I have a file about 4-5 gigs in size (almost a billion lines). From each line of the file I have to parse an array of integers and additional information about the integer value and update my own data structure. My class for storing such information looks like
class Holder {
private int[][] arr = new int[1000000000][5];
private int[] meta = new int[1000000000];
}
An example line from a file looks like
(1_23_4_55) 99
Each index in arrand metacorresponds to the line number in the file. From the above line, I first extract an array of integers, and then the meta information. In this case
--pseudo_code--
arr[line_num] = new int[]{1, 23, 4, 55}
meta[line_num]=99
BufferedReader, readLine - Holder. .
java Serialization Externalizable ( meta arr) HUGE Holder. , , .
, .
P.S. . 50 . BufferedReader 40 (, 100 , , 100 /). .
EDIT
, , ( );
public class BigFileParser {
private int parsePositiveInt(final String s) {
int num = 0;
int sign = -1;
final int len = s.length();
final char ch = s.charAt(0);
if (ch == '-')
sign = 1;
else
num = '0' - ch;
int i = 1;
while (i < len)
num = num * 10 + '0' - s.charAt(i++);
return sign * num;
}
private void loadBigFile() {
long startTime = System.nanoTime();
Holder holder = new Holder();
String line;
try {
Reader fReader = new FileReader("/path/to/BIG/file");
BufferedReader bufferedReader = new BufferedReader(fReader, 40960);
String tempTerm;
int i, meta, ascii, len;
boolean consumeNextInteger;
TIntArrayList arr;
char c;
while ((line = bufferedReader.readLine()) != null) {
consumeNextInteger = true;
tempTerm = "";
arr = new TIntArrayList(5);
for (i = 0, len = line.length(); i < len; i++) {
c = line.charAt(i);
ascii = c - 0;
if (consumeNextInteger && ascii == 95) {
arr.add(parsePositiveInt(tempTerm));
tempTerm = "";
} else if (ascii >= 48 && ascii <= 57) {
tempTerm += c;
} else if (ascii == 9) {
arr.add(parsePositiveInt(tempTerm));
consumeNextInteger = false;
tempTerm = "";
}
}
meta = parsePositiveInt(tempTerm);
holder.update(arr, meta);
}
bufferedReader.close();
long endTime = System.nanoTime();
System.out.println("@time -> " + (endTime - startTime) * 1.0
/ 1000000000 + " seconds");
} catch (IOException exp) {
exp.printStackTrace();
}
}
}
public class Holder {
private static final int SIZE = 500000000;
private TIntArrayList[] arrs;
private TIntArrayList metas;
private int idx;
public Holder() {
arrs = new TIntArrayList[SIZE];
metas = new TIntArrayList(SIZE);
idx = 0;
}
public void update(TIntArrayList arr, int meta) {
arrs[idx] = arr;
metas.add(meta);
idx++;
}
}