Lines 1-5
Link Here
|
1 |
/******************************************************************************* |
1 |
/******************************************************************************* |
2 |
* Copyright (c) 2000, 2006 IBM Corporation and others. |
2 |
* Copyright (c) 2000, 2007 IBM Corporation and others. |
3 |
* All rights reserved. This program and the accompanying materials |
3 |
* All rights reserved. This program and the accompanying materials |
4 |
* are made available under the terms of the Eclipse Public License v1.0 |
4 |
* are made available under the terms of the Eclipse Public License v1.0 |
5 |
* which accompanies this distribution, and is available at |
5 |
* which accompanies this distribution, and is available at |
Lines 19-28
Link Here
|
19 |
import org.eclipse.jdt.internal.compiler.util.HashtableOfObject; |
19 |
import org.eclipse.jdt.internal.compiler.util.HashtableOfObject; |
20 |
import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable; |
20 |
import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable; |
21 |
import org.eclipse.jdt.internal.compiler.util.SimpleSet; |
21 |
import org.eclipse.jdt.internal.compiler.util.SimpleSet; |
|
|
22 |
import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray; |
22 |
|
23 |
|
23 |
public class DiskIndex { |
24 |
public class DiskIndex { |
24 |
|
25 |
|
25 |
String fileName; |
26 |
File indexFile; |
26 |
|
27 |
|
27 |
private int headerInfoOffset; |
28 |
private int headerInfoOffset; |
28 |
private int numberOfChunks; |
29 |
private int numberOfChunks; |
Lines 30-43
Link Here
|
30 |
private int[] chunkOffsets; |
31 |
private int[] chunkOffsets; |
31 |
private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names |
32 |
private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names |
32 |
private int startOfCategoryTables; |
33 |
private int startOfCategoryTables; |
33 |
private HashtableOfIntValues categoryOffsets; |
34 |
private HashtableOfIntValues categoryOffsets, categoryEnds; |
34 |
|
35 |
|
35 |
private int cacheUserCount; |
36 |
private int cacheUserCount; |
36 |
private String[][] cachedChunks; // decompressed chunks of document names |
37 |
private String[][] cachedChunks; // decompressed chunks of document names |
37 |
private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet |
38 |
private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet |
38 |
private char[] cachedCategoryName; |
39 |
private char[] cachedCategoryName; |
39 |
|
40 |
|
40 |
public static final String SIGNATURE= "INDEX VERSION 1.115"; //$NON-NLS-1$ |
41 |
private static final int DEFAULT_BUFFER_SIZE = 2048; |
|
|
42 |
private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; |
43 |
private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE; |
44 |
private byte[] streamBuffer; |
45 |
private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer |
46 |
private int streamEnd; // used when writing data from the streamBuffer to the file |
47 |
|
48 |
public static final String SIGNATURE= "INDEX VERSION 1.116"; //$NON-NLS-1$ |
49 |
private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray(); |
41 |
public static boolean DEBUG = false; |
50 |
public static boolean DEBUG = false; |
42 |
|
51 |
|
43 |
private static final int RE_INDEXED = -1; |
52 |
private static final int RE_INDEXED = -1; |
Lines 45-51
Link Here
|
45 |
|
54 |
|
46 |
private static final int CHUNK_SIZE = 100; |
55 |
private static final int CHUNK_SIZE = 100; |
47 |
|
56 |
|
48 |
class IntList { |
57 |
private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20); |
|
|
58 |
|
59 |
static class IntList { |
49 |
|
60 |
|
50 |
int size; |
61 |
int size; |
51 |
int[] elements; |
62 |
int[] elements; |
Lines 71-77
Link Here
|
71 |
|
82 |
|
72 |
|
83 |
|
73 |
DiskIndex(String fileName) { |
84 |
DiskIndex(String fileName) { |
74 |
this.fileName = fileName; |
85 |
if (fileName == null) |
|
|
86 |
throw new java.lang.IllegalArgumentException(); |
87 |
this.indexFile = new File(fileName); |
75 |
|
88 |
|
76 |
// clear cached items |
89 |
// clear cached items |
77 |
this.headerInfoOffset = -1; |
90 |
this.headerInfoOffset = -1; |
Lines 84-89
Link Here
|
84 |
this.categoryTables = null; |
97 |
this.categoryTables = null; |
85 |
this.cachedCategoryName = null; |
98 |
this.cachedCategoryName = null; |
86 |
this.categoryOffsets = null; |
99 |
this.categoryOffsets = null; |
|
|
100 |
this.categoryEnds = null; |
87 |
} |
101 |
} |
88 |
SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException { |
102 |
SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException { |
89 |
// must skip over documents which have been added/changed/deleted in the memory index |
103 |
// must skip over documents which have been added/changed/deleted in the memory index |
Lines 129-136
Link Here
|
129 |
result.addDocumentTable(wordsToDocNumbers); |
143 |
result.addDocumentTable(wordsToDocNumbers); |
130 |
} else { |
144 |
} else { |
131 |
SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; |
145 |
SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; |
132 |
if (result == null) |
146 |
if (result == null) result = new EntryResult(word, null); |
133 |
result = new EntryResult(word, null); |
|
|
134 |
int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word)); |
147 |
int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word)); |
135 |
for (int i = 0, l = docNumbers.length; i < l; i++) { |
148 |
for (int i = 0, l = docNumbers.length; i < l; i++) { |
136 |
String docName = readDocumentName(docNumbers[i]); |
149 |
String docName = readDocumentName(docNumbers[i]); |
Lines 204-218
Link Here
|
204 |
private void cacheDocumentNames() throws IOException { |
217 |
private void cacheDocumentNames() throws IOException { |
205 |
// will need all document names so get them now |
218 |
// will need all document names so get them now |
206 |
this.cachedChunks = new String[this.numberOfChunks][]; |
219 |
this.cachedChunks = new String[this.numberOfChunks][]; |
207 |
DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(getIndexFile()), this.numberOfChunks > 5 ? 4096 : 2048)); |
220 |
FileInputStream stream = new FileInputStream(this.indexFile); |
208 |
try { |
221 |
try { |
209 |
stream.skip(this.chunkOffsets[0]); |
222 |
if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1; |
|
|
223 |
int offset = this.chunkOffsets[0]; |
224 |
stream.skip(offset); |
225 |
this.streamBuffer = new byte[BUFFER_READ_SIZE]; |
226 |
this.bufferIndex = 0; |
227 |
this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); |
210 |
for (int i = 0; i < this.numberOfChunks; i++) { |
228 |
for (int i = 0; i < this.numberOfChunks; i++) { |
211 |
int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE; |
229 |
int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE; |
212 |
readChunk(this.cachedChunks[i] = new String[size], stream, 0, size); |
230 |
readChunk(this.cachedChunks[i] = new String[size], stream, 0, size); |
213 |
} |
231 |
} |
|
|
232 |
} catch (IOException e) { |
233 |
this.cachedChunks = null; |
234 |
throw e; |
214 |
} finally { |
235 |
} finally { |
215 |
stream.close(); |
236 |
stream.close(); |
|
|
237 |
this.streamBuffer = null; |
238 |
BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; |
216 |
} |
239 |
} |
217 |
} |
240 |
} |
218 |
private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) { |
241 |
private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) { |
Lines 335-350
Link Here
|
335 |
} |
358 |
} |
336 |
} |
359 |
} |
337 |
} |
360 |
} |
338 |
File getIndexFile() { |
|
|
339 |
if (this.fileName == null) return null; |
340 |
|
341 |
return new File(this.fileName); |
342 |
} |
343 |
void initialize(boolean reuseExistingFile) throws IOException { |
361 |
void initialize(boolean reuseExistingFile) throws IOException { |
344 |
File indexFile = getIndexFile(); |
362 |
if (this.indexFile.exists()) { |
345 |
if (indexFile.exists()) { |
|
|
346 |
if (reuseExistingFile) { |
363 |
if (reuseExistingFile) { |
347 |
RandomAccessFile file = new RandomAccessFile(this.fileName, "r"); //$NON-NLS-1$ |
364 |
RandomAccessFile file = new RandomAccessFile(this.indexFile, "r"); //$NON-NLS-1$ |
348 |
try { |
365 |
try { |
349 |
String signature = file.readUTF(); |
366 |
String signature = file.readUTF(); |
350 |
if (!signature.equals(SIGNATURE)) |
367 |
if (!signature.equals(SIGNATURE)) |
Lines 358-371
Link Here
|
358 |
} |
375 |
} |
359 |
return; |
376 |
return; |
360 |
} |
377 |
} |
361 |
if (!indexFile.delete()) { |
378 |
if (!this.indexFile.delete()) { |
362 |
if (DEBUG) |
379 |
if (DEBUG) |
363 |
System.out.println("initialize - Failed to delete index " + this.fileName); //$NON-NLS-1$ |
380 |
System.out.println("initialize - Failed to delete index " + this.indexFile); //$NON-NLS-1$ |
364 |
throw new IOException("Failed to delete index " + this.fileName); //$NON-NLS-1$ |
381 |
throw new IOException("Failed to delete index " + this.indexFile); //$NON-NLS-1$ |
365 |
} |
382 |
} |
366 |
} |
383 |
} |
367 |
if (indexFile.createNewFile()) { |
384 |
if (this.indexFile.createNewFile()) { |
368 |
RandomAccessFile file = new RandomAccessFile(this.fileName, "rw"); //$NON-NLS-1$ |
385 |
RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); //$NON-NLS-1$ |
369 |
try { |
386 |
try { |
370 |
file.writeUTF(SIGNATURE); |
387 |
file.writeUTF(SIGNATURE); |
371 |
file.writeInt(-1); // file is empty |
388 |
file.writeInt(-1); // file is empty |
Lines 374-398
Link Here
|
374 |
} |
391 |
} |
375 |
} else { |
392 |
} else { |
376 |
if (DEBUG) |
393 |
if (DEBUG) |
377 |
System.out.println("initialize - Failed to create new index " + this.fileName); //$NON-NLS-1$ |
394 |
System.out.println("initialize - Failed to create new index " + this.indexFile); //$NON-NLS-1$ |
378 |
throw new IOException("Failed to create new index " + this.fileName); //$NON-NLS-1$ |
395 |
throw new IOException("Failed to create new index " + this.indexFile); //$NON-NLS-1$ |
379 |
} |
396 |
} |
380 |
} |
397 |
} |
381 |
private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException { |
398 |
private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException { |
382 |
if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file |
399 |
if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file |
383 |
if (DEBUG) |
400 |
if (DEBUG) |
384 |
System.out.println("initializeFrom - Failed to delete temp index " + this.fileName); //$NON-NLS-1$ |
401 |
System.out.println("initializeFrom - Failed to delete temp index " + this.indexFile); //$NON-NLS-1$ |
385 |
} else if (!newIndexFile.createNewFile()) { |
402 |
} else if (!newIndexFile.createNewFile()) { |
386 |
if (DEBUG) |
403 |
if (DEBUG) |
387 |
System.out.println("initializeFrom - Failed to create temp index " + this.fileName); //$NON-NLS-1$ |
404 |
System.out.println("initializeFrom - Failed to create temp index " + this.indexFile); //$NON-NLS-1$ |
388 |
throw new IOException("Failed to create temp index " + this.fileName); //$NON-NLS-1$ |
405 |
throw new IOException("Failed to create temp index " + this.indexFile); //$NON-NLS-1$ |
389 |
} |
406 |
} |
390 |
|
407 |
|
391 |
int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize; |
408 |
int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize; |
392 |
this.categoryOffsets = new HashtableOfIntValues(size); |
409 |
this.categoryOffsets = new HashtableOfIntValues(size); |
|
|
410 |
this.categoryEnds = new HashtableOfIntValues(size); |
393 |
this.categoryTables = new HashtableOfObject(size); |
411 |
this.categoryTables = new HashtableOfObject(size); |
394 |
} |
412 |
} |
395 |
private void mergeCategories(DiskIndex onDisk, int[] positions, DataOutputStream stream) throws IOException { |
413 |
private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { |
396 |
// at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults() |
414 |
// at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults() |
397 |
char[][] oldNames = onDisk.categoryOffsets.keyTable; |
415 |
char[][] oldNames = onDisk.categoryOffsets.keyTable; |
398 |
for (int i = 0, l = oldNames.length; i < l; i++) { |
416 |
for (int i = 0, l = oldNames.length; i < l; i++) { |
Lines 407-413
Link Here
|
407 |
mergeCategory(categoryNames[i], onDisk, positions, stream); |
425 |
mergeCategory(categoryNames[i], onDisk, positions, stream); |
408 |
this.categoryTables = null; |
426 |
this.categoryTables = null; |
409 |
} |
427 |
} |
410 |
private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, DataOutputStream stream) throws IOException { |
428 |
private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { |
411 |
HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); |
429 |
HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); |
412 |
if (wordsToDocs == null) |
430 |
if (wordsToDocs == null) |
413 |
wordsToDocs = new HashtableOfObject(3); |
431 |
wordsToDocs = new HashtableOfObject(3); |
Lines 465-480
Link Here
|
465 |
if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved |
483 |
if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved |
466 |
|
484 |
|
467 |
// index is now empty since all the saved documents were removed |
485 |
// index is now empty since all the saved documents were removed |
468 |
DiskIndex newDiskIndex = new DiskIndex(this.fileName); |
486 |
DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath()); |
469 |
newDiskIndex.initialize(false); |
487 |
newDiskIndex.initialize(false); |
470 |
return newDiskIndex; |
488 |
return newDiskIndex; |
471 |
} |
489 |
} |
472 |
|
490 |
|
473 |
DiskIndex newDiskIndex = new DiskIndex(this.fileName + ".tmp"); //$NON-NLS-1$ |
491 |
DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath() + ".tmp"); //$NON-NLS-1$ |
474 |
File newIndexFile = newDiskIndex.getIndexFile(); |
|
|
475 |
try { |
492 |
try { |
476 |
newDiskIndex.initializeFrom(this, newIndexFile); |
493 |
newDiskIndex.initializeFrom(this, newDiskIndex.indexFile); |
477 |
DataOutputStream stream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(newIndexFile, false), 2048)); |
494 |
FileOutputStream stream = new FileOutputStream(newDiskIndex.indexFile, false); |
478 |
int offsetToHeader = -1; |
495 |
int offsetToHeader = -1; |
479 |
try { |
496 |
try { |
480 |
newDiskIndex.writeAllDocumentNames(docNames, stream); |
497 |
newDiskIndex.writeAllDocumentNames(docNames, stream); |
Lines 487-494
Link Here
|
487 |
for (int i = 0, l = names.length; i < l; i++) |
504 |
for (int i = 0, l = names.length; i < l; i++) |
488 |
if (names[i] != null) |
505 |
if (names[i] != null) |
489 |
newDiskIndex.copyQueryResults( |
506 |
newDiskIndex.copyQueryResults( |
490 |
(HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), |
507 |
(HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer) integerPositions[i]).intValue()); |
491 |
((Integer) integerPositions[i]).intValue()); |
|
|
492 |
} |
508 |
} |
493 |
indexedDocuments = null; // free up the space |
509 |
indexedDocuments = null; // free up the space |
494 |
|
510 |
|
Lines 497-539
Link Here
|
497 |
newDiskIndex.writeCategories(stream); |
513 |
newDiskIndex.writeCategories(stream); |
498 |
else |
514 |
else |
499 |
newDiskIndex.mergeCategories(this, positions, stream); |
515 |
newDiskIndex.mergeCategories(this, positions, stream); |
500 |
offsetToHeader = stream.size(); |
516 |
offsetToHeader = newDiskIndex.streamEnd; |
501 |
newDiskIndex.writeHeaderInfo(stream); |
517 |
newDiskIndex.writeHeaderInfo(stream); |
502 |
positions = null; // free up the space |
518 |
positions = null; // free up the space |
503 |
} finally { |
519 |
} finally { |
504 |
stream.close(); |
520 |
stream.close(); |
|
|
521 |
this.streamBuffer = null; |
505 |
} |
522 |
} |
506 |
newDiskIndex.writeOffsetToHeader(offsetToHeader); |
523 |
newDiskIndex.writeOffsetToHeader(offsetToHeader); |
507 |
|
524 |
|
508 |
// rename file by deleting previous index file & renaming temp one |
525 |
// rename file by deleting previous index file & renaming temp one |
509 |
File old = getIndexFile(); |
526 |
if (this.indexFile.exists() && !this.indexFile.delete()) { |
510 |
if (old.exists() && !old.delete()) { |
|
|
511 |
if (DEBUG) |
527 |
if (DEBUG) |
512 |
System.out.println("mergeWith - Failed to delete " + this.fileName); //$NON-NLS-1$ |
528 |
System.out.println("mergeWith - Failed to delete " + this.indexFile); //$NON-NLS-1$ |
513 |
throw new IOException("Failed to delete index file " + this.fileName); //$NON-NLS-1$ |
529 |
throw new IOException("Failed to delete index file " + this.indexFile); //$NON-NLS-1$ |
514 |
} |
530 |
} |
515 |
if (!newIndexFile.renameTo(old)) { |
531 |
if (!newDiskIndex.indexFile.renameTo(this.indexFile)) { |
516 |
if (DEBUG) |
532 |
if (DEBUG) |
517 |
System.out.println("mergeWith - Failed to rename " + this.fileName); //$NON-NLS-1$ |
533 |
System.out.println("mergeWith - Failed to rename " + this.indexFile); //$NON-NLS-1$ |
518 |
throw new IOException("Failed to rename index file " + this.fileName); //$NON-NLS-1$ |
534 |
throw new IOException("Failed to rename index file " + this.indexFile); //$NON-NLS-1$ |
519 |
} |
535 |
} |
520 |
} catch (IOException e) { |
536 |
} catch (IOException e) { |
521 |
if (newIndexFile.exists() && !newIndexFile.delete()) |
537 |
if (newDiskIndex.indexFile.exists() && !newDiskIndex.indexFile.delete()) |
522 |
if (DEBUG) |
538 |
if (DEBUG) |
523 |
System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.fileName); //$NON-NLS-1$ |
539 |
System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexFile); //$NON-NLS-1$ |
524 |
throw e; |
540 |
throw e; |
525 |
} |
541 |
} |
526 |
|
542 |
|
527 |
newDiskIndex.fileName = this.fileName; |
543 |
newDiskIndex.indexFile = this.indexFile; |
528 |
return newDiskIndex; |
544 |
return newDiskIndex; |
529 |
} |
545 |
} |
530 |
private synchronized String[] readAllDocumentNames() throws IOException { |
546 |
private synchronized String[] readAllDocumentNames() throws IOException { |
531 |
if (this.numberOfChunks <= 0) |
547 |
if (this.numberOfChunks <= 0) |
532 |
return new String[0]; |
548 |
return CharOperation.NO_STRINGS; |
533 |
|
549 |
|
534 |
DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(getIndexFile()), this.numberOfChunks > 5 ? 4096 : 2048)); |
550 |
FileInputStream stream = new FileInputStream(this.indexFile); |
535 |
try { |
551 |
try { |
536 |
stream.skip(this.chunkOffsets[0]); |
552 |
int offset = this.chunkOffsets[0]; |
|
|
553 |
stream.skip(offset); |
554 |
this.streamBuffer = new byte[BUFFER_READ_SIZE]; |
555 |
this.bufferIndex = 0; |
556 |
this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); |
537 |
int lastIndex = this.numberOfChunks - 1; |
557 |
int lastIndex = this.numberOfChunks - 1; |
538 |
String[] docNames = new String[lastIndex * CHUNK_SIZE + sizeOfLastChunk]; |
558 |
String[] docNames = new String[lastIndex * CHUNK_SIZE + sizeOfLastChunk]; |
539 |
for (int i = 0; i < this.numberOfChunks; i++) |
559 |
for (int i = 0; i < this.numberOfChunks; i++) |
Lines 541-553
Link Here
|
541 |
return docNames; |
561 |
return docNames; |
542 |
} finally { |
562 |
} finally { |
543 |
stream.close(); |
563 |
stream.close(); |
|
|
564 |
this.streamBuffer = null; |
544 |
} |
565 |
} |
545 |
} |
566 |
} |
546 |
private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException { |
567 |
private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException { |
547 |
// result will be null if categoryName is unknown |
568 |
// result will be null if categoryName is unknown |
548 |
int offset = this.categoryOffsets.get(categoryName); |
569 |
int offset = this.categoryOffsets.get(categoryName); |
549 |
if (offset == HashtableOfIntValues.NO_VALUE) |
570 |
if (offset == HashtableOfIntValues.NO_VALUE) { |
550 |
return null; |
571 |
return null; |
|
|
572 |
} |
551 |
|
573 |
|
552 |
if (this.categoryTables == null) { |
574 |
if (this.categoryTables == null) { |
553 |
this.categoryTables = new HashtableOfObject(3); |
575 |
this.categoryTables = new HashtableOfObject(3); |
Lines 564-581
Link Here
|
564 |
} |
586 |
} |
565 |
} |
587 |
} |
566 |
|
588 |
|
567 |
DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(getIndexFile()), 2048)); |
589 |
FileInputStream stream = new FileInputStream(this.indexFile); |
568 |
HashtableOfObject categoryTable = null; |
590 |
HashtableOfObject categoryTable = null; |
569 |
char[][] matchingWords = null; |
591 |
char[][] matchingWords = null; |
570 |
int count = 0; |
592 |
int count = 0; |
571 |
int firstOffset = -1; |
593 |
int firstOffset = -1; |
|
|
594 |
this.streamBuffer = new byte[BUFFER_READ_SIZE]; |
572 |
try { |
595 |
try { |
573 |
stream.skip(offset); |
596 |
stream.skip(offset); |
574 |
int size = stream.readInt(); |
597 |
this.bufferIndex = 0; |
|
|
598 |
this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); |
599 |
int size = readStreamInt(stream); |
575 |
try { |
600 |
try { |
576 |
if (size < 0) { // DEBUG |
601 |
if (size < 0) { // DEBUG |
577 |
System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ |
602 |
System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ |
578 |
System.err.println("file = "+getIndexFile()); //$NON-NLS-1$ |
603 |
System.err.println("file = "+this.indexFile); //$NON-NLS-1$ |
579 |
System.err.println("offset = "+offset); //$NON-NLS-1$ |
604 |
System.err.println("offset = "+offset); //$NON-NLS-1$ |
580 |
System.err.println("size = "+size); //$NON-NLS-1$ |
605 |
System.err.println("size = "+size); //$NON-NLS-1$ |
581 |
System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ |
606 |
System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ |
Lines 585-591
Link Here
|
585 |
// DEBUG |
610 |
// DEBUG |
586 |
oom.printStackTrace(); |
611 |
oom.printStackTrace(); |
587 |
System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ |
612 |
System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ |
588 |
System.err.println("file = "+getIndexFile()); //$NON-NLS-1$ |
613 |
System.err.println("file = "+this.indexFile); //$NON-NLS-1$ |
589 |
System.err.println("offset = "+offset); //$NON-NLS-1$ |
614 |
System.err.println("offset = "+offset); //$NON-NLS-1$ |
590 |
System.err.println("size = "+size); //$NON-NLS-1$ |
615 |
System.err.println("size = "+size); //$NON-NLS-1$ |
591 |
System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ |
616 |
System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ |
Lines 593-600
Link Here
|
593 |
} |
618 |
} |
594 |
int largeArraySize = 256; |
619 |
int largeArraySize = 256; |
595 |
for (int i = 0; i < size; i++) { |
620 |
for (int i = 0; i < size; i++) { |
596 |
char[] word = stream.readUTF().toCharArray(); |
621 |
char[] word = readStreamChars(stream); |
597 |
int arrayOffset = stream.readInt(); |
622 |
int arrayOffset = readStreamInt(stream); |
598 |
// if arrayOffset is: |
623 |
// if arrayOffset is: |
599 |
// <= 0 then the array size == 1 with the value -> -arrayOffset |
624 |
// <= 0 then the array size == 1 with the value -> -arrayOffset |
600 |
// > 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately |
625 |
// > 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately |
Lines 602-610
Link Here
|
602 |
if (arrayOffset <= 0) { |
627 |
if (arrayOffset <= 0) { |
603 |
categoryTable.put(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber |
628 |
categoryTable.put(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber |
604 |
} else if (arrayOffset < largeArraySize) { |
629 |
} else if (arrayOffset < largeArraySize) { |
605 |
categoryTable.put(word, readDocumentArray(stream, arrayOffset)); // read in-lined array providing size |
630 |
categoryTable.put(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size |
606 |
} else { |
631 |
} else { |
607 |
arrayOffset = stream.readInt(); // read actual offset |
632 |
arrayOffset = readStreamInt(stream); // read actual offset |
608 |
if (readDocNumbers) { |
633 |
if (readDocNumbers) { |
609 |
if (matchingWords == null) |
634 |
if (matchingWords == null) |
610 |
matchingWords = new char[size][]; |
635 |
matchingWords = new char[size][]; |
Lines 615-647
Link Here
|
615 |
categoryTable.put(word, new Integer(arrayOffset)); // offset to array in the file |
640 |
categoryTable.put(word, new Integer(arrayOffset)); // offset to array in the file |
616 |
} |
641 |
} |
617 |
} |
642 |
} |
618 |
this.categoryTables.put(categoryName, categoryTable); |
643 |
this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable); |
619 |
// cache the table as long as its not too big |
644 |
// cache the table as long as its not too big |
620 |
// in practise, some tables can be greater than 500K when the contain more than 10K elements |
645 |
// in practice, some tables can be greater than 500K when they contain more than 10K elements |
621 |
this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null; |
646 |
this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null; |
|
|
647 |
} catch (IOException ioe) { |
648 |
this.streamBuffer = null; |
649 |
throw ioe; |
622 |
} finally { |
650 |
} finally { |
623 |
stream.close(); |
651 |
stream.close(); |
624 |
} |
652 |
} |
625 |
|
653 |
|
626 |
if (matchingWords != null && count > 0) { |
654 |
if (matchingWords != null && count > 0) { |
627 |
stream = new DataInputStream(new BufferedInputStream(new FileInputStream(getIndexFile()), 2048)); |
655 |
stream = new FileInputStream(this.indexFile); |
628 |
try { |
656 |
try { |
629 |
stream.skip(firstOffset); |
657 |
stream.skip(firstOffset); |
630 |
for (int i = 0; i < count; i++) // each array follows the previous one |
658 |
this.bufferIndex = 0; |
631 |
categoryTable.put(matchingWords[i], readDocumentArray(stream, stream.readInt())); |
659 |
this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); |
|
|
660 |
for (int i = 0; i < count; i++) { // each array follows the previous one |
661 |
categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream))); |
662 |
} |
663 |
} catch (IOException ioe) { |
664 |
this.streamBuffer = null; |
665 |
throw ioe; |
632 |
} finally { |
666 |
} finally { |
633 |
stream.close(); |
667 |
stream.close(); |
634 |
} |
668 |
} |
635 |
} |
669 |
} |
|
|
670 |
this.streamBuffer = null; |
636 |
return categoryTable; |
671 |
return categoryTable; |
637 |
} |
672 |
} |
638 |
private void readChunk(String[] docNames, DataInputStream stream, int index, int size) throws IOException { |
673 |
private void readChunk(String[] docNames, FileInputStream stream, int index, int size) throws IOException { |
639 |
String current = stream.readUTF(); |
674 |
String current = new String(readStreamChars(stream)); |
640 |
docNames[index++] = current; |
675 |
docNames[index++] = current; |
641 |
for (int i = 1; i < size; i++) { |
676 |
for (int i = 1; i < size; i++) { |
642 |
int start = stream.readUnsignedByte(); // number of identical characters at the beginning |
677 |
if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) |
643 |
int end = stream.readUnsignedByte(); // number of identical characters at the end |
678 |
readStreamBuffer(stream); |
644 |
String next = stream.readUTF(); |
679 |
int start = streamBuffer[this.bufferIndex++] & 0xFF; |
|
|
680 |
int end = streamBuffer[this.bufferIndex++] & 0xFF; |
681 |
String next = new String(readStreamChars(stream)); |
645 |
if (start > 0) { |
682 |
if (start > 0) { |
646 |
if (end > 0) { |
683 |
if (end > 0) { |
647 |
int length = current.length(); |
684 |
int length = current.length(); |
Lines 657-680
Link Here
|
657 |
current = next; |
694 |
current = next; |
658 |
} |
695 |
} |
659 |
} |
696 |
} |
660 |
private int[] readDocumentArray(DataInputStream stream, int arraySize) throws IOException { |
|
|
661 |
int[] result = new int[arraySize]; |
662 |
switch (this.documentReferenceSize) { |
663 |
case 1 : |
664 |
for (int i = 0; i < arraySize; i++) |
665 |
result[i] = stream.readUnsignedByte(); |
666 |
break; |
667 |
case 2 : |
668 |
for (int i = 0; i < arraySize; i++) |
669 |
result[i] = stream.readUnsignedShort(); |
670 |
break; |
671 |
default : |
672 |
for (int i = 0; i < arraySize; i++) |
673 |
result[i] = stream.readInt(); |
674 |
break; |
675 |
} |
676 |
return result; |
677 |
} |
678 |
synchronized String readDocumentName(int docNumber) throws IOException { |
697 |
synchronized String readDocumentName(int docNumber) throws IOException { |
679 |
if (this.cachedChunks == null) |
698 |
if (this.cachedChunks == null) |
680 |
this.cachedChunks = new String[this.numberOfChunks][]; |
699 |
this.cachedChunks = new String[this.numberOfChunks][]; |
Lines 687-706
Link Here
|
687 |
int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start; |
706 |
int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start; |
688 |
if (numberOfBytes < 0) |
707 |
if (numberOfBytes < 0) |
689 |
throw new IllegalArgumentException(); |
708 |
throw new IllegalArgumentException(); |
690 |
byte[] bytes = new byte[numberOfBytes]; |
709 |
this.streamBuffer = new byte[numberOfBytes]; |
691 |
FileInputStream file = new FileInputStream(getIndexFile()); |
710 |
this.bufferIndex = 0; |
|
|
711 |
FileInputStream file = new FileInputStream(this.indexFile); |
692 |
try { |
712 |
try { |
693 |
file.skip(start); |
713 |
file.skip(start); |
694 |
if (file.read(bytes, 0, numberOfBytes) != numberOfBytes) |
714 |
if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes) |
695 |
throw new IOException(); |
715 |
throw new IOException(); |
|
|
716 |
} catch (IOException ioe) { |
717 |
this.streamBuffer = null; |
718 |
throw ioe; |
696 |
} finally { |
719 |
} finally { |
697 |
file.close(); |
720 |
file.close(); |
698 |
} |
721 |
} |
699 |
DataInputStream stream = new DataInputStream(new ByteArrayInputStream(bytes)); |
|
|
700 |
int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE; |
722 |
int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE; |
701 |
chunk = this.cachedChunks[chunkNumber] = new String[numberOfNames]; |
723 |
chunk = new String[numberOfNames]; |
702 |
readChunk(chunk, stream, 0, numberOfNames); |
724 |
try { |
|
|
725 |
readChunk(chunk, null, 0, numberOfNames); |
726 |
} catch (IOException ioe) { |
727 |
this.streamBuffer = null; |
728 |
throw ioe; |
729 |
} |
730 |
this.cachedChunks[chunkNumber] = chunk; |
703 |
} |
731 |
} |
|
|
732 |
this.streamBuffer = null; |
704 |
return chunk[docNumber - (chunkNumber * CHUNK_SIZE)]; |
733 |
return chunk[docNumber - (chunkNumber * CHUNK_SIZE)]; |
705 |
} |
734 |
} |
706 |
synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException { |
735 |
synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException { |
Lines 708-719
Link Here
|
708 |
if (arrayOffset instanceof int[]) |
737 |
if (arrayOffset instanceof int[]) |
709 |
return (int[]) arrayOffset; |
738 |
return (int[]) arrayOffset; |
710 |
|
739 |
|
711 |
DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(getIndexFile()), 2048)); |
740 |
FileInputStream stream = new FileInputStream(this.indexFile); |
712 |
try { |
741 |
try { |
713 |
stream.skip(((Integer) arrayOffset).intValue()); |
742 |
int offset = ((Integer) arrayOffset).intValue(); |
714 |
return readDocumentArray(stream, stream.readInt()); |
743 |
stream.skip(offset); |
|
|
744 |
this.streamBuffer = new byte[BUFFER_READ_SIZE]; |
745 |
this.bufferIndex = 0; |
746 |
this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); |
747 |
return readStreamDocumentArray(stream, readStreamInt(stream)); |
715 |
} finally { |
748 |
} finally { |
716 |
stream.close(); |
749 |
stream.close(); |
|
|
750 |
this.streamBuffer = null; |
717 |
} |
751 |
} |
718 |
} |
752 |
} |
719 |
private void readHeaderInfo(RandomAccessFile file) throws IOException { |
753 |
private void readHeaderInfo(RandomAccessFile file) throws IOException { |
Lines 732-739
Link Here
|
732 |
|
766 |
|
733 |
int size = file.readInt(); |
767 |
int size = file.readInt(); |
734 |
this.categoryOffsets = new HashtableOfIntValues(size); |
768 |
this.categoryOffsets = new HashtableOfIntValues(size); |
735 |
for (int i = 0; i < size; i++) |
769 |
this.categoryEnds = new HashtableOfIntValues(size); |
736 |
this.categoryOffsets.put(file.readUTF().toCharArray(), file.readInt()); // cache offset to category table |
770 |
char[] previousCategory = null; |
|
|
771 |
int offset = -1; |
772 |
for (int i = 0; i < size; i++) { |
773 |
char[] categoryName = INTERNED_CATEGORY_NAMES.get(file.readUTF().toCharArray()); |
774 |
offset = file.readInt(); |
775 |
this.categoryOffsets.put(categoryName, offset); // cache offset to category table |
776 |
if (previousCategory != null) { |
777 |
this.categoryEnds.put(previousCategory, offset); // cache end of the category table |
778 |
} |
779 |
previousCategory = categoryName; |
780 |
} |
781 |
if (previousCategory != null) { |
782 |
this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table |
783 |
} |
737 |
this.categoryTables = new HashtableOfObject(3); |
784 |
this.categoryTables = new HashtableOfObject(3); |
738 |
} |
785 |
} |
739 |
synchronized void startQuery() { |
786 |
synchronized void startQuery() { |
Lines 755-769
Link Here
|
755 |
} |
802 |
} |
756 |
} |
803 |
} |
757 |
} |
804 |
} |
758 |
private void writeAllDocumentNames(String[] sortedDocNames, DataOutputStream stream) throws IOException { |
805 |
private void readStreamBuffer(FileInputStream stream) throws IOException { |
|
|
806 |
// if we're about to read a known amount at the end of the existing buffer, but it does not completely fit |
807 |
// so we need to shift the remaining bytes to be read, and fill the buffer from the stream |
808 |
if (this.bufferEnd < this.streamBuffer.length) |
809 |
return; // we're at the end of the stream - nothing left to read |
810 |
|
811 |
int bytesInBuffer = this.bufferEnd - this.bufferIndex; |
812 |
if (bytesInBuffer > 0) |
813 |
System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer); |
814 |
this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex); |
815 |
this.bufferIndex = 0; |
816 |
} |
817 |
/** |
818 |
* Reads in a string from the specified data input stream. The |
819 |
* string has been encoded using a modified UTF-8 format. |
820 |
* <p> |
821 |
* The first two bytes are read as an unsigned short. |
822 |
* This value gives the number of following bytes that are in the encoded string, |
823 |
* not the length of the resulting string. The following bytes are then |
824 |
* interpreted as bytes encoding characters in the UTF-8 format |
825 |
* and are converted into characters. |
826 |
* <p> |
827 |
* This method blocks until all the bytes are read, the end of the |
828 |
* stream is detected, or an exception is thrown. |
829 |
* |
830 |
* @param stream a data input stream. |
831 |
* @return UTF decoded string as a char array |
832 |
* @exception EOFException if this end of data input is reached while reading it. |
833 |
* @exception IOException if an I/O error occurs while reading data input. |
834 |
* @exception UTFDataFormatException if the bytes do not represent a |
835 |
* valid UTF-8 encoding of a Unicode string. |
836 |
*/ |
837 |
private char[] readStreamChars(FileInputStream stream) throws IOException { |
838 |
// read chars array length |
839 |
if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) |
840 |
readStreamBuffer(stream); |
841 |
int length = (streamBuffer[this.bufferIndex++] & 0xFF) << 8; |
842 |
length += this.streamBuffer[this.bufferIndex++] & 0xFF; |
843 |
|
844 |
// fill the chars from bytes buffer |
845 |
char[] word = new char[length]; |
846 |
int i = 0; |
847 |
while (i < length) { |
848 |
// how many characters can be decoded without refilling the buffer? |
849 |
int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3); |
850 |
// all the characters must already be in the buffer if we're at the end of the stream |
851 |
if (charsInBuffer > length || this.bufferEnd != this.streamBuffer.length) |
852 |
charsInBuffer = length; |
853 |
while (i < charsInBuffer) { |
854 |
byte b = this.streamBuffer[this.bufferIndex++]; |
855 |
switch (b & 0xF0) { |
856 |
case 0x00 : |
857 |
case 0x10 : |
858 |
case 0x20 : |
859 |
case 0x30 : |
860 |
case 0x40 : |
861 |
case 0x50 : |
862 |
case 0x60 : |
863 |
case 0x70 : |
864 |
word[i++]= (char) b; |
865 |
break; |
866 |
case 0xC0 : |
867 |
case 0xD0 : |
868 |
char next = (char) this.streamBuffer[this.bufferIndex++]; |
869 |
if ((next & 0xC0) != 0x80) { |
870 |
throw new UTFDataFormatException(); |
871 |
} |
872 |
char ch = (char) ((b & 0x1F) << 6); |
873 |
ch |= next & 0x3F; |
874 |
word[i++] = ch; |
875 |
break; |
876 |
case 0xE0 : |
877 |
char first = (char) this.streamBuffer[this.bufferIndex++]; |
878 |
char second = (char) this.streamBuffer[this.bufferIndex++]; |
879 |
if ((first & second & 0xC0) != 0x80) { |
880 |
throw new UTFDataFormatException(); |
881 |
} |
882 |
ch = (char) ((b & 0x0F) << 12); |
883 |
ch |= ((first& 0x3F) << 6); |
884 |
ch |= second & 0x3F; |
885 |
word[i++] = ch; |
886 |
break; |
887 |
default: |
888 |
throw new UTFDataFormatException(); |
889 |
} |
890 |
} |
891 |
if (i < length && stream != null) |
892 |
readStreamBuffer(stream); |
893 |
} |
894 |
return word; |
895 |
} |
896 |
private int[] readStreamDocumentArray(FileInputStream stream, int arraySize) throws IOException { |
897 |
int[] indexes = new int[arraySize]; |
898 |
if (arraySize == 0) return indexes; |
899 |
|
900 |
int i = 0; |
901 |
switch (this.documentReferenceSize) { |
902 |
case 1 : |
903 |
while (i < arraySize) { |
904 |
// how many bytes without refilling the buffer? |
905 |
int bytesInBuffer = i + this.bufferEnd - this.bufferIndex; |
906 |
if (bytesInBuffer > arraySize) |
907 |
bytesInBuffer = arraySize; |
908 |
while (i < bytesInBuffer) { |
909 |
indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF; |
910 |
} |
911 |
if (i < arraySize && stream != null) |
912 |
readStreamBuffer(stream); |
913 |
} |
914 |
break; |
915 |
case 2 : |
916 |
while (i < arraySize) { |
917 |
// how many shorts without refilling the buffer? |
918 |
int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2); |
919 |
if (shortsInBuffer > arraySize) |
920 |
shortsInBuffer = arraySize; |
921 |
while (i < shortsInBuffer) { |
922 |
int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8; |
923 |
indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF); |
924 |
} |
925 |
if (i < arraySize && stream != null) |
926 |
readStreamBuffer(stream); |
927 |
} |
928 |
break; |
929 |
default : |
930 |
while (i < arraySize) { |
931 |
indexes[i++] = readStreamInt(stream); |
932 |
} |
933 |
break; |
934 |
} |
935 |
return indexes; |
936 |
} |
937 |
private int readStreamInt(FileInputStream stream) throws IOException { |
938 |
if (this.bufferIndex + 4 >= this.bufferEnd) { |
939 |
readStreamBuffer(stream); |
940 |
} |
941 |
int val = (streamBuffer[this.bufferIndex++] & 0xFF) << 24; |
942 |
val += (streamBuffer[this.bufferIndex++] & 0xFF) << 16; |
943 |
val += (streamBuffer[this.bufferIndex++] & 0xFF) << 8; |
944 |
return val + (streamBuffer[this.bufferIndex++] & 0xFF); |
945 |
} |
946 |
private void writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream) throws IOException { |
759 |
if (sortedDocNames.length == 0) |
947 |
if (sortedDocNames.length == 0) |
760 |
throw new IllegalArgumentException(); |
948 |
throw new IllegalArgumentException(); |
761 |
|
949 |
|
762 |
// assume the file was just created by initializeFrom() |
950 |
// assume the file was just created by initializeFrom() |
|
|
951 |
this.streamBuffer = new byte[BUFFER_WRITE_SIZE]; |
952 |
this.bufferIndex = 0; |
953 |
this.streamEnd = 0; |
954 |
|
763 |
// in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names |
955 |
// in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names |
764 |
stream.writeUTF(SIGNATURE); |
956 |
writeStreamChars(stream, SIGNATURE_CHARS); |
765 |
this.headerInfoOffset = stream.size(); |
957 |
this.headerInfoOffset = this.streamEnd; |
766 |
stream.writeInt(-1); // will overwrite with correct value later |
958 |
writeStreamInt(stream, -1); // will overwrite with correct value later |
767 |
|
959 |
|
768 |
int size = sortedDocNames.length; |
960 |
int size = sortedDocNames.length; |
769 |
this.numberOfChunks = (size / CHUNK_SIZE) + 1; |
961 |
this.numberOfChunks = (size / CHUNK_SIZE) + 1; |
Lines 777-788
Link Here
|
777 |
this.chunkOffsets = new int[this.numberOfChunks]; |
969 |
this.chunkOffsets = new int[this.numberOfChunks]; |
778 |
int lastIndex = this.numberOfChunks - 1; |
970 |
int lastIndex = this.numberOfChunks - 1; |
779 |
for (int i = 0; i < this.numberOfChunks; i++) { |
971 |
for (int i = 0; i < this.numberOfChunks; i++) { |
780 |
this.chunkOffsets[i] = stream.size(); |
972 |
this.chunkOffsets[i] = this.streamEnd; |
781 |
|
973 |
|
782 |
int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE; |
974 |
int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE; |
783 |
int chunkIndex = i * CHUNK_SIZE; |
975 |
int chunkIndex = i * CHUNK_SIZE; |
784 |
String current = sortedDocNames[chunkIndex]; |
976 |
String current = sortedDocNames[chunkIndex]; |
785 |
stream.writeUTF(current); |
977 |
writeStreamChars(stream, current.toCharArray()); |
786 |
for (int j = 1; j < chunkSize; j++) { |
978 |
for (int j = 1; j < chunkSize; j++) { |
787 |
String next = sortedDocNames[chunkIndex + j]; |
979 |
String next = sortedDocNames[chunkIndex + j]; |
788 |
int len1 = current.length(); |
980 |
int len1 = current.length(); |
Lines 802-818
Link Here
|
802 |
if (len1 == 0) break; // current is 'xabc', next is 'xyabc' |
994 |
if (len1 == 0) break; // current is 'xabc', next is 'xyabc' |
803 |
} |
995 |
} |
804 |
if (end > 255) end = 255; |
996 |
if (end > 255) end = 255; |
805 |
stream.writeByte(start); |
997 |
if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { |
806 |
stream.writeByte(end); |
998 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
|
|
999 |
this.bufferIndex = 0; |
1000 |
} |
1001 |
this.streamBuffer[this.bufferIndex++] = (byte) start; |
1002 |
this.streamBuffer[this.bufferIndex++] = (byte) end; |
1003 |
this.streamEnd += 2; |
807 |
|
1004 |
|
808 |
int last = next.length() - end; |
1005 |
int last = next.length() - end; |
809 |
stream.writeUTF(start < last ? next.substring(start, last) : ""); //$NON-NLS-1$ |
1006 |
writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR)); |
810 |
current = next; |
1007 |
current = next; |
811 |
} |
1008 |
} |
812 |
} |
1009 |
} |
813 |
this.startOfCategoryTables = stream.size() + 1; |
1010 |
this.startOfCategoryTables = this.streamEnd + 1; |
814 |
} |
1011 |
} |
815 |
private void writeCategories(DataOutputStream stream) throws IOException { |
1012 |
private void writeCategories(FileOutputStream stream) throws IOException { |
816 |
char[][] categoryNames = this.categoryTables.keyTable; |
1013 |
char[][] categoryNames = this.categoryTables.keyTable; |
817 |
Object[] tables = this.categoryTables.valueTable; |
1014 |
Object[] tables = this.categoryTables.valueTable; |
818 |
for (int i = 0, l = categoryNames.length; i < l; i++) |
1015 |
for (int i = 0, l = categoryNames.length; i < l; i++) |
Lines 820-826
Link Here
|
820 |
writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream); |
1017 |
writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream); |
821 |
this.categoryTables = null; |
1018 |
this.categoryTables = null; |
822 |
} |
1019 |
} |
823 |
private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, DataOutputStream stream) throws IOException { |
1020 |
private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException { |
824 |
// the format of a category table is as follows: |
1021 |
// the format of a category table is as follows: |
825 |
// any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered) |
1022 |
// any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered) |
826 |
// then the number of word->int[] pairs in the table is written |
1023 |
// then the number of word->int[] pairs in the table is written |
Lines 838-915
Link Here
|
838 |
o = values[i] = ((IntList) values[i]).asArray(); |
1035 |
o = values[i] = ((IntList) values[i]).asArray(); |
839 |
int[] documentNumbers = (int[]) o; |
1036 |
int[] documentNumbers = (int[]) o; |
840 |
if (documentNumbers.length >= largeArraySize) { |
1037 |
if (documentNumbers.length >= largeArraySize) { |
841 |
values[i] = new Integer(stream.size()); |
1038 |
values[i] = new Integer(this.streamEnd); |
842 |
writeDocumentNumbers(documentNumbers, stream); |
1039 |
writeDocumentNumbers(documentNumbers, stream); |
843 |
} |
1040 |
} |
844 |
} |
1041 |
} |
845 |
} |
1042 |
} |
846 |
|
1043 |
|
847 |
this.categoryOffsets.put(categoryName, stream.size()); // remember the offset to the start of the table |
1044 |
this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table |
848 |
this.categoryTables.put(categoryName, null); // flush cached table |
1045 |
this.categoryTables.put(categoryName, null); // flush cached table |
849 |
stream.writeInt(wordsToDocs.elementSize); |
1046 |
writeStreamInt(stream, wordsToDocs.elementSize); |
850 |
char[][] words = wordsToDocs.keyTable; |
1047 |
char[][] words = wordsToDocs.keyTable; |
851 |
for (int i = 0, l = words.length; i < l; i++) { |
1048 |
for (int i = 0, l = words.length; i < l; i++) { |
852 |
Object o = values[i]; |
1049 |
Object o = values[i]; |
853 |
if (o != null) { |
1050 |
if (o != null) { |
854 |
Util.writeUTF(stream, words[i]); |
1051 |
writeStreamChars(stream, words[i]); |
855 |
if (o instanceof int[]) { |
1052 |
if (o instanceof int[]) { |
856 |
int[] documentNumbers = (int[]) o; |
1053 |
int[] documentNumbers = (int[]) o; |
857 |
if (documentNumbers.length == 1) |
1054 |
if (documentNumbers.length == 1) |
858 |
stream.writeInt(-documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero) |
1055 |
writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero) |
859 |
else |
1056 |
else |
860 |
writeDocumentNumbers(documentNumbers, stream); |
1057 |
writeDocumentNumbers(documentNumbers, stream); |
861 |
} else { |
1058 |
} else { |
862 |
stream.writeInt(largeArraySize); // mark to identify that an offset follows |
1059 |
writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows |
863 |
stream.writeInt(((Integer) o).intValue()); // offset in the file of the array of document numbers |
1060 |
writeStreamInt(stream, ((Integer) o).intValue()); // offset in the file of the array of document numbers |
864 |
} |
1061 |
} |
865 |
} |
1062 |
} |
866 |
} |
1063 |
} |
867 |
} |
1064 |
} |
868 |
private void writeDocumentNumbers(int[] documentNumbers, DataOutputStream stream) throws IOException { |
1065 |
private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException { |
869 |
// must store length as a positive int to detect in-lined array of 1 element |
1066 |
// must store length as a positive int to detect in-lined array of 1 element |
870 |
int length = documentNumbers.length; |
1067 |
int length = documentNumbers.length; |
871 |
stream.writeInt(length); |
1068 |
writeStreamInt(stream, length); |
872 |
Util.sort(documentNumbers); |
1069 |
Util.sort(documentNumbers); |
|
|
1070 |
int start = 0; |
873 |
switch (this.documentReferenceSize) { |
1071 |
switch (this.documentReferenceSize) { |
874 |
case 1 : |
1072 |
case 1 : |
875 |
for (int i = 0; i < length; i++) |
1073 |
while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) { |
876 |
stream.writeByte(documentNumbers[i]); |
1074 |
// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part |
|
|
1075 |
int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex; |
1076 |
for (int i=0; i < bytesLeft; i++) { |
1077 |
this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; |
1078 |
} |
1079 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1080 |
this.bufferIndex = 0; |
1081 |
} |
1082 |
while (start < length) { |
1083 |
this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; |
1084 |
} |
1085 |
this.streamEnd += length; |
877 |
break; |
1086 |
break; |
878 |
case 2 : |
1087 |
case 2 : |
879 |
for (int i = 0; i < length; i++) |
1088 |
while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) { |
880 |
stream.writeShort(documentNumbers[i]); |
1089 |
// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part |
|
|
1090 |
int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2; |
1091 |
for (int i=0; i < shortsLeft; i++) { |
1092 |
this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); |
1093 |
this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; |
1094 |
} |
1095 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1096 |
this.bufferIndex = 0; |
1097 |
} |
1098 |
while (start < length) { |
1099 |
this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); |
1100 |
this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; |
1101 |
} |
1102 |
this.streamEnd += length * 2; |
881 |
break; |
1103 |
break; |
882 |
default : |
1104 |
default : |
883 |
for (int i = 0; i < length; i++) |
1105 |
while (start < length) { |
884 |
stream.writeInt(documentNumbers[i]); |
1106 |
writeStreamInt(stream, documentNumbers[start++]); |
|
|
1107 |
} |
885 |
break; |
1108 |
break; |
886 |
} |
1109 |
} |
887 |
} |
1110 |
} |
888 |
private void writeHeaderInfo(DataOutputStream stream) throws IOException { |
1111 |
private void writeHeaderInfo(FileOutputStream stream) throws IOException { |
889 |
stream.writeInt(this.numberOfChunks); |
1112 |
writeStreamInt(stream, this.numberOfChunks); |
890 |
stream.writeByte(this.sizeOfLastChunk); |
1113 |
if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { |
891 |
stream.writeByte(this.documentReferenceSize); |
1114 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
|
|
1115 |
this.bufferIndex = 0; |
1116 |
} |
1117 |
this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk; |
1118 |
this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize; |
1119 |
this.streamEnd += 2; |
892 |
|
1120 |
|
893 |
// apend the file with chunk offsets |
1121 |
// apend the file with chunk offsets |
894 |
for (int i = 0; i < this.numberOfChunks; i++) |
1122 |
for (int i = 0; i < this.numberOfChunks; i++) { |
895 |
stream.writeInt(this.chunkOffsets[i]); |
1123 |
writeStreamInt(stream, this.chunkOffsets[i]); |
|
|
1124 |
} |
896 |
|
1125 |
|
897 |
stream.writeInt(this.startOfCategoryTables); |
1126 |
writeStreamInt(stream, this.startOfCategoryTables); |
898 |
|
1127 |
|
899 |
// append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table |
1128 |
// append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table |
900 |
stream.writeInt(this.categoryOffsets.elementSize); |
1129 |
writeStreamInt(stream, this.categoryOffsets.elementSize); |
901 |
char[][] categoryNames = this.categoryOffsets.keyTable; |
1130 |
char[][] categoryNames = this.categoryOffsets.keyTable; |
902 |
int[] offsets = this.categoryOffsets.valueTable; |
1131 |
int[] offsets = this.categoryOffsets.valueTable; |
903 |
for (int i = 0, l = categoryNames.length; i < l; i++) { |
1132 |
for (int i = 0, l = categoryNames.length; i < l; i++) { |
904 |
if (categoryNames[i] != null) { |
1133 |
if (categoryNames[i] != null) { |
905 |
Util.writeUTF(stream, categoryNames[i]); |
1134 |
writeStreamChars(stream, categoryNames[i]); |
906 |
stream.writeInt(offsets[i]); |
1135 |
writeStreamInt(stream, offsets[i]); |
907 |
} |
1136 |
} |
908 |
} |
1137 |
} |
|
|
1138 |
// ensure buffer is written to the stream |
1139 |
if (this.bufferIndex > 0) { |
1140 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1141 |
this.bufferIndex = 0; |
1142 |
} |
909 |
} |
1143 |
} |
910 |
private void writeOffsetToHeader(int offsetToHeader) throws IOException { |
1144 |
private void writeOffsetToHeader(int offsetToHeader) throws IOException { |
911 |
if (offsetToHeader > 0) { |
1145 |
if (offsetToHeader > 0) { |
912 |
RandomAccessFile file = new RandomAccessFile(this.fileName, "rw"); //$NON-NLS-1$ |
1146 |
RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); //$NON-NLS-1$ |
913 |
try { |
1147 |
try { |
914 |
file.seek(this.headerInfoOffset); // offset to position in header |
1148 |
file.seek(this.headerInfoOffset); // offset to position in header |
915 |
file.writeInt(offsetToHeader); |
1149 |
file.writeInt(offsetToHeader); |
Lines 919-922
Link Here
|
919 |
} |
1153 |
} |
920 |
} |
1154 |
} |
921 |
} |
1155 |
} |
|
|
1156 |
/** |
1157 |
* Writes a string to the given output stream using UTF-8 |
1158 |
* encoding in a machine-independent manner. |
1159 |
* <p> |
1160 |
* First, two bytes of the array are giving the number of bytes to |
1161 |
* follow. This value is the number of bytes actually written out, |
1162 |
* not the length of the string. Following the length, each character |
1163 |
* of the string is put in the bytes array, in sequence, using the UTF-8 |
1164 |
* encoding for the character. |
1165 |
* </p> |
1166 |
* <p> |
1167 |
* Then the entire byte array is written to the output stream |
1168 |
* using {@link OutputStream#write(byte[], int, int)} method. |
1169 |
* </p> |
1170 |
* |
1171 |
* @param array char array to be written. |
1172 |
* @exception IOException if an I/O error occurs while writting |
1173 |
* the bytes array to the stream. |
1174 |
*/ |
1175 |
private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException { |
1176 |
if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { |
1177 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1178 |
this.bufferIndex = 0; |
1179 |
} |
1180 |
int length = array.length; |
1181 |
this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes |
1182 |
this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster |
1183 |
this.streamEnd += 2; |
1184 |
|
1185 |
// we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all |
1186 |
int totalBytesNeeded = length * 3; |
1187 |
if (totalBytesNeeded <= BUFFER_WRITE_SIZE) { |
1188 |
if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) { |
1189 |
// flush the buffer now to make sure there is room for the array |
1190 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1191 |
this.bufferIndex = 0; |
1192 |
} |
1193 |
writeStreamChars(stream, array, 0, length); |
1194 |
} else { |
1195 |
int charsPerWrite = BUFFER_WRITE_SIZE / 3; |
1196 |
int start = 0; |
1197 |
while (start < length) { |
1198 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1199 |
this.bufferIndex = 0; |
1200 |
int charsLeftToWrite = length - start; |
1201 |
int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite); |
1202 |
writeStreamChars(stream, array, start, end); |
1203 |
start = end; |
1204 |
} |
1205 |
} |
1206 |
} |
1207 |
private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException { |
1208 |
// start can NOT be == end |
1209 |
// must have checked that there is enough room for end - start * 3 bytes in the buffer |
1210 |
|
1211 |
int oldIndex = this.bufferIndex; |
1212 |
while (start < end) { |
1213 |
int ch = array[start++]; |
1214 |
if ((ch & 0x007F) == ch) { |
1215 |
this.streamBuffer[this.bufferIndex++] = (byte) ch; |
1216 |
} else if ((ch & 0x07FF) == ch) { |
1217 |
// first two bits are stored in first byte |
1218 |
byte b = (byte) (ch >> 6); |
1219 |
b &= 0x1F; |
1220 |
b |= 0xC0; |
1221 |
this.streamBuffer[this.bufferIndex++] = b; |
1222 |
// last six bits are stored in second byte |
1223 |
b = (byte) (ch & 0x3F); |
1224 |
b |= 0x80; |
1225 |
this.streamBuffer[this.bufferIndex++] = b; |
1226 |
} else { |
1227 |
// first four bits are stored in first byte |
1228 |
byte b = (byte) (ch >> 12); |
1229 |
b &= 0x0F; |
1230 |
b |= 0xE0; |
1231 |
this.streamBuffer[this.bufferIndex++] = b; |
1232 |
// six following bits are stored in second byte |
1233 |
b = (byte) (ch >> 6); |
1234 |
b &= 0x3F; |
1235 |
b |= 0x80; |
1236 |
this.streamBuffer[this.bufferIndex++] = b; |
1237 |
// last six bits are stored in third byte |
1238 |
b = (byte) (ch & 0x3F); |
1239 |
b |= 0x80; |
1240 |
this.streamBuffer[this.bufferIndex++] = b; |
1241 |
} |
1242 |
} |
1243 |
this.streamEnd += this.bufferIndex - oldIndex; |
1244 |
} |
1245 |
private void writeStreamInt(FileOutputStream stream, int val) throws IOException { |
1246 |
if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE) { |
1247 |
stream.write(this.streamBuffer, 0, this.bufferIndex); |
1248 |
this.bufferIndex = 0; |
1249 |
} |
1250 |
this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24); |
1251 |
this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16); |
1252 |
this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8); |
1253 |
this.streamBuffer[this.bufferIndex++] = (byte) val; |
1254 |
this.streamEnd += 4; |
1255 |
} |
922 |
} |
1256 |
} |