Module org.apache.lucene.codecs
Class SimpleTextBKDWriter
java.lang.Object
org.apache.lucene.codecs.simpletext.SimpleTextBKDWriter
- All Implemented Interfaces:
Closeable,AutoCloseable
Forked from
BKDWriter and simplified/specialized for SimpleText's usage-
Nested Class Summary
Nested Classes -
Field Summary
FieldsModifier and TypeFieldDescriptionstatic final String(package private) final int[]protected final BKDConfigHow many dimensions we are storing at the leaf (data) nodesstatic final floatDefault maximum heap to use, before spilling to (slower) diskprotected final FixedBitSetprivate booleanprivate final int(package private) final doubleprotected final byte[]Maximum per-dim values, packedprivate final intprotected final byte[]Minimum per-dim values, packedprotected longprivate PointWriter(package private) final BytesRefBuilder(package private) final byte[](package private) final byte[](package private) final BytesRef(package private) final BytesRef(package private) final byte[](package private) final TrackingDirectoryWrapper(package private) final Stringprivate IndexOutputprivate final longAn upper bound on how many points the caller will add (includes deletions)static final intstatic final intstatic final intstatic final intstatic final int -
Constructor Summary
ConstructorsConstructorDescriptionSimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, BKDConfig config, double maxMBSortInHeap, long totalPointCount) -
Method Summary
Modifier and TypeMethodDescriptionvoidadd(byte[] packedValue, int docID) private voidbuild(int nodeID, int leafNodeOffset, MutablePointTree reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) private voidbuild(int nodeID, int leafNodeOffset, BKDRadixSelector.PathSlice points, IndexOutput out, BKDRadixSelector radixSelector, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) The array (sized numDims) of PathSlice describe the cell we have currently recursed to.private voidcheckMaxLeafNodeCount(int numLeaves) voidclose()private voidcomputeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix) longfinish(IndexOutput out) Writes the BKD tree to the providedIndexOutputand returns the file offset where index was written.longHow many points have been added so farprivate voidnewline(IndexOutput out) private voidrotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) protected intsplit(byte[] minPackedValue, byte[] maxPackedValue) private HeapPointWriterswitchToHeap(PointWriter source) Pull a partition back into heap once the point count is low enough while recursing.private booleanvalueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) Called only in assertprivate booleanvalueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset, int doc, int lastDoc) private booleanvaluesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values, int[] docs, int docsOffset) private ErrorverifyChecksum(Throwable priorException, PointWriter writer) Called on exception, to check whether the checksum is also corrupt in this source, and add that information (checksum matched or didn't) as a suppressed exception.static voidverifyParams(double maxMBSortInHeap, long totalPointCount) private voidwrite(IndexOutput out, String s) private voidwrite(IndexOutput out, BytesRef b) longwriteField(IndexOutput out, String fieldName, MutablePointTree reader) Write a field from aMutablePointTree.private longwriteField1Dim(IndexOutput out, String fieldName, MutablePointTree reader) private longwriteFieldNDims(IndexOutput out, String fieldName, MutablePointTree values) private voidwriteIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode) Subclass can change how it writes the index.private voidwriteInt(IndexOutput out, int x) protected voidwriteLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) protected voidwriteLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) private voidwriteLong(IndexOutput out, long x)
-
Field Details
-
CODEC_NAME
- See Also:
-
VERSION_START
public static final int VERSION_START- See Also:
-
VERSION_COMPRESSED_DOC_IDS
public static final int VERSION_COMPRESSED_DOC_IDS- See Also:
-
VERSION_COMPRESSED_VALUES
public static final int VERSION_COMPRESSED_VALUES- See Also:
-
VERSION_IMPLICIT_SPLIT_DIM_1D
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D- See Also:
-
VERSION_CURRENT
public static final int VERSION_CURRENT- See Also:
-
DEFAULT_MAX_MB_SORT_IN_HEAP
public static final float DEFAULT_MAX_MB_SORT_IN_HEAPDefault maximum heap to use, before spilling to (slower) disk- See Also:
-
config
How many dimensions we are storing at the leaf (data) nodes -
scratch
-
tempDir
-
tempFileNamePrefix
-
maxMBSortInHeap
final double maxMBSortInHeap -
scratchDiff
final byte[] scratchDiff -
scratch1
final byte[] scratch1 -
scratch2
final byte[] scratch2 -
scratchBytesRef1
-
scratchBytesRef2
-
commonPrefixLengths
final int[] commonPrefixLengths -
docsSeen
-
pointWriter
-
finished
private boolean finished -
tempInput
-
maxPointsSortInHeap
private final int maxPointsSortInHeap -
minPackedValue
protected final byte[] minPackedValueMinimum per-dim values, packed -
maxPackedValue
protected final byte[] maxPackedValueMaximum per-dim values, packed -
pointCount
protected long pointCount -
totalPointCount
private final long totalPointCountAn upper bound on how many points the caller will add (includes deletions) -
maxDoc
private final int maxDoc
-
-
Constructor Details
-
SimpleTextBKDWriter
public SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, BKDConfig config, double maxMBSortInHeap, long totalPointCount) throws IOException - Throws:
IOException
-
-
Method Details
-
verifyParams
public static void verifyParams(double maxMBSortInHeap, long totalPointCount) -
add
- Throws:
IOException
-
getPointCount
public long getPointCount()How many points have been added so far -
writeField
public long writeField(IndexOutput out, String fieldName, MutablePointTree reader) throws IOException Write a field from aMutablePointTree. This way of writing points is faster than regular writes withBKDWriter.add(byte[], int)since there is opportunity for reordering points before writing them to disk. This method does not use transient disk in order to reorder points.- Throws:
IOException
-
writeFieldNDims
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointTree values) throws IOException - Throws:
IOException
-
writeField1Dim
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointTree reader) throws IOException - Throws:
IOException
-
rotateToTree
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) -
checkMaxLeafNodeCount
private void checkMaxLeafNodeCount(int numLeaves) -
finish
Writes the BKD tree to the providedIndexOutputand returns the file offset where index was written.- Throws:
IOException
-
writeIndex
private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode) throws IOException Subclass can change how it writes the index.- Throws:
IOException
-
writeLeafBlockDocs
protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException - Throws:
IOException
-
writeLeafBlockPackedValues
protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException - Throws:
IOException
-
close
- Specified by:
closein interfaceAutoCloseable- Specified by:
closein interfaceCloseable- Throws:
IOException
-
verifyChecksum
Called on exception, to check whether the checksum is also corrupt in this source, and add that information (checksum matched or didn't) as a suppressed exception.- Throws:
IOException
-
valueInBounds
Called only in assert -
split
protected int split(byte[] minPackedValue, byte[] maxPackedValue) -
switchToHeap
Pull a partition back into heap once the point count is low enough while recursing.- Throws:
IOException
-
build
private void build(int nodeID, int leafNodeOffset, MutablePointTree reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException - Throws:
IOException
-
build
private void build(int nodeID, int leafNodeOffset, BKDRadixSelector.PathSlice points, IndexOutput out, BKDRadixSelector radixSelector, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException The array (sized numDims) of PathSlice describe the cell we have currently recursed to.- Throws:
IOException
-
computeCommonPrefixLength
-
valuesInOrderAndBounds
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException - Throws:
IOException
-
valueInOrder
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset, int doc, int lastDoc) -
write
- Throws:
IOException
-
writeInt
- Throws:
IOException
-
writeLong
- Throws:
IOException
-
write
- Throws:
IOException
-
newline
- Throws:
IOException
-