Skip to content

Commit f6d3ca9

Browse files
star tree file formats refactoring and fixing offset bug
Signed-off-by: Bharathwaj G <[email protected]>
1 parent 7c427d9 commit f6d3ca9

22 files changed

+532
-462
lines changed

server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ public class CompositeIndexConstants {
1919
public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field
2020

2121
/**
22-
* Represents the key to fetch number of non-star aggregated segment documents.
22+
* Represents the key to fetch number of non-star aggregated records/entries
2323
*/
24-
public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount";
24+
public static final String SEGMENT_ENTRIES_COUNT = "segmentEntriesCount";
2525

2626
/**
27-
* Represents the key to fetch number of total star tree documents in a segment.
27+
* Represents the key to fetch number of total star tree records/entries in a segment.
2828
*/
29-
public static final String STAR_TREE_DOCS_COUNT = "starTreeDocsCount";
29+
public static final String STAR_TREE_ENTRIES_COUNT = "starTreeEntriesCount";
3030

3131
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
3434
import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode;
3535
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType;
36-
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator;
36+
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialValuesIterator;
37+
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator;
3738
import org.opensearch.index.mapper.DocCountFieldMapper;
3839
import org.opensearch.index.mapper.FieldMapper;
3940
import org.opensearch.index.mapper.FieldValueConverter;
@@ -177,13 +178,13 @@ public List<MetricAggregatorInfo> generateMetricAggregatorInfos(MapperService ma
177178
*
178179
* @return list of MetricAggregatorInfo
179180
*/
180-
public List<SequentialDocValuesIterator> getMetricReaders(SegmentWriteState state, Map<String, DocValuesProducer> fieldProducerMap)
181+
public List<SequentialValuesIterator> getMetricReaders(SegmentWriteState state, Map<String, DocValuesProducer> fieldProducerMap)
181182
throws IOException {
182183

183-
List<SequentialDocValuesIterator> metricReaders = new ArrayList<>();
184+
List<SequentialValuesIterator> metricReaders = new ArrayList<>();
184185
for (Metric metric : this.starTreeField.getMetrics()) {
185186
for (MetricStat metricStat : metric.getBaseMetrics()) {
186-
SequentialDocValuesIterator metricReader;
187+
SequentialValuesIterator metricReader;
187188
FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField());
188189
if (metricStat.equals(MetricStat.DOC_COUNT)) {
189190
// _doc_count is numeric field , so we convert to sortedNumericDocValues and get iterator
@@ -192,8 +193,10 @@ public List<SequentialDocValuesIterator> getMetricReaders(SegmentWriteState stat
192193
if (metricFieldInfo == null) {
193194
metricFieldInfo = getFieldInfo(metric.getField(), DocValuesType.SORTED_NUMERIC);
194195
}
195-
metricReader = new SequentialDocValuesIterator(
196-
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
196+
metricReader = new SequentialValuesIterator(
197+
new SortedNumericStarTreeValuesIterator(
198+
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
199+
)
197200
);
198201
}
199202
metricReaders.add(metricReader);
@@ -218,17 +221,17 @@ public void build(
218221
long startTime = System.currentTimeMillis();
219222
logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName());
220223

221-
List<SequentialDocValuesIterator> metricReaders = getMetricReaders(writeState, fieldProducerMap);
224+
List<SequentialValuesIterator> metricReaders = getMetricReaders(writeState, fieldProducerMap);
222225
List<Dimension> dimensionsSplitOrder = starTreeField.getDimensionsOrder();
223-
SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()];
226+
SequentialValuesIterator[] dimensionReaders = new SequentialValuesIterator[dimensionsSplitOrder.size()];
224227
for (int i = 0; i < numDimensions; i++) {
225228
String dimension = dimensionsSplitOrder.get(i).getField();
226229
FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension);
227230
if (dimensionFieldInfo == null) {
228231
dimensionFieldInfo = getFieldInfo(dimension, DocValuesType.SORTED_NUMERIC);
229232
}
230-
dimensionReaders[i] = new SequentialDocValuesIterator(
231-
fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo)
233+
dimensionReaders[i] = new SequentialValuesIterator(
234+
new SortedNumericStarTreeValuesIterator(fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo))
232235
);
233236
}
234237
Iterator<StarTreeDocument> starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders);
@@ -287,7 +290,7 @@ void appendDocumentsToStarTree(Iterator<StarTreeDocument> starTreeDocumentIterat
287290
}
288291
}
289292

290-
private void serializeStarTree(int numSegmentStarTreeDocument, int numStarTreeDocs) throws IOException {
293+
private void serializeStarTree(int numSegmentStarTreeEntries, int numStarTreeEntries) throws IOException {
291294
// serialize the star tree data
292295
long dataFilePointer = dataOut.getFilePointer();
293296
StarTreeWriter starTreeWriter = new StarTreeWriter();
@@ -299,8 +302,8 @@ private void serializeStarTree(int numSegmentStarTreeDocument, int numStarTreeDo
299302
starTreeField,
300303
metricAggregatorInfos,
301304
numStarTreeNodes,
302-
numSegmentStarTreeDocument,
303-
numStarTreeDocs,
305+
numSegmentStarTreeEntries,
306+
numStarTreeEntries,
304307
dataFilePointer,
305308
totalStarTreeDataLength
306309
);
@@ -395,27 +398,25 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
395398
*/
396399
protected StarTreeDocument getStarTreeDocument(
397400
int currentDocId,
398-
SequentialDocValuesIterator[] dimensionReaders,
399-
List<SequentialDocValuesIterator> metricReaders
401+
SequentialValuesIterator[] dimensionReaders,
402+
List<SequentialValuesIterator> metricReaders
400403
) throws IOException {
401404
Long[] dims = new Long[numDimensions];
402405
int i = 0;
403-
for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) {
404-
dimensionDocValueIterator.nextDoc(currentDocId);
405-
Long val = dimensionDocValueIterator.value(currentDocId);
406+
for (SequentialValuesIterator dimensionValueIterator : dimensionReaders) {
407+
dimensionValueIterator.nextEntry(currentDocId);
408+
Long val = dimensionValueIterator.value(currentDocId);
406409
dims[i] = val;
407410
i++;
408411
}
409412
i = 0;
410413
Object[] metrics = new Object[metricReaders.size()];
411-
for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) {
412-
metricDocValuesIterator.nextDoc(currentDocId);
414+
for (SequentialValuesIterator metricValuesIterator : metricReaders) {
415+
metricValuesIterator.nextEntry(currentDocId);
413416
// As part of merge, we traverse the star tree doc values
414417
// The type of data stored in metric fields is different from the
415418
// actual indexing field they're based on
416-
metrics[i] = metricAggregatorInfos.get(i)
417-
.getValueAggregators()
418-
.toAggregatedValueType(metricDocValuesIterator.value(currentDocId));
419+
metrics[i] = metricAggregatorInfos.get(i).getValueAggregators().toAggregatedValueType(metricValuesIterator.value(currentDocId));
419420
i++;
420421
}
421422
return new StarTreeDocument(dims, metrics);
@@ -463,8 +464,8 @@ protected StarTreeDocument getStarTreeDocument(
463464
* @return Iterator for the aggregated star-tree document
464465
*/
465466
public abstract Iterator<StarTreeDocument> sortAndAggregateSegmentDocuments(
466-
SequentialDocValuesIterator[] dimensionReaders,
467-
List<SequentialDocValuesIterator> metricReaders
467+
SequentialValuesIterator[] dimensionReaders,
468+
List<SequentialValuesIterator> metricReaders
468469
) throws IOException;
469470

470471
/**
@@ -483,8 +484,8 @@ public abstract Iterator<StarTreeDocument> generateStarTreeDocumentsForStarNode(
483484
*/
484485
protected StarTreeDocument getSegmentStarTreeDocument(
485486
int currentDocId,
486-
SequentialDocValuesIterator[] dimensionReaders,
487-
List<SequentialDocValuesIterator> metricReaders
487+
SequentialValuesIterator[] dimensionReaders,
488+
List<SequentialValuesIterator> metricReaders
488489
) throws IOException {
489490
Long[] dimensions = getStarTreeDimensionsFromSegment(currentDocId, dimensionReaders);
490491
Object[] metrics = getStarTreeMetricsFromSegment(currentDocId, metricReaders);
@@ -497,12 +498,12 @@ protected StarTreeDocument getSegmentStarTreeDocument(
497498
* @return dimension values for each of the star-tree dimension
498499
* @throws IOException when we are unable to iterate to the next doc for the given dimension readers
499500
*/
500-
Long[] getStarTreeDimensionsFromSegment(int currentDocId, SequentialDocValuesIterator[] dimensionReaders) throws IOException {
501+
Long[] getStarTreeDimensionsFromSegment(int currentDocId, SequentialValuesIterator[] dimensionReaders) throws IOException {
501502
Long[] dimensions = new Long[numDimensions];
502503
for (int i = 0; i < numDimensions; i++) {
503504
if (dimensionReaders[i] != null) {
504505
try {
505-
dimensionReaders[i].nextDoc(currentDocId);
506+
dimensionReaders[i].nextEntry(currentDocId);
506507
} catch (IOException e) {
507508
logger.error("unable to iterate to next doc", e);
508509
throw new RuntimeException("unable to iterate to next doc", e);
@@ -524,13 +525,13 @@ Long[] getStarTreeDimensionsFromSegment(int currentDocId, SequentialDocValuesIte
524525
* @return metric values for each of the star-tree metric
525526
* @throws IOException when we are unable to iterate to the next doc for the given metric readers
526527
*/
527-
private Object[] getStarTreeMetricsFromSegment(int currentDocId, List<SequentialDocValuesIterator> metricsReaders) throws IOException {
528+
private Object[] getStarTreeMetricsFromSegment(int currentDocId, List<SequentialValuesIterator> metricsReaders) throws IOException {
528529
Object[] metrics = new Object[numMetrics];
529530
for (int i = 0; i < numMetrics; i++) {
530-
SequentialDocValuesIterator metricStatReader = metricsReaders.get(i);
531+
SequentialValuesIterator metricStatReader = metricsReaders.get(i);
531532
if (metricStatReader != null) {
532533
try {
533-
metricStatReader.nextDoc(currentDocId);
534+
metricStatReader.nextEntry(currentDocId);
534535
} catch (IOException e) {
535536
logger.error("unable to iterate to next doc", e);
536537
throw new RuntimeException("unable to iterate to next doc", e);
@@ -661,18 +662,18 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum
661662
/**
662663
* Converts numericDocValues to sortedNumericDocValues and returns SequentialDocValuesIterator
663664
*/
664-
private SequentialDocValuesIterator getIteratorForNumericField(
665+
private SequentialValuesIterator getIteratorForNumericField(
665666
Map<String, DocValuesProducer> fieldProducerMap,
666667
FieldInfo fieldInfo,
667668
String name
668669
) throws IOException {
669670
if (fieldInfo == null) {
670671
fieldInfo = getFieldInfo(name, DocValuesType.NUMERIC);
671672
}
672-
SequentialDocValuesIterator sequentialDocValuesIterator;
673+
SequentialValuesIterator sequentialDocValuesIterator;
673674
assert fieldProducerMap.containsKey(fieldInfo.name);
674-
sequentialDocValuesIterator = new SequentialDocValuesIterator(
675-
DocValues.singleton(fieldProducerMap.get(fieldInfo.name).getNumeric(fieldInfo))
675+
sequentialDocValuesIterator = new SequentialValuesIterator(
676+
new SortedNumericStarTreeValuesIterator(DocValues.singleton(fieldProducerMap.get(fieldInfo.name).getNumeric(fieldInfo)))
676677
);
677678
return sequentialDocValuesIterator;
678679
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import org.apache.logging.log4j.Logger;
1313
import org.apache.lucene.codecs.DocValuesConsumer;
1414
import org.apache.lucene.index.SegmentWriteState;
15-
import org.apache.lucene.search.DocIdSetIterator;
1615
import org.apache.lucene.store.IndexOutput;
1716
import org.opensearch.common.annotation.ExperimentalApi;
1817
import org.opensearch.common.util.io.IOUtils;
@@ -22,8 +21,9 @@
2221
import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument;
2322
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
2423
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
25-
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator;
24+
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialValuesIterator;
2625
import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentsSorter;
26+
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.StarTreeValuesIterator;
2727
import org.opensearch.index.mapper.MapperService;
2828

2929
import java.io.IOException;
@@ -36,7 +36,7 @@
3636
import java.util.Objects;
3737
import java.util.concurrent.atomic.AtomicInteger;
3838

39-
import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT;
39+
import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_ENTRIES_COUNT;
4040
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues;
4141

4242
/**
@@ -113,8 +113,8 @@ public void build(
113113
*/
114114
@Override
115115
public Iterator<StarTreeDocument> sortAndAggregateSegmentDocuments(
116-
SequentialDocValuesIterator[] dimensionReaders,
117-
List<SequentialDocValuesIterator> metricReaders
116+
SequentialValuesIterator[] dimensionReaders,
117+
List<SequentialValuesIterator> metricReaders
118118
) throws IOException {
119119
// Write all dimensions for segment documents into the buffer,
120120
// and sort all documents using an int array
@@ -143,19 +143,19 @@ public Iterator<StarTreeDocument> sortAndAggregateSegmentDocuments(
143143
* @return iterator of star tree documents
144144
*/
145145
Iterator<StarTreeDocument> mergeStarTrees(List<StarTreeValues> starTreeValuesSubs) throws IOException {
146-
int numDocs = 0;
147-
int[] docIds;
146+
int numEntries = 0;
147+
int[] entryIds;
148148
try {
149149
for (StarTreeValues starTreeValues : starTreeValuesSubs) {
150150
List<Dimension> dimensionsSplitOrder = starTreeValues.getStarTreeField().getDimensionsOrder();
151-
SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[starTreeValues.getStarTreeField()
151+
SequentialValuesIterator[] dimensionReaders = new SequentialValuesIterator[starTreeValues.getStarTreeField()
152152
.getDimensionsOrder()
153153
.size()];
154154
for (int i = 0; i < dimensionsSplitOrder.size(); i++) {
155155
String dimension = dimensionsSplitOrder.get(i).getField();
156-
dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension));
156+
dimensionReaders[i] = new SequentialValuesIterator(starTreeValues.getDimensionValuesIterator(dimension));
157157
}
158-
List<SequentialDocValuesIterator> metricReaders = new ArrayList<>();
158+
List<SequentialValuesIterator> metricReaders = new ArrayList<>();
159159
// get doc id set iterators for metrics
160160
for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) {
161161
for (MetricStat metricStat : metric.getBaseMetrics()) {
@@ -164,34 +164,35 @@ Iterator<StarTreeDocument> mergeStarTrees(List<StarTreeValues> starTreeValuesSub
164164
metric.getField(),
165165
metricStat.getTypeName()
166166
);
167-
metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName)));
167+
metricReaders.add(new SequentialValuesIterator(starTreeValues.getMetricValuesIterator(metricFullName)));
168168
}
169169
}
170-
int currentDocId = 0;
171-
int numSegmentDocs = Integer.parseInt(
172-
starTreeValues.getAttributes().getOrDefault(SEGMENT_DOCS_COUNT, String.valueOf(DocIdSetIterator.NO_MORE_DOCS))
170+
int currentEntryId = 0;
171+
int numSegmentEntries = Integer.parseInt(
172+
starTreeValues.getAttributes()
173+
.getOrDefault(SEGMENT_ENTRIES_COUNT, String.valueOf(StarTreeValuesIterator.NO_MORE_ENTRIES))
173174
);
174-
while (currentDocId < numSegmentDocs) {
175-
StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders);
175+
while (currentEntryId < numSegmentEntries) {
176+
StarTreeDocument starTreeDocument = getStarTreeDocument(currentEntryId, dimensionReaders, metricReaders);
176177
segmentDocumentFileManager.writeStarTreeDocument(starTreeDocument, true);
177-
numDocs++;
178-
currentDocId++;
178+
numEntries++;
179+
currentEntryId++;
179180
}
180181
}
181-
docIds = new int[numDocs];
182-
for (int i = 0; i < numDocs; i++) {
183-
docIds[i] = i;
182+
entryIds = new int[numEntries];
183+
for (int i = 0; i < numEntries; i++) {
184+
entryIds[i] = i;
184185
}
185186
} catch (IOException ex) {
186187
segmentDocumentFileManager.close();
187188
throw ex;
188189
}
189190

190-
if (numDocs == 0) {
191+
if (numEntries == 0) {
191192
return Collections.emptyIterator();
192193
}
193194

194-
return sortAndReduceDocuments(docIds, numDocs, true);
195+
return sortAndReduceDocuments(entryIds, numEntries, true);
195196
}
196197

197198
/**

0 commit comments

Comments
 (0)