Skip to content

Commit 40a7d02

Browse files
Pull match_only_text fixes into main (#130049)
This brings in the fixes from #130020, with minor fixes to address review nits from that PR. Co-authored-by: Martijn van Groningen <[email protected]>
1 parent 1d3bd46 commit 40a7d02

File tree

6 files changed

+328
-8
lines changed

6 files changed

+328
-8
lines changed

build-tools-internal/src/main/resources/checkstyle_suppressions.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIT.java" checks="LineLength" />
3838
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
3939
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]LogsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
40+
<suppress files="plugin[/\\]logsdb[/\\]qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]MatchOnlyTextRollingUpgradeIT.java" checks="LineLength" />
4041

4142
<!-- Gradle requires inputs to be seriablizable -->
4243
<suppress files="build-tools-internal[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]gradle[/\\]internal[/\\]precommit[/\\]TestingConventionRule.java" checks="RegexpSinglelineJava" />

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,38 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
362362
return toQuery(query, queryShardContext);
363363
}
364364

365+
private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
366+
BytesFromMixedStringsBytesRefBlockLoader(String field) {
367+
super(field);
368+
}
369+
370+
@Override
371+
public Builder builder(BlockFactory factory, int expectedCount) {
372+
return factory.bytesRefs(expectedCount);
373+
}
374+
375+
@Override
376+
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
377+
return new BlockStoredFieldsReader.Bytes(field) {
378+
private final BytesRef scratch = new BytesRef();
379+
380+
@Override
381+
protected BytesRef toBytesRef(Object v) {
382+
if (v instanceof BytesRef b) {
383+
return b;
384+
} else {
385+
assert v instanceof String;
386+
return BlockSourceReader.toBytesRef(scratch, v.toString());
387+
}
388+
}
389+
};
390+
}
391+
}
392+
365393
@Override
366394
public BlockLoader blockLoader(BlockLoaderContext blContext) {
367395
if (textFieldType.isSyntheticSource()) {
368-
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
396+
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
369397
}
370398
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
371399
// MatchOnlyText never has norms, so we have to use the field names field
@@ -386,7 +414,12 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
386414
) {
387415
@Override
388416
protected BytesRef storedToBytesRef(Object stored) {
389-
return (BytesRef) stored;
417+
if (stored instanceof BytesRef storedBytes) {
418+
return storedBytes;
419+
} else {
420+
assert stored instanceof String;
421+
return new BytesRef(stored.toString());
422+
}
390423
}
391424
};
392425
}
@@ -477,7 +510,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
477510
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
478511
@Override
479512
protected void write(XContentBuilder b, Object value) throws IOException {
480-
b.value(((BytesRef) value).utf8ToString());
513+
if (value instanceof BytesRef valueBytes) {
514+
b.value(valueBytes.utf8ToString());
515+
} else {
516+
assert value instanceof String;
517+
b.value(value.toString());
518+
}
481519
}
482520
}
483521
);

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
package org.elasticsearch.index.mapper.extras;
1111

1212
import org.apache.lucene.analysis.TokenStream;
13+
import org.apache.lucene.document.Field;
14+
import org.apache.lucene.document.StoredField;
15+
import org.apache.lucene.document.StringField;
1316
import org.apache.lucene.index.DirectoryReader;
1417
import org.apache.lucene.index.DocValuesType;
1518
import org.apache.lucene.index.IndexOptions;
@@ -21,6 +24,7 @@
2124
import org.apache.lucene.tests.analysis.CannedTokenStream;
2225
import org.apache.lucene.tests.analysis.Token;
2326
import org.apache.lucene.tests.index.RandomIndexWriter;
27+
import org.apache.lucene.util.BytesRef;
2428
import org.elasticsearch.common.Strings;
2529
import org.elasticsearch.core.Tuple;
2630
import org.elasticsearch.index.IndexSettings;
@@ -350,4 +354,29 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() thr
350354
assertThat(fields, empty());
351355
}
352356
}
357+
358+
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
359+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
360+
b.startObject("field1").field("type", "match_only_text").endObject();
361+
b.startObject("field2").field("type", "match_only_text").endObject();
362+
})).documentMapper();
363+
try (Directory directory = newDirectory()) {
364+
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);
365+
366+
LuceneDocument document = new LuceneDocument();
367+
document.add(new StringField("field1", "foo", Field.Store.NO));
368+
document.add(new StoredField("field1._original", "foo"));
369+
370+
document.add(new StringField("field2", "bar", Field.Store.NO));
371+
document.add(new StoredField("field2._original", new BytesRef("bar")));
372+
373+
iw.addDocument(document);
374+
iw.close();
375+
376+
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
377+
String syntheticSource = syntheticSource(mapper, null, indexReader, 0);
378+
assertEquals("{\"field1\":\"foo\",\"field2\":\"bar\"}", syntheticSource);
379+
}
380+
}
381+
}
353382
}

server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ public String toString() {
469469
/**
470470
* Convert a {@link String} into a utf-8 {@link BytesRef}.
471471
*/
472-
static BytesRef toBytesRef(BytesRef scratch, String v) {
472+
public static BytesRef toBytesRef(BytesRef scratch, String v) {
473473
int len = UnicodeUtil.maxUTF8Length(v.length());
474474
if (scratch.bytes.length < len) {
475475
scratch.bytes = new byte[len];

server/src/main/java/org/elasticsearch/index/mapper/BlockStoredFieldsReader.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ public boolean canReuse(int startingDocID) {
3535
return true;
3636
}
3737

38-
private abstract static class StoredFieldsBlockLoader implements BlockLoader {
38+
public abstract static class StoredFieldsBlockLoader implements BlockLoader {
3939
protected final String field;
4040

41-
StoredFieldsBlockLoader(String field) {
41+
public StoredFieldsBlockLoader(String field) {
4242
this.field = field;
4343
}
4444

@@ -112,10 +112,10 @@ protected BytesRef toBytesRef(Object v) {
112112
}
113113
}
114114

115-
private abstract static class Bytes extends BlockStoredFieldsReader {
115+
public abstract static class Bytes extends BlockStoredFieldsReader {
116116
private final String field;
117117

118-
Bytes(String field) {
118+
public Bytes(String field) {
119119
this.field = field;
120120
}
121121

0 commit comments

Comments
 (0)