Skip to content

Commit f562e63

Browse files
[8.19] Pull match_only_text fixes into main (#130049) (#130137)
* Pull match_only_text fixes into main (#130049) This brings in the fixes from #130020, with minor fixes to address review nits from that PR. Co-authored-by: Martijn van Groningen <[email protected]> (cherry picked from commit 40a7d02) # Conflicts: # qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java * Fix base class of MatchOnlyTextRollingUpgradeIT * Add missing assumption to MatchOnlyTextRollingUpgradeIT
1 parent 7312cf6 commit f562e63

File tree

6 files changed

+329
-8
lines changed

6 files changed

+329
-8
lines changed

build-tools-internal/src/main/resources/checkstyle_suppressions.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIT.java" checks="LineLength" />
3838
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
3939
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]LogsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
40+
<suppress files="plugin[/\\]logsdb[/\\]qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]MatchOnlyTextRollingUpgradeIT.java" checks="LineLength" />
4041

4142
<!-- Gradle requires inputs to be seriablizable -->
4243
<suppress files="build-tools-internal[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]gradle[/\\]internal[/\\]precommit[/\\]TestingConventionRule.java" checks="RegexpSinglelineJava" />

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,38 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
361361
return toQuery(query, queryShardContext);
362362
}
363363

364+
private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
365+
BytesFromMixedStringsBytesRefBlockLoader(String field) {
366+
super(field);
367+
}
368+
369+
@Override
370+
public Builder builder(BlockFactory factory, int expectedCount) {
371+
return factory.bytesRefs(expectedCount);
372+
}
373+
374+
@Override
375+
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
376+
return new BlockStoredFieldsReader.Bytes(field) {
377+
private final BytesRef scratch = new BytesRef();
378+
379+
@Override
380+
protected BytesRef toBytesRef(Object v) {
381+
if (v instanceof BytesRef b) {
382+
return b;
383+
} else {
384+
assert v instanceof String;
385+
return BlockSourceReader.toBytesRef(scratch, v.toString());
386+
}
387+
}
388+
};
389+
}
390+
}
391+
364392
@Override
365393
public BlockLoader blockLoader(BlockLoaderContext blContext) {
366394
if (textFieldType.isSyntheticSource()) {
367-
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
395+
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
368396
}
369397
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
370398
// MatchOnlyText never has norms, so we have to use the field names field
@@ -385,7 +413,12 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
385413
) {
386414
@Override
387415
protected BytesRef storedToBytesRef(Object stored) {
388-
return (BytesRef) stored;
416+
if (stored instanceof BytesRef storedBytes) {
417+
return storedBytes;
418+
} else {
419+
assert stored instanceof String;
420+
return new BytesRef(stored.toString());
421+
}
389422
}
390423
};
391424
}
@@ -476,7 +509,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
476509
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
477510
@Override
478511
protected void write(XContentBuilder b, Object value) throws IOException {
479-
b.value(((BytesRef) value).utf8ToString());
512+
if (value instanceof BytesRef valueBytes) {
513+
b.value(valueBytes.utf8ToString());
514+
} else {
515+
assert value instanceof String;
516+
b.value(value.toString());
517+
}
480518
}
481519
}
482520
);

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
package org.elasticsearch.index.mapper.extras;
1111

1212
import org.apache.lucene.analysis.TokenStream;
13+
import org.apache.lucene.document.Field;
14+
import org.apache.lucene.document.StoredField;
15+
import org.apache.lucene.document.StringField;
1316
import org.apache.lucene.index.DirectoryReader;
1417
import org.apache.lucene.index.DocValuesType;
1518
import org.apache.lucene.index.IndexOptions;
@@ -21,6 +24,7 @@
2124
import org.apache.lucene.tests.analysis.CannedTokenStream;
2225
import org.apache.lucene.tests.analysis.Token;
2326
import org.apache.lucene.tests.index.RandomIndexWriter;
27+
import org.apache.lucene.util.BytesRef;
2428
import org.elasticsearch.common.Strings;
2529
import org.elasticsearch.core.Tuple;
2630
import org.elasticsearch.index.IndexSettings;
@@ -350,4 +354,29 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() thr
350354
assertThat(fields, empty());
351355
}
352356
}
357+
358+
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
359+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
360+
b.startObject("field1").field("type", "match_only_text").endObject();
361+
b.startObject("field2").field("type", "match_only_text").endObject();
362+
})).documentMapper();
363+
try (Directory directory = newDirectory()) {
364+
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);
365+
366+
LuceneDocument document = new LuceneDocument();
367+
document.add(new StringField("field1", "foo", Field.Store.NO));
368+
document.add(new StoredField("field1._original", "foo"));
369+
370+
document.add(new StringField("field2", "bar", Field.Store.NO));
371+
document.add(new StoredField("field2._original", new BytesRef("bar")));
372+
373+
iw.addDocument(document);
374+
iw.close();
375+
376+
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
377+
String syntheticSource = syntheticSource(mapper, null, indexReader, 0);
378+
assertEquals("{\"field1\":\"foo\",\"field2\":\"bar\"}", syntheticSource);
379+
}
380+
}
381+
}
353382
}
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.upgrades;
11+
12+
import com.carrotsearch.randomizedtesting.annotations.Name;
13+
14+
import org.elasticsearch.client.Request;
15+
import org.elasticsearch.client.Response;
16+
import org.elasticsearch.client.ResponseException;
17+
import org.elasticsearch.common.network.NetworkAddress;
18+
import org.elasticsearch.common.time.DateFormatter;
19+
import org.elasticsearch.common.time.FormatNames;
20+
import org.elasticsearch.common.xcontent.XContentHelper;
21+
import org.elasticsearch.test.rest.ObjectPath;
22+
import org.elasticsearch.xcontent.XContentType;
23+
24+
import java.io.IOException;
25+
import java.io.InputStream;
26+
import java.time.Instant;
27+
import java.util.List;
28+
import java.util.Locale;
29+
import java.util.Map;
30+
31+
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.enableLogsdbByDefault;
32+
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.getWriteBackingIndex;
33+
import static org.hamcrest.Matchers.containsString;
34+
import static org.hamcrest.Matchers.equalTo;
35+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
36+
import static org.hamcrest.Matchers.notNullValue;
37+
38+
public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeTestCase {
39+
40+
static String BULK_ITEM_TEMPLATE =
41+
"""
42+
{"@timestamp": "$now", "host.name": "$host", "method": "$method", "ip": "$ip", "message": "$message", "length": $length, "factor": $factor}
43+
""";
44+
45+
private static final String TEMPLATE = """
46+
{
47+
"mappings": {
48+
"properties": {
49+
"@timestamp" : {
50+
"type": "date"
51+
},
52+
"method": {
53+
"type": "keyword"
54+
},
55+
"message": {
56+
"type": "match_only_text"
57+
},
58+
"ip": {
59+
"type": "ip"
60+
},
61+
"length": {
62+
"type": "long"
63+
},
64+
"factor": {
65+
"type": "double"
66+
}
67+
}
68+
}
69+
}""";
70+
71+
public MatchOnlyTextRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
72+
super(upgradedNodes);
73+
}
74+
75+
public void testIndexing() throws Exception {
76+
assumeTrue("test relies on index.mapping.source.mode setting", getOldClusterTestVersion().onOrAfter("8.16.0"));
77+
String dataStreamName = "logs-bwc-test";
78+
if (isOldCluster()) {
79+
startTrial();
80+
enableLogsdbByDefault();
81+
createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE);
82+
83+
Instant startTime = Instant.now().minusSeconds(60 * 60);
84+
bulkIndex(dataStreamName, 4, 1024, startTime);
85+
86+
String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0);
87+
var settings = (Map<?, ?>) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex);
88+
assertThat(((Map<?, ?>) settings.get("settings")).get("index.mode"), equalTo("logsdb"));
89+
assertThat(((Map<?, ?>) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC"));
90+
91+
ensureGreen(dataStreamName);
92+
search(dataStreamName);
93+
query(dataStreamName);
94+
} else if (isMixedCluster()) {
95+
Instant startTime = Instant.now().minusSeconds(60 * 30);
96+
bulkIndex(dataStreamName, 4, 1024, startTime);
97+
98+
ensureGreen(dataStreamName);
99+
search(dataStreamName);
100+
query(dataStreamName);
101+
} else if (isUpgradedCluster()) {
102+
ensureGreen(dataStreamName);
103+
Instant startTime = Instant.now();
104+
bulkIndex(dataStreamName, 4, 1024, startTime);
105+
search(dataStreamName);
106+
query(dataStreamName);
107+
108+
var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge");
109+
forceMergeRequest.addParameter("max_num_segments", "1");
110+
assertOK(client().performRequest(forceMergeRequest));
111+
112+
ensureGreen(dataStreamName);
113+
search(dataStreamName);
114+
query(dataStreamName);
115+
}
116+
}
117+
118+
static void createTemplate(String dataStreamName, String id, String template) throws IOException {
119+
final String INDEX_TEMPLATE = """
120+
{
121+
"index_patterns": ["$DATASTREAM"],
122+
"template": $TEMPLATE,
123+
"data_stream": {
124+
}
125+
}""";
126+
var putIndexTemplateRequest = new Request("POST", "/_index_template/" + id);
127+
putIndexTemplateRequest.setJsonEntity(INDEX_TEMPLATE.replace("$TEMPLATE", template).replace("$DATASTREAM", dataStreamName));
128+
assertOK(client().performRequest(putIndexTemplateRequest));
129+
}
130+
131+
static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception {
132+
String firstIndex = null;
133+
for (int i = 0; i < numRequest; i++) {
134+
var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk");
135+
StringBuilder requestBody = new StringBuilder();
136+
for (int j = 0; j < numDocs; j++) {
137+
String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier.
138+
String methodName = "method" + j % 5;
139+
String ip = NetworkAddress.format(randomIp(true));
140+
String param = "chicken" + randomInt(5);
141+
String message = "the quick brown fox jumps over the " + param;
142+
long length = randomLong();
143+
double factor = randomDouble();
144+
145+
requestBody.append("{\"create\": {}}");
146+
requestBody.append('\n');
147+
requestBody.append(
148+
BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime))
149+
.replace("$host", hostName)
150+
.replace("$method", methodName)
151+
.replace("$ip", ip)
152+
.replace("$message", message)
153+
.replace("$length", Long.toString(length))
154+
.replace("$factor", Double.toString(factor))
155+
);
156+
requestBody.append('\n');
157+
158+
startTime = startTime.plusMillis(1);
159+
}
160+
bulkRequest.setJsonEntity(requestBody.toString());
161+
bulkRequest.addParameter("refresh", "true");
162+
var response = client().performRequest(bulkRequest);
163+
assertOK(response);
164+
var responseBody = entityAsMap(response);
165+
assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false));
166+
if (firstIndex == null) {
167+
firstIndex = (String) ((Map<?, ?>) ((Map<?, ?>) ((List<?>) responseBody.get("items")).get(0)).get("create")).get("_index");
168+
}
169+
}
170+
return firstIndex;
171+
}
172+
173+
void search(String dataStreamName) throws Exception {
174+
var searchRequest = new Request("POST", "/" + dataStreamName + "/_search");
175+
searchRequest.addParameter("pretty", "true");
176+
searchRequest.setJsonEntity("""
177+
{
178+
"size": 500,
179+
"query": {
180+
"match_phrase": {
181+
"message": "chicken"
182+
}
183+
}
184+
}
185+
""".replace("chicken", "chicken" + randomInt(5)));
186+
var response = client().performRequest(searchRequest);
187+
assertOK(response);
188+
var responseBody = entityAsMap(response);
189+
logger.info("{}", responseBody);
190+
191+
Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value");
192+
assertThat(totalCount, greaterThanOrEqualTo(512));
193+
}
194+
195+
void query(String dataStreamName) throws Exception {
196+
var queryRequest = new Request("POST", "/_query");
197+
queryRequest.addParameter("pretty", "true");
198+
queryRequest.setJsonEntity("""
199+
{
200+
"query": "FROM $ds | STATS max(length), max(factor) BY message | SORT message | LIMIT 5"
201+
}
202+
""".replace("$ds", dataStreamName));
203+
var response = client().performRequest(queryRequest);
204+
assertOK(response);
205+
var responseBody = entityAsMap(response);
206+
logger.info("{}", responseBody);
207+
208+
String column1 = ObjectPath.evaluate(responseBody, "columns.0.name");
209+
String column2 = ObjectPath.evaluate(responseBody, "columns.1.name");
210+
String column3 = ObjectPath.evaluate(responseBody, "columns.2.name");
211+
assertThat(column1, equalTo("max(length)"));
212+
assertThat(column2, equalTo("max(factor)"));
213+
assertThat(column3, equalTo("message"));
214+
215+
String key = ObjectPath.evaluate(responseBody, "values.0.2");
216+
assertThat(key, equalTo("the quick brown fox jumps over the chicken0"));
217+
Long maxRx = ObjectPath.evaluate(responseBody, "values.0.0");
218+
assertThat(maxRx, notNullValue());
219+
Double maxTx = ObjectPath.evaluate(responseBody, "values.0.1");
220+
assertThat(maxTx, notNullValue());
221+
}
222+
223+
protected static void startTrial() throws IOException {
224+
Request startTrial = new Request("POST", "/_license/start_trial");
225+
startTrial.addParameter("acknowledge", "true");
226+
try {
227+
assertOK(client().performRequest(startTrial));
228+
} catch (ResponseException e) {
229+
var responseBody = entityAsMap(e.getResponse());
230+
String error = ObjectPath.evaluate(responseBody, "error_message");
231+
assertThat(error, containsString("Trial was already activated."));
232+
}
233+
}
234+
235+
static Map<String, Object> getIndexSettingsWithDefaults(String index) throws IOException {
236+
Request request = new Request("GET", "/" + index + "/_settings");
237+
request.addParameter("flat_settings", "true");
238+
request.addParameter("include_defaults", "true");
239+
Response response = client().performRequest(request);
240+
try (InputStream is = response.getEntity().getContent()) {
241+
return XContentHelper.convertToMap(
242+
XContentType.fromMediaType(response.getEntity().getContentType().getValue()).xContent(),
243+
is,
244+
true
245+
);
246+
}
247+
}
248+
249+
static String formatInstant(Instant instant) {
250+
return DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(instant);
251+
}
252+
253+
}

server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ public String toString() {
468468
/**
469469
* Convert a {@link String} into a utf-8 {@link BytesRef}.
470470
*/
471-
static BytesRef toBytesRef(BytesRef scratch, String v) {
471+
public static BytesRef toBytesRef(BytesRef scratch, String v) {
472472
int len = UnicodeUtil.maxUTF8Length(v.length());
473473
if (scratch.bytes.length < len) {
474474
scratch.bytes = new byte[len];

0 commit comments

Comments
 (0)