Skip to content

Commit c7da98f

Browse files
committed
Avoid infinite loop in flat_object parsing (opensearch-project#15985)
* Avoid infinite loop in flat_object parsing We had logic in flat_object parsing that would: 1. Try parsing a flat object field that is not an object or null. 2. Would see an END_ARRAY token, ignore it, and not advance the parser. Combined, this would create a scenario where passing an array of strings for a flat_object would parse the string values, then loop infinitely on the END_ARRAY token. Signed-off-by: Michael Froh <[email protected]> * Remove some unused code and add more tests The removed code does not actually seem to affect the logic. Also, I want to be 100% sure that every call to parseToken is guaranteed to call parser.nextToken() at some point. Signed-off-by: Michael Froh <[email protected]> * Remove unused parameter from parseToken Thanks for the reminder, @kkewwei! Signed-off-by: Michael Froh <[email protected]> * Add skip for newly-added test The test fails on MixedClusterClientYamlTestSuiteIT because 2.x still has the infinite loop until backport. Signed-off-by: Michael Froh <[email protected]> --------- Signed-off-by: Michael Froh <[email protected]> (cherry picked from commit 05dab3b)
1 parent ecefe47 commit c7da98f

File tree

4 files changed

+90
-53
lines changed

4 files changed

+90
-53
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3232
### Fixed
3333
- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))
3434
- Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501))
35+
- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985))
3536

3637
### Security
3738

rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ setup:
6262
},
6363
"required_matches": 1
6464
}
65-
6665
# Do index refresh
6766
- do:
6867
indices.refresh:
@@ -74,7 +73,52 @@ teardown:
7473
- do:
7574
indices.delete:
7675
index: test
77-
76+
---
77+
"Invalid docs":
78+
- skip:
79+
version: "- 2.99.99"
80+
reason: "parsing of these objects would infinite loop prior to 2.18"
81+
# The following documents are invalid.
82+
- do:
83+
catch: /parsing_exception/
84+
index:
85+
index: test
86+
id: 3
87+
body: {
88+
"ISBN13": "V12154942123242",
89+
"catalog": [ "Arrays in Action" ],
90+
"required_matches": 1
91+
}
92+
- do:
93+
catch: /parsing_exception/
94+
index:
95+
index: test
96+
id: 3
97+
body: {
98+
"ISBN13": "V12154942123242",
99+
"catalog": "Strings in Action",
100+
"required_matches": 1
101+
}
102+
- do:
103+
catch: /parsing_exception/
104+
index:
105+
index: test
106+
id: 3
107+
body: {
108+
"ISBN13": "V12154942123242",
109+
"catalog": 12345,
110+
"required_matches": 1
111+
}
112+
- do:
113+
catch: /parsing_exception/
114+
index:
115+
index: test
116+
id: 3
117+
body: {
118+
"ISBN13": "V12154942123242",
119+
"catalog": [ 12345 ],
120+
"required_matches": 1
121+
}
78122
---
79123
# Verify that mappings under the catalog field did not expand
80124
# and no dynamic fields were created.

server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package org.opensearch.common.xcontent;
1010

1111
import org.opensearch.common.xcontent.json.JsonXContent;
12+
import org.opensearch.core.common.ParsingException;
1213
import org.opensearch.core.common.Strings;
1314
import org.opensearch.core.common.bytes.BytesReference;
1415
import org.opensearch.core.xcontent.AbstractXContentParser;
@@ -73,7 +74,7 @@ public XContentParser parseObject() throws IOException {
7374
builder.startObject();
7475
LinkedList<String> path = new LinkedList<>(Collections.singleton(fieldTypeName));
7576
while (currentToken() != Token.END_OBJECT) {
76-
parseToken(path, null);
77+
parseToken(path);
7778
}
7879
// deduplication the fieldName,valueList,valueAndPathList
7980
builder.field(this.fieldTypeName, new HashSet<>(keyList));
@@ -87,14 +88,11 @@ public XContentParser parseObject() throws IOException {
8788
/**
8889
* @return true if the child object contains no_null value, false otherwise
8990
*/
90-
private boolean parseToken(Deque<String> path, String currentFieldName) throws IOException {
91-
if (path.size() == 1 && processNoNestedValue()) {
92-
return true;
93-
}
91+
private boolean parseToken(Deque<String> path) throws IOException {
9492
boolean isChildrenValueValid = false;
9593
boolean visitFieldName = false;
9694
if (this.parser.currentToken() == Token.FIELD_NAME) {
97-
currentFieldName = this.parser.currentName();
95+
final String currentFieldName = this.parser.currentName();
9896
path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop
9997
visitFieldName = true;
10098
String parts = currentFieldName;
@@ -106,23 +104,21 @@ private boolean parseToken(Deque<String> path, String currentFieldName) throws I
106104
}
107105
this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part
108106
this.parser.nextToken(); // advance to the value of fieldName
109-
isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object,
110-
// or a primitive value)
107+
isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object,
108+
// or a primitive value)
111109
path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName)
112110
// Note that whichever other branch we just passed through has already ended with nextToken(), so we
113111
// don't need to call it.
114112
} else if (this.parser.currentToken() == Token.START_ARRAY) {
115113
parser.nextToken();
116114
while (this.parser.currentToken() != Token.END_ARRAY) {
117-
isChildrenValueValid |= parseToken(path, currentFieldName);
115+
isChildrenValueValid |= parseToken(path);
118116
}
119117
this.parser.nextToken();
120-
} else if (this.parser.currentToken() == Token.END_ARRAY) {
121-
// skip
122118
} else if (this.parser.currentToken() == Token.START_OBJECT) {
123119
parser.nextToken();
124120
while (this.parser.currentToken() != Token.END_OBJECT) {
125-
isChildrenValueValid |= parseToken(path, currentFieldName);
121+
isChildrenValueValid |= parseToken(path);
126122
}
127123
this.parser.nextToken();
128124
} else {
@@ -148,21 +144,6 @@ public void removeKeyOfNullValue() {
148144
this.keyList.remove(keyList.size() - 1);
149145
}
150146

151-
private boolean processNoNestedValue() throws IOException {
152-
if (parser.currentToken() == Token.VALUE_NULL) {
153-
return true;
154-
} else if (this.parser.currentToken() == Token.VALUE_STRING
155-
|| this.parser.currentToken() == Token.VALUE_NUMBER
156-
|| this.parser.currentToken() == Token.VALUE_BOOLEAN) {
157-
String value = this.parser.textOrNull();
158-
if (value != null) {
159-
this.valueList.add(value);
160-
}
161-
return true;
162-
}
163-
return false;
164-
}
165-
166147
private String parseValue() throws IOException {
167148
switch (this.parser.currentToken()) {
168149
case VALUE_BOOLEAN:
@@ -172,7 +153,7 @@ private String parseValue() throws IOException {
172153
return this.parser.textOrNull();
173154
// Handle other token types as needed
174155
default:
175-
throw new IOException("Unsupported value token type [" + parser.currentToken() + "]");
156+
throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]");
176157
}
177158
}
178159

server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.opensearch.common.lucene.Lucene;
3131
import org.opensearch.common.lucene.search.AutomatonQueries;
3232
import org.opensearch.common.xcontent.JsonToStringXContentParser;
33+
import org.opensearch.core.common.ParsingException;
3334
import org.opensearch.core.xcontent.DeprecationHandler;
3435
import org.opensearch.core.xcontent.NamedXContentRegistry;
3536
import org.opensearch.core.xcontent.XContentParser;
@@ -568,31 +569,41 @@ protected void parseCreateField(ParseContext context) throws IOException {
568569
if (context.externalValueSet()) {
569570
String value = context.externalValue().toString();
570571
parseValueAddFields(context, value, fieldType().name());
571-
} else if (context.parser().currentToken() != XContentParser.Token.VALUE_NULL) {
572-
JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
573-
NamedXContentRegistry.EMPTY,
574-
DeprecationHandler.IGNORE_DEPRECATIONS,
575-
context.parser(),
576-
fieldType().name()
577-
);
578-
/*
579-
JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
580-
It reads the JSON object and parsed to a list of string
581-
*/
582-
XContentParser parser = jsonToStringParser.parseObject();
583-
584-
XContentParser.Token currentToken;
585-
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
586-
switch (currentToken) {
587-
case FIELD_NAME:
588-
fieldName = parser.currentName();
589-
break;
590-
case VALUE_STRING:
591-
String value = parser.textOrNull();
592-
parseValueAddFields(context, value, fieldName);
593-
break;
572+
} else {
573+
XContentParser ctxParser = context.parser();
574+
if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) {
575+
if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) {
576+
throw new ParsingException(
577+
ctxParser.getTokenLocation(),
578+
"[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value"
579+
);
594580
}
595581

582+
JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
583+
NamedXContentRegistry.EMPTY,
584+
DeprecationHandler.IGNORE_DEPRECATIONS,
585+
ctxParser,
586+
fieldType().name()
587+
);
588+
/*
589+
JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
590+
It reads the JSON object and parsed to a list of string
591+
*/
592+
XContentParser parser = jsonToStringParser.parseObject();
593+
594+
XContentParser.Token currentToken;
595+
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
596+
switch (currentToken) {
597+
case FIELD_NAME:
598+
fieldName = parser.currentName();
599+
break;
600+
case VALUE_STRING:
601+
String value = parser.textOrNull();
602+
parseValueAddFields(context, value, fieldName);
603+
break;
604+
}
605+
606+
}
596607
}
597608
}
598609

0 commit comments

Comments
 (0)