Skip to content

Commit d4f5f46

Browse files
authored
[INLONG-11943][Sort] TransformFunction: url_encode supports specifying character sets (#11946)
1 parent a5a2a70 commit d4f5f46

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
2525
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
2626

27+
import net.sf.jsqlparser.expression.Expression;
2728
import net.sf.jsqlparser.expression.Function;
2829

2930
import java.net.URLEncoder;
3031
import java.nio.charset.StandardCharsets;
32+
import java.util.List;
3133

3234
/**
3335
* UrlEncodeFunction -> url_encode(str)
@@ -37,20 +39,24 @@
3739
* - Return the result of translating 'str' into ‘application/x-www-form-urlencoded’ format using the UTF-8 encoding scheme.
3840
*/
3941
@TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
40-
"url_encode"}, parameter = "(String str)", descriptions = {
42+
"url_encode"}, parameter = "(String str[, String charset])", descriptions = {
4143
"- Return \"\" if 'str' is NULL, or there is an issue with the decoding process(such as encountering an "
4244
+
4345
"illegal escape pattern), or the encoding scheme is not supported;",
4446
"- Return the result of translating 'str' into 'application/x-www-form-urlencoded' format using the " +
45-
"UTF-8 encoding scheme."
47+
"charset(default:UTF-8) encoding scheme."
4648
}, examples = {
47-
"url_encode('https://apache.inlong.com/search?q=java url encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
49+
"url_encode('https://apache.inlong.com/search?q=java url encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\"",
50+
"url_encode('https://apache.inlong.com/search?q=java url encode','UTF-8') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
4851
public class UrlEncodeFunction implements ValueParser {
4952

5053
private final ValueParser stringParser;
54+
private final ValueParser charsetParser;
5155

5256
public UrlEncodeFunction(Function expr) {
53-
stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
57+
List<Expression> params = expr.getParameters().getExpressions();
58+
stringParser = OperatorTools.buildParser(params.get(0));
59+
charsetParser = params.size() > 1 ? OperatorTools.buildParser(params.get(1)) : null;
5460
}
5561

5662
@Override
@@ -66,7 +72,19 @@ public Object parse(SourceData sourceData, int rowIndex, Context context) {
6672
}
6773

6874
try {
69-
return URLEncoder.encode(string, StandardCharsets.UTF_8.toString());
75+
if (charsetParser == null) {
76+
return URLEncoder.encode(string, StandardCharsets.UTF_8.toString());
77+
} else {
78+
Object charsetObj = charsetParser.parse(sourceData, rowIndex, context);
79+
if (charsetObj == null) {
80+
return null;
81+
}
82+
String charset = OperatorTools.parseString(charsetObj);
83+
if (charset == null) {
84+
return null;
85+
}
86+
return URLEncoder.encode(string, charset);
87+
}
7088
} catch (Exception e) {
7189
return null;
7290
}

inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,31 @@ public void testUrlEncodeFunction() throws Exception {
5454
Assert.assertEquals(1, output2.size());
5555
Assert.assertEquals(output2.get(0), "result=");
5656
}
57+
58+
@Test
59+
public void testUrlEncodeCharsetFunction() throws Exception {
60+
String transformSql = "select url_encode(string1,'GBK') from source";
61+
TransformConfig config = new TransformConfig(transformSql);
62+
TransformProcessor<String, String> processor = TransformProcessor
63+
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
64+
SinkEncoderFactory.createKvEncoder(kvSink));
65+
66+
// case1: url_encode('A160=汕头市&vuserid=&version_build=76','GBK')
67+
List<String> output1 = processor.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
68+
new HashMap<>());
69+
Assert.assertEquals(1, output1.size());
70+
Assert.assertEquals(output1.get(0), "result=A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76");
71+
72+
String transformSql2 = "select url_encode(string1,'UTF-8') from source";
73+
TransformConfig config2 = new TransformConfig(transformSql2);
74+
TransformProcessor<String, String> processor2 = TransformProcessor
75+
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
76+
SinkEncoderFactory.createKvEncoder(kvSink));
77+
// case2: url_encode(null)
78+
List<String> output2 = processor2.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
79+
new HashMap<>());
80+
Assert.assertEquals(1, output2.size());
81+
Assert.assertEquals(output2.get(0),
82+
"result=A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76");
83+
}
5784
}

0 commit comments

Comments
 (0)