Skip to content

Commit 0a9b48f

Browse files
authored
Merge pull request #12929 from harpocrates/alec/indy-string-concat
Use `StringConcatFactory` for string concatenation on JDK 9+
2 parents d627128 + 84ed337 commit 0a9b48f

File tree

5 files changed

+204
-18
lines changed

5 files changed

+204
-18
lines changed

compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,30 +1063,109 @@ trait BCodeBodyBuilder extends BCodeSkelBuilder {
10631063
}
10641064
}
10651065

1066+
/* Generate string concatenation
1067+
*
1068+
* On JDK 8: create and append using `StringBuilder`
1069+
* On JDK 9+: use `invokedynamic` with `StringConcatFactory`
1070+
*/
10661071
def genStringConcat(tree: Tree): BType = {
10671072
lineNumber(tree)
10681073
liftStringConcat(tree) match {
1069-
// Optimization for expressions of the form "" + x. We can avoid the StringBuilder.
1074+
// Optimization for expressions of the form "" + x
10701075
case List(Literal(Constant("")), arg) =>
10711076
genLoad(arg, ObjectReference)
10721077
genCallMethod(defn.String_valueOf_Object, InvokeStyle.Static)
10731078

10741079
case concatenations =>
1075-
bc.genStartConcat
1076-
for (elem <- concatenations) {
1077-
val loadedElem = elem match {
1080+
val concatArguments = concatenations.view
1081+
.filter {
1082+
case Literal(Constant("")) => false // empty strings are no-ops in concatenation
1083+
case _ => true
1084+
}
1085+
.map {
10781086
case Apply(boxOp, value :: Nil) if Erasure.Boxing.isBox(boxOp.symbol) && boxOp.symbol.denot.owner != defn.UnitModuleClass =>
10791087
// Eliminate boxing of primitive values. Boxing is introduced by erasure because
10801088
// there's only a single synthetic `+` method "added" to the string class.
10811089
value
1090+
case other => other
1091+
}
1092+
.toList
1093+
1094+
// `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower
1095+
if (classfileVersion < asm.Opcodes.V9) {
1096+
1097+
// Estimate capacity needed for the string builder
1098+
val approxBuilderSize = concatArguments.view.map {
1099+
case Literal(Constant(s: String)) => s.length
1100+
case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length
1101+
case _ => 0
1102+
}.sum
1103+
bc.genNewStringBuilder(approxBuilderSize)
1104+
1105+
for (elem <- concatArguments) {
1106+
val elemType = tpeTK(elem)
1107+
genLoad(elem, elemType)
1108+
bc.genStringBuilderAppend(elemType)
1109+
}
1110+
bc.genStringBuilderEnd
1111+
} else {
1112+
1113+
/* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If
1114+
* the string concatenation is longer (unlikely), we spill into multiple calls
1115+
*/
1116+
val MaxIndySlots = 200
1117+
val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument
1118+
val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant
1119+
1120+
val recipe = new StringBuilder()
1121+
val argTypes = Seq.newBuilder[asm.Type]
1122+
val constVals = Seq.newBuilder[String]
1123+
var totalArgSlots = 0
1124+
var countConcats = 1 // ie. 1 + how many times we spilled
1125+
1126+
for (elem <- concatArguments) {
1127+
val tpe = tpeTK(elem)
1128+
val elemSlots = tpe.size
1129+
1130+
// Unlikely spill case
1131+
if (totalArgSlots + elemSlots >= MaxIndySlots) {
1132+
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())
1133+
countConcats += 1
1134+
totalArgSlots = 0
1135+
recipe.setLength(0)
1136+
argTypes.clear()
1137+
constVals.clear()
1138+
}
10821139

1083-
case _ => elem
1140+
elem match {
1141+
case Literal(Constant(s: String)) =>
1142+
if (s.contains(TagArg) || s.contains(TagConst)) {
1143+
totalArgSlots += elemSlots
1144+
recipe.append(TagConst)
1145+
constVals += s
1146+
} else {
1147+
recipe.append(s)
1148+
}
1149+
1150+
case other =>
1151+
totalArgSlots += elemSlots
1152+
recipe.append(TagArg)
1153+
val tpe = tpeTK(elem)
1154+
argTypes += tpe.toASMType
1155+
genLoad(elem, tpe)
1156+
}
1157+
}
1158+
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())
1159+
1160+
// If we spilled, generate one final concat
1161+
if (countConcats > 1) {
1162+
bc.genIndyStringConcat(
1163+
TagArg.toString * countConcats,
1164+
Seq.fill(countConcats)(StringRef.toASMType),
1165+
Seq.empty
1166+
)
10841167
}
1085-
val elemType = tpeTK(loadedElem)
1086-
genLoad(loadedElem, elemType)
1087-
bc.genConcat(elemType)
10881168
}
1089-
bc.genEndConcat
10901169
}
10911170
StringRef
10921171
}

compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,24 +224,27 @@ trait BCodeIdiomatic {
224224

225225
} // end of method genPrimitiveShift()
226226

227-
/*
227+
/* Creates a new `StringBuilder` instance with the requested capacity
228+
*
228229
* can-multi-thread
229230
*/
230-
final def genStartConcat: Unit = {
231+
final def genNewStringBuilder(size: Int): Unit = {
231232
jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName)
232233
jmethod.visitInsn(Opcodes.DUP)
234+
jmethod.visitLdcInsn(Integer.valueOf(size))
233235
invokespecial(
234236
JavaStringBuilderClassName,
235237
INSTANCE_CONSTRUCTOR_NAME,
236-
"()V",
238+
"(I)V",
237239
itf = false
238240
)
239241
}
240242

241-
/*
243+
/* Issue a call to `StringBuilder#append` for the right element type
244+
*
242245
* can-multi-thread
243246
*/
244-
def genConcat(elemType: BType): Unit = {
247+
final def genStringBuilderAppend(elemType: BType): Unit = {
245248
val paramType = elemType match {
246249
case ct: ClassBType if ct.isSubtypeOf(StringRef) => StringRef
247250
case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef) => jlStringBufferRef
@@ -257,13 +260,38 @@ trait BCodeIdiomatic {
257260
invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor)
258261
}
259262

260-
/*
263+
/* Extract the built `String` from the `StringBuilder`
264+
*
261265
* can-multi-thread
262266
*/
263-
final def genEndConcat: Unit = {
267+
final def genStringBuilderEnd: Unit = {
264268
invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;")
265269
}
266270

271+
/* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants`
272+
* (only works for JDK 9+)
273+
*
274+
* can-multi-thread
275+
*/
276+
final def genIndyStringConcat(
277+
recipe: String,
278+
argTypes: Seq[asm.Type],
279+
constants: Seq[String]
280+
): Unit = {
281+
jmethod.visitInvokeDynamicInsn(
282+
"makeConcatWithConstants",
283+
asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*),
284+
new asm.Handle(
285+
asm.Opcodes.H_INVOKESTATIC,
286+
"java/lang/invoke/StringConcatFactory",
287+
"makeConcatWithConstants",
288+
"(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;",
289+
false
290+
),
291+
(recipe +: constants):_*
292+
)
293+
}
294+
267295
/*
268296
* Emits one or more conversion instructions based on the types given as arguments.
269297
*

compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class StringConcatTest extends DottyBytecodeTest {
6161
}
6262

6363
assertEquals(List(
64-
"<init>()V",
64+
"<init>(I)V",
6565
"toString()Ljava/lang/String;",
6666
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",
6767
"append(Ljava/lang/Object;)Ljava/lang/StringBuilder;",
@@ -82,7 +82,7 @@ class StringConcatTest extends DottyBytecodeTest {
8282
)
8383

8484
assertEquals(List(
85-
"<init>()V",
85+
"<init>(I)V",
8686
"toString()Ljava/lang/String;",
8787
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",
8888
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",

tests/run/StringConcat.check

1.1 KB
Binary file not shown.

tests/run/StringConcat.scala

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
@main def Test() = {
2+
3+
// This should generally obey 15.18.1. of the JLS (String Concatenation Operator +)
4+
def concatenatingVariousTypes(): String = {
5+
val str: String = "some string"
6+
val sb: StringBuffer = new StringBuffer("some stringbuffer")
7+
val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence")
8+
val i: Int = 123456789
9+
val s: Short = 345
10+
val b: Byte = 12
11+
val z: Boolean = true
12+
val f: Float = 3.14f
13+
val j: Long = 98762147483647L
14+
val d: Double = 3.1415d
15+
16+
"String " + str + "\n" +
17+
"StringBuffer " + sb + "\n" +
18+
"CharSequence " + cs + "\n" +
19+
"Int " + i + "\n" +
20+
"Short " + s + "\n" +
21+
"Byte " + b + "\n" +
22+
"Boolean " + z + "\n" +
23+
"Float " + f + "\n" +
24+
"Long " + j + "\n" +
25+
"Double " + d + "\n"
26+
}
27+
// The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory`
28+
def concatenationInvolvingSpecialCharacters(): String = {
29+
val s1 = "Qux"
30+
val s2 = "Quux"
31+
32+
s"Foo \u0001 $s1 Bar \u0002 $s2 Baz"
33+
}
34+
// Concatenation involving more than 200 elements
35+
def largeConcatenation(): String = {
36+
val s00 = "s00"
37+
val s01 = "s01"
38+
val s02 = "s02"
39+
val s03 = "s03"
40+
val s04 = "s04"
41+
val s05 = "s05"
42+
val s06 = "s06"
43+
val s07 = "s07"
44+
val s08 = "s08"
45+
46+
// 24 rows follow
47+
((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
48+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
49+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
50+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
51+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
52+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") +
53+
(s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
54+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
55+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
56+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
57+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
58+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) +
59+
((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
60+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
61+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
62+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
63+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
64+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") +
65+
(s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
66+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
67+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
68+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
69+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
70+
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n"))
71+
}
72+
println("----------")
73+
println(concatenatingVariousTypes())
74+
println("----------")
75+
println(concatenationInvolvingSpecialCharacters())
76+
println("----------")
77+
println(largeConcatenation())
78+
println("----------")
79+
}

0 commit comments

Comments
 (0)