Skip to content

Commit 68c5005

Browse files
committed
Fix string.c
1 parent e736de7 commit 68c5005

File tree

3 files changed

+49
-16
lines changed

3 files changed

+49
-16
lines changed

internal/cmd/generator/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.PHONY: asm
22
asm:
3-
clang -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c
3+
clang -Wall -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c
44

55
.PHONY: generate
66
generate:

internal/cmd/generator/simd/string.c

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,29 @@
11
#include <stdio.h>
22
#include <stdint.h>
33
#include <string.h>
4+
#include <stdbool.h>
45
#include <immintrin.h>
56

7+
static const bool needEscape[256] = {
8+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
9+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
10+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
11+
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
12+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
13+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
14+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
15+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6F
16+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7F
17+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80-0x8F
18+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90-0x9F
19+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xA0-0xAF
20+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xB0-0xBF
21+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xC0-0xCF
22+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xD0-0xDF
23+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xE0-0xEF
24+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xF0-0xFF
25+
};
26+
627
uint64_t findHTMLEscapeIndex64(char *buf, int len) {
728
static const uint64_t lsb = 0x0101010101010101;
829
static const uint64_t msb = 0x8080808080808080;
@@ -26,7 +47,7 @@ uint64_t findHTMLEscapeIndex64(char *buf, int len) {
2647
}
2748
sp += 8;
2849
}
29-
return 8 * chunkLen;
50+
return chunkIdx * 8;
3051
}
3152

3253
uint64_t findHTMLEscapeIndex128(char *buf, int len) {
@@ -40,7 +61,6 @@ uint64_t findHTMLEscapeIndex128(char *buf, int len) {
4061
static const __m64 gt = (__m64)(lsb * '>');
4162
static const __m64 amp = (__m64)(lsb * '&');
4263

43-
__m128i zeroV = _mm_setzero_si128();
4464
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
4565
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
4666
__m128i spaceV = _mm_set_epi64(space, space);
@@ -87,7 +107,6 @@ uint64_t findHTMLEscapeIndex256(char *buf, int len) {
87107
static const __m64 gt = (__m64)(lsb * '>');
88108
static const __m64 amp = (__m64)(lsb * '&');
89109

90-
__m256i zeroV = _mm256_setzero_si256();
91110
__m256i msbV = _mm256_set1_epi64x(msb);
92111
__m256i lsbV = _mm256_set1_epi64x(lsb);
93112
__m256i spaceV = _mm256_set1_epi64x(space);
@@ -146,7 +165,14 @@ uint64_t findEscapeIndex64(char *buf, int len) {
146165
}
147166
sp += 8;
148167
}
149-
return 8 * chunkLen;
168+
int idx = 8 * chunkLen;
169+
bool *needEscape = needEscape;
170+
for ( ;idx < len; idx++) {
171+
if (needEscape[buf[idx]] != 0) {
172+
return idx;
173+
}
174+
}
175+
return len;
150176
}
151177

152178
uint64_t findEscapeIndex128(char *buf, int len) {
@@ -157,7 +183,6 @@ uint64_t findEscapeIndex128(char *buf, int len) {
157183
static const __m64 quote = (__m64)(lsb * '"');
158184
static const __m64 escape = (__m64)(lsb * '\\');
159185

160-
__m128i zeroV = _mm_setzero_si128();
161186
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
162187
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
163188
__m128i spaceV = _mm_set_epi64(space, space);
@@ -181,10 +206,17 @@ uint64_t findEscapeIndex128(char *buf, int len) {
181206
sp += 16;
182207
}
183208
int idx = 16 * chunkLen;
184-
if (len - idx >= 8) {
185-
return idx + findEscapeIndex64(sp, len - idx);
209+
int remainLen = len - idx;
210+
if (remainLen >= 8) {
211+
return idx + findEscapeIndex64(sp, remainLen);
186212
}
187-
return idx;
213+
bool *needEscape = needEscape;
214+
for (; idx < len; idx++) {
215+
if (needEscape[buf[idx]] != 0) {
216+
return idx;
217+
}
218+
}
219+
return len;
188220
}
189221

190222
uint64_t findEscapeIndex256(char *buf, int len) {
@@ -195,7 +227,6 @@ uint64_t findEscapeIndex256(char *buf, int len) {
195227
static const __m64 quote = (__m64)(lsb * '"');
196228
static const __m64 escape = (__m64)(lsb * '\\');
197229

198-
__m256i zeroV = _mm256_setzero_si256();
199230
__m256i msbV = _mm256_set1_epi64x(msb);
200231
__m256i lsbV = _mm256_set1_epi64x(lsb);
201232
__m256i spaceV = _mm256_set1_epi64x(space);
@@ -214,7 +245,7 @@ uint64_t findEscapeIndex256(char *buf, int len) {
214245
__m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN);
215246
int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV));
216247
if (movemask != 0) {
217-
return __builtin_ctz(movemask);
248+
return __builtin_ctz(movemask) + chunkIdx * 32;
218249
}
219250
sp += 32;
220251
}
@@ -225,5 +256,11 @@ uint64_t findEscapeIndex256(char *buf, int len) {
225256
} else if (remainLen >= 8) {
226257
return idx + findEscapeIndex64(sp, remainLen);
227258
}
228-
return idx;
259+
bool *needEscape = needEscape;
260+
for (; idx < len; idx++) {
261+
if (needEscape[buf[idx]] != 0) {
262+
return idx;
263+
}
264+
}
265+
return len;
229266
}

internal/encoder/string.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -758,10 +758,6 @@ func appendString(buf []byte, s string) []byte {
758758
}
759759
ESCAPE:
760760
c := s[j]
761-
if !needEscape[c] {
762-
j++
763-
continue
764-
}
765761
switch c {
766762
case '\\', '"':
767763
buf = append(buf, s[i:j]...)

0 commit comments

Comments
 (0)