|
1 | 1 | #!/usr/bin/env bash |
2 | | -# Builds ts-ts.wasm: the OFFICIAL tree-sitter C runtime + the official |
3 | | -# TypeScript grammar, compiled to a standalone wasm32-wasi reactor module. |
4 | | -# No emscripten, no JS glue, no third-party Go host — just official C sources |
5 | | -# driven from Go via wazero (see wasmts.go). |
| 2 | +# Builds ts-core.wasm: the OFFICIAL tree-sitter C runtime + the base grammars + |
| 3 | +# our host_extra.c (the batched ts_dump_tree walk), compiled to ONE standalone |
| 4 | +# wasm32-wasi reactor module via `zig cc`. No emscripten, no JS glue. |
6 | 5 | # |
7 | | -# Requires: zig (provides clang + wasi-libc cross-compilation), git. |
8 | | -# brew install zig |
| 6 | +# Requires: zig (clang + wasi-libc cross-compile), git, and tree-sitter CLI (only |
| 7 | +# for grammars whose repo ships no committed parser.c — gen=1 rows). |
9 | 8 | # |
10 | | -# Key point: the only wasmtime-dependent part of the runtime (wasm_store.c) is |
11 | | -# guarded by `#ifdef TREE_SITTER_FEATURE_WASM`, which we do NOT define — so the |
12 | | -# stock amalgamation (lib/src/lib.c) compiles to wasi cleanly with no stubs. |
| 9 | +# For wasm we compile each grammar IN PLACE from a full clone, so relative |
| 10 | +# includes (e.g. typescript's ../../common/scanner.h) and src-root headers (html |
| 11 | +# tag.h, haskell unicode.h) resolve naturally — none of the vendor.sh copy/rewrite |
| 12 | +# dance is needed. Quirks that remain: SHA pins (dart), `tree-sitter generate` |
| 13 | +# (sql), and a 2nd grammar from one repo (tsx). See plan §6.1. |
13 | 14 | set -euo pipefail |
14 | 15 | cd "$(dirname "$0")" |
15 | 16 |
|
16 | | -TS_VERSION="${TS_VERSION:-v0.25.10}" # tree-sitter runtime |
17 | | -TS_TS_VERSION="${TS_TS_VERSION:-v0.23.2}" # tree-sitter-typescript grammar |
| 17 | +TS_VERSION="${TS_VERSION:-v0.25.10}" |
| 18 | +OUT="${OUT:-ts-core.wasm}" |
18 | 19 | WORK="$(mktemp -d)" |
19 | 20 | trap 'rm -rf "$WORK"' EXIT |
20 | 21 |
|
21 | | -git clone --depth 1 --branch "$TS_VERSION" https://github.com/tree-sitter/tree-sitter "$WORK/tree-sitter" |
22 | | -git clone --depth 1 --branch "$TS_TS_VERSION" https://github.com/tree-sitter/tree-sitter-typescript "$WORK/ts-typescript" |
| 22 | +# id repo ref srcsubdir [gen] |
| 23 | +GRAMMARS=( |
| 24 | + "python tree-sitter/tree-sitter-python v0.25.0 src" |
| 25 | + "typescript tree-sitter/tree-sitter-typescript v0.23.2 typescript/src" |
| 26 | + "tsx tree-sitter/tree-sitter-typescript v0.23.2 tsx/src" |
| 27 | + "javascript tree-sitter/tree-sitter-javascript v0.25.0 src" |
| 28 | + "go tree-sitter/tree-sitter-go v0.25.0 src" |
| 29 | + "rust tree-sitter/tree-sitter-rust v0.24.2 src" |
| 30 | + "java tree-sitter/tree-sitter-java v0.23.5 src" |
| 31 | + "c tree-sitter/tree-sitter-c v0.24.2 src" |
| 32 | + "cpp tree-sitter/tree-sitter-cpp v0.23.4 src" |
| 33 | + "ruby tree-sitter/tree-sitter-ruby v0.23.1 src" |
| 34 | + "c_sharp tree-sitter/tree-sitter-c-sharp v0.23.5 src" |
| 35 | + "php tree-sitter/tree-sitter-php v0.24.2 php/src" |
| 36 | + "swift alex-pinkus/tree-sitter-swift 0.7.3-with-generated-files src" |
| 37 | + "kotlin tree-sitter-grammars/tree-sitter-kotlin v1.1.0 src" |
| 38 | + "scala tree-sitter/tree-sitter-scala v0.26.0 src" |
| 39 | + "bash tree-sitter/tree-sitter-bash v0.25.1 src" |
| 40 | + "lua tree-sitter-grammars/tree-sitter-lua v0.5.0 src" |
| 41 | + "dart UserNobody14/tree-sitter-dart a9bdfa3 src" |
| 42 | + "r r-lib/tree-sitter-r v1.2.0 src" |
| 43 | + "objc tree-sitter-grammars/tree-sitter-objc v3.0.2 src" |
| 44 | + "html tree-sitter/tree-sitter-html v0.23.2 src" |
| 45 | + "css tree-sitter/tree-sitter-css v0.25.0 src" |
| 46 | + "scss tree-sitter-grammars/tree-sitter-scss v1.0.0 src" |
| 47 | + "sql DerekStride/tree-sitter-sql v0.3.11 src 1" |
| 48 | + "markdown tree-sitter-grammars/tree-sitter-markdown v0.5.3 tree-sitter-markdown/src" |
| 49 | + "zig tree-sitter-grammars/tree-sitter-zig v1.1.2 src" |
| 50 | + "julia tree-sitter/tree-sitter-julia v0.25.0 src" |
| 51 | + "fortran stadelmanma/tree-sitter-fortran v0.6.0 src" |
| 52 | + "haskell tree-sitter/tree-sitter-haskell v0.23.1 src" |
| 53 | + "ocaml tree-sitter/tree-sitter-ocaml v0.25.0 grammars/ocaml/src" |
| 54 | + "solidity JoranHonig/tree-sitter-solidity v1.2.13 src" |
| 55 | +) |
23 | 56 |
|
| 57 | +clone() { # repo ref dest — tag/branch fast path, SHA fallback |
| 58 | + local repo="$1" ref="$2" dest="$3" |
| 59 | + git clone --depth 1 --branch "$ref" "https://github.com/$repo" "$dest" >/dev/null 2>&1 && return 0 |
| 60 | + git clone "https://github.com/$repo" "$dest" >/dev/null 2>&1 || return 1 |
| 61 | + git -C "$dest" checkout "$ref" >/dev/null 2>&1 |
| 62 | +} |
| 63 | + |
| 64 | +echo "→ tree-sitter runtime $TS_VERSION" |
| 65 | +git clone --depth 1 --branch "$TS_VERSION" https://github.com/tree-sitter/tree-sitter "$WORK/tree-sitter" 2>/dev/null |
| 66 | + |
| 67 | +SRCS=( "$WORK/tree-sitter/lib/src/lib.c" "csrc/host_extra.c" ) |
| 68 | +INCS=( -I "$WORK/tree-sitter/lib/include" -I "$WORK/tree-sitter/lib/src" ) |
| 69 | +EXPORTS=() |
| 70 | +BUILT=() ; FAILED=() |
| 71 | + |
| 72 | +for row in "${GRAMMARS[@]}"; do |
| 73 | + read -r id repo ref sub gen <<<"$row" |
| 74 | + printf ' %-12s %s@%s ' "$id" "$repo" "$ref" |
| 75 | + if ! clone "$repo" "$ref" "$WORK/$id"; then echo "CLONE FAIL"; FAILED+=("$id"); continue; fi |
| 76 | + gsrc="$WORK/$id/$sub" |
| 77 | + if [ "${gen:-0}" = "1" ] && [ ! -f "$gsrc/parser.c" ]; then |
| 78 | + ( cd "$WORK/$id" && tree-sitter generate >/dev/null 2>&1 ) || true |
| 79 | + fi |
| 80 | + if [ ! -f "$gsrc/parser.c" ]; then echo "NO parser.c"; FAILED+=("$id"); continue; fi |
| 81 | + SRCS+=( "$gsrc/parser.c" ) |
| 82 | + [ -f "$gsrc/scanner.c" ] && SRCS+=( "$gsrc/scanner.c" ) |
| 83 | + [ -f "$gsrc/scanner.cc" ] && SRCS+=( "$gsrc/scanner.cc" ) |
| 84 | + INCS+=( -I "$gsrc" ) |
| 85 | + EXPORTS+=( -Wl,--export=tree_sitter_$id ) |
| 86 | + BUILT+=("$id") |
| 87 | + echo "ok" |
| 88 | +done |
| 89 | + |
| 90 | +echo "→ compiling ${#SRCS[@]} sources, ${#BUILT[@]} grammars → $OUT" |
24 | 91 | zig cc --target=wasm32-wasi-musl -mexec-model=reactor \ |
25 | | - -I "$WORK/tree-sitter/lib/include" -I "$WORK/tree-sitter/lib/src" \ |
26 | | - -I "$WORK/ts-typescript/typescript/src" \ |
27 | | - "$WORK/tree-sitter/lib/src/lib.c" \ |
28 | | - "$WORK/ts-typescript/typescript/src/parser.c" \ |
29 | | - "$WORK/ts-typescript/typescript/src/scanner.c" \ |
30 | | - -o ts-ts.wasm -Oz -fPIC -Wl,--no-entry -Wl,--strip-debug \ |
| 92 | + "${INCS[@]}" "${SRCS[@]}" \ |
| 93 | + -o "$OUT" -Oz -fPIC -Wl,--no-entry -Wl,--strip-debug \ |
31 | 94 | -Wl,--export=malloc -Wl,--export=free \ |
32 | 95 | -Wl,--export=ts_parser_new -Wl,--export=ts_parser_delete \ |
33 | 96 | -Wl,--export=ts_parser_set_language -Wl,--export=ts_parser_parse_string \ |
34 | | - -Wl,--export=ts_tree_root_node -Wl,--export=ts_tree_delete \ |
| 97 | + -Wl,--export=ts_parser_reset \ |
| 98 | + -Wl,--export=ts_tree_delete -Wl,--export=ts_tree_root_node \ |
35 | 99 | -Wl,--export=ts_node_child_count -Wl,--export=ts_node_child \ |
36 | 100 | -Wl,--export=ts_node_type -Wl,--export=ts_node_start_byte \ |
37 | 101 | -Wl,--export=ts_node_end_byte -Wl,--export=ts_node_has_error \ |
38 | | - -Wl,--export=tree_sitter_typescript |
| 102 | + -Wl,--export=ts_dump_tree -Wl,--export=ts_dump_rec_size \ |
| 103 | + -Wl,--export=ts_language_symbol_count -Wl,--export=ts_language_symbol_name \ |
| 104 | + "${EXPORTS[@]}" |
39 | 105 |
|
40 | | -echo "built ts-ts.wasm ($(du -h ts-ts.wasm | cut -f1)) from tree-sitter $TS_VERSION + tree-sitter-typescript $TS_TS_VERSION" |
| 106 | +echo "built $OUT ($(du -h "$OUT" | cut -f1)) — runtime $TS_VERSION, ${#BUILT[@]} grammars" |
| 107 | +[ ${#FAILED[@]} -gt 0 ] && echo "FAILED: ${FAILED[*]}" |
| 108 | +echo "grammars: ${BUILT[*]}" |
0 commit comments