diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..93d047b --- /dev/null +++ b/.clang-format @@ -0,0 +1,12 @@ +AlignArrayOfStructures: Left +BasedOnStyle: LLVM +IndentCaseLabels: true +IndentGotoLabels: true +IndentPPDirectives: AfterHash +IndentWidth: 2 +KeepEmptyLinesAtTheStartOfBlocks: false +SeparateDefinitionBlocks: Always +SortIncludes: CaseInsensitive +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceBeforeCaseColon: false diff --git a/.gitattributes b/.gitattributes index 7da892b..21ab52e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ /src/**/* linguist-generated=true -/src/scanner.cc linguist-generated=false +/src/scanner.c linguist-generated=false # Exclude test files from language stats /test/**/*.ex linguist-documentation=true diff --git a/Package.swift b/Package.swift index 19e6fef..a3414ba 100644 --- a/Package.swift +++ b/Package.swift @@ -30,7 +30,7 @@ let package = Package( ], sources: [ "src/parser.c", - "src/scanner.cc", + "src/scanner.c", ], resources: [ .copy("queries") diff --git a/binding.gyp b/binding.gyp index b3fbb75..a78b7a6 100644 --- a/binding.gyp +++ b/binding.gyp @@ -9,7 +9,7 @@ "sources": [ "bindings/node/binding.cc", "src/parser.c", - "src/scanner.cc" + "src/scanner.c" ], "cflags_c": [ "-std=c99", diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index 673642f..a90672e 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -2,7 +2,7 @@ fn main() { let src_dir = std::path::Path::new("src"); let mut c_config = cc::Build::new(); - c_config.include(&src_dir); + c_config.include(src_dir); c_config .flag_if_supported("-Wno-unused-parameter") .flag_if_supported("-Wno-unused-but-set-variable") @@ -10,26 +10,7 @@ fn main() { let parser_path = src_dir.join("parser.c"); c_config.file(&parser_path); - // If your language uses an external scanner written in C, - // then include this block of code: - - /* let scanner_path = src_dir.join("scanner.c"); c_config.file(&scanner_path); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); - */ - - c_config.compile("parser"); - println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); - - let mut cpp_config = cc::Build::new(); - cpp_config.cpp(true); - cpp_config.include(&src_dir); - cpp_config - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable"); - let scanner_path = src_dir.join("scanner.cc"); - cpp_config.file(&scanner_path); - cpp_config.compile("scanner"); - println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); } diff --git a/docs/parser.md b/docs/parser.md index 8aedee9..7bba6af 100644 --- a/docs/parser.md +++ b/docs/parser.md @@ -33,7 +33,7 @@ For detailed introduction see the official guide on [Creating parsers](https://t Essentially, we define relevant language rules in `grammar.js`, based on which Tree-sitter generates parser code (under `src/`). In some cases, we want to write -custom C++ code for tokenizing specific character sequences (in `src/scanner.cc`). +custom C code for tokenizing specific character sequences (in `src/scanner.c`). The grammar rules may often conflict with each other, meaning that the given sequence of tokens has multiple valid interpretations given one _token_ of lookahead. diff --git a/package-lock.json b/package-lock.json index 30873ee..4865476 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "0.1.0", "license": "Apache-2.0", "dependencies": { + "clang-format": "^1.8.0", "nan": "^2.15.0" }, "devDependencies": { @@ -16,11 +17,147 @@ "tree-sitter-cli": "^0.20.7" } }, + "node_modules/async": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.4.tgz", + "integrity": "sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==" + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/clang-format": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.8.0.tgz", + "integrity": "sha512-pK8gzfu55/lHzIpQ1givIbWfn3eXnU7SfxqIwVgnn5jEM6j4ZJYjpFqFs4iSBPNedzRMmfjYjuQhu657WAXHXw==", + "dependencies": { + "async": "^3.2.3", + "glob": "^7.0.0", + "resolve": "^1.1.6" + }, + "bin": { + "check-clang-format": "bin/check-clang-format.js", + "clang-format": "index.js", + "git-clang-format": "bin/git-clang-format" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + }, + "node_modules/function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" + }, + "node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dependencies": { + "function-bind": "^1.1.1" + }, + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/is-core-module": { + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.12.1.tgz", + "integrity": "sha512-Q4ZuBAe2FUsKtyQJoQHlvP8OvBERxO3jEmy1I7hcRXcJBGGHFh/aJBswbXuS9sgrDH2QUO8ilkwNPHvHMd8clg==", + "dependencies": { + "has": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, "node_modules/nan": { "version": "2.15.0", "resolved": "https://registry.npmjs.org/nan/-/nan-2.15.0.tgz", "integrity": "sha512-8ZtvEnA2c5aYCZYd1cvgdnU6cqwixRoYg70xPLWUws5ORTa/lnw+u4amixRS/Ac5U5mQVgp9pnlSUnbNWFaWZQ==" }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==" + }, "node_modules/prettier": { "version": "2.3.2", "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.3.2.tgz", @@ -33,6 +170,33 @@ "node": ">=10.13.0" } }, + "node_modules/resolve": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.2.tgz", + "integrity": "sha512-Sb+mjNHOULsBv818T40qSPeRiuWLyaGMa5ewydRLFimneixmVy2zdivRl+AF6jaYPC8ERxGDmFSiqui6SfPd+g==", + "dependencies": { + "is-core-module": "^2.11.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/tree-sitter-cli": { "version": "0.20.7", "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.7.tgz", @@ -42,25 +206,163 @@ "bin": { "tree-sitter": "cli.js" } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" } }, "dependencies": { + "async": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.4.tgz", + "integrity": "sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==" + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "clang-format": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.8.0.tgz", + "integrity": "sha512-pK8gzfu55/lHzIpQ1givIbWfn3eXnU7SfxqIwVgnn5jEM6j4ZJYjpFqFs4iSBPNedzRMmfjYjuQhu657WAXHXw==", + "requires": { + "async": "^3.2.3", + "glob": "^7.0.0", + "resolve": "^1.1.6" + } + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" + }, + "glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "requires": { + "function-bind": "^1.1.1" + } + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "is-core-module": { + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.12.1.tgz", + "integrity": "sha512-Q4ZuBAe2FUsKtyQJoQHlvP8OvBERxO3jEmy1I7hcRXcJBGGHFh/aJBswbXuS9sgrDH2QUO8ilkwNPHvHMd8clg==", + "requires": { + "has": "^1.0.3" + } + }, + "minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "requires": { + "brace-expansion": "^1.1.7" + } + }, "nan": { "version": "2.15.0", "resolved": "https://registry.npmjs.org/nan/-/nan-2.15.0.tgz", "integrity": "sha512-8ZtvEnA2c5aYCZYd1cvgdnU6cqwixRoYg70xPLWUws5ORTa/lnw+u4amixRS/Ac5U5mQVgp9pnlSUnbNWFaWZQ==" }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "requires": { + "wrappy": "1" + } + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==" + }, + "path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==" + }, "prettier": { "version": "2.3.2", "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.3.2.tgz", "integrity": "sha512-lnJzDfJ66zkMy58OL5/NY5zp70S7Nz6KqcKkXYzn2tMVrNxvbqaBpg7H3qHaLxCJ5lNMsGuM8+ohS7cZrthdLQ==", "dev": true }, + "resolve": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.2.tgz", + "integrity": "sha512-Sb+mjNHOULsBv818T40qSPeRiuWLyaGMa5ewydRLFimneixmVy2zdivRl+AF6jaYPC8ERxGDmFSiqui6SfPd+g==", + "requires": { + "is-core-module": "^2.11.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + } + }, + "supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==" + }, "tree-sitter-cli": { "version": "0.20.7", "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.7.tgz", "integrity": "sha512-MHABT8oCPr4D0fatsPo6ATQ9H4h9vHpPRjlxkxJs80tpfAEKGn6A1zU3eqfCKBcgmfZDe9CiL3rKOGMzYHwA3w==", "dev": true + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" } } } diff --git a/package.json b/package.json index 81d3ac6..0f27d24 100644 --- a/package.json +++ b/package.json @@ -16,13 +16,14 @@ }, "scripts": { "test": "tree-sitter test", - "format": "prettier --trailing-comma es5 --write grammar.js", - "format-check": "prettier --trailing-comma es5 --check grammar.js" + "format": "prettier --trailing-comma es5 --write grammar.js && clang-format -i src/scanner.c", + "format-check": "prettier --trailing-comma es5 --check grammar.js && cat src/scanner.c | clang-format src/scanner.c | diff src/scanner.c -" }, "dependencies": { "nan": "^2.15.0" }, "devDependencies": { + "clang-format": "^1.8.0", "prettier": "^2.3.2", "tree-sitter-cli": "^0.20.7" }, diff --git a/src/scanner.cc b/src/scanner.c similarity index 72% rename from src/scanner.cc rename to src/scanner.c index 8889957..4f69b09 100644 --- a/src/scanner.cc +++ b/src/scanner.c @@ -1,7 +1,5 @@ #include -namespace { - // See references in grammar.externals enum TokenType { QUOTED_CONTENT_I_SINGLE, @@ -36,37 +34,31 @@ enum TokenType { QUOTED_ATOM_START }; -void advance(TSLexer* lexer) { - lexer->advance(lexer, false); -} +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } -void skip(TSLexer *lexer) { - lexer->advance(lexer, true); -} +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } // Note: some checks require several lexer steps of lookahead // and alter its state, for these we use names check_* -bool is_whitespace(int32_t c) { +static inline bool is_whitespace(int32_t c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } -bool is_inline_whitespace(int32_t c) { +static inline bool is_inline_whitespace(int32_t c) { return c == ' ' || c == '\t'; } -bool is_newline(int32_t c) { +static inline bool is_newline(int32_t c) { // Note: this implies \r\n is treated as two line breaks, // but in our case it's fine, since multiple line breaks // make no difference return c == '\n' || c == '\r'; } -bool is_digit(int32_t c) { - return '0' <= c && c <= '9'; -} +static inline bool is_digit(int32_t c) { return '0' <= c && c <= '9'; } -bool check_keyword_end(TSLexer* lexer) { +static inline bool check_keyword_end(TSLexer *lexer) { if (lexer->lookahead == ':') { advance(lexer); return is_whitespace(lexer->lookahead); @@ -74,7 +66,7 @@ bool check_keyword_end(TSLexer* lexer) { return false; } -bool check_operator_end(TSLexer* lexer) { +static bool check_operator_end(TSLexer *lexer) { // Keyword if (lexer->lookahead == ':') { return !check_keyword_end(lexer); @@ -97,21 +89,22 @@ bool check_operator_end(TSLexer* lexer) { } const char token_terminators[] = { - // Operator starts - '@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\', '%', - // Delimiters - '{', '}', '[', ']', '(', ')', '"', '\'', - // Separators - ',', ';', - // Comment - '#' -}; - -const uint8_t token_terminators_length = sizeof(token_terminators) / sizeof(char); + // Operator starts + '@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\', + '%', + // Delimiters + '{', '}', '[', ']', '(', ')', '"', '\'', + // Separators + ',', ';', + // Comment + '#'}; + +const uint8_t token_terminators_length = + sizeof(token_terminators) / sizeof(char); // Note: this is a heuristic as we only use this to distinguish word // operators and we don't want to include complex Unicode ranges -bool is_token_end(int32_t c) { +static inline bool is_token_end(int32_t c) { for (uint8_t i = 0; i < token_terminators_length; i++) { if (c == token_terminators[i]) { return true; @@ -121,44 +114,46 @@ bool is_token_end(int32_t c) { return is_whitespace(c); } -struct QuotedContentInfo { - const TokenType token_type; +typedef struct { + const enum TokenType token_type; const bool supports_interpol; const int32_t end_delimiter; const uint8_t delimiter_length; -}; +} QuotedContentInfo; const QuotedContentInfo quoted_content_infos[] = { - { QUOTED_CONTENT_I_SINGLE, true, '\'', 1 }, - { QUOTED_CONTENT_I_DOUBLE, true, '"', 1 }, - { QUOTED_CONTENT_I_HEREDOC_SINGLE, true, '\'', 3 }, - { QUOTED_CONTENT_I_HEREDOC_DOUBLE, true, '"', 3 }, - { QUOTED_CONTENT_I_PARENTHESIS, true, ')', 1 }, - { QUOTED_CONTENT_I_CURLY, true, '}', 1 }, - { QUOTED_CONTENT_I_SQUARE, true, ']', 1 }, - { QUOTED_CONTENT_I_ANGLE, true, '>', 1 }, - { QUOTED_CONTENT_I_BAR, true, '|', 1 }, - { QUOTED_CONTENT_I_SLASH, true, '/', 1 }, - { QUOTED_CONTENT_SINGLE, false, '\'', 1 }, - { QUOTED_CONTENT_DOUBLE, false, '"', 1 }, - { QUOTED_CONTENT_HEREDOC_SINGLE, false, '\'', 3 }, - { QUOTED_CONTENT_HEREDOC_DOUBLE, false, '"', 3 }, - { QUOTED_CONTENT_PARENTHESIS, false, ')', 1 }, - { QUOTED_CONTENT_CURLY, false, '}', 1 }, - { QUOTED_CONTENT_SQUARE, false, ']', 1 }, - { QUOTED_CONTENT_ANGLE, false, '>', 1 }, - { QUOTED_CONTENT_BAR, false, '|', 1 }, - { QUOTED_CONTENT_SLASH, false, '/', 1 }, + {QUOTED_CONTENT_I_SINGLE, true, '\'', 1}, + {QUOTED_CONTENT_I_DOUBLE, true, '"', 1}, + {QUOTED_CONTENT_I_HEREDOC_SINGLE, true, '\'', 3}, + {QUOTED_CONTENT_I_HEREDOC_DOUBLE, true, '"', 3}, + {QUOTED_CONTENT_I_PARENTHESIS, true, ')', 1}, + {QUOTED_CONTENT_I_CURLY, true, '}', 1}, + {QUOTED_CONTENT_I_SQUARE, true, ']', 1}, + {QUOTED_CONTENT_I_ANGLE, true, '>', 1}, + {QUOTED_CONTENT_I_BAR, true, '|', 1}, + {QUOTED_CONTENT_I_SLASH, true, '/', 1}, + {QUOTED_CONTENT_SINGLE, false, '\'', 1}, + {QUOTED_CONTENT_DOUBLE, false, '"', 1}, + {QUOTED_CONTENT_HEREDOC_SINGLE, false, '\'', 3}, + {QUOTED_CONTENT_HEREDOC_DOUBLE, false, '"', 3}, + {QUOTED_CONTENT_PARENTHESIS, false, ')', 1}, + {QUOTED_CONTENT_CURLY, false, '}', 1}, + {QUOTED_CONTENT_SQUARE, false, ']', 1}, + {QUOTED_CONTENT_ANGLE, false, '>', 1}, + {QUOTED_CONTENT_BAR, false, '|', 1}, + {QUOTED_CONTENT_SLASH, false, '/', 1}, }; -const uint8_t quoted_content_infos_length = sizeof(quoted_content_infos) / sizeof(QuotedContentInfo); +const uint8_t quoted_content_infos_length = + sizeof(quoted_content_infos) / sizeof(QuotedContentInfo); -int8_t find_quoted_token_info(const bool* valid_symbols) { +static inline int8_t find_quoted_token_info(const bool *valid_symbols) { // Quoted tokens are mutually exclusive and only one should be valid // at a time. If multiple are valid it means we parse an arbitrary // code outside quotes, in which case we don't want to tokenize it as // quoted content. - if (valid_symbols[QUOTED_CONTENT_I_SINGLE] && valid_symbols[QUOTED_CONTENT_I_DOUBLE]) { + if (valid_symbols[QUOTED_CONTENT_I_SINGLE] && + valid_symbols[QUOTED_CONTENT_I_DOUBLE]) { return -1; } @@ -171,10 +166,10 @@ int8_t find_quoted_token_info(const bool* valid_symbols) { return -1; } -bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) { - lexer->result_symbol = info.token_type; +bool scan_quoted_content(TSLexer *lexer, const QuotedContentInfo *info) { + lexer->result_symbol = info->token_type; - bool is_heredoc = (info.delimiter_length == 3); + bool is_heredoc = (info->delimiter_length == 3); for (bool has_content = false; true; has_content = true) { bool newline = false; @@ -192,33 +187,35 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) { lexer->mark_end(lexer); - if (lexer->lookahead == info.end_delimiter) { + if (lexer->lookahead == info->end_delimiter) { uint8_t length = 1; - while (length < info.delimiter_length) { + while (length < info->delimiter_length) { advance(lexer); - if (lexer->lookahead == info.end_delimiter) { + if (lexer->lookahead == info->end_delimiter) { length++; } else { break; } } - if (length == info.delimiter_length && (!is_heredoc || newline)) { + if (length == info->delimiter_length && (!is_heredoc || newline)) { return has_content; } } else { if (lexer->lookahead == '#') { advance(lexer); - if (info.supports_interpol && lexer->lookahead == '{') { + if (info->supports_interpol && lexer->lookahead == '{') { return has_content; } } else if (lexer->lookahead == '\\') { advance(lexer); if (is_heredoc && lexer->lookahead == '\n') { // We need to know about the newline to correctly recognise - // heredoc end delimiter, so we intentionally ignore escaping - } else if (info.supports_interpol || lexer->lookahead == info.end_delimiter) { + // heredoc end delimiter, so we intentionally ignore + // escaping + } else if (info->supports_interpol || + lexer->lookahead == info->end_delimiter) { return has_content; } } else if (lexer->lookahead == '\0') { @@ -235,7 +232,7 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) { return false; } -bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { +static bool scan_newline(TSLexer *lexer, const bool *valid_symbols) { advance(lexer); while (is_whitespace(lexer->lookahead)) { @@ -261,7 +258,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return false; } - if (valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] ) { + if (valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR]) { lexer->result_symbol = NEWLINE_BEFORE_BINARY_OPERATOR; // &&, &&& @@ -276,7 +273,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return check_operator_end(lexer); } } - // =, ==, ===, =~, => + // =, ==, ===, =~, => } else if (lexer->lookahead == '=') { advance(lexer); if (lexer->lookahead == '=') { @@ -296,16 +293,17 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } else { return check_operator_end(lexer); } - // :: + // :: } else if (lexer->lookahead == ':') { advance(lexer); if (lexer->lookahead == ':') { advance(lexer); // Ignore ::: atom - if (lexer->lookahead == ':') return false; + if (lexer->lookahead == ':') + return false; return check_operator_end(lexer); } - // ++, +++ + // ++, +++ } else if (lexer->lookahead == '+') { advance(lexer); if (lexer->lookahead == '+') { @@ -317,7 +315,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return check_operator_end(lexer); } } - // --, ---, -> + // --, ---, -> } else if (lexer->lookahead == '-') { advance(lexer); if (lexer->lookahead == '-') { @@ -332,11 +330,10 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { advance(lexer); return check_operator_end(lexer); } - // <, <=, <-, <>, <~, <~>, <|>, <<<, <<~ + // <, <=, <-, <>, <~, <~>, <|>, <<<, <<~ } else if (lexer->lookahead == '<') { advance(lexer); - if (lexer->lookahead == '=' || - lexer->lookahead == '-' || + if (lexer->lookahead == '=' || lexer->lookahead == '-' || lexer->lookahead == '>') { advance(lexer); return check_operator_end(lexer); @@ -356,15 +353,14 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } } else if (lexer->lookahead == '<') { advance(lexer); - if (lexer->lookahead == '<' || - lexer->lookahead == '~') { + if (lexer->lookahead == '<' || lexer->lookahead == '~') { advance(lexer); return check_operator_end(lexer); } } else { return check_operator_end(lexer); } - // >, >=, >>> + // >, >=, >>> } else if (lexer->lookahead == '>') { advance(lexer); if (lexer->lookahead == '=') { @@ -379,7 +375,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } else { return check_operator_end(lexer); } - // ^^^ + // ^^^ } else if (lexer->lookahead == '^') { advance(lexer); if (lexer->lookahead == '^') { @@ -389,7 +385,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return check_operator_end(lexer); } } - // !=, !== + // !=, !== } else if (lexer->lookahead == '!') { advance(lexer); if (lexer->lookahead == '=') { @@ -401,7 +397,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return check_operator_end(lexer); } } - // ~>, ~>> + // ~>, ~>> } else if (lexer->lookahead == '~') { advance(lexer); if (lexer->lookahead == '>') { @@ -413,7 +409,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return check_operator_end(lexer); } } - // |, ||, |||, |> + // |, ||, |||, |> } else if (lexer->lookahead == '|') { advance(lexer); if (lexer->lookahead == '|') { @@ -430,7 +426,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } else { return check_operator_end(lexer); } - // *, ** + // *, ** } else if (lexer->lookahead == '*') { advance(lexer); if (lexer->lookahead == '*') { @@ -439,7 +435,7 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } else { return check_operator_end(lexer); } - // / // + // / // } else if (lexer->lookahead == '/') { advance(lexer); if (lexer->lookahead == '/') { @@ -448,18 +444,19 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { } else { return check_operator_end(lexer); } - // ., .. + // ., .. } else if (lexer->lookahead == '.') { advance(lexer); if (lexer->lookahead == '.') { advance(lexer); // Ignore ... identifier - if (lexer->lookahead == '.') return false; + if (lexer->lookahead == '.') + return false; return check_operator_end(lexer); } else { return check_operator_end(lexer); } - // double slash + // double slash } else if (lexer->lookahead == '\\') { advance(lexer); if (lexer->lookahead == '\\') { @@ -487,21 +484,21 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return is_token_end(lexer->lookahead) && check_operator_end(lexer); } } - // or + // or } else if (lexer->lookahead == 'o') { advance(lexer); if (lexer->lookahead == 'r') { advance(lexer); return is_token_end(lexer->lookahead) && check_operator_end(lexer); } - // in + // in } else if (lexer->lookahead == 'i') { advance(lexer); if (lexer->lookahead == 'n') { advance(lexer); return is_token_end(lexer->lookahead) && check_operator_end(lexer); } - // not in + // not in } else if (lexer->lookahead == 'n') { advance(lexer); if (lexer->lookahead == 'o') { @@ -515,7 +512,8 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { advance(lexer); if (lexer->lookahead == 'n') { advance(lexer); - return is_token_end(lexer->lookahead) && check_operator_end(lexer); + return is_token_end(lexer->lookahead) && + check_operator_end(lexer); } } } @@ -526,14 +524,15 @@ bool scan_newline(TSLexer* lexer, const bool* valid_symbols) { return false; } -bool scan(TSLexer* lexer, const bool* valid_symbols) { +bool scan(TSLexer *lexer, const bool *valid_symbols) { int8_t quoted_content_info_idx = find_quoted_token_info(valid_symbols); // Quoted content, which matches any character except for close // delimiters, escapes and interpolations if (quoted_content_info_idx != -1) { - const QuotedContentInfo& info = quoted_content_infos[quoted_content_info_idx]; - return scan_quoted_content(lexer, info); + const QuotedContentInfo info = + quoted_content_infos[quoted_content_info_idx]; + return scan_quoted_content(lexer, &info); } bool skipped_whitespace = false; @@ -544,10 +543,10 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { } // Newline, which is either tokenized as a special newline or ignored - if (is_newline(lexer->lookahead) && ( - valid_symbols[NEWLINE_BEFORE_DO] || - valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] || - valid_symbols[NEWLINE_BEFORE_COMMENT])) { + if (is_newline(lexer->lookahead) && + (valid_symbols[NEWLINE_BEFORE_DO] || + valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] || + valid_symbols[NEWLINE_BEFORE_COMMENT])) { return scan_newline(lexer, valid_symbols); } @@ -556,7 +555,8 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) { lexer->mark_end(lexer); advance(lexer); - if (lexer->lookahead == '+' || lexer->lookahead == ':' || lexer->lookahead == '/') { + if (lexer->lookahead == '+' || lexer->lookahead == ':' || + lexer->lookahead == '/') { return false; } if (is_whitespace(lexer->lookahead)) { @@ -565,13 +565,14 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { lexer->result_symbol = BEFORE_UNARY_OPERATOR; return true; } - // before unary - + // before unary - } else if (lexer->lookahead == '-') { if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) { lexer->mark_end(lexer); lexer->result_symbol = BEFORE_UNARY_OPERATOR; advance(lexer); - if (lexer->lookahead == '-' || lexer->lookahead == '>' || lexer->lookahead == ':' || lexer->lookahead == '/') { + if (lexer->lookahead == '-' || lexer->lookahead == '>' || + lexer->lookahead == ':' || lexer->lookahead == '/') { return false; } if (is_whitespace(lexer->lookahead)) { @@ -579,7 +580,7 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { } return true; } - // not in + // not in } else if (lexer->lookahead == 'n') { if (valid_symbols[NOT_IN]) { lexer->result_symbol = NOT_IN; @@ -601,7 +602,7 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { } } } - // quoted atom start + // quoted atom start } else if (lexer->lookahead == ':') { if (valid_symbols[QUOTED_ATOM_START]) { advance(lexer); @@ -616,25 +617,20 @@ bool scan(TSLexer* lexer, const bool* valid_symbols) { return false; } -// Expose the API expected by tree-sitter +void *tree_sitter_elixir_external_scanner_create() { return NULL; } -extern "C" { - void* tree_sitter_elixir_external_scanner_create() { - return NULL; - } - - bool tree_sitter_elixir_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols) { - return scan(lexer, valid_symbols); - } - - unsigned tree_sitter_elixir_external_scanner_serialize(void* payload, char* buffer) { - return 0; - } - - void tree_sitter_elixir_external_scanner_deserialize(void* payload, const char* buffer, unsigned length) {} - - void tree_sitter_elixir_external_scanner_destroy(void* payload) {} +bool tree_sitter_elixir_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + return scan(lexer, valid_symbols); } -// end anonymous namespace +unsigned tree_sitter_elixir_external_scanner_serialize(void *payload, + char *buffer) { + return 0; } + +void tree_sitter_elixir_external_scanner_deserialize(void *payload, + const char *buffer, + unsigned length) {} + +void tree_sitter_elixir_external_scanner_destroy(void *payload) {}