diff --git a/coconut/compiler/compiler.py b/coconut/compiler/compiler.py index 59dd26c7..446928b4 100644 --- a/coconut/compiler/compiler.py +++ b/coconut/compiler/compiler.py @@ -891,6 +891,10 @@ def bind(cls): cls.ellipsis <<= attach(cls.ellipsis_tokens, cls.method("ellipsis_handle")) cls.f_string <<= attach(cls.f_string_tokens, cls.method("f_string_handle")) cls.t_string <<= attach(cls.t_string_tokens, cls.method("t_string_handle")) + cls.d_string <<= attach(cls.d_string_ref, cls.method("d_string_handle")) + cls.db_string <<= attach(cls.db_string_ref, cls.method("d_string_handle")) + cls.df_string <<= attach(cls.df_string_ref, cls.method("d_f_string_handle")) + cls.dt_string <<= attach(cls.dt_string_ref, cls.method("d_f_string_handle", is_t=True)) cls.funcname_typeparams <<= attach(cls.funcname_typeparams_tokens, cls.method("funcname_typeparams_handle")) # standard handlers of the form name <<= attach(name_ref, method("name_handle")) @@ -4778,6 +4782,122 @@ def t_string_handle(self, original, loc, tokens): """Process Python 3.14 template strings.""" return self.f_string_handle(original, loc, tokens, is_t=True) + @staticmethod + def _d_string_dedent(text, loc, placeholder=None): + """Apply PEP 822 dedentation to string contents. + The text must start with a newline (the required newline after opening quotes). + If placeholder is given, it is treated as non-whitespace for indentation calculation + but preserved in the output.""" + if not text.startswith("\n"): + raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc) + text = text[1:] # remove leading newline (not included in result) + + lines = text.split("\n") + + # determine common indentation + # blank lines are ignored except the last line (closing quotes line) + indent = None + for i, line in enumerate(lines): + is_last = i == len(lines) - 1 + check_line = line.replace(placeholder, "X") if placeholder else line + if not is_last and check_line.strip() == "": + continue + stripped = check_line.lstrip() + line_indent = check_line[:len(check_line) - len(stripped)] + if indent is None: + indent = line_indent + else: + common = "" + for a, b in zip(indent, line_indent): + if a == b: + common += a + else: + break + indent = common + + if indent is None: + indent = "" + + # apply dedentation + result_lines = [] + for i, line in enumerate(lines): + is_last = i == len(lines) - 1 + check_line = line.replace(placeholder, "X") if placeholder else line + if check_line.strip() == "" and not is_last: + result_lines.append("") + elif line.startswith(indent): + result_lines.append(line[len(indent):]) + elif indent.startswith(check_line) and check_line.strip() == "": + result_lines.append("") + else: + raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc) + + return "\n".join(result_lines) + + @staticmethod + def _strip_raw_and_b(string): + """Strip r and b prefixes from a string token, returning (raw, has_b, string).""" + raw = False + has_b = False + while string: + if string[0] in "rR": + raw = True + elif string[0] in "bB": + has_b = True + else: + break + string = string[1:] + return raw, has_b, string + + def d_string_handle(self, original, loc, tokens): + """Process PEP 822 d-strings (dedented strings).""" + string, = tokens + + raw, has_b, string = self._strip_raw_and_b(string) + + # unwrap string ref + internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid d string item", string) + text, strchar = self.get_ref("str", string[1:-1]) + + # must be triple-quoted + if len(strchar) != 3: + raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc) + + # apply dedentation + text = self._d_string_dedent(text, loc) + + return ("b" if has_b else "") + ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True) + + def d_f_string_handle(self, original, loc, tokens, is_t=False): + """Process d-string combined with f or t prefix.""" + string, = tokens + + # strip raw r + raw = string.startswith("r") + if raw: + string = string[1:] + + # unwrap f-string ref + internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid df string item", string) + strchar, string_parts, exprs = self.get_ref("f_str", string[1:-1]) + + # must be triple-quoted + if len(strchar) != 3: + raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc) + + # apply dedentation to the f-string parts using placeholder for expressions; + # null bytes can't appear in Python source code so they're safe to use here + placeholder = "\x00" + internal_assert(placeholder not in "".join(string_parts), "placeholder character found in d-string contents", string_parts) + full_text = placeholder.join(string_parts) + dedented = self._d_string_dedent(full_text, loc, placeholder=placeholder) + new_parts = dedented.split(placeholder) + + # re-wrap as f-string ref and delegate to f_string_handle + new_ref = self.wrap_f_str(strchar, new_parts, exprs) + new_token = ("r" if raw else "") + new_ref + return self.f_string_handle(original, loc, [new_token], is_t=is_t) + def decorators_handle(self, loc, tokens): """Process decorators.""" defs = [] diff --git a/coconut/compiler/grammar.py b/coconut/compiler/grammar.py index 1a37448e..39fa2e04 100644 --- a/coconut/compiler/grammar.py +++ b/coconut/compiler/grammar.py @@ -935,12 +935,17 @@ class Grammar(object): u_string = Forward() f_string = Forward() t_string = Forward() + d_string = Forward() + db_string = Forward() + df_string = Forward() + dt_string = Forward() bit_b = caseless_literal("b") raw_r = caseless_literal("r") unicode_u = caseless_literal("u", suppress=True) format_f = caseless_literal("f", suppress=True) template_t = caseless_literal("t", suppress=True) + dedent_d = caseless_literal("d", suppress=True) string = combine(Optional(raw_r) + string_item) # Python 2 only supports br"..." not rb"..." @@ -949,9 +954,14 @@ class Grammar(object): u_string_ref = combine(unicode_u + string_item) f_string_tokens = combine((format_f + Optional(raw_r) | raw_r + format_f) + string_item) t_string_tokens = combine((template_t + Optional(raw_r) | raw_r + template_t) + string_item) - nonbf_string = string | u_string - nonb_string = nonbf_string | f_string | t_string - any_string = nonb_string | b_string + # d-string (PEP 822) dedented string variants + d_string_ref = combine(any_len_perm(raw_r, required=(dedent_d,)) + string_item) + db_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, bit_b)) + string_item) + df_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, format_f)) + string_item) + dt_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, template_t)) + string_item) + nonbf_string = string | u_string | d_string + nonb_string = nonbf_string | f_string | t_string | df_string | dt_string + any_string = nonb_string | b_string | db_string moduledoc = any_string + newline docstring = condense(moduledoc) @@ -1342,10 +1352,10 @@ class Grammar(object): ) string_atom = Forward() - string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string) - fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string) + string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string | db_string) + fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string | db_string) f_string_atom = Forward() - f_string_atom_ref = ZeroOrMore(nonbf_string) + f_string + ZeroOrMore(nonb_string) + f_string_atom_ref = ZeroOrMore(nonbf_string) + (f_string | df_string) + ZeroOrMore(nonb_string) keyword_atom = any_keyword_in(const_vars) passthrough_atom = addspace(OneOrMore(passthrough_item)) @@ -2897,8 +2907,8 @@ class Grammar(object): | fixto(end_of_line, "misplaced newline (maybe missing ':')") ) - start_f_str_regex = compile_regex(r"\br?[ft]r?$") - start_f_str_regex_len = 4 + start_f_str_regex = compile_regex(r"\b[dr]{0,2}[ft][dr]{0,2}$") + start_f_str_regex_len = 5 end_f_str_expr = StartOfStrGrammar(combine(rbrace | colon | bang).leaveWhitespace()) diff --git a/coconut/tests/src/cocotest/agnostic/primary_2.coco b/coconut/tests/src/cocotest/agnostic/primary_2.coco index 5ac07bb0..1254a8f9 100644 --- a/coconut/tests/src/cocotest/agnostic/primary_2.coco +++ b/coconut/tests/src/cocotest/agnostic/primary_2.coco @@ -597,4 +597,94 @@ def primary_test_2() -> bool: assert final_outer_fn() == 5 assert final_nested == 1 + # d-string (PEP 822) tests + assert d""" + Hello + World! + """ == "Hello\nWorld!\n" + assert d""" + Hello + World!""" == "Hello\nWorld!" + assert d""" + Hello + + World! + """ == "Hello\n\nWorld!\n" + assert d""" + Hello + World! + """ == " Hello\nWorld!\n" + assert d""" + Hello + """ == " Hello\n" + + # d-string prefix permutations: dr/rd + assert dr""" + Hello\n + World! + """ == "Hello\\n\nWorld!\n" + assert rd""" + Hello\n + World! + """ == "Hello\\n\nWorld!\n" + + # d-string prefix permutations: db/bd + assert db""" + Hello + World! + """ == b"Hello\nWorld!\n" + assert bd""" + Hello + World! + """ == b"Hello\nWorld!\n" + + # d-string prefix permutations: dbr/drb/bdr/brd/rdb/rbd + assert dbr""" + Hello\n + """ == b"Hello\\n\n" + assert drb""" + Hello\n + """ == b"Hello\\n\n" + assert bdr""" + Hello\n + """ == b"Hello\\n\n" + assert brd""" + Hello\n + """ == b"Hello\\n\n" + assert rdb""" + Hello\n + """ == b"Hello\\n\n" + assert rbd""" + Hello\n + """ == b"Hello\\n\n" + + # d-string prefix permutations: df/fd + name = "World" + assert df""" + Hello, {name}! + """ == "Hello, World!\n" + assert fd""" + Hello, {name}! + """ == "Hello, World!\n" + + # d-string prefix permutations: dfr/drf/fdr/frd/rdf/rfd + assert dfr""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + assert drf""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + assert fdr""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + assert frd""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + assert rdf""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + assert rfd""" + Hello\n, {name}! + """ == "Hello\\n, World!\n" + return True diff --git a/coconut/tests/src/cocotest/agnostic/specific.coco b/coconut/tests/src/cocotest/agnostic/specific.coco index 8b93487c..05d69c5c 100644 --- a/coconut/tests/src/cocotest/agnostic/specific.coco +++ b/coconut/tests/src/cocotest/agnostic/specific.coco @@ -237,4 +237,49 @@ def py310_spec_test() -> bool: assert template5.interpolations[1].value == 2 assert template5.interpolations[2].value == 3 + # dt-string (PEP 822 + t-string) tests + dt_name = "World" + dt_template = dt""" + Hello, {dt_name}! + """ + assert type(dt_template).__name__ == "Template" + assert dt_template.strings == ("Hello, ", "!\n") + assert len(dt_template.interpolations) == 1 + assert dt_template.interpolations[0].value == "World" + + # dt-string prefix permutations: td + td_template = td""" + Hello, {dt_name}! + """ + assert type(td_template).__name__ == "Template" + assert td_template.strings == ("Hello, ", "!\n") + assert td_template.interpolations[0].value == "World" + + # dt-string prefix permutations: dtr/drt/tdr/trd/rdt/rtd + dtr_template = dtr""" + Hello\n, {dt_name}! + """ + assert type(dtr_template).__name__ == "Template" + assert dtr_template.strings == ("Hello\\n, ", "!\n") + drt_template = drt""" + Hello\n, {dt_name}! + """ + assert drt_template.strings == ("Hello\\n, ", "!\n") + tdr_template = tdr""" + Hello\n, {dt_name}! + """ + assert tdr_template.strings == ("Hello\\n, ", "!\n") + trd_template = trd""" + Hello\n, {dt_name}! + """ + assert trd_template.strings == ("Hello\\n, ", "!\n") + rdt_template = rdt""" + Hello\n, {dt_name}! + """ + assert rdt_template.strings == ("Hello\\n, ", "!\n") + rtd_template = rtd""" + Hello\n, {dt_name}! + """ + assert rtd_template.strings == ("Hello\\n, ", "!\n") + return True