Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions coconut/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,10 @@ def bind(cls):
cls.ellipsis <<= attach(cls.ellipsis_tokens, cls.method("ellipsis_handle"))
cls.f_string <<= attach(cls.f_string_tokens, cls.method("f_string_handle"))
cls.t_string <<= attach(cls.t_string_tokens, cls.method("t_string_handle"))
cls.d_string <<= attach(cls.d_string_ref, cls.method("d_string_handle"))
cls.db_string <<= attach(cls.db_string_ref, cls.method("d_string_handle", is_b=True))
cls.df_string <<= attach(cls.df_string_ref, cls.method("d_f_string_handle"))
cls.dt_string <<= attach(cls.dt_string_ref, cls.method("d_f_string_handle", is_t=True))
cls.funcname_typeparams <<= attach(cls.funcname_typeparams_tokens, cls.method("funcname_typeparams_handle"))

# standard handlers of the form name <<= attach(name_ref, method("name_handle"))
Expand Down Expand Up @@ -4778,6 +4782,118 @@ def t_string_handle(self, original, loc, tokens):
"""Process Python 3.14 template strings."""
return self.f_string_handle(original, loc, tokens, is_t=True)

@staticmethod
def _d_string_dedent(text, loc, placeholder=None):
"""Apply PEP 822 dedentation to string contents.
The text must start with a newline (the required newline after opening quotes).
If placeholder is given, it is treated as non-whitespace for indentation calculation
but preserved in the output."""
if not text.startswith("\n"):
raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc)
text = text[1:] # remove leading newline (not included in result)

lines = text.split("\n")

# determine common indentation
# blank lines are ignored except the last line (closing quotes line)
indent = None
for i, line in enumerate(lines):
is_last = (i == len(lines) - 1)

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary parens (and below).

check_line = line.replace(placeholder, "X") if placeholder else line
if not is_last and check_line.strip() == "":
continue
stripped = check_line.lstrip()
line_indent = check_line[:len(check_line) - len(stripped)]
if indent is None:
indent = line_indent
else:
common = ""
for a, b in zip(indent, line_indent):
if a == b:
common += a
else:
break
indent = common

if indent is None:
indent = ""

# apply dedentation
result_lines = []
for i, line in enumerate(lines):
is_last = (i == len(lines) - 1)
check_line = line.replace(placeholder, "X") if placeholder else line
if check_line.strip() == "" and not is_last:
result_lines.append("")
elif line.startswith(indent):
result_lines.append(line[len(indent):])
elif indent.startswith(check_line) and check_line.strip() == "":
result_lines.append("")
else:
raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc)

return "\n".join(result_lines)

@staticmethod
def _strip_raw_and_b(string):
"""Strip r and b prefixes from a string token, returning (raw, has_b, string)."""
raw = False
has_b = False
while string and string[0] in "rRbB":

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this just while string: and then check for bB in an elif, and break in the else.

if string[0] in "rR":
raw = True
else:
has_b = True
string = string[1:]
return raw, has_b, string

def d_string_handle(self, original, loc, tokens, is_b=False):
"""Process PEP 822 d-strings (dedented strings), with optional b prefix."""
string, = tokens

raw, has_b, string = self._strip_raw_and_b(string)

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can just use has_b, why do we need is_b, or for that matter to have a separate db_string grammar element at all? What do we do for normal b strings?


# unwrap string ref
internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid d string item", string)
text, strchar = self.get_ref("str", string[1:-1])

# must be triple-quoted
if len(strchar) != 3:
raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)

# apply dedentation
text = self._d_string_dedent(text, loc)

return ("b" if is_b or has_b else "") + ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True)

def d_f_string_handle(self, original, loc, tokens, is_t=False):
"""Process d-string combined with f or t prefix."""
string, = tokens

# strip raw r
raw = string.startswith("r")
if raw:
string = string[1:]

# unwrap f-string ref
internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid df string item", string)
strchar, string_parts, exprs = self.get_ref("f_str", string[1:-1])

# must be triple-quoted
if len(strchar) != 3:
raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)

# apply dedentation to the f-string parts using placeholder for expressions
placeholder = "\x00"

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure null bytes can't appear in the string normally? At least we need to assert that they don't.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, if there's a placeholder we know can't appear in the string and that naturally doesn't count as whitespace, that would make the implementation easier because we wouldn't need the placeholder logic in _d_string_dedent.

full_text = placeholder.join(string_parts)
dedented = self._d_string_dedent(full_text, loc, placeholder=placeholder)
new_parts = dedented.split(placeholder)

# re-wrap as f-string ref and delegate to f_string_handle
new_ref = self.wrap_f_str(strchar, new_parts, exprs)
new_token = ("r" if raw else "") + new_ref
return self.f_string_handle(original, loc, [new_token], is_t=is_t)

def decorators_handle(self, loc, tokens):
"""Process decorators."""
defs = []
Expand Down
26 changes: 18 additions & 8 deletions coconut/compiler/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,12 +935,17 @@ class Grammar(object):
u_string = Forward()
f_string = Forward()
t_string = Forward()
d_string = Forward()
db_string = Forward()
df_string = Forward()
dt_string = Forward()

bit_b = caseless_literal("b")
raw_r = caseless_literal("r")
unicode_u = caseless_literal("u", suppress=True)
format_f = caseless_literal("f", suppress=True)
template_t = caseless_literal("t", suppress=True)
dedent_d = caseless_literal("d", suppress=True)

string = combine(Optional(raw_r) + string_item)
# Python 2 only supports br"..." not rb"..."
Expand All @@ -949,9 +954,14 @@ class Grammar(object):
u_string_ref = combine(unicode_u + string_item)
f_string_tokens = combine((format_f + Optional(raw_r) | raw_r + format_f) + string_item)
t_string_tokens = combine((template_t + Optional(raw_r) | raw_r + template_t) + string_item)
nonbf_string = string | u_string
nonb_string = nonbf_string | f_string | t_string
any_string = nonb_string | b_string
# d-string (PEP 822) dedented string variants
d_string_ref = combine(any_len_perm(raw_r, required=(dedent_d,)) + string_item)
db_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, bit_b)) + string_item)
df_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, format_f)) + string_item)
dt_string_ref = combine(any_len_perm(raw_r, required=(dedent_d, template_t)) + string_item)
nonbf_string = string | u_string | d_string
nonb_string = nonbf_string | f_string | t_string | df_string | dt_string
any_string = nonb_string | b_string | db_string
moduledoc = any_string + newline
docstring = condense(moduledoc)

Expand Down Expand Up @@ -1342,10 +1352,10 @@ class Grammar(object):
)

string_atom = Forward()
string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string)
fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string)
string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string | db_string)
fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string | db_string)
f_string_atom = Forward()
f_string_atom_ref = ZeroOrMore(nonbf_string) + f_string + ZeroOrMore(nonb_string)
f_string_atom_ref = ZeroOrMore(nonbf_string) + (f_string | df_string) + ZeroOrMore(nonb_string)

keyword_atom = any_keyword_in(const_vars)
passthrough_atom = addspace(OneOrMore(passthrough_item))
Expand Down Expand Up @@ -2897,8 +2907,8 @@ class Grammar(object):
| fixto(end_of_line, "misplaced newline (maybe missing ':')")
)

start_f_str_regex = compile_regex(r"\br?[ft]r?$")
start_f_str_regex_len = 4
start_f_str_regex = compile_regex(r"\b[dr]{0,2}[ft][dr]{0,2}$")
start_f_str_regex_len = 5

end_f_str_expr = StartOfStrGrammar(combine(rbrace | colon | bang).leaveWhitespace())

Expand Down
90 changes: 90 additions & 0 deletions coconut/tests/src/cocotest/agnostic/primary_2.coco
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,94 @@ def primary_test_2() -> bool:
assert final_outer_fn() == 5
assert final_nested == 1

# d-string (PEP 822) tests
assert d"""
Hello
World!
""" == "Hello\nWorld!\n"
assert d"""
Hello
World!""" == "Hello\nWorld!"
assert d"""
Hello

World!
""" == "Hello\n\nWorld!\n"
assert d"""
Hello
World!
""" == " Hello\nWorld!\n"
assert d"""
Hello
""" == " Hello\n"

# d-string prefix permutations: dr/rd
assert dr"""
Hello\n
World!
""" == "Hello\\n\nWorld!\n"
assert rd"""
Hello\n
World!
""" == "Hello\\n\nWorld!\n"

# d-string prefix permutations: db/bd
assert db"""
Hello
World!
""" == b"Hello\nWorld!\n"
assert bd"""
Hello
World!
""" == b"Hello\nWorld!\n"

# d-string prefix permutations: dbr/drb/bdr/brd/rdb/rbd
assert dbr"""
Hello\n
""" == b"Hello\\n\n"
assert drb"""
Hello\n
""" == b"Hello\\n\n"
assert bdr"""
Hello\n
""" == b"Hello\\n\n"
assert brd"""
Hello\n
""" == b"Hello\\n\n"
assert rdb"""
Hello\n
""" == b"Hello\\n\n"
assert rbd"""
Hello\n
""" == b"Hello\\n\n"

# d-string prefix permutations: df/fd
name = "World"
assert df"""
Hello, {name}!
""" == "Hello, World!\n"
assert fd"""
Hello, {name}!
""" == "Hello, World!\n"

# d-string prefix permutations: dfr/drf/fdr/frd/rdf/rfd
assert dfr"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"
assert drf"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"
assert fdr"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"
assert frd"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"
assert rdf"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"
assert rfd"""
Hello\n, {name}!
""" == "Hello\\n, World!\n"

return True
45 changes: 45 additions & 0 deletions coconut/tests/src/cocotest/agnostic/specific.coco
Original file line number Diff line number Diff line change
Expand Up @@ -237,4 +237,49 @@ def py310_spec_test() -> bool:
assert template5.interpolations[1].value == 2
assert template5.interpolations[2].value == 3

# dt-string (PEP 822 + t-string) tests
dt_name = "World"
dt_template = dt"""
Hello, {dt_name}!
"""
assert type(dt_template).__name__ == "Template"
assert dt_template.strings == ("Hello, ", "!\n")
assert len(dt_template.interpolations) == 1
assert dt_template.interpolations[0].value == "World"

# dt-string prefix permutations: td
td_template = td"""
Hello, {dt_name}!
"""
assert type(td_template).__name__ == "Template"
assert td_template.strings == ("Hello, ", "!\n")
assert td_template.interpolations[0].value == "World"

# dt-string prefix permutations: dtr/drt/tdr/trd/rdt/rtd
dtr_template = dtr"""
Hello\n, {dt_name}!
"""
assert type(dtr_template).__name__ == "Template"
assert dtr_template.strings == ("Hello\\n, ", "!\n")
drt_template = drt"""
Hello\n, {dt_name}!
"""
assert drt_template.strings == ("Hello\\n, ", "!\n")
tdr_template = tdr"""
Hello\n, {dt_name}!
"""
assert tdr_template.strings == ("Hello\\n, ", "!\n")
trd_template = trd"""
Hello\n, {dt_name}!
"""
assert trd_template.strings == ("Hello\\n, ", "!\n")
rdt_template = rdt"""
Hello\n, {dt_name}!
"""
assert rdt_template.strings == ("Hello\\n, ", "!\n")
rtd_template = rtd"""
Hello\n, {dt_name}!
"""
assert rtd_template.strings == ("Hello\\n, ", "!\n")

return True
Loading