Write spokenForms.json from Talon

pokey · pokey · commit 3bfd6aa6151c · 2023-10-20T13:47:08.000+01:00
diff --git a/cursorless-talon/src/csv_overrides.py b/cursorless-talon/src/csv_overrides.py
@@ -1,8 +1,9 @@
 import csv
 from collections.abc import Container
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import Optional
+from typing import Callable, Optional, TypedDict
 
 from talon import Context, Module, actions, app, fs
 
@@ -25,50 +26,74 @@
     desc="The directory to use for cursorless settings csvs relative to talon user directory",
 )
 
-default_ctx = Context()
-default_ctx.matches = r"""
+# The global context we use for our lists
+ctx = Context()
+
+# A context that contains default vocabulary, for use in testing
+normalized_ctx = Context()
+normalized_ctx.matches = r"""
 tag: user.cursorless_default_vocabulary
 """
 
 
+# Maps from Talon list name to a map from spoken form to value
+ListToSpokenForms = dict[str, dict[str, str]]
+
+
+@dataclass
+class SpokenFormEntry:
+    list_name: str
+    id: str
+    spoken_forms: list[str]
+
+
 def init_csv_and_watch_changes(
     filename: str,
-    default_values: dict[str, dict[str, str]],
+    default_values: ListToSpokenForms,
+    handle_new_values: Optional[Callable[[list[SpokenFormEntry]], None]] = None,
     extra_ignored_values: Optional[list[str]] = None,
     extra_allowed_values: Optional[list[str]] = None,
     allow_unknown_values: bool = False,
     default_list_name: Optional[str] = None,
     headers: list[str] = [SPOKEN_FORM_HEADER, CURSORLESS_IDENTIFIER_HEADER],
-    ctx: Context = Context(),
     no_update_file: bool = False,
-    pluralize_lists: Optional[list[str]] = None,
+    pluralize_lists: list[str] = [],
 ):
     """
     Initialize a cursorless settings csv, creating it if necessary, and watch
     for changes to the csv.  Talon lists will be generated based on the keys of
     `default_values`.  For example, if there is a key `foo`, there will be a
-    list created called `user.cursorless_foo` that will contain entries from
-    the original dict at the key `foo`, updated according to customization in
-    the csv at
+    list created called `user.cursorless_foo` that will contain entries from the
+    original dict at the key `foo`, updated according to customization in the
+    csv at
 
-        actions.path.talon_user() / "cursorless-settings" / filename
+    ```
+    actions.path.talon_user() / "cursorless-settings" / filename
+    ```
 
     Note that the settings directory location can be customized using the
     `user.cursorless_settings_directory` setting.
 
     Args:
         filename (str): The name of the csv file to be placed in
-        `cursorles-settings` dir
-        default_values (dict[str, dict]): The default values for the lists to
-        be customized in the given csv
-        extra_ignored_values list[str]: Don't throw an exception if any of
-        these appear as values; just ignore them and don't add them to any list
-        allow_unknown_values bool: If unknown values appear, just put them in the list
-        default_list_name Optional[str]: If unknown values are allowed, put any
-        unknown values in this list
-        no_update_file Optional[bool]: Set this to `TRUE` to indicate that we should
-        not update the csv. This is used generally in case there was an issue coming up with the default set of values so we don't want to persist those to disk
-        pluralize_lists: Create plural version of given lists
+            `cursorles-settings` dir
+        default_values (ListToSpokenForms): The default values for the lists to
+            be customized in the given csv
+        handle_new_values (Optional[Callable[[list[SpokenFormEntry]], None]]): A
+            callback to be called when the lists are updated
+        extra_ignored_values (Optional[list[str]]): Don't throw an exception if
+            any of these appear as values; just ignore them and don't add them
+            to any list
+        allow_unknown_values (bool): If unknown values appear, just put them in
+            the list
+        default_list_name (Optional[str]): If unknown values are
+            allowed, put any unknown values in this list
+        headers (list[str]): The headers to use for the csv
+        no_update_file (bool): Set this to `True` to indicate that we should not
+            update the csv. This is used generally in case there was an issue
+            coming up with the default set of values so we don't want to persist
+            those to disk
+        pluralize_lists (list[str]): Create plural version of given lists
     """
     # Don't allow both `extra_allowed_values` and `allow_unknown_values`
     assert not (extra_allowed_values and allow_unknown_values)
@@ -112,7 +137,7 @@ def on_watch(path, flags):
                 allow_unknown_values=allow_unknown_values,
                 default_list_name=default_list_name,
                 pluralize_lists=pluralize_lists,
-                ctx=ctx,
+                handle_new_values=handle_new_values,
             )
 
     fs.watch(str(file_path.parent), on_watch)
@@ -135,7 +160,7 @@ def on_watch(path, flags):
             allow_unknown_values=allow_unknown_values,
             default_list_name=default_list_name,
             pluralize_lists=pluralize_lists,
-            ctx=ctx,
+            handle_new_values=handle_new_values,
         )
     else:
         if not no_update_file:
@@ -148,7 +173,7 @@ def on_watch(path, flags):
             allow_unknown_values=allow_unknown_values,
             default_list_name=default_list_name,
             pluralize_lists=pluralize_lists,
-            ctx=ctx,
+            handle_new_values=handle_new_values,
         )
 
     def unsubscribe():
@@ -184,23 +209,23 @@ def create_default_vocabulary_dicts(
             if active_key:
                 updated_dict[active_key] = value2
         default_values_updated[key] = updated_dict
-    assign_lists_to_context(default_ctx, default_values_updated, pluralize_lists)
+    assign_lists_to_context(normalized_ctx, default_values_updated, pluralize_lists)
 
 
 def update_dicts(
-    default_values: dict[str, dict],
-    current_values: dict,
+    default_values: ListToSpokenForms,
+    current_values: dict[str, str],
     extra_ignored_values: list[str],
     extra_allowed_values: list[str],
     allow_unknown_values: bool,
     default_list_name: Optional[str],
     pluralize_lists: list[str],
-    ctx: Context,
+    handle_new_values: Optional[Callable[[list[SpokenFormEntry]], None]],
 ):
     # Create map with all default values
-    results_map = {}
-    for list_name, dict in default_values.items():
-        for key, value in dict.items():
+    results_map: dict[str, ResultsListEntry] = {}
+    for list_name, obj in default_values.items():
+        for key, value in obj.items():
             results_map[value] = {"key": key, "value": value, "list": list_name}
 
     # Update result with current values
@@ -211,6 +236,7 @@ def update_dicts(
             if value in extra_ignored_values:
                 pass
             elif allow_unknown_values or value in extra_allowed_values:
+                assert default_list_name is not None
                 results_map[value] = {
                     "key": key,
                     "value": value,
@@ -221,9 +247,35 @@ def update_dicts(
 
     # Convert result map back to result list
     results = {res["list"]: {} for res in results_map.values()}
-    for obj in results_map.values():
+    values: list[SpokenFormEntry] = []
+    for list_name, id, spoken_forms in generate_spoken_forms(
+        list(results_map.values())
+    ):
+        for spoken_form in spoken_forms:
+            results[list_name][spoken_form] = id
+        values.append(
+            SpokenFormEntry(list_name=list_name, id=id, spoken_forms=spoken_forms)
+        )
+
+    # Assign result to talon context list
+    assign_lists_to_context(ctx, results, pluralize_lists)
+
+    if handle_new_values is not None:
+        handle_new_values(values)
+
+
+class ResultsListEntry(TypedDict):
+    key: str
+    value: str
+    list: str
+
+
+def generate_spoken_forms(results_list: list[ResultsListEntry]):
+    for obj in results_list:
         value = obj["value"]
         key = obj["key"]
+
+        spoken = []
         if not is_removed(key):
             for k in key.split("|"):
                 if value == "pasteFromClipboard" and k.endswith(" to"):
@@ -234,10 +286,13 @@ def update_dicts(
                     # cursorless before this change would have "paste to" as
                     # their spoken form and so would need to say "paste to to".
                     k = k[:-3]
-                results[obj["list"]][k.strip()] = value
+                spoken.append(k.strip())
 
-    # Assign result to talon context list
-    assign_lists_to_context(ctx, results, pluralize_lists)
+        yield (
+            obj["list"],
+            value,
+            spoken,
+        )
 
 
 def assign_lists_to_context(
@@ -410,7 +465,7 @@ def get_full_path(filename: str):
     return (settings_directory / filename).resolve()
 
 
-def get_super_values(values: dict[str, dict[str, str]]):
+def get_super_values(values: ListToSpokenForms):
     result: dict[str, str] = {}
     for value_dict in values.values():
         result.update(value_dict)
diff --git a/cursorless-talon/src/marks/decorated_mark.py b/cursorless-talon/src/marks/decorated_mark.py
@@ -138,7 +138,7 @@ def setup_hat_styles_csv(hat_colors: dict[str, str], hat_shapes: dict[str, str])
             "hat_color": active_hat_colors,
             "hat_shape": active_hat_shapes,
         },
-        [*hat_colors.values(), *hat_shapes.values()],
+        extra_ignored_values=[*hat_colors.values(), *hat_shapes.values()],
         no_update_file=is_shape_error or is_color_error,
     )
 
diff --git a/cursorless-talon/src/spoken_forms.py b/cursorless-talon/src/spoken_forms.py
@@ -4,27 +4,30 @@
 
 from talon import app, fs
 
-from .csv_overrides import SPOKEN_FORM_HEADER, init_csv_and_watch_changes
+from .csv_overrides import (
+    SPOKEN_FORM_HEADER,
+    ListToSpokenForms,
+    SpokenFormEntry,
+    init_csv_and_watch_changes,
+)
 from .marks.decorated_mark import init_hats
+from .spoken_forms_output import SpokenFormsOutput
 
 JSON_FILE = Path(__file__).parent / "spoken_forms.json"
 disposables: list[Callable] = []
 
 
-def watch_file(spoken_forms: dict, filename: str) -> Callable:
-    return init_csv_and_watch_changes(
-        filename,
-        spoken_forms[filename],
-    )
-
-
 P = ParamSpec("P")
 R = TypeVar("R")
 
 
 def auto_construct_defaults(
-    spoken_forms: dict[str, dict[str, dict[str, str]]],
-    f: Callable[Concatenate[str, dict[str, dict[str, str]], P], R],
+    spoken_forms: dict[str, ListToSpokenForms],
+    handle_new_values: Callable[[str, list[SpokenFormEntry]], None],
+    f: Callable[
+        Concatenate[str, ListToSpokenForms, Callable[[list[SpokenFormEntry]], None], P],
+        R,
+    ],
 ):
     """
     Decorator that automatically constructs the default values for the
@@ -37,17 +40,38 @@ def auto_construct_defaults(
     of `init_csv_and_watch_changes` to remove the `default_values` parameter.
 
     Args:
-        spoken_forms (dict[str, dict[str, dict[str, str]]]): The spoken forms
-        f (Callable[Concatenate[str, dict[str, dict[str, str]], P], R]): Will always be `init_csv_and_watch_changes`
+        spoken_forms (dict[str, ListToSpokenForms]): The spoken forms
+        handle_new_values (Callable[[ListToSpokenForms], None]): A callback to be called when the lists are updated
+        f (Callable[Concatenate[str, ListToSpokenForms, P], R]): Will always be `init_csv_and_watch_changes`
     """
 
     def ret(filename: str, *args: P.args, **kwargs: P.kwargs) -> R:
         default_values = spoken_forms[filename]
-        return f(filename, default_values, *args, **kwargs)
+        return f(
+            filename,
+            default_values,
+            lambda new_values: handle_new_values(filename, new_values),
+            *args,
+            **kwargs,
+        )
 
     return ret
 
 
+# Maps from Talon list name to the type of the value in that list, e.g.
+# `pairedDelimiter` or `simpleScopeTypeType`
+# FIXME: This is a hack until we generate spoken_forms.json from Typescript side
+# At that point we can just include its type as part of that file
+LIST_TO_TYPE_MAP = {
+    "wrapper_selectable_paired_delimiter": "pairedDelimiter",
+    "selectable_only_paired_delimiter": "pairedDelimiter",
+    "wrapper_only_paired_delimiter": "pairedDelimiter",
+    "surrounding_pair_scope_type": "pairedDelimiter",
+    "scope_type": "simpleScopeTypeType",
+    "custom_regex_scope_type": "customRegex",
+}
+
+
 def update():
     global disposables
 
@@ -57,7 +81,35 @@ def update():
     with open(JSON_FILE, encoding="utf-8") as file:
         spoken_forms = json.load(file)
 
-    handle_csv = auto_construct_defaults(spoken_forms, init_csv_and_watch_changes)
+    initialized = False
+    custom_spoken_forms: dict[str, list[SpokenFormEntry]] = {}
+    spoken_forms_output = SpokenFormsOutput()
+    spoken_forms_output.init()
+
+    def update_spoken_forms_output():
+        spoken_forms_output.write(
+            [
+                {
+                    "type": LIST_TO_TYPE_MAP[entry.list_name],
+                    "id": entry.id,
+                    "spokenForms": entry.spoken_forms,
+                }
+                for spoken_form_list in custom_spoken_forms.values()
+                for entry in spoken_form_list
+                if entry.list_name in LIST_TO_TYPE_MAP
+            ]
+        )
+
+    def handle_new_values(csv_name: str, values: list[SpokenFormEntry]):
+        custom_spoken_forms[csv_name] = values
+        if initialized:
+            # On first run, we just do one update at the end, so we suppress
+            # writing until we get there
+            update_spoken_forms_output()
+
+    handle_csv = auto_construct_defaults(
+        spoken_forms, handle_new_values, init_csv_and_watch_changes
+    )
 
     disposables = [
         handle_csv("actions.csv"),
@@ -109,6 +161,9 @@ def update():
         ),
     ]
 
+    update_spoken_forms_output()
+    initialized = True
+
 
 def on_watch(path, flags):
     if JSON_FILE.match(path):
diff --git a/cursorless-talon/src/spoken_forms_output.py b/cursorless-talon/src/spoken_forms_output.py

Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ def setup_hat_styles_csv(hat_colors: dict[str, str], hat_shapes: dict[str, str])`
`138`	`138`	`"hat_color": active_hat_colors,`
`139`	`139`	`"hat_shape": active_hat_shapes,`
`140`	`140`	`},`
`141`		`- [hat_colors.values(), hat_shapes.values()],`
	`141`	`+ extra_ignored_values=[hat_colors.values(), hat_shapes.values()],`
`142`	`142`	`no_update_file=is_shape_error or is_color_error,`
`143`	`143`	`)`
`144`	`144`