Make token removal range less aggressive (#1564)

AndreasArvidsson · pokey · web-flow · commit b1f084e8f1d6 · 2023-07-06T18:33:01.000Z
Fixes #1077 ## Checklist - [x] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [ ] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [ ] I have not broken the cheatsheet --------- Co-authored-by: Pokey Rule <755842+pokey@users.noreply.github.com>
diff --git a/packages/cursorless-engine/src/processTargets/targetUtil/insertionRemovalBehaviors/TokenInsertionRemovalBehavior.ts b/packages/cursorless-engine/src/processTargets/targetUtil/insertionRemovalBehaviors/TokenInsertionRemovalBehavior.ts
@@ -1,9 +1,11 @@
-import { Range, TextDocument, TextEditor } from "@cursorless/common";
-import { tokenize } from "../../../tokenizer";
+import { Range, TextEditor } from "@cursorless/common";
 import type { Target } from "../../../typings/target.types";
 import { expandToFullLine } from "../../../util/rangeUtils";
 import { PlainTarget } from "../../targets";
 
+const leadingDelimiters = ['"', "'", "(", "[", "{", "<"];
+const trailingDelimiters = ['"', "'", ")", "]", "}", ">", ",", ";", ":"];
+
 export function getTokenLeadingDelimiterTarget(
   target: Target,
 ): Target | undefined {
@@ -53,10 +55,27 @@ export function getTokenTrailingDelimiterTarget(
 }
 
 /**
- * Constructs a removal range for the given target that will clean up a json
- * whitespace on one side unless it will cause two tokens to be merged. This
- * removal range is designed to be used with things that should clean themselves
- * up as if they're a range of tokens.
+ * Constructs a removal range for the given target that may remove whitespace on
+ * one side. This removal range is designed to be used with things that should
+ * clean themselves up as if they're a range of tokens.
+ *
+ * We determine whether to include adjacent whitespace based on the following
+ * rules:
+ *
+ * - If we would just be leaving a line with whitespace on it, we delete the
+ *   whitespace
+ * - Otherwise, if there is trailing whitespace, we include it if any of the
+ *   following is true:
+ *   - there is leading whitespace, OR
+ *   - we are at start of line, OR
+ *   - there is an approved leading delimiter character (eg `(`, `[`, etc).
+ * - Otherwise, if there is leading whitespace, we include it if any of the
+ *   following is true:
+ *   - we are at end of line, OR
+ *   - there is an approved trailing delimiter character (eg `)`, `]`, `:`, `;`,
+ *     etc).
+ * - Otherwise, we don't include any adjacent whitespace
+ *
  * @param target The target to get the token removal range for
  * @returns The removal range for the given target
  */
@@ -82,70 +101,45 @@ export function getTokenRemovalRange(target: Target): Range {
     return fullLineRange;
   }
 
+  // Use trailing range if: There is a leading range OR there is no leading
+  // content OR there is an approved leading delimiter character
   if (!trailingWhitespaceRange.isEmpty) {
-    const candidateRemovalRange = contentRange.union(trailingWhitespaceRange);
-
-    if (!mergesTokens(editor, contentRange, candidateRemovalRange)) {
-      // If there is trailing whitespace and it doesn't result in tokens getting
-      // merged, then we remove it
-      return candidateRemovalRange;
+    if (
+      !leadingWhitespaceRange.isEmpty ||
+      contentRange.start.isEqual(fullLineRange.start) ||
+      leadingDelimiters.includes(getLeadingCharacter(editor, contentRange))
+    ) {
+      return contentRange.union(trailingWhitespaceRange);
     }
   }
 
-  if (
-    !leadingWhitespaceRange.isEmpty &&
-    leadingWhitespaceRange.start.character !== 0
-  ) {
-    const candidateRemovalRange = leadingWhitespaceRange.union(contentRange);
-
-    if (!mergesTokens(editor, contentRange, candidateRemovalRange)) {
-      // If there is leading whitespace that is not indentation and it doesn't
-      // result in tokens getting merged, then we remove it
-      return candidateRemovalRange;
+  // Use leading range if: There is no trailing content OR there is an approved
+  // trailing delimiter character
+  if (!leadingWhitespaceRange.isEmpty) {
+    if (
+      contentRange.end.isEqual(fullLineRange.end) ||
+      trailingDelimiters.includes(getTrailingCharacter(editor, contentRange))
+    ) {
+      return contentRange.union(leadingWhitespaceRange);
     }
   }
 
   // Otherwise just return the content range
   return contentRange;
 }
 
-/** Returns true if removal range causes tokens to merge */
-function mergesTokens(
-  editor: TextEditor,
-  contentRange: Range,
-  removalRange: Range,
-) {
-  const { document } = editor;
-  const fullRange = expandToFullLine(editor, contentRange);
-  const fullText = document.getText(fullRange);
-  const fullTextOffset = document.offsetAt(fullRange.start);
-
-  const numTokensContentRangeRemoved = calculateNumberOfTokensAfterRemoval(
-    document,
-    fullText,
-    fullTextOffset,
-    contentRange,
-  );
-
-  const numTokensRemovalRangeRemoved = calculateNumberOfTokensAfterRemoval(
-    document,
-    fullText,
-    fullTextOffset,
-    removalRange,
-  );
-
-  return numTokensContentRangeRemoved !== numTokensRemovalRangeRemoved;
+function getLeadingCharacter(editor: TextEditor, contentRange: Range): string {
+  const { start } = contentRange;
+  const line = editor.document.lineAt(start);
+  return start.isAfter(line.range.start)
+    ? editor.document.getText(new Range(start.translate(undefined, -1), start))
+    : "";
 }
 
-function calculateNumberOfTokensAfterRemoval(
-  document: TextDocument,
-  fullText: string,
-  fullTextOffset: number,
-  removalRange: Range,
-): number {
-  const startIndex = document.offsetAt(removalRange.start) - fullTextOffset;
-  const endIndex = document.offsetAt(removalRange.end) - fullTextOffset;
-  const modifiedText = fullText.slice(0, startIndex) + fullText.slice(endIndex);
-  const tokens = tokenize(modifiedText, document.languageId, (m) => m);
-  return tokens.length;
+function getTrailingCharacter(editor: TextEditor, contentRange: Range): string {
+  const { end } = contentRange;
+  const line = editor.document.lineAt(end);
+  return end.isBefore(line.range.end)
+    ? editor.document.getText(new Range(end.translate(undefined, 1), end))
+    : "";
 }
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/rust/chuckFunkNameFine.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/rust/chuckFunkNameFine.yml
@@ -21,7 +21,7 @@ initialState:
       end: {line: 0, character: 2}
 finalState:
   documentContents: |-
-    fn() {
+    fn () {
         println!("Hello, world!");
     }
   selections:
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/rust/chuckName2.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/rust/chuckName2.yml
@@ -20,7 +20,7 @@ initialState:
   marks: {}
 finalState:
   documentContents: |
-    fn() -> Result<(), Error> {
+    fn () -> Result<(), Error> {
         
     }
   selections:
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/positions/chuckAir.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/positions/chuckAir.yml
@@ -24,7 +24,7 @@ finalState:
     "hello "
     " hello"
     " hello "
-    return.b
+    return .b
   selections:
     - anchor: {line: 4, character: 0}
       active: {line: 4, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckBat.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckBat.yml
@@ -0,0 +1,23 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck bat
+  action: {name: remove}
+  targets:
+    - type: primitive
+      mark: {type: decoratedSymbol, symbolColor: default, character: b}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: "aaa bbb: ccc"
+  selections:
+    - anchor: {line: 0, character: 12}
+      active: {line: 0, character: 12}
+  marks:
+    default.b:
+      start: {line: 0, character: 4}
+      end: {line: 0, character: 7}
+finalState:
+  documentContents: "aaa: ccc"
+  selections:
+    - anchor: {line: 0, character: 8}
+      active: {line: 0, character: 8}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: foo = bar.baz
+  selections:
+    - anchor: {line: 0, character: 6}
+      active: {line: 0, character: 10}
+  marks: {}
+finalState:
+  documentContents: foo = baz
+  selections:
+    - anchor: {line: 0, character: 6}
+      active: {line: 0, character: 6}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken10.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken10.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: "\"a b c\""
+  selections:
+    - anchor: {line: 0, character: 5}
+      active: {line: 0, character: 5}
+  marks: {}
+finalState:
+  documentContents: "\"a b\""
+  selections:
+    - anchor: {line: 0, character: 4}
+      active: {line: 0, character: 4}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken11.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken11.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: a.b c
+  selections:
+    - anchor: {line: 0, character: 2}
+      active: {line: 0, character: 2}
+  marks: {}
+finalState:
+  documentContents: a. c
+  selections:
+    - anchor: {line: 0, character: 2}
+      active: {line: 0, character: 2}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken12.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken12.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: a! -b
+  selections:
+    - anchor: {line: 0, character: 2}
+      active: {line: 0, character: 2}
+  marks: {}
+finalState:
+  documentContents: a -b
+  selections:
+    - anchor: {line: 0, character: 1}
+      active: {line: 0, character: 1}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken13.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken13.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: a b
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
+  marks: {}
+finalState:
+  documentContents: b
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken14.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken14.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: a b
+  selections:
+    - anchor: {line: 0, character: 2}
+      active: {line: 0, character: 2}
+  marks: {}
+finalState:
+  documentContents: a
+  selections:
+    - anchor: {line: 0, character: 1}
+      active: {line: 0, character: 1}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken2.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken2.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: "\"foo-bar bongo-bazman\""
+  selections:
+    - anchor: {line: 0, character: 9}
+      active: {line: 0, character: 21}
+  marks: {}
+finalState:
+  documentContents: "\"foo-bar\""
+  selections:
+    - anchor: {line: 0, character: 8}
+      active: {line: 0, character: 8}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken3.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken3.yml
@@ -0,0 +1,22 @@
+languageId: plaintext
+command:
+  version: 5
+  spokenForm: chuck token
+  action: {name: remove}
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: token}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: hello there,
+  selections:
+    - anchor: {line: 0, character: 6}
+      active: {line: 0, character: 6}
+  marks: {}
+finalState:
+  documentContents: hello,
+  selections:
+    - anchor: {line: 0, character: 5}
+      active: {line: 0, character: 5}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken4.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken4.yml
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken5.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken5.yml
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken6.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken6.yml
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken7.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken7.yml
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken8.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken8.yml
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken9.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/scopes/token/chuckToken9.yml

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ initialState:`
`21`	`21`	`end: {line: 0, character: 2}`
`22`	`22`	`finalState:`
`23`	`23`	`documentContents: \|-`
`24`		`- fn() {`
	`24`	`+ fn () {`
`25`	`25`	`println!("Hello, world!");`
`26`	`26`	`}`
`27`	`27`	`selections:`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ initialState:`
`20`	`20`	`marks: {}`
`21`	`21`	`finalState:`
`22`	`22`	`documentContents: \|`
`23`		`- fn() -> Result<(), Error> {`
	`23`	`+ fn () -> Result<(), Error> {`
`24`	`24`
`25`	`25`	`}`
`26`	`26`	`selections:`