Skip to content

Make token removal range less aggressive #1564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { Range, TextDocument, TextEditor } from "@cursorless/common";
import { tokenize } from "../../../tokenizer";
import { Range, TextEditor } from "@cursorless/common";
import type { Target } from "../../../typings/target.types";
import { expandToFullLine } from "../../../util/rangeUtils";
import { PlainTarget } from "../../targets";

const leadingDelimiters = ['"', "'", "(", "[", "{", "<"];
const trailingDelimiters = ['"', "'", ")", "]", "}", ">", ",", ";", ":"];
Comment on lines +6 to +7
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #1590 to track determining opening / closing properly. What you have here is strictly better than what we do today, so I'm fine with it


export function getTokenLeadingDelimiterTarget(
target: Target,
): Target | undefined {
Expand Down Expand Up @@ -53,10 +55,27 @@ export function getTokenTrailingDelimiterTarget(
}

/**
* Constructs a removal range for the given target that will clean up a json
* whitespace on one side unless it will cause two tokens to be merged. This
* removal range is designed to be used with things that should clean themselves
* up as if they're a range of tokens.
* Constructs a removal range for the given target that may remove whitespace on
* one side. This removal range is designed to be used with things that should
* clean themselves up as if they're a range of tokens.
*
* We determine whether to include adjacent whitespace based on the following
* rules:
*
* - If we would just be leaving a line with whitespace on it, we delete the
* whitespace
* - Otherwise, if there is trailing whitespace, we include it if any of the
* following is true:
* - there is leading whitespace, OR
* - we are at start of line, OR
* - there is an approved leading delimiter character (eg `(`, `[`, etc).
* - Otherwise, if there is leading whitespace, we include it if any of the
* following is true:
* - we are at end of line, OR
* - there is an approved trailing delimiter character (eg `)`, `]`, `:`, `;`,
* etc).
* - Otherwise, we don't include any adjacent whitespace
*
* @param target The target to get the token removal range for
* @returns The removal range for the given target
*/
Expand All @@ -82,70 +101,45 @@ export function getTokenRemovalRange(target: Target): Range {
return fullLineRange;
}

// Use trailing range if: There is a leading range OR there is no leading
// content OR there is an approved leading delimiter character
if (!trailingWhitespaceRange.isEmpty) {
const candidateRemovalRange = contentRange.union(trailingWhitespaceRange);

if (!mergesTokens(editor, contentRange, candidateRemovalRange)) {
// If there is trailing whitespace and it doesn't result in tokens getting
// merged, then we remove it
return candidateRemovalRange;
if (
!leadingWhitespaceRange.isEmpty ||
contentRange.start.isEqual(fullLineRange.start) ||
leadingDelimiters.includes(getLeadingCharacter(editor, contentRange))
) {
return contentRange.union(trailingWhitespaceRange);
}
}

if (
!leadingWhitespaceRange.isEmpty &&
leadingWhitespaceRange.start.character !== 0
) {
const candidateRemovalRange = leadingWhitespaceRange.union(contentRange);

if (!mergesTokens(editor, contentRange, candidateRemovalRange)) {
// If there is leading whitespace that is not indentation and it doesn't
// result in tokens getting merged, then we remove it
return candidateRemovalRange;
// Use leading range if: There is no trailing content OR there is an approved
// trailing delimiter character
if (!leadingWhitespaceRange.isEmpty) {
if (
contentRange.end.isEqual(fullLineRange.end) ||
trailingDelimiters.includes(getTrailingCharacter(editor, contentRange))
) {
return contentRange.union(leadingWhitespaceRange);
}
}

// Otherwise just return the content range
return contentRange;
}

/** Returns true if removal range causes tokens to merge */
function mergesTokens(
editor: TextEditor,
contentRange: Range,
removalRange: Range,
) {
const { document } = editor;
const fullRange = expandToFullLine(editor, contentRange);
const fullText = document.getText(fullRange);
const fullTextOffset = document.offsetAt(fullRange.start);

const numTokensContentRangeRemoved = calculateNumberOfTokensAfterRemoval(
document,
fullText,
fullTextOffset,
contentRange,
);

const numTokensRemovalRangeRemoved = calculateNumberOfTokensAfterRemoval(
document,
fullText,
fullTextOffset,
removalRange,
);

return numTokensContentRangeRemoved !== numTokensRemovalRangeRemoved;
function getLeadingCharacter(editor: TextEditor, contentRange: Range): string {
const { start } = contentRange;
const line = editor.document.lineAt(start);
return start.isAfter(line.range.start)
? editor.document.getText(new Range(start.translate(undefined, -1), start))
: "";
}

function calculateNumberOfTokensAfterRemoval(
document: TextDocument,
fullText: string,
fullTextOffset: number,
removalRange: Range,
): number {
const startIndex = document.offsetAt(removalRange.start) - fullTextOffset;
const endIndex = document.offsetAt(removalRange.end) - fullTextOffset;
const modifiedText = fullText.slice(0, startIndex) + fullText.slice(endIndex);
const tokens = tokenize(modifiedText, document.languageId, (m) => m);
return tokens.length;
function getTrailingCharacter(editor: TextEditor, contentRange: Range): string {
const { end } = contentRange;
const line = editor.document.lineAt(end);
return end.isBefore(line.range.end)
? editor.document.getText(new Range(end.translate(undefined, 1), end))
: "";
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ initialState:
end: {line: 0, character: 2}
finalState:
documentContents: |-
fn() {
fn () {
println!("Hello, world!");
}
selections:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ initialState:
marks: {}
finalState:
documentContents: |
fn() -> Result<(), Error> {
fn () -> Result<(), Error> {

}
selections:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ finalState:
"hello "
" hello"
" hello "
return.b
return .b
selections:
- anchor: {line: 4, character: 0}
active: {line: 4, character: 0}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck bat
action: {name: remove}
targets:
- type: primitive
mark: {type: decoratedSymbol, symbolColor: default, character: b}
usePrePhraseSnapshot: true
initialState:
documentContents: "aaa bbb: ccc"
selections:
- anchor: {line: 0, character: 12}
active: {line: 0, character: 12}
marks:
default.b:
start: {line: 0, character: 4}
end: {line: 0, character: 7}
finalState:
documentContents: "aaa: ccc"
selections:
- anchor: {line: 0, character: 8}
active: {line: 0, character: 8}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: foo = bar.baz
selections:
- anchor: {line: 0, character: 6}
active: {line: 0, character: 10}
marks: {}
finalState:
documentContents: foo = baz
selections:
- anchor: {line: 0, character: 6}
active: {line: 0, character: 6}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: "\"a b c\""
selections:
- anchor: {line: 0, character: 5}
active: {line: 0, character: 5}
marks: {}
finalState:
documentContents: "\"a b\""
selections:
- anchor: {line: 0, character: 4}
active: {line: 0, character: 4}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: a.b c
selections:
- anchor: {line: 0, character: 2}
active: {line: 0, character: 2}
marks: {}
finalState:
documentContents: a. c
selections:
- anchor: {line: 0, character: 2}
active: {line: 0, character: 2}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: a! -b
selections:
- anchor: {line: 0, character: 2}
active: {line: 0, character: 2}
marks: {}
finalState:
documentContents: a -b
selections:
- anchor: {line: 0, character: 1}
active: {line: 0, character: 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: a b
selections:
- anchor: {line: 0, character: 0}
active: {line: 0, character: 0}
marks: {}
finalState:
documentContents: b
selections:
- anchor: {line: 0, character: 0}
active: {line: 0, character: 0}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: a b
selections:
- anchor: {line: 0, character: 2}
active: {line: 0, character: 2}
marks: {}
finalState:
documentContents: a
selections:
- anchor: {line: 0, character: 1}
active: {line: 0, character: 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: "\"foo-bar bongo-bazman\""
selections:
- anchor: {line: 0, character: 9}
active: {line: 0, character: 21}
marks: {}
finalState:
documentContents: "\"foo-bar\""
selections:
- anchor: {line: 0, character: 8}
active: {line: 0, character: 8}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
languageId: plaintext
command:
version: 5
spokenForm: chuck token
action: {name: remove}
targets:
- type: primitive
modifiers:
- type: containingScope
scopeType: {type: token}
usePrePhraseSnapshot: true
initialState:
documentContents: hello there,
selections:
- anchor: {line: 0, character: 6}
active: {line: 0, character: 6}
marks: {}
finalState:
documentContents: hello,
selections:
- anchor: {line: 0, character: 5}
active: {line: 0, character: 5}
Loading