Skip to content

Commit 4e5260d

Browse files
committed
avoid allocating hats to the first letter of a word in a token
I propose that we declare that it fixes #1658, at least for now.
1 parent 90ca6ac commit 4e5260d

File tree

8 files changed

+33
-11
lines changed

8 files changed

+33
-11
lines changed

packages/cursorless-engine/src/core/HatTokenMapImpl.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const PRE_PHRASE_SNAPSHOT_MAX_AGE_NS = BigInt(6e10); // 60 seconds
2323
*/
2424
export class HatTokenMapImpl implements HatTokenMap {
2525
/**
26-
* This is the active map the changes every time we reallocate hats. It is
26+
* This is the active map that changes every time we reallocate hats. It is
2727
* liable to change in the middle of a phrase.
2828
*/
2929
private activeMap: IndividualHatMap;

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ export default class WordScopeHandler extends NestedScopeHandler {
1616
domain,
1717
}: TargetScope): TargetScope[] {
1818
const { document } = editor;
19-
// FIXME: Switch to using getMatchesInRange once we are able to properly
20-
// mock away vscode for the unit tests in subtoken.test.ts
2119
const offset = document.offsetAt(domain.start);
2220
const matches = this.wordTokenizer.splitIdentifier(
2321
document.getText(domain),

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@ import { matchText } from "../../../../util/regex";
44
const CAMEL_REGEX = /\p{Lu}?\p{Ll}+|\p{Lu}+(?!\p{Ll})|\p{N}+/gu;
55

66
/**
7-
* This class just encapsulates the word-splitting logic from
8-
* {@link WordScopeHandler}. We could probably just inline it into that class,
9-
* but for now we need it here because we can't yet properly mock away vscode
10-
* for the unit tests in subtoken.test.ts.
7+
* This class encapsulates word-splitting logic.
8+
* It is used by the {@link WordScopeHandler} and the hat allocator.
119
*/
1210
export default class WordTokenizer {
1311
private wordRegex: RegExp;

packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,13 @@ export const subtokenFixture: Fixture[] = [
8484
input: "_quickBrownFox_",
8585
expectedOutput: ["quick", "Brown", "Fox"],
8686
},
87+
{
88+
input: "thisIsATest",
89+
expectedOutput: ["this", "Is", "A", "Test"],
90+
},
91+
// TODO: Handle this correctly?
92+
// {
93+
// input: "NSURLSession",
94+
// expectedOutput: ["NS", "URL", "Session"],
95+
// },
8796
];

packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number;
1414
*/
1515
export const negativePenalty: HatMetric = ({ penalty }) => -penalty;
1616

17+
/**
18+
* @returns A metric that penalizes graphemes that are the first letter of a word within a token
19+
*/
20+
export const avoidFirstLetter: HatMetric = ({ isFirstLetter }) =>
21+
isFirstLetter ? -1 : 0;
22+
1723
/**
1824
* @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
1925
* token that used the given hat in the previous hat allocation.

packages/cursorless-engine/src/util/allocateHats/allocateHats.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ import { Grapheme, TokenGraphemeSplitter } from "../../tokenGraphemeSplitter";
1414
import { chooseTokenHat } from "./chooseTokenHat";
1515
import { getHatRankingContext } from "./getHatRankingContext";
1616
import { getRankedTokens } from "./getRankedTokens";
17+
import WordTokenizer from "../../processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer";
1718

1819
export interface HatCandidate {
1920
grapheme: Grapheme;
2021
style: HatStyleName;
2122
penalty: number;
23+
isFirstLetter: boolean;
2224
}
2325

2426
/**
@@ -137,6 +139,10 @@ function getTokenRemainingHatCandidates(
137139
token: Token,
138140
availableGraphemeStyles: DefaultMap<string, HatStyleMap>,
139141
): HatCandidate[] {
142+
const words = new WordTokenizer(
143+
token.editor.document.languageId,
144+
).splitIdentifier(token.text);
145+
const firstLetters = new Set<number>(words.map((word) => word.index));
140146
return tokenGraphemeSplitter
141147
.getTokenGraphemes(token.text)
142148
.flatMap((grapheme) =>
@@ -145,13 +151,14 @@ function getTokenRemainingHatCandidates(
145151
grapheme,
146152
style,
147153
penalty,
154+
isFirstLetter: firstLetters.has(grapheme.tokenStartOffset),
148155
}),
149156
),
150157
);
151158
}
152159

153160
/**
154-
* @param token The token that recevied the hat
161+
* @param token The token that received the hat
155162
* @param chosenHat The hat we chose for the token
156163
* @returns An object indicating the hat assigned to the token, along with the
157164
* range of the grapheme upon which it sits

packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common";
22
import { HatCandidate } from "./allocateHats";
33
import { RankingContext } from "./getHatRankingContext";
44
import {
5+
avoidFirstLetter,
56
hatOldTokenRank,
67
isOldTokenHat,
78
minimumTokenRankContainingGrapheme,
@@ -71,7 +72,10 @@ export function chooseTokenHat(
7172
// 4. Narrow to the hats with the lowest penalty
7273
negativePenalty,
7374

74-
// 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
75+
// 5. Avoid the first grapheme of the token if possible
76+
avoidFirstLetter,
77+
78+
// 6. Prefer hats that sit on a grapheme that doesn't appear in any highly
7579
// ranked token
7680
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
7781
])!;

packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ async function basic() {
3838

3939
await vscode.commands.executeCommand("cursorless.keyboard.modal.modeOn");
4040

41-
// Target default f
42-
await typeText("df");
41+
// Target default o
42+
await typeText("do");
4343

4444
// Target containing function
4545
await typeText("sf");

0 commit comments

Comments
 (0)