Skip to content
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
- Dev: Unwrapped `LimitedQueueSnapshot` to `std::vector`. (#6606)
- Dev: Simplified uses of `getMessageSnapshot`. (#6607)
- Dev: Disabled `llvm-prefer-static-over-anonymous-namespace` in clang-tidy. (#6610)
- Dev: Added experimental spell checker support. (#6446, #6703, #6722, #6730, #6731, #6779)
- Dev: Added experimental spell checker support. (#6446, #6703, #6722, #6730, #6731, #6779, #6780)
- Dev: Added Clazy linting in CI. (#6623)
- Dev: Added custom clang-tidy module linting in CI. (#6626)
- Dev: CMake option `USE_ALTERNATE_LINKER` now errors if the given linker can't be found. (#6692)
Expand Down
79 changes: 59 additions & 20 deletions src/widgets/splits/InputHighlighter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "providers/seventv/SeventvEmotes.hpp"
#include "providers/twitch/TwitchAccount.hpp"
#include "providers/twitch/TwitchChannel.hpp"
#include "singletons/Settings.hpp"

#include <QTextCharFormat>
#include <QTextDocument>
Expand All @@ -23,7 +24,7 @@ namespace {

using namespace chatterino;

bool isIgnoredWord(TwitchChannel *twitch, const QString &word)
bool isEmote(TwitchChannel *twitch, const QString &word)
{
EmoteName name{word};
if (twitch)
Expand All @@ -38,11 +39,6 @@ bool isIgnoredWord(TwitchChannel *twitch, const QString &word)
{
return true;
}

if (twitch->accessChatters()->contains(word))
{
return true;
}
}
if (getApp()->getBttvEmotes()->emote(name) ||
getApp()->getFfzEmotes()->emote(name) ||
Expand All @@ -60,22 +56,52 @@ bool isIgnoredWord(TwitchChannel *twitch, const QString &word)
return true;
}

return false;
}

bool isChatter(TwitchChannel *twitch, const QString &word)
{
if (twitch)
{
if (twitch->accessChatters()->contains(word) ||
(getSettings()->alwaysIncludeBroadcasterInUserCompletions &&
word.compare(twitch->getName(), Qt::CaseInsensitive) == 0))
{
return true;
}
}
return false;
}

bool isLink(const QString &token)
{
// TODO: Replace this with a link parser variant that doesn't return the parsed data
auto link = linkparser::parse(word);
auto link = linkparser::parse(token);
return link.has_value();
}

bool isIgnoredWord(TwitchChannel *twitch, const QString &word)
{
return isEmote(twitch, word) || isChatter(twitch, word);
}

bool isIgnoredToken(TwitchChannel *twitch, const QString &token)
{
return isEmote(twitch, token) || isLink(token);
}

} // namespace

namespace chatterino {

namespace inputhighlight::detail {

// FIXME: this also matches URLs - this probably needs to be some function like Firefox' mozEnglishWordUtils::FindNextWord
// A word is a string of unicode letters. Words are seperated by whitespace
// (tokenRegex) or, inside a token, by punctuation characters (except '_')
QRegularExpression wordRegex()
{
static QRegularExpression regex{
R"(\p{L}(?:\P{Z}+\p{L}+)*)",
R"((?<=^|(?!_)\p{P})\p{L}+(?=$|(?!_)\p{P}))",
QRegularExpression::PatternOption::UseUnicodePropertiesOption,
};
return regex;
Expand All @@ -87,6 +113,7 @@ InputHighlighter::InputHighlighter(SpellChecker &spellChecker, QObject *parent)
: QSyntaxHighlighter(parent)
, spellChecker(spellChecker)
, wordRegex(inputhighlight::detail::wordRegex())
, tokenRegex(R"(\S+)")
{
this->spellFmt.setUnderlineStyle(QTextCharFormat::SpellCheckUnderline);
this->spellFmt.setUnderlineColor(Qt::red);
Expand Down Expand Up @@ -138,20 +165,32 @@ void InputHighlighter::visitWords(
auto cmdTriggerLen = getApp()->getCommands()->commandTriggerLen(textView);
textView = textView.sliced(cmdTriggerLen);

#if QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
auto it = this->wordRegex.globalMatchView(textView);
#else
auto it = this->wordRegex.globalMatch(textView);
#endif
auto tokenIt = this->tokenRegex.globalMatchView(textView);

while (it.hasNext())
// iterate over whitespace-delimited tokens
while (tokenIt.hasNext())
{
auto match = it.next();
auto text = match.captured();
if (!isIgnoredWord(channel, text))
auto tokenMatch = tokenIt.next();
auto token = tokenMatch.captured();
if (isIgnoredToken(channel, token))
{
cb(text, static_cast<int>(match.capturedStart() + cmdTriggerLen),
static_cast<int>(text.size()));
continue;
}

auto wordIt = this->wordRegex.globalMatchView(token);

while (wordIt.hasNext())
{
auto wordMatch = wordIt.next();
auto word = wordMatch.captured();

if (!isIgnoredWord(channel, word))
{
cb(word,
static_cast<int>(cmdTriggerLen + tokenMatch.capturedStart() +
wordMatch.capturedStart()),
static_cast<int>(word.size()));
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/widgets/splits/InputHighlighter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class InputHighlighter : public QSyntaxHighlighter
std::weak_ptr<TwitchChannel> channel;

QRegularExpression wordRegex;
QRegularExpression tokenRegex;
};

} // namespace chatterino
41 changes: 23 additions & 18 deletions tests/src/InputHighlighter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,17 @@ TEST_F(InputHighlighterTest, getSpellCheckedWords)
{.input = "word word", .words = {"word", "word"}},
{.input = " word word ", .words = {"word", "word"}},
{.input = "word?", .words = {"word"}},
{.input = "word?word", .words = {"word?word"}},
{.input = "word-word", .words = {"word-word"}},
{.input = "word?word", .words = {"word", "word"}},
// FIXME: should be "word-word"
{.input = "word-word", .words = {"word", "word"}},
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems incorrect, if you can try to fix this in this PR that would be nice

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I change the regex to (?<=^|(?!_|\p{Pd})\p{P}|\p{Pd}\p{Pd})\p{Pd}?((?:\p{L}\p{Pd}?)*\p{L})\p{Pd}?(?=$|(?!_|\p{Pd})\p{P}|\p{Pd}\p{Pd}) (https://regex101.com/r/OK1L36/3) this would work but is pretty unreadable.

A patch with this regex would be:

diff --git a/src/widgets/splits/InputHighlighter.cpp b/src/widgets/splits/InputHighlighter.cpp
index 7e350277..b1ab035a 100644
--- a/src/widgets/splits/InputHighlighter.cpp
+++ b/src/widgets/splits/InputHighlighter.cpp
@@ -101,7 +101,7 @@ namespace inputhighlight::detail {
 QRegularExpression wordRegex()
 {
     static QRegularExpression regex{
-        R"((?<=^|(?!_)\p{P})\p{L}+(?=$|(?!_)\p{P}))",
+        R"((?<=^|(?!_|\p{Pd})\p{P}|\p{Pd}\p{Pd})\p{Pd}?((?:\p{L}\p{Pd}?)*\p{L})\p{Pd}?(?=$|(?!_|\p{Pd})\p{P}|\p{Pd}\p{Pd}))",
         QRegularExpression::PatternOption::UseUnicodePropertiesOption,
     };
     return regex;
@@ -182,7 +182,7 @@ void InputHighlighter::visitWords(
         while (wordIt.hasNext())
         {
             auto wordMatch = wordIt.next();
-            auto word = wordMatch.captured();
+            auto word = wordMatch.captured(1);
 
             if (!isIgnoredWord(channel, word))
             {
diff --git a/tests/src/InputHighlighter.cpp b/tests/src/InputHighlighter.cpp
index 4fb00fb5..e6e38af7 100644
--- a/tests/src/InputHighlighter.cpp
+++ b/tests/src/InputHighlighter.cpp
@@ -186,7 +186,7 @@ TEST_F(InputHighlighterTest, getSpellCheckedWords)
         {.input = "   word word  ", .words = {"word", "word"}},
         {.input = "word?", .words = {"word"}},
         {.input = "word?word", .words = {"word", "word"}},
-        {.input = "word-word", .words = {"word", "word"}},
+        {.input = "word-word", .words = {"word-word"}},
         {
             .input = "channel emotes 7TVEmote a BTTVEmote b FFZEmote c",
             .words = {"channel", "emotes", "a", "b", "c"},
@@ -265,6 +265,8 @@ TEST(InputHighlight, wordRegex)
          .words = {u"abc", u"foo", u"bar", u"baz"}},
         {.input = u"1234567,word/a123", .words = {u"word"}},
         {.input = u"'quotes\"", .words = {u"quotes"}},
+        {.input = u"word-word-", .words = {u"word-word"}},
+        {.input = u"-word--word", .words = {u"word", u"word"}},
     };
 
     auto re = inputhighlight::detail::wordRegex();
@@ -275,7 +277,7 @@ TEST(InputHighlight, wordRegex)
         auto match = re.globalMatchView(c.input);
         while (match.hasNext())
         {
-            got.emplace_back(match.next().capturedView());
+            got.emplace_back(match.next().capturedView(1));
         }
         ASSERT_EQ(got, c.words) << "index=" << i;
     }

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we need to be this complex, it's probably better to write a manual loop similar to what Firefox does in the referenced function (in hopes that's more readable).

imo, doesn't have to be in this PR: We can keep the current state, make sure that it's properly tested, and then replace it with a manual loop.

{
.input = "channel emotes 7TVEmote a BTTVEmote b FFZEmote c",
.words = {"channel", "emotes", "TVEmote", "a", "b", "c"},
.words = {"channel", "emotes", "a", "b", "c"},
},
{
.input = "global emotes 7TVGlobal a BTTVGlobal b FFZGlobal c "
"MyCoolTwitchEmote d",
// FIXME: TVGlobal shouldn't show up
.words = {"global", "emotes", "TVGlobal", "a", "b", "c", "d"},
.words = {"global", "emotes", "a", "b", "c", "d"},
},
{
.input = "/ban a user",
Expand Down Expand Up @@ -228,9 +228,14 @@ TEST_F(InputHighlighterTest, getSpellCheckedWords)
},
},
{
.input = "Hey, @userchatter a 123kappa123 b MyUser42 c",
// FIXME: 'kappa' and 'MyUser' shouldn't show up
.words = {"Hey", "a", "kappa", "b", "MyUser", "c"},
.input = "Hey, @userchatter, a 123kappa123 b MyUser42 c",
.words = {"Hey", "a", "b", "c"},
},
Comment thread
4rneee marked this conversation as resolved.
{
.input =
"twitch.tv ignore "
"https://wiki.chatterino.com/Help/#basic-troubleshooting links",
.words = {"ignore", "links"},
},
};

Expand All @@ -255,18 +260,18 @@ TEST(InputHighlight, wordRegex)
std::vector<Case> cases{
{.input = u"", .words = {}},
{.input = u"word", .words = {u"word"}},
{.input = u"word word", .words = {u"word", u"word"}},
{.input = u" word word ", .words = {u"word", u"word"}},
{.input = u"test/man", .words = {u"test", u"man"}},
{.input = u"word?", .words = {u"word"}},
{.input = u"word?word", .words = {u"word?word"}},
{.input = u"a12word", .words = {u"a12word"}},
{.input = u"word?word", .words = {u"word", u"word"}},
{.input = u"sentence.", .words = {u"sentence"}},
{.input = u"#hashtag", .words = {u"hashtag"}},
{.input = u"inogre123numbers", .words = {}},
{.input = u"under_score", .words = {}},
{.input = u"äwördü", .words = {u"äwördü"}},
// FIXME: should be the whole input
{.input = u"123kappa123", .words = {u"kappa"}},
{.input = u"123kappa", .words = {u"kappa"}},
{.input = u"kappa123", .words = {u"kappa"}},
{.input = u"abc @foo bar@baz@", .words = {u"abc", u"foo", u"bar@baz"}},
{.input = u"1234567 word a123", .words = {u"word", u"a"}},
{.input = u"abc!@foo#bar&(baz]",
.words = {u"abc", u"foo", u"bar", u"baz"}},
{.input = u"1234567,word/a123", .words = {u"word"}},
{.input = u"'quotes\"", .words = {u"quotes"}},
};

auto re = inputhighlight::detail::wordRegex();
Expand Down
Loading