Skip to content

Commit 8c82dd5

Browse files
committed
Fast path IndexOf and variants for ASCII
In ICU doing any sort of index of operation (which includes Prefix and Suffix checking) is relatively expensive. ICU ends up doing a fair amount of work and allocations in order to construct a searcher object which could be reused, but our APIs are not amenable towards working in this manner. However, for some cultures we can often fast path ASCII searches when we know that ASCII and Ordinal comparisions are the same, as is the case for both Invariant and en-US. This change has CompareInfo hold some additional state about a locale to decide if we can do this optimiztion and then wires it up to IndexOf, LastIndexOf, IsPrefix and IsSuffix. In the future, we can try to extend the set of allowable cultures that we preform this optimization on by coming up with better checks on when it is safe to preform this transformation. Today, this optimization does not apply when IgnoreSymbols is set, because we would need to blank some ASCII symbol characters. If this ends up being a common operation, we could consider having ordinal implementations that also ignore symbols. This represents the best that we can do for dotnet/corefx#3672. It gets us back to where we were before for many common real world cases. Fixes dotnet/corefx#3672.
1 parent b8a0a62 commit 8c82dd5

File tree

1 file changed

+42
-1
lines changed

1 file changed

+42
-1
lines changed

src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@ public partial class CompareInfo
1313
[SecurityCritical]
1414
private readonly Interop.GlobalizationInterop.SafeSortHandle m_sortHandle;
1515

16+
private readonly bool m_isAsciiEqualityOrdinal;
17+
1618
[SecuritySafeCritical]
1719
internal CompareInfo(CultureInfo culture)
1820
{
1921
m_name = culture.m_name;
2022
m_sortName = culture.SortName;
2123
m_sortHandle = Interop.GlobalizationInterop.GetSortHandle(System.Text.Encoding.UTF8.GetBytes(m_sortName));
24+
m_isAsciiEqualityOrdinal = (m_sortName == "en-US" || m_sortName == "");
2225
}
2326

2427
[SecurityCritical]
@@ -161,6 +164,11 @@ private unsafe int IndexOfCore(string source, string target, int startIndex, int
161164
return IndexOfOrdinal(source, target, startIndex, count, ignoreCase: false);
162165
}
163166

167+
if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && target.IsAscii())
168+
{
169+
return IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
170+
}
171+
164172
fixed (char* pSource = source)
165173
{
166174
int index = Interop.GlobalizationInterop.IndexOf(m_sortHandle, target, target.Length, pSource + startIndex, count, options);
@@ -180,12 +188,17 @@ private unsafe int LastIndexOfCore(string source, string target, int startIndex,
180188
{
181189
return startIndex;
182190
}
183-
191+
184192
if (options == CompareOptions.Ordinal)
185193
{
186194
return LastIndexOfOrdinal(source, target, startIndex, count, ignoreCase: false);
187195
}
188196

197+
if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && target.IsAscii())
198+
{
199+
return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
200+
}
201+
189202
// startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
190203
// of the start of the string that is count characters away from startIndex.
191204
int leftStartIndex = (startIndex - count + 1);
@@ -205,6 +218,11 @@ private bool StartsWith(string source, string prefix, CompareOptions options)
205218
Contract.Assert(!string.IsNullOrEmpty(prefix));
206219
Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
207220

221+
if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && prefix.IsAscii())
222+
{
223+
return IsPrefix(source, prefix, GetOrdinalCompareOptions(options));
224+
}
225+
208226
return Interop.GlobalizationInterop.StartsWith(m_sortHandle, prefix, prefix.Length, source, source.Length, options);
209227
}
210228

@@ -215,6 +233,11 @@ private bool EndsWith(string source, string suffix, CompareOptions options)
215233
Contract.Assert(!string.IsNullOrEmpty(suffix));
216234
Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
217235

236+
if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && suffix.IsAscii())
237+
{
238+
return IsSuffix(source, suffix, GetOrdinalCompareOptions(options));
239+
}
240+
218241
return Interop.GlobalizationInterop.EndsWith(m_sortHandle, suffix, suffix.Length, source, source.Length, options);
219242
}
220243

@@ -251,5 +274,23 @@ internal unsafe int GetHashCodeOfStringCore(string source, CompareOptions option
251274
[DllImport(JitHelpers.QCall)]
252275
[SuppressUnmanagedCodeSecurity]
253276
private static unsafe extern int InternalHashSortKey(byte* sortKey, int sortKeyLength, [MarshalAs(UnmanagedType.Bool)] bool forceRandomizedHashing, long additionalEntropy);
277+
278+
private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
279+
{
280+
if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
281+
{
282+
return CompareOptions.OrdinalIgnoreCase;
283+
}
284+
else
285+
{
286+
return CompareOptions.Ordinal;
287+
}
288+
}
289+
290+
private static bool CanUseAsciiOrdinalForOptions(CompareOptions options)
291+
{
292+
// Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. ').
293+
return (options & CompareOptions.IgnoreSymbols) == 0;
294+
}
254295
}
255296
}

0 commit comments

Comments
 (0)