Skip to content

Fixes #86. "gethexaformat" messes up column widths. #89

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NStack.sln
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Files", "Solution
.github\workflows\build.yml = .github\workflows\build.yml
.github\workflows\publish.yml = .github\workflows\publish.yml
README.md = README.md
testenvironments.json = testenvironments.json
EndProjectSection
EndProject
Global
Expand Down
81 changes: 20 additions & 61 deletions NStack/unicode/Rune.ColumnWidth.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
//
using NStack;

namespace System
{
public partial struct Rune
{
static uint[,] combining = new uint[,] {
namespace System {
public partial struct Rune {
static uint [,] combining = new uint [,] {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
Expand Down Expand Up @@ -48,16 +46,16 @@ public partial struct Rune
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2e9a, 0x2e9a },
{ 0x2ef4, 0x2eff }, { 0x2fd6, 0x2fef }, { 0x2ffc, 0x2fff },
{ 0x31e4, 0x31ef }, { 0x321f, 0x321f }, { 0xA48D, 0xA48F },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2E9A, 0x2E9A },
{ 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
{ 0x31E4, 0x31EF }, { 0x321F, 0x321F }, { 0xA48D, 0xA48F },
{ 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
{ 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE1A, 0xFE1F },
{ 0xFE20, 0xFE23 }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
{ 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
};

static uint[,] combiningWideChars = new uint[,] {
static uint [,] combiningWideChars = new uint [,] {
/* Hangul Jamo init. consonants - 0x1100, 0x11ff */
/* Miscellaneous Technical - 0x2300, 0x23ff */
/* Hangul Syllables - 0x11a8, 0x11c2 */
Expand All @@ -84,22 +82,21 @@ public partial struct Rune
{ 0x3131, 0x318e }, { 0x3190, 0x3247 }, { 0x3250, 0x4dbf },
{ 0x4e00, 0xa4c6 }, { 0xa960, 0xa97c }, { 0xac00 ,0xd7a3 },
{ 0xf900, 0xfaff }, { 0xfe10, 0xfe1f }, { 0xfe30 ,0xfe6b },
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }, { 0x10000, 0x10ffff }
};

static int bisearch(uint rune, uint[,] table, int max)
static int bisearch (uint rune, uint [,] table, int max)
{
int min = 0;
int mid;

if (rune < table[0, 0] || rune > table[max, 1])
if (rune < table [0, 0] || rune > table [max, 1])
return 0;
while (max >= min)
{
while (max >= min) {
mid = (min + max) / 2;
if (rune > table[mid, 1])
if (rune > table [mid, 1])
min = mid + 1;
else if (rune < table[mid, 0])
else if (rune < table [mid, 0])
max = mid - 1;
else
return 1;
Expand Down Expand Up @@ -127,82 +124,44 @@ static int bisearch(uint rune, uint[,] table, int max)
// return false;
//}

static uint gethexaformat(uint rune, int length)
{
var hex = rune.ToString($"x{length}");
var hexstr = hex.Substring(hex.Length - length, length);
return (uint)int.Parse(hexstr, System.Globalization.NumberStyles.HexNumber);
}

/// <summary>
/// Check if the rune is a non-spacing character.
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns>True if is a non-spacing character, false otherwise.</returns>
public static bool IsNonSpacingChar(uint rune)
public static bool IsNonSpacingChar (uint rune)
{
return bisearch(rune, combining, combining.GetLength(0) - 1) != 0;
return bisearch (rune, combining, combining.GetLength (0) - 1) != 0;
}

/// <summary>
/// Check if the rune is a wide character.
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns>True if is a wide character, false otherwise.</returns>
public static bool IsWideChar(uint rune)
public static bool IsWideChar (uint rune)
{
return bisearch(gethexaformat(rune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0;
return bisearch (rune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0;
}

static char firstSurrogatePairChar = '\0';

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="rune">The rune.</param>
public static int ColumnWidth(Rune rune)
public static int ColumnWidth (Rune rune)
{
if (firstSurrogatePairChar != '\0')
firstSurrogatePairChar = '\0';
uint irune = (uint)rune;
if (irune < 0x20 || (irune >= 0x7f && irune < 0xa0))
return -1;
if (irune < 0x7f)
return 1;
/* binary search in table of non-spacing characters */
if (bisearch(gethexaformat(irune, 4), combining, combining.GetLength(0) - 1) != 0)
if (bisearch (irune, combining, combining.GetLength (0) - 1) != 0)
return 0;
/* if we arrive here, ucs is not a combining or C0/C1 control character */
return 1 +
(bisearch(gethexaformat(irune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0 ? 1 : 0);
}

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="c">The char.</param>
public static int ColumnWidth(char c)
{
if (!((Rune)c).IsValid)
{
if (firstSurrogatePairChar == '\0')
{
firstSurrogatePairChar = c;
return 0;
}
else if (firstSurrogatePairChar != '\0')
{
var r = new Rune(firstSurrogatePairChar, c);
firstSurrogatePairChar = '\0';
return ColumnWidth(r);
}
}
if (firstSurrogatePairChar != '\0')
firstSurrogatePairChar = '\0';

return ColumnWidth((Rune)c);
(bisearch (irune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0 ? 1 : 0);
}
}
}
87 changes: 50 additions & 37 deletions NStack/unicode/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace System {
/// <remarks>
///
/// </remarks>
[StructLayout(LayoutKind.Sequential)]
[StructLayout (LayoutKind.Sequential)]
public partial struct Rune {
// Stores the rune
uint value;
Expand Down Expand Up @@ -54,9 +54,8 @@ public partial struct Rune {
/// </remarks>
public Rune (uint rune)
{
if (rune > maxRune)
{
throw new ArgumentOutOfRangeException("Value is beyond the supplementary range!");
if (rune > maxRune) {
throw new ArgumentOutOfRangeException ("Value is beyond the supplementary range!");
}
this.value = rune;
}
Expand All @@ -77,43 +76,48 @@ public Rune (char ch)
/// <param name="lowSurrogate">The low surrogate code point.</param>
public Rune (uint highSurrogate, uint lowSurrogate)
{
if (EncodeSurrogatePair(highSurrogate, lowSurrogate, out Rune rune))
{
if (EncodeSurrogatePair (highSurrogate, lowSurrogate, out Rune rune)) {
this.value = rune;
}
else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax)
{
throw new ArgumentOutOfRangeException($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
}
else
{
throw new ArgumentOutOfRangeException($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
} else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax) {
throw new ArgumentOutOfRangeException ($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
} else {
throw new ArgumentOutOfRangeException ($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
}
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8
/// </summary>
/// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
public bool IsValid => ValidRune(value);
public bool IsValid => ValidRune (value);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a surrogate code point.
/// </summary>
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
public bool IsSurrogate => IsSurrogateRune(value);
public bool IsSurrogate => IsSurrogateRune (value);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a valid surrogate pair.
/// </summary>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public bool IsSurrogatePair => DecodeSurrogatePair(value, out _);
public bool IsSurrogatePair => DecodeSurrogatePair (value, out _);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a high surrogate.
/// </summary>
public bool IsHighSurrogate => value >= highSurrogateMin && value <= highSurrogateMax;

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a low surrogate.
/// </summary>
public bool IsLowSurrogate => value >= lowSurrogateMin && value <= lowSurrogateMax;

/// <summary>
/// Check if the rune is a non-spacing character.
/// </summary>
/// <returns>True if is a non-spacing character, false otherwise.</returns>
public bool IsNonSpacing => IsNonSpacingChar(value);
public bool IsNonSpacing => IsNonSpacingChar (value);

// Code points in the surrogate range are not valid for UTF-8.
const uint highSurrogateMin = 0xd800;
Expand Down Expand Up @@ -539,8 +543,7 @@ public static int InvalidIndex (byte [] buffer)
public static bool ValidRune (Rune rune)
{
if ((0 <= (int)rune.value && rune.value < highSurrogateMin) ||
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value))
{
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value)) {
return true;
}

Expand All @@ -552,7 +555,7 @@ public static bool ValidRune (Rune rune)
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
public static bool IsSurrogateRune(uint rune)
public static bool IsSurrogateRune (uint rune)
{
return rune >= highSurrogateMin && rune <= lowSurrogateMax;
}
Expand All @@ -564,12 +567,11 @@ public static bool IsSurrogateRune(uint rune)
/// <param name="lowSurrogate">The low surrogate code point.</param>
/// <param name="rune">The returning rune.</param>
/// <returns><c>True</c>if the returning rune is greater than 0 <c>False</c>otherwise.</returns>
public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, out Rune rune)
public static bool EncodeSurrogatePair (uint highsurrogate, uint lowSurrogate, out Rune rune)
{
rune = 0;
if (highsurrogate >= highSurrogateMin && highsurrogate <= highSurrogateMax &&
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax)
{
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax) {
//return 0x10000 + ((highsurrogate - highSurrogateMin) * 0x0400) + (lowSurrogate - lowSurrogateMin);
return (rune = 0x10000 + ((highsurrogate - highSurrogateMin) << 10) + (lowSurrogate - lowSurrogateMin)) > 0;
}
Expand All @@ -582,14 +584,13 @@ public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, ou
/// <param name="rune">The rune</param>
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public static bool DecodeSurrogatePair(uint rune, out char [] chars)
public static bool DecodeSurrogatePair (uint rune, out char [] chars)
{
uint s = rune - 0x10000;
uint h = highSurrogateMin + (s >> 10);
uint l = lowSurrogateMin + (s & 0x3FF);

if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune)
{
if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune) {
chars = new char [] { (char)h, (char)l };
return true;
}
Expand All @@ -603,13 +604,11 @@ public static bool DecodeSurrogatePair(uint rune, out char [] chars)
/// <param name="str">The string.</param>
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public static bool DecodeSurrogatePair(string str, out char [] chars)
public static bool DecodeSurrogatePair (string str, out char [] chars)
{
if (str.Length == 2)
{
chars = str.ToCharArray();
if (EncodeSurrogatePair(chars[0], chars[1], out _))
{
if (str.Length == 2) {
chars = str.ToCharArray ();
if (EncodeSurrogatePair (chars [0], chars [1], out _)) {
return true;
}
}
Expand All @@ -622,9 +621,9 @@ public static bool DecodeSurrogatePair(string str, out char [] chars)
/// </summary>
/// <returns>The number of UTF8 bytes expected given the first prefix.</returns>
/// <param name="firstByte">Is the first byte of a UTF8 sequence.</param>
public static int ExpectedSizeFromFirstByte(byte firstByte)
public static int ExpectedSizeFromFirstByte (byte firstByte)
{
var x = first[firstByte];
var x = first [firstByte];

// Invalid runes, just return 1 for byte, and let higher level pass to print
if (x == xx)
Expand Down Expand Up @@ -806,7 +805,7 @@ public static Rune To (Case toCase, Rune rune)
{
uint rval = rune.value;
switch (toCase) {
case Case.Lower:
case Case.Lower:
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Lower, rval));
case Case.Title:
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Title, rval));
Expand Down Expand Up @@ -874,6 +873,20 @@ public static Rune To (Case toCase, Rune rune)
/// <param name="rune">Rune.</param>
public static implicit operator uint (Rune rune) => rune.value;

/// <summary>
/// Implicit operator conversion from a C# integer into a rune.
/// </summary>
/// <returns>Rune representing the C# integer</returns>
/// <param name="value">32-bit Integer.</param>
public static implicit operator Rune (int value) => new Rune ((uint)value);

/// <summary>
/// Implicit operator conversion from a byte to an unsigned integer
/// </summary>
/// <returns>The unsigned integer representation.</returns>
/// <param name="byt">Byte.</param>
public static implicit operator Rune (byte byt) => new Rune (byt);

/// <summary>
/// Implicit operator conversion from a C# char into a rune.
/// </summary>
Expand Down Expand Up @@ -905,7 +918,7 @@ public override string ToString ()
{
var buff = new byte [4];
var size = EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString(buff, 0, size);
return System.Text.Encoding.UTF8.GetString (buff, 0, size);
}

/// <summary>
Expand Down
6 changes: 3 additions & 3 deletions NStackTests/NStackTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
<Version>0.20.0</Version>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.0" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="coverlet.collector" Version="3.1.2">
<PackageReference Include="NUnit3TestAdapter" Version="4.3.1" />
<PackageReference Include="coverlet.collector" Version="3.2.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
Loading