Skip to content

Davhdavh parsestring 1 #233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 194 additions & 0 deletions src/CommandLine/StringToCommandLine/CSharpStyleCommandLineParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;

namespace CommandLine.StringToCommandLine
{
/// <summary>
/// Parse commandlines like C# would parse a string, splitting at each unquoted space:
/// * "" ->
/// * "abc" -> abc
/// * abc abc -> abc, abc
/// * "\"" -> "
/// * asd"asd -> error
/// * "asd -> error unterminated string
/// * \ -> error unterminated escape
/// * \['"\0abfnrtUuvx] -> https://msdn.microsoft.com/en-us/library/ms228362.aspx?f=255&MSPPError=-2147217396
/// * \other -> error
/// </summary>
public class CSharpStyleCommandLineParser : StringToCommandLineParserBase
{
public override IEnumerable<string> Parse(string commandLine)
{
if (string.IsNullOrWhiteSpace(commandLine))
yield break;
var currentArg = new StringBuilder();
var quoting = false;

var pos = 0;
while (pos < commandLine.Length)
{
var c = commandLine[pos];
if (c == '\\')
{
// --- Handle escape sequences
pos++;
if (pos >= commandLine.Length) throw new UnterminatedEscapeException();
switch (commandLine[pos])
{
case '\'':
c = '\'';
break;
case '\"':
c = '\"';
break;
case '\\':
c = '\\';
break;
case '0':
c = '\0';
break;
case 'a':
c = '\a';
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = ' ';
break;
case 'r':
c = ' ';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case 'x':
// --- Hexa escape (1-4 digits)
var hexa = new StringBuilder(10);
pos++;
if (pos >= commandLine.Length)
throw new UnterminatedEscapeException();
c = commandLine[pos];
if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
{
hexa.Append(c);
pos++;
if (pos < commandLine.Length)
{
c = commandLine[pos];
if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
{
hexa.Append(c);
pos++;
if (pos < commandLine.Length)
{
c = commandLine[pos];
if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
{
hexa.Append(c);
pos++;
if (pos < commandLine.Length)
{
c = commandLine[pos];
if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
{
hexa.Append(c);
pos++;
}
}
}
}
}
}
}
c = (char) int.Parse(hexa.ToString(), NumberStyles.HexNumber);
pos--;
break;
case 'u':
// Unicode hexa escape (exactly 4 digits)
pos++;
if (pos + 3 >= commandLine.Length)
throw new UnterminatedEscapeException();
try
{
var charValue = uint.Parse(commandLine.Substring(pos, 4), NumberStyles.HexNumber);
c = (char) charValue;
pos += 3;
}
catch (SystemException)
{
throw new UnrecognizedEscapeSequenceException();
}
break;
case 'U':
// Unicode hexa escape (exactly 8 digits, first four must be 0000)
pos++;
if (pos + 7 >= commandLine.Length)
throw new UnterminatedEscapeException();
try
{
var charValue = uint.Parse(commandLine.Substring(pos, 8), NumberStyles.HexNumber);
if (charValue > 0xffff)
throw new UnrecognizedEscapeSequenceException();
c = (char) charValue;
pos += 7;
}
catch (SystemException)
{
throw new UnrecognizedEscapeSequenceException();
}
break;
default:
throw new UnrecognizedEscapeSequenceException();
}
pos++;
currentArg.Append(c);
continue;
}
if (c == '"')
{
if (quoting)
{
pos++; //skip space
//check that it actually IS a space or EOF
if (pos < commandLine.Length && !char.IsWhiteSpace(commandLine[pos]))
throw new UnquotedQuoteException();
yield return currentArg.ToString();
currentArg.Clear();
quoting = false;
}
else
{
if (currentArg.Length > 0)
throw new UnquotedQuoteException();
quoting = true;
}
pos++;
continue;
}
if (char.IsWhiteSpace(c) && !quoting)
{
if (currentArg.Length > 0)
yield return currentArg.ToString();
currentArg.Clear();
pos++;
continue;
}
pos++;
currentArg.Append(c);
}
if (quoting && currentArg.Length > 0)
throw new UnterminatedStringException();
if (currentArg.Length > 0)
yield return currentArg.ToString();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
using System.Collections.Generic;
using System.Text;

namespace CommandLine.StringToCommandLine
{
/// <summary>
/// Parse commandlines like CommandLineToArgvW:
/// * 2n backslashes followed by a quotation mark produce n backslashes followed by a quotation mark.
/// * (2n) + 1 backslashes followed by a quotation mark again produce n backslashes followed by a quotation mark.
/// * n backslashes not followed by a quotation mark simply produce n backslashes.
/// * Unterminated quoted strings at the end of the line ignores the missing quote.
/// </summary>
public class DefaultWindowsCommandLineParser : StringToCommandLineParserBase
{
public override IEnumerable<string> Parse(string commandLine)
{
if (string.IsNullOrWhiteSpace(commandLine))
yield break;
var currentArg = new StringBuilder();
var quoting = false;
var emptyIsAnArgument = false;
var lastC = '\0';
// Iterate all characters from the input string
foreach (var c in commandLine)
{
if (c == '"')
{
var nrbackslash = 0;
for (var i = currentArg.Length - 1; i >= 0; i--)
{
if (currentArg[i] != '\\') break;
nrbackslash++;
}
//* 2n backslashes followed by a quotation mark produce n backslashes followed by a quotation mark.
//also cover nrbackslack == 0
if (nrbackslash%2 == 0)
{
if (nrbackslash > 0)
currentArg.Length = currentArg.Length - nrbackslash/2;
// Toggle quoted range
quoting = !quoting;
emptyIsAnArgument = true;
if (quoting && lastC == '"')
{
// Doubled quote within a quoted range is like escaping
currentArg.Append(c);
lastC = '\0'; //prevent double quoting
continue;
}
}
else
{
// * (2n) + 1 backslashes followed by a quotation mark again produce n backslashes followed by a quotation mark.
currentArg.Length = currentArg.Length - nrbackslash/2 - 1;
currentArg.Append(c);
}
}
else if (!quoting && char.IsWhiteSpace(c))
{
// Accept empty arguments only if they are quoted
if (currentArg.Length > 0 || emptyIsAnArgument)
yield return currentArg.ToString();
// Reset for next argument
currentArg.Clear();
emptyIsAnArgument = false;
}
else
{
// Copy character from input, no special meaning
currentArg.Append(c);
}
lastC = c;
}
// Save last argument
if (currentArg.Length > 0 || emptyIsAnArgument)
yield return currentArg.ToString();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System;
using System.Collections.Generic;

namespace CommandLine.StringToCommandLine
{
public abstract class StringToCommandLineParserBase
{
public abstract IEnumerable<string> Parse(string commandLine);

public class UnterminatedStringException : ArgumentException {}

public class UnrecognizedEscapeSequenceException : ArgumentException {}

public class UnquotedQuoteException : ArgumentException {}

public class UnterminatedEscapeException : ArgumentException {}
}
}
4 changes: 3 additions & 1 deletion tests/CommandLine.Tests/CommandLine.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@
<Compile Include="Unit\ParserTests.cs" />
<Compile Include="Unit\Text\HelpTextTests.cs" />
<Compile Include="Unit\UnParserExtensionsTests.cs" />
<Compile Include="Unit\StringToCommandLine\CSharpStyleCommandLineParserTests.cs" />
<Compile Include="Unit\StringToCommandLine\DefaultWindowsCommandLineParserTests.cs" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config">
Expand Down Expand Up @@ -455,4 +457,4 @@
</Choose>
<Import Project="..\..\packages\xunit.runner.visualstudio\build\$(__paket__xunit_runner_visualstudio_props).props" Condition="Exists('..\..\packages\xunit.runner.visualstudio\build\$(__paket__xunit_runner_visualstudio_props).props')" Label="Paket" />
<Import Project="..\..\packages\xunit.runner.visualstudio\build\$(__paket__xunit_runner_visualstudio_targets).targets" Condition="Exists('..\..\packages\xunit.runner.visualstudio\build\$(__paket__xunit_runner_visualstudio_targets).targets')" Label="Paket" />
</Project>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using System.Collections;
using System.Linq;
using CommandLine.StringToCommandLine;
using Xunit;

namespace CommandLine.Tests.Unit.StringToCommandLine
{
public class CSharpStyleCommandLineParserTests
{
[Fact]
public void TestMethod1() { RunTest("test", new[] {"test"}); }

[Fact]
public void TestMethod2() { RunTest("test test", new[] {"test", "test"}); }

[Fact]
public void TestMethod3() { RunTest("test \"test\"", new[] {"test", "test"}); }

[Fact]
public void TestMethod4() { RunTest("test \"te\\\"s\\\"t\"", new[] {"test", "te\"s\"t"}); }

[Fact]
public void TestMethod4B() { RunTest("test \"te\\\"\\\"\\\"\\\"s\\\"t\"", new[] {"test", "te\"\"\"\"s\"t"}); }

[Fact]
public void TestMethod5() { Assert.Throws<StringToCommandLineParserBase.UnterminatedStringException>(() => RunTest("\"abc d e", new[] {""})); }

[Fact]
public void TestMethod6() { Assert.Throws<StringToCommandLineParserBase.UnterminatedEscapeException>(() => RunTest("asd\\", new[] {""})); }

[Fact]
public void TestMethod7() { RunTest("\\\\\\a\\b\\'\\\"\\0\\f\\t\\v", new[] {"\\\a\b\'\"\0\f\t\v"}); }

[Fact]
public void TestMethod8() { RunTest("Hello\\x1\\x12\\x123\\x1234", new[] {"Hello\x1\x12\x123\x1234"}); }

private static void RunTest(string commandLine, ICollection expected)
{
var parser = new CSharpStyleCommandLineParser();
var enumerable = parser.Parse(commandLine);
Assert.Equal(expected, enumerable.ToArray());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using System.Collections;
using System.Linq;
using CommandLine.StringToCommandLine;
using Xunit;

namespace CommandLine.Tests.Unit.StringToCommandLine
{
public class DefaultWindowsCommandLineParserTests
{
[Fact]
public void TestMethod1() { RunTest("test", new[] {"test"}); }

[Fact]
public void TestMethod2() { RunTest("test test", new[] {"test", "test"}); }

[Fact]
public void TestMethod3() { RunTest("test \"test\"", new[] {"test", "test"}); }

[Fact]
public void TestMethod4() { RunTest("test \"te\"s\"t\"", new[] {"test", "test"}); }

[Fact]
public void TestMethod4B() { RunTest("test \"te\"\"\"\"s\"t\"", new[] {"test", "te\"\"st"}); }

[Fact]
public void TestMethod5() { RunTest("\"abc\" d e", new[] {"abc", "d", "e"}); }

[Fact]
public void TestMethod6() { RunTest("a\\\\b d\"e f\"g h", new[] {"a\\\\b", "de fg", "h"}); }

[Fact]
public void TestMethod7() { RunTest("a\\\\\\\"b c d", new[] {"a\\\"b", "c", "d"}); }

[Fact]
public void TestMethod8() { RunTest("a\\\\\\\\\"b c\" d e", new[] {"a\\\\b c", "d", "e"}); }

private static void RunTest(string commandLine, ICollection expected)
{
var parser = new DefaultWindowsCommandLineParser();
var enumerable = parser.Parse(commandLine);
Assert.Equal(expected, enumerable.ToArray());
}
}
}