Skip to content

Commit bb927a6

Browse files
committed
refactor: bash argument parser
1 parent dd10896 commit bb927a6

File tree

1 file changed

+161
-40
lines changed

1 file changed

+161
-40
lines changed

src/lib/Lib/CMakeExecution.cpp

Lines changed: 161 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -111,63 +111,184 @@ cmakeDefaultGeneratorIsVisualStudio(llvm::StringRef cmakePath)
111111
return defaultGenerator.starts_with("Visual Studio");
112112
}
113113

114+
Expected<std::string_view>
115+
parseBashIdentifier(std::string_view str)
116+
{
117+
if (str.empty())
118+
{
119+
return Unexpected(Error("Empty argument"));
120+
}
121+
if (str[0] != '$')
122+
{
123+
return Unexpected(Error("Argument does not start with '$'"));
124+
}
125+
if (str.size() == 1)
126+
{
127+
return Unexpected(Error("Argument does not contain identifier"));
128+
}
129+
// Check if first char matches [a-zA-Z_]
130+
if (str[1] != '_' && (str[1] < 'a' || str[1] > 'z') && (str[1] < 'A' || str[1] > 'Z'))
131+
{
132+
return Unexpected(Error("Argument does not start with [a-zA-Z_]"));
133+
}
134+
// Iterate other chars including valid chars in identifier
135+
std::string_view identifier = str.substr(1, 1);
136+
for (size_t i = 2; i < str.size(); ++i)
137+
{
138+
char const ch = str[i];
139+
if (ch != '_' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' || ch > '9'))
140+
{
141+
break;
142+
}
143+
identifier = str.substr(1, i);
144+
}
145+
return identifier;
146+
}
147+
114148
std::vector<std::string>
115-
parseCmakeArgs(std::string const& cmakeArgsStr) {
149+
parseBashArgs(std::string_view str)
150+
{
116151
std::vector<std::string> args;
117-
std::string currentArg;
118-
char quoteChar = '\0';
119-
bool escapeNextChar = false;
152+
char curQuote = '\0';
153+
std::string curArg;
120154

121-
for (char ch : cmakeArgsStr)
155+
for (std::size_t i = 0; i < str.size(); ++i)
122156
{
123-
if (escapeNextChar)
124-
{
125-
currentArg += ch;
126-
escapeNextChar = false;
127-
}
128-
else if (ch == '\\')
129-
{
130-
escapeNextChar = true;
131-
}
132-
else if ((ch == '"' || ch == '\''))
157+
char const c = str[i];
158+
bool const inQuote = curQuote != '\0';
159+
bool const curIsQuote = c == '\'' || c == '"';
160+
bool const curIsEscaped = i > 0 && str[i - 1] == '\\';
161+
if (!inQuote)
133162
{
134-
if (quoteChar == '\0')
163+
if (!curIsEscaped)
135164
{
136-
quoteChar = ch;
137-
}
138-
else if (ch == quoteChar)
139-
{
140-
quoteChar = '\0';
141-
}
142-
else
165+
if (curIsQuote)
166+
{
167+
// Open quotes
168+
curQuote = c;
169+
}
170+
else if (c == ' ')
171+
{
172+
// End of argument
173+
if (!curArg.empty())
174+
{
175+
args.push_back(curArg);
176+
curArg.clear();
177+
}
178+
}
179+
else if (c == '$')
180+
{
181+
// Expand environment variable
182+
Expected<std::string_view> id =
183+
parseBashIdentifier(str.substr(i));
184+
if (id)
185+
{
186+
std::string idStr(*id);
187+
char const* const value = std::getenv(idStr.c_str());
188+
if (value == nullptr)
189+
{
190+
curArg += c;
191+
}
192+
else
193+
{
194+
curArg += value;
195+
i += idStr.size();
196+
}
197+
}
198+
else
199+
{
200+
curArg += c;
201+
}
202+
}
203+
else if (c != '\\')
204+
{
205+
// Add character to current argument
206+
curArg += c;
207+
}
208+
}
209+
else
143210
{
144-
currentArg.push_back(ch);
211+
// Current character is escaped:
212+
// add whatever it is to current argument
213+
curArg += c;
145214
}
146-
} else if (std::isspace(ch))
215+
}
216+
else if (curQuote == '\"')
147217
{
148-
if (quoteChar != '\0')
218+
// In \" quotes:
219+
// Preserve the literal value of all characters except for
220+
// ($), (`), ("), (\), and the (!) character
221+
if (!curIsEscaped)
149222
{
150-
currentArg.push_back(ch);
151-
}
152-
else
223+
if (c == curQuote)
224+
{
225+
// Close quotes
226+
curQuote = '\0';
227+
}
228+
else if (c == '$')
229+
{
230+
// Expand environment variable
231+
Expected<std::string_view> id =
232+
parseBashIdentifier(str.substr(i));
233+
if (id)
234+
{
235+
std::string idStr(*id);
236+
char const* const value = std::getenv(idStr.c_str());
237+
if (value == nullptr)
238+
{
239+
curArg += c;
240+
}
241+
else
242+
{
243+
curArg += value;
244+
i += idStr.size();
245+
}
246+
}
247+
else
248+
{
249+
curArg += c;
250+
}
251+
}
252+
else if (c != '\\')
253+
{
254+
// Add character to current argument
255+
curArg += c;
256+
}
257+
}
258+
else
153259
{
154-
if ( ! currentArg.empty())
260+
// Current character is escaped:
261+
// add whatever it is to current argument
262+
// Chars that don't need escaping also include the slash
263+
if (c != '$' && c != '`' && c != '"' && c != '\\')
155264
{
156-
args.push_back(currentArg);
157-
currentArg.clear();
265+
curArg += '\\';
158266
}
159-
}
160-
} else
267+
curArg += c;
268+
}
269+
}
270+
else if (curQuote == '\'')
161271
{
162-
currentArg += ch;
272+
// In \' quotes:
273+
// Preserve the literal value of each character within the
274+
// quotes
275+
if (c != curQuote)
276+
{
277+
// Add character to current argument
278+
curArg += c;
279+
}
280+
else
281+
{
282+
// Close quotes
283+
curQuote = '\0';
284+
}
163285
}
164286
}
165-
166-
if ( ! currentArg.empty())
287+
// Add last argument
288+
if (!curArg.empty())
167289
{
168-
args.push_back(currentArg);
290+
args.push_back(curArg);
169291
}
170-
171292
return args;
172293
}
173294

@@ -255,7 +376,7 @@ executeCmakeExportCompileCommands(llvm::StringRef projectPath, llvm::StringRef c
255376
std::optional<llvm::StringRef> const redirects[] = {llvm::StringRef(), llvm::StringRef(), errorPath.path()};
256377
std::vector<llvm::StringRef> args = {cmakePath, "-S", projectPath, "-B", tempDir, "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON"};
257378

258-
auto const additionalArgs = parseCmakeArgs(cmakeArgs.str());
379+
auto const additionalArgs = parseBashArgs(cmakeArgs.str());
259380
MRDOCS_TRY(pushCMakeArgs(cmakePath, args, additionalArgs));
260381

261382
int const result = llvm::sys::ExecuteAndWait(cmakePath, args, std::nullopt, redirects);

0 commit comments

Comments
 (0)