Merge pull request #352 from Infinoid/schedule-parser

stephenchouca · web-flow · commit 859cfd2688b5 · 2021-01-13T22:31:23.000-05:00
Fix command-line schedule parsing
diff --git a/include/taco/parser/schedule_parser.h b/include/taco/parser/schedule_parser.h
@@ -0,0 +1,20 @@
+#ifndef TACO_SCHEDULE_PARSER_H
+#define TACO_SCHEDULE_PARSER_H
+
+#include <string>
+#include <vector>
+
+namespace taco {
+namespace parser {
+
+// parse a string of the form: "reorder(i,j),precompute(D(i,j)*E(j,k),j,j_pre)"
+// into string vectors of the form:
+// [ [ "reorder", "i", "j" ], [ "precompute", "D(i,j)*E(j,k)", "j", "j_pre" ] ]
+std::vector<std::vector<std::string>> ScheduleParser(const std::string);
+
+// serialize the result of a parse (for debugging)
+std::string serializeParsedSchedule(std::vector<std::vector<std::string>>);
+
+}}
+
+#endif //TACO_EINSUM_PARSER_H
diff --git a/src/parser/schedule_parser.cpp b/src/parser/schedule_parser.cpp
@@ -0,0 +1,103 @@
+#include <string>
+#include <vector>
+#include <iostream>
+
+#include "taco/parser/lexer.h"
+#include "taco/parser/schedule_parser.h"
+#include "taco/error.h"
+
+using std::vector;
+using std::string;
+using std::cout;
+using std::endl;
+
+namespace taco{
+namespace parser{
+
+/// Parses command line schedule directives (`-s <directive>`).
+/// Example: "precompute(B(i,j),j,jpre),reorder(j,i)" is parsed as:
+/// [ [ "precompute", "B(i,j)", "j", "jpre" ],
+///   [ "reorder", "j", "i" ] ]
+/// The first element of each inner vector is the function name.
+/// Inner parens are preserved.  All whitespace is removed.
+vector<vector<string>> ScheduleParser(const string argValue) {
+    int parenthesesCnt;
+    vector<vector<string>> parsed;
+    vector<string> current_schedule;
+    string current_element;
+    parser::Lexer lexer(argValue);
+    parser::Token tok;
+    parenthesesCnt = 0;
+    for(tok = lexer.getToken(); tok != parser::Token::eot; tok = lexer.getToken()) {
+        switch(tok) {
+        case parser::Token::lparen:
+            if(parenthesesCnt == 0) {
+                // The first opening paren separates the name of the scheduler directive from its first parameter
+                current_schedule.push_back(current_element);
+                current_element = "";
+            }
+            else {
+                // pass inner parens through to the scheduler
+                current_element += lexer.tokenString(tok);
+            }
+            parenthesesCnt++;
+            break;
+        case parser::Token::rparen:
+            taco_uassert(parenthesesCnt > 0) << "mismatched parentheses (too many right-parens, negative nesting level) in schedule expression '" << argValue << "'";
+            if(parenthesesCnt > 1)
+                current_element += lexer.tokenString(tok);
+            parenthesesCnt--;
+            break;
+        case parser::Token::comma:
+            if(parenthesesCnt == 0) {
+                // new schedule directive
+                current_schedule.push_back(current_element);
+                parsed.push_back(current_schedule);
+                current_schedule.clear();
+                current_element = "";
+            } else if(parenthesesCnt == 1) {
+                // new parameter to schedule directive
+                current_schedule.push_back(current_element);
+                current_element = "";
+            } else {
+                // probably multiple indexes inside of an IndexExpr; pass it through
+                current_element += lexer.tokenString(tok);
+                break;
+            }
+            break;
+        // things where .getIdentifier() makes sense
+        case parser::Token::identifier:
+        case parser::Token::int_scalar:
+        case parser::Token::uint_scalar:
+        case parser::Token::float_scalar:
+        case parser::Token::complex_scalar:
+            current_element += lexer.getIdentifier();
+            break;
+        // .tokenstring() works for the remaining cases
+        default:
+            current_element += lexer.tokenString(tok);
+            break;
+        }
+    }
+    taco_uassert(parenthesesCnt == 0) << "imbalanced parentheses (too few right-parens) in schedule expression '" << argValue << "'";
+    if(current_element.length() > 0)
+        current_schedule.push_back(current_element);
+    if(current_schedule.size() > 0)
+        parsed.push_back(current_schedule);
+    return parsed;
+}
+
+string serializeParsedSchedule(vector<vector<string>> parsed) {
+    std::stringstream ss;
+    ss << "[ ";
+    for(vector<string> current_schedule : parsed) {
+        ss << "[ ";
+        for(string element : current_schedule) {
+            ss << "'" << element << "', ";
+        }
+        ss << "], ";
+    }
+    ss << "]";
+    return ss.str();
+}
+}}
diff --git a/test/tests-schedule-parser.cpp b/test/tests-schedule-parser.cpp
@@ -0,0 +1,86 @@
+#include <iostream>
+#include <taco/parser/schedule_parser.h>
+#include "test.h"
+
+using std::cout;
+using std::endl;
+using std::string;
+using std::vector;
+using namespace taco::parser;
+
+void assert_string_vectors_equal(vector<string> a, vector<string> b) {
+    ASSERT_EQ(a.size(), b.size()) << "Vectors are of unequal lengths: " << a.size() << " != " << b.size();
+    for(size_t i = 0; i < a.size(); i++) {
+        EXPECT_EQ(a[i], b[i]) << "a[" << i << "] != b[" << i << "]: \"" << a[i] << "\" != \"" << b[i] << "\"";
+    }
+}
+
+void assert_string_vector_vectors_equal(vector<vector<string>> a, vector<vector<string>> b) {
+    ASSERT_EQ(a.size(), b.size()) << "Vector-vectors are of unequal lengths: " << a.size() << " != " << b.size();
+    for(size_t i = 0; i < a.size(); i++) {
+        assert_string_vectors_equal(a[i], b[i]);
+    }
+}
+
+TEST(schedule_parser, normal_operation) {
+    struct {
+        string str;
+        vector<vector<string>> result;
+    } cases[] = {
+        // basic parsing
+        { "i,j,k",                  { { "i" }, { "j" }, { "k" } } },
+        { "i(j,k)",                 { { "i", "j", "k" } } },
+        { "i(j,k),l(m,n)",          { { "i", "j", "k" }, { "l", "m",          "n" } } },
+        { "i(j,k),l(m(n,o),p)",     { { "i", "j", "k" }, { "l", "m(n,o)",     "p" } } },
+        { "i(j,k),l(m(n(o(p))),q)", { { "i", "j", "k" }, { "l", "m(n(o(p)))", "q" } } },
+
+        // whitespace
+        { "i,j, k",                  { { "i" }, { "j" }, { "k" } } },
+        { "i(j, k)",                 { { "i", "j", "k" } } },
+        { "i(j,k), l(m,n)",          { { "i", "j", "k" }, { "l", "m",          "n" } } },
+        { "i(j,k),l(m(n, o),p)",     { { "i", "j", "k" }, { "l", "m(n,o)",     "p" } } },
+        { "i(j,k),l(m(n(o(p))), q)", { { "i", "j", "k" }, { "l", "m(n(o(p)))", "q" } } },
+
+        // empty slots
+        { "",              { } },
+        { ",j,k",          { { "" }, { "j" }, { "k" } } },
+        { "i(,k)",         { { "i", "", "k" } } },
+        { "(j,k)",         { { "", "j", "k" } } },
+        { "i(j,),,l(m,n)", { { "i", "j", "" }, { "" }, { "l", "m", "n" } } },
+
+        // real scheduling directives
+        { "split(i,i0,i1,16)",           { { "split", "i", "i0", "i1", "16" } } },
+        { "precompute(A(i,j)*x(j),i,i)", { { "precompute", "A(i,j)*x(j)", "i", "i" } } },
+        { "split(i,i0,i1,16),precompute(A(i,j)*x(j),i,i)",
+                                         { { "split", "i", "i0", "i1", "16" },
+                                           { "precompute", "A(i,j)*x(j)", "i", "i" } } },
+    };
+    for(auto test : cases) {
+        auto actual = ScheduleParser(test.str);
+        cout << "string \"" << test.str << "\"" << " parsed as: " << serializeParsedSchedule(actual) << endl;
+        assert_string_vector_vectors_equal(test.result, actual);
+    }
+}
+
+TEST(schedule_parser, error_reporting) {
+    struct {
+        string str;
+        string assertion;
+    } cases[] = {
+        { "i,j,k(",  "too few right-parens" },
+        { "i(j,k",   "too few right-parens" },
+        { "i,j,k)",  "too many right-parens" },
+        { "i,j,k)(", "too many right-parens" },
+    };
+    for(auto test : cases) {
+        try {
+            auto actual = ScheduleParser(test.str);
+            // should throw an exception before getting here
+            ASSERT_TRUE(false);
+        } catch (taco::TacoException &e) {
+            string message = e.what();
+            EXPECT_TRUE(message.find(test.assertion) != string::npos)
+              << "substring \"" << test.assertion << "\" not found in exception message \"" << message << "\"";
+        }
+    }
+}
diff --git a/tools/taco.cpp b/tools/taco.cpp