From d4a71af3bb4450c99efafbb83e6ec8bd65a5da4c Mon Sep 17 00:00:00 2001 From: Corey McCandless Date: Wed, 7 Mar 2018 08:33:59 -0600 Subject: [PATCH 1/4] implement sgf-parsing --- exercises/sgf-parsing/README.md | 110 ++++++++++++++++++++++ exercises/sgf-parsing/example.py | 100 ++++++++++++++++++++ exercises/sgf-parsing/sgf_parsing.py | 26 +++++ exercises/sgf-parsing/sgf_parsing_test.py | 94 ++++++++++++++++++ 4 files changed, 330 insertions(+) create mode 100644 exercises/sgf-parsing/README.md create mode 100644 exercises/sgf-parsing/example.py create mode 100644 exercises/sgf-parsing/sgf_parsing.py create mode 100644 exercises/sgf-parsing/sgf_parsing_test.py diff --git a/exercises/sgf-parsing/README.md b/exercises/sgf-parsing/README.md new file mode 100644 index 0000000000..8f123b308f --- /dev/null +++ b/exercises/sgf-parsing/README.md @@ -0,0 +1,110 @@ +# SGF Parsing + +Parsing a Smart Game Format string. + +[SGF](https://en.wikipedia.org/wiki/Smart_Game_Format) is a standard format for +storing board game files, in particular go. + +SGF is a fairly simple format. An SGF file usually contains a single +tree of nodes where each node is a property list. The property list +contains key value pairs, each key can only occur once but may have +multiple values. + +An SGF file may look like this: + +```text +(;FF[4]C[root]SZ[19];B[aa];W[ab]) +``` + +This is a tree with three nodes: + +- The top level node has two properties: FF\[4\] (key = "FF", value = + "4") and C\[root\](key = "C", value = "root"). (FF indicates the + version of SGF and C is a comment.) + - The top level node has a single child which has a single property: + B\[aa\]. (Black plays on the point encoded as "aa", which is the + 1-1 point (which is a stupid place to play)). + - The B\[aa\] node has a single child which has a single property: + W\[ab\]. + +As you can imagine an SGF file contains a lot of nodes with a single +child, which is why there's a shorthand for it. + +SGF can encode variations of play. Go players do a lot of backtracking +in their reviews (let's try this, doesn't work, let's try that) and SGF +supports variations of play sequences. For example: + +```text +(;FF[4](;B[aa];W[ab])(;B[dd];W[ee])) +``` + +Here the root node has two variations. The first (which by convention +indicates what's actually played) is where black plays on 1-1. Black was +sent this file by his teacher who pointed out a more sensible play in +the second child of the root node: `B[dd]` (4-4 point, a very standard +opening to take the corner). + +A key can have multiple values associated with it. For example: + +```text +(;FF[4];AB[aa][ab][ba]) +``` + +Here `AB` (add black) is used to add three black stones to the board. + +There are a few more complexities to SGF (and parsing in general), which +you can mostly ignore. You should assume that the input is encoded in +UTF-8, the tests won't contain a charset property, so don't worry about +that. Furthermore you may assume that all newlines are unix style (`\n`, +no `\r` or `\r\n` will be in the tests) and that no optional whitespace +between properties, nodes, etc will be in the tests. + +The exercise will have you parse an SGF string and return a tree +structure of properties. You do not need to encode knowledge about the +data types of properties, just use the rules for the +[text](http://www.red-bean.com/sgf/sgf4.html#text) type everywhere. + +## Exception messages + +Sometimes it is necessary to raise an exception. When you do this, you should include a meaningful error message to +indicate what the source of the error is. This makes your code more readable and helps significantly with debugging. Not +every exercise will require you to raise an exception, but for those that do, the tests will only pass if you include +a message. + +To raise a message with an exception, just write it as an argument to the exception type. For example, instead of +`raise Exception`, you should write: + +```python +raise Exception("Meaningful message indicating the source of the error") +``` + +## Running the tests + +To run the tests, run the appropriate command below ([why they are different](https://github.com/pytest-dev/pytest/issues/1629#issue-161422224)): + +- Python 2.7: `py.test sgf_parsing_test.py` +- Python 3.3+: `pytest sgf_parsing_test.py` + +Alternatively, you can tell Python to run the pytest module (allowing the same command to be used regardless of Python version): +`python -m pytest sgf_parsing_test.py` + +### Common `pytest` options + +- `-v` : enable verbose output +- `-x` : stop running tests on first failure +- `--ff` : run failures from previous test before running other test cases + +For other options, see `python -m pytest -h` + +## Submitting Exercises + +Note that, when trying to submit an exercise, make sure the solution is in the `$EXERCISM_WORKSPACE/python/sgf-parsing` directory. + +You can find your Exercism workspace by running `exercism debug` and looking for the line that starts with `Workspace`. + +For more detailed information about running tests, code style and linting, +please see the [help page](http://exercism.io/languages/python). + +## Submitting Incomplete Solutions + +It's possible to submit an incomplete solution so you can see how others have completed the exercise. diff --git a/exercises/sgf-parsing/example.py b/exercises/sgf-parsing/example.py new file mode 100644 index 0000000000..ae63903199 --- /dev/null +++ b/exercises/sgf-parsing/example.py @@ -0,0 +1,100 @@ +class SgfTree(object): + def __init__(self, properties=None, children=None): + self.properties = properties or {} + self.children = children or [] + + def __eq__(self, other): + if not isinstance(other, SgfTree): + return False + for k, v in self.properties.items(): + if k not in other.properties: + return False + if other.properties[k] != v: + return False + for k in other.properties.keys(): + if k not in self.properties: + return False + if len(self.children) != len(other.children): + return False + for a, b in zip(self.children, other.children): + if a != b: + return False + return True + + def __repr__(self): + """Ironically, encoding to SGF is much easier""" + rep = '(;' + for k, vs in self.properties.items(): + rep += k + for v in vs: + rep += '[{}]'.format(v) + if self.children: + if len(self.children) > 1: + rep += '(' + for c in self.children: + rep += repr(c)[1:-1] + if len(self.children) > 1: + rep += ')' + return rep + ')' + + +def is_upper(s): + a, z = map(ord, 'AZ') + return all( + a <= o and o <= z + for o in map(ord, s) + ) + + +def parse(input_string): + root = None + current = None + stack = list(input_string) + + def assert_that(condition): + if not condition: + raise ValueError( + 'invalid format at {}:{}: {}'.format( + repr(input_string), + len(input_string) - len(stack), + repr(''.join(stack)) + ) + ) + assert_that(stack) + + def pop(): + if stack[0] == '\\': + stack.pop(0) + ch = stack.pop(0) + return ' ' if ch in '\n\t' else ch + + def peek(): + return stack[0] + + def pop_until(ch): + v = '' + while peek() != ch: + v += pop() + return v + while stack: + assert_that(pop() == '(' and peek() == ';') + while pop() == ';': + properties = {} + while is_upper(peek()): + key = pop_until('[') + assert_that(is_upper(key)) + values = [] + while peek() == '[': + pop() + values.append(pop_until(']')) + pop() + properties[key] = values + if root is None: + current = root = SgfTree(properties) + else: + current = SgfTree(properties) + root.children.append(current) + while peek() == '(': + child_input = pop() + pop_until(')') + pop() + current.children.append(parse(child_input)) + return root diff --git a/exercises/sgf-parsing/sgf_parsing.py b/exercises/sgf-parsing/sgf_parsing.py new file mode 100644 index 0000000000..4f3e7f7ca7 --- /dev/null +++ b/exercises/sgf-parsing/sgf_parsing.py @@ -0,0 +1,26 @@ +class SgfTree(object): + def __init__(self, properties=None, children=None): + self.properties = properties or {} + self.children = children or [] + + def __eq__(self, other): + if not isinstance(other, SgfTree): + return False + for k, v in self.properties.items(): + if k not in other.properties: + return False + if other.properties[k] != v: + return False + for k in other.properties.keys(): + if k not in self.properties: + return False + if len(self.children) != len(other.children): + return False + for a, b in zip(self.children, other.children): + if a != b: + return False + return True + + +def parse(input_string): + pass diff --git a/exercises/sgf-parsing/sgf_parsing_test.py b/exercises/sgf-parsing/sgf_parsing_test.py new file mode 100644 index 0000000000..c1b72413e2 --- /dev/null +++ b/exercises/sgf-parsing/sgf_parsing_test.py @@ -0,0 +1,94 @@ +import unittest + +from example import parse, SgfTree + + +class SgfParsingTest(unittest.TestCase): + def test_empty_input(self): + input_string = '' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_tree_with_no_nodes(self): + input_string = '()' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_node_without_tree(self): + input_string = ';' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_node_without_properties(self): + input_string = '(;)' + expected = SgfTree() + self.assertEqual(parse(input_string), expected) + + def test_single_node_tree(self): + input_string = '(;A[B])' + expected = SgfTree(properties={'A': ['B']}) + self.assertEqual(parse(input_string), expected) + + def test_properties_without_delimiter(self): + input_string = '(;a)' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_all_lowercase_property(self): + input_string = '(;a[b])' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_upper_and_lowercase_property(self): + input_string = '(;Aa[b])' + with self.assertRaisesWithMessage(ValueError): + parse(input_string) + + def test_two_nodes(self): + input_string = '(;A[B];B[C])' + expected = SgfTree( + properties={'A': ['B']}, + children=[ + SgfTree({'B': ['C']}) + ] + ) + self.assertEqual(parse(input_string), expected) + + def test_two_child_trees(self): + input_string = '(;A[B](;B[C])(;C[D]))' + expected = SgfTree( + properties={'A': ['B']}, + children=[ + SgfTree({'B': ['C']}), + SgfTree({'C': ['D']}), + ] + ) + self.assertEqual(parse(input_string), expected) + + def test_multiple_property_values(self): + input_string = '(;A[b][c][d])' + expected = SgfTree( + properties={'A': ['b', 'c', 'd']} + ) + self.assertEqual(parse(input_string), expected) + + def test_escaped_property(self): + input_string = '(;A[\]b\nc\nd\t\te \n\]])' + expected = SgfTree( + properties={'A': [']b c d e ]']} + ) + self.assertEqual(parse(input_string), expected) + + # Utility functions + def setUp(self): + try: + self.assertRaisesRegex + except AttributeError: + self.assertRaisesRegex = self.assertRaisesRegexp + + def assertRaisesWithMessage(self, exception): + return self.assertRaisesRegex(exception, r".+") + + +if __name__ == '__main__': + unittest.main() From d13b63db431321f0e2c390413addf539a66d27d9 Mon Sep 17 00:00:00 2001 From: Corey McCandless Date: Wed, 7 Mar 2018 08:39:07 -0600 Subject: [PATCH 2/4] fix import statement --- exercises/sgf-parsing/sgf_parsing_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercises/sgf-parsing/sgf_parsing_test.py b/exercises/sgf-parsing/sgf_parsing_test.py index c1b72413e2..6bd6100026 100644 --- a/exercises/sgf-parsing/sgf_parsing_test.py +++ b/exercises/sgf-parsing/sgf_parsing_test.py @@ -1,6 +1,6 @@ import unittest -from example import parse, SgfTree +from sgf_parsing import parse, SgfTree class SgfParsingTest(unittest.TestCase): From a185a86cce2bd5da558c431ddc0d11481c7fd458 Mon Sep 17 00:00:00 2001 From: Corey McCandless Date: Wed, 7 Mar 2018 08:48:32 -0600 Subject: [PATCH 3/4] create entry in config.json --- config.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/config.json b/config.json index cbee3d09ab..faacc0a8ca 100644 --- a/config.json +++ b/config.json @@ -1305,6 +1305,17 @@ "lists" ] }, + { + "uuid": "0d6325d1-c0a3-456e-9a92-cea0559e82ed", + "slug": "sgf-parsing", + "core": false, + "unlocked_by": null, + "difficulty": 7, + "topics": [ + "parsing", + "trees" + ] + }, { "uuid": "e7351e8e-d3ff-4621-b818-cd55cf05bffd", "slug": "accumulate", From 108788bf6810f7a347997dedaa156e4a453cbd7b Mon Sep 17 00:00:00 2001 From: Corey McCandless Date: Wed, 7 Mar 2018 09:10:10 -0600 Subject: [PATCH 4/4] fix __eq__ for Python2 --- exercises/sgf-parsing/example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercises/sgf-parsing/example.py b/exercises/sgf-parsing/example.py index ae63903199..008c2a36ee 100644 --- a/exercises/sgf-parsing/example.py +++ b/exercises/sgf-parsing/example.py @@ -17,7 +17,7 @@ def __eq__(self, other): if len(self.children) != len(other.children): return False for a, b in zip(self.children, other.children): - if a != b: + if not (a == b): return False return True