From 05330154d0737226e8aa928921ebabf4b4505849 Mon Sep 17 00:00:00 2001 From: Vidar Tonaas Fauske Date: Tue, 13 Nov 2018 13:45:14 +0100 Subject: [PATCH 1/3] Handle missing expected errors better - Better error message for missing error in output compare. - Fail cell if an unrun cell with an expected error does not produce one. --- nbval/plugin.py | 43 ++++++++++++++++++++++--------- tests/test_expected_exceptions.py | 43 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 tests/test_expected_exceptions.py diff --git a/nbval/plugin.py b/nbval/plugin.py index 4200ec8..0579c7a 100644 --- a/nbval/plugin.py +++ b/nbval/plugin.py @@ -428,12 +428,22 @@ def compare_outputs(self, test, ref, skip_compare=None): test_keys = set(testing_outs.keys()) if ref_keys - test_keys: - self.comparison_traceback.append( - cc.FAIL - + "Missing output fields from running code: %s" - % (ref_keys - test_keys) - + cc.ENDC - ) + if ref_keys == {'evalue', 'ename'}: + self.comparison_traceback.append( + cc.FAIL + + "Expected error:\n %s: %r" % ( + '\n'.join(reference_outs['ename']), + '\n'.join(reference_outs['evalue']) + ) + + cc.ENDC + ) + else: + self.comparison_traceback.append( + cc.FAIL + + "Missing output fields from running code: %s" + % (ref_keys - test_keys) + + cc.ENDC + ) return False elif test_keys - ref_keys: self.comparison_traceback.append( @@ -582,6 +592,13 @@ def runtest(self): # TODO: Only store if comparing with nbdime, to save on memory usage self.test_outputs = outs + # Cells where the reference is not run, will not check outputs: + unrun = self.cell.execution_count is None + if unrun and self.cell.outputs: + self.raise_cell_error('Unrun reference cell has outputs') + + cell_has_error = False + # Now get the outputs from the iopub channel while True: # The iopub channel broadcasts a range of messages. We keep reading @@ -692,6 +709,7 @@ def runtest(self): # cell execution. Therefore raise a cell error and pass the # traceback information. elif msg_type == 'error': + cell_has_error = True # Store error in output first out['ename'] = reply['ename'] out['evalue'] = reply['evalue'] @@ -700,9 +718,9 @@ def runtest(self): if not self.options['check_exception']: # Ensure we flush iopub before raising error try: - self.parent.kernel.await_idle(msg_id, self.output_timeout) + kernel.await_idle(msg_id, self.output_timeout) except Empty: - self.stop() + kernel.stop() raise RuntimeError('Timed out waiting for idle kernel!') traceback = '\n' + '\n'.join(reply['traceback']) if out['ename'] == 'KeyboardInterrupt' and self.parent.timed_out: @@ -718,10 +736,11 @@ def runtest(self): outs[:] = coalesce_streams(outs) - # Cells where the reference is not run, will not check outputs: - unrun = self.cell.execution_count is None - if unrun and self.cell.outputs: - self.raise_cell_error('Unrun reference cell has outputs') + if self.options['check_exception'] and unrun and not cell_has_error: + # If unrun, we cannot rely on output comparison for checking errors + self.raise_cell_error( + "Expected error", + "Expected cell to produce an error, but none was produced!") # Compare if the outputs have the same number of lines # and throw an error if it fails diff --git a/tests/test_expected_exceptions.py b/tests/test_expected_exceptions.py new file mode 100644 index 0000000..c281e4a --- /dev/null +++ b/tests/test_expected_exceptions.py @@ -0,0 +1,43 @@ +import os + +import nbformat +import pytest + +from utils import build_nb + + +pytest_plugins = "pytester" + + +def test_unrun_raises(testdir): + # This test uses the testdir fixture from pytester, which is useful for + # testing pytest plugins. It writes a notebook to a temporary dir + # and then runs pytest. + + # Setup notebook to test: + sources = [ + # In [1]: + "raise ValueError('foo')", + ] + # Build unrun notebook: + nb = build_nb(sources, mark_run=False) + + # Write notebook to test dir + nbformat.write(nb, os.path.join( + str(testdir.tmpdir), 'test_expcted_exceptions.ipynb')) + + # Run tests + result = testdir.inline_run('--nbval', '--current-env', '-s') + reports = result.getreports('pytest_runtest_logreport') + + # Setup and teardown of cells should have no issues: + setup_teardown = [r for r in reports if r.when != 'call'] + for r in setup_teardown: + assert r.passed + + reports = [r for r in reports if r.when == 'call'] + + assert len(reports) == 1 + + # First cell should fail, unexpectedly + assert reports[0].failed and not hasattr(reports[0], 'wasxfail') From a124c502999c44e3460a8ffe14fa23e94b997f61 Mon Sep 17 00:00:00 2001 From: Vidar Tonaas Fauske Date: Tue, 13 Nov 2018 14:44:26 +0100 Subject: [PATCH 2/3] Fix/expand tests for expected failures --- tests/test_expected_exceptions.py | 72 +++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/tests/test_expected_exceptions.py b/tests/test_expected_exceptions.py index c281e4a..d949d3b 100644 --- a/tests/test_expected_exceptions.py +++ b/tests/test_expected_exceptions.py @@ -9,7 +9,7 @@ pytest_plugins = "pytester" -def test_unrun_raises(testdir): +def test_run_raises(testdir): # This test uses the testdir fixture from pytester, which is useful for # testing pytest plugins. It writes a notebook to a temporary dir # and then runs pytest. @@ -17,27 +17,73 @@ def test_unrun_raises(testdir): # Setup notebook to test: sources = [ # In [1]: - "raise ValueError('foo')", + "", # No error produced, when one is expected + # In [2]: + "raise ValueError('foo')", # Wrong ename + # In [3]: + "raise ValueError('foo')", # Wrong evalue ] # Build unrun notebook: - nb = build_nb(sources, mark_run=False) + nb = build_nb(sources, mark_run=True) + + nb.cells[0].metadata.tags = ['raises-exception'] + nb.cells[0].outputs.append( + nbformat.v4.new_output( + 'error', + ename='ValueError', + evalue='foo', + traceback=['foobar', 'bob'], # Should be ignored + ) + ) + + nb.cells[1].metadata.tags = ['raises-exception'] + nb.cells[1].outputs.append( + nbformat.v4.new_output( + 'error', + ename='TypeError', # Expected TypeError, got ValueError + evalue='foo', + traceback=['foobar', 'bob'], # Should be ignored + ) + ) + + nb.cells[2].metadata.tags = ['raises-exception'] + nb.cells[2].outputs.append( + nbformat.v4.new_output( + 'error', + ename='ValueError', + evalue='bar', # Expected bar, got foo + traceback=['foobar', 'bob'], # Should be ignored + ) + ) # Write notebook to test dir nbformat.write(nb, os.path.join( str(testdir.tmpdir), 'test_expcted_exceptions.ipynb')) # Run tests - result = testdir.inline_run('--nbval', '--current-env', '-s') - reports = result.getreports('pytest_runtest_logreport') + result = testdir.runpytest_subprocess('--nbval', '--current-env', '-s') + result.assert_outcomes(failed=3) - # Setup and teardown of cells should have no issues: - setup_teardown = [r for r in reports if r.when != 'call'] - for r in setup_teardown: - assert r.passed - reports = [r for r in reports if r.when == 'call'] - assert len(reports) == 1 +def test_unrun_raises(testdir): + # This test uses the testdir fixture from pytester, which is useful for + # testing pytest plugins. It writes a notebook to a temporary dir + # and then runs pytest. - # First cell should fail, unexpectedly - assert reports[0].failed and not hasattr(reports[0], 'wasxfail') + # Setup notebook to test: + sources = [ + # In [1]: + "pass", + ] + # Build unrun notebook: + nb = build_nb(sources, mark_run=False) + nb.cells[0].metadata.tags = ['raises-exception'] + + # Write notebook to test dir + nbformat.write(nb, os.path.join( + str(testdir.tmpdir), 'test_expcted_exceptions.ipynb')) + + # Run tests + result = testdir.runpytest_subprocess('--nbval', '--current-env', '-s') + result.assert_outcomes(failed=1) From cdff477c9ba4c47ebcc4142df5800dcdefe77bdc Mon Sep 17 00:00:00 2001 From: Vidar Tonaas Fauske Date: Tue, 13 Nov 2018 14:58:56 +0100 Subject: [PATCH 3/3] Cleanup --- nbval/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbval/plugin.py b/nbval/plugin.py index 0579c7a..104e0ca 100644 --- a/nbval/plugin.py +++ b/nbval/plugin.py @@ -580,7 +580,7 @@ def runtest(self): # Poll the shell channel to get a message try: - self.parent.kernel.await_reply(msg_id, timeout=timeout) + kernel.await_reply(msg_id, timeout=timeout) except Empty: # Timeout reached # Try to interrupt kernel, as this will give us traceback: kernel.interrupt()