diff --git a/.gitignore b/.gitignore index 0cb3fbb..5af20da 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ scrapy_splash.egg-info htmlcov .hypothesis .ipynb_checkpoints +.pytest_cache diff --git a/README.rst b/README.rst index 91b8855..895ce44 100644 --- a/README.rst +++ b/README.rst @@ -260,19 +260,8 @@ to set ``meta['splash']['args']`` use ``SplashRequest(..., args=myargs)``. * ``meta['splash']['magic_response']`` - when set to True and a JSON response is received from Splash, several attributes of the response - (headers, body, url, status code) are filled using data returned in JSON: - - * response.headers are filled from 'headers' keys; - * response.url is set to the value of 'url' key; - * response.body is set to the value of 'html' key, - or to base64-decoded value of 'body' key; - * response.status is set to the value of 'http_status' key. - When ``meta['splash']['http_status_from_error_code']`` is True - and ``assert(splash:go(..))`` fails with an HTTP error - response.status is also set to HTTP error code. - - Original URL, status and headers are available as ``response.real_url``, - ``response.splash_response_status`` and ``response.splash_response_headers``. + (headers, body, url, status code) are filled using data returned in JSON, + for details see Responses section This option is set to True by default if you use SplashRequest. ``render.json`` and ``execute`` endpoints may not have all the necessary @@ -326,9 +315,15 @@ SplashJsonResponse provide extra features: * response.headers are filled from 'headers' keys; * response.url is set to the value of 'url' key; - * response.body is set to the value of 'html' key, - or to base64-decoded value of 'body' key; - * response.status is set from the value of 'http_status' key. + * response.body is set to the value of 'html' key, utf-8 text expected, + or to base64-decoded binary value of 'body' key; + * response.status is set to the value of 'http_status' key. + When ``meta['splash']['http_status_from_error_code']`` is True + and ``assert(splash:go(..))`` fails with an HTTP error + response.status is also set to HTTP error code. + + Original URL, status and headers are available as ``response.real_url``, + ``response.splash_response_status`` and ``response.splash_response_headers``. When ``response.body`` is updated in SplashJsonResponse (either from 'html' or from 'body' keys) familiar ``response.css`` diff --git a/scrapy_splash/response.py b/scrapy_splash/response.py index e5250c2..64066ef 100644 --- a/scrapy_splash/response.py +++ b/scrapy_splash/response.py @@ -176,7 +176,6 @@ def _load_from_json(self): # response.body if 'body' in self.data: self._body = base64.b64decode(self.data['body']) - self._cached_ubody = self._body.decode(self.encoding) elif 'html' in self.data: self._cached_ubody = self.data['html'] self._body = self._cached_ubody.encode(self.encoding) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 66b79ce..7b7e5fc 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -286,6 +286,22 @@ def test_magic_response(): if c.name == 'spam': assert c.value == 'ham' + resp_data = { + 'url': "http://exmaple.com/#id42", + 'body': base64.b64encode(b'\xad').decode('ascii'), + 'headers': [ + {'name': 'Content-Type', 'value': "text/html; charset=cp1251"}, + ] + } + resp = TextResponse("http://mysplash.example.com/execute", + headers={b'Content-Type': b'application/json'}, + body=json.dumps(resp_data).encode('utf8')) + + try: + resp2 = mw.process_response(req, resp, None) + except: + assert 'process_response raised exception' is None + def test_cookies(): mw = _get_mw()