scrapinghub · kmike · Aug 18, 2020 · Aug 3, 2020 · Aug 6, 2020 · Aug 6, 2020
diff --git a/autoextract_poet/page_inputs.py b/autoextract_poet/page_inputs.py
@@ -0,0 +1,57 @@
+from typing import ClassVar, Generic, Optional, TypeVar
+
+import attr
+
+from autoextract_poet.items import (
+    Article,
+    Item,
+    Product,
+)
+
+T = TypeVar("T", bound=Item)
+
+
+@attr.s(auto_attribs=True)
+class _AutoExtractData(Generic[T]):
+    """Container for AutoExtract data.
+
+    Should not be used directly by providers.
+    Use derived classes like AutoExtractArticleData and similar.
+
+    API responses are wrapped in a JSON array
+    (this is to facilitate query batching)
+    but we're receiving single responses here..
+
+    https://doc.scrapinghub.com/autoextract.html#responses
+    """
+
+    item_key: ClassVar[str]
+
+    data: dict
+
+    @property
+    def item_class(self):
+        return self.__orig_bases__[0].__args__[0]
+
+    def to_item(self) -> Optional[T]:
+        return self.item_class.from_dict(self.data[self.item_key])
+
+
+@attr.s(auto_attribs=True)
+class AutoExtractArticleData(_AutoExtractData[Article]):
+    """Container for AutoExtract Article data.
+
+    https://doc.scrapinghub.com/autoextract/article.html
+    """
+
+    item_key = "article"
+
+
+@attr.s(auto_attribs=True)
+class AutoExtractProductData(_AutoExtractData[Product]):
+    """Container for AutoExtract Product data.
+
+    https://doc.scrapinghub.com/autoextract/product.html
+    """
+
+    item_key = "product"
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,6 +1,15 @@
 import json
 import os
 
+from autoextract_poet.items import (
+    AdditionalProperty,
+    Breadcrumb,
+    Item,
+    GTIN,
+    Offer,
+    Rating,
+)
+
 
 def load_fixture(name):
     path = os.path.join(
@@ -9,3 +18,23 @@ def load_fixture(name):
     )
     with open(path, 'r') as f:
         return json.loads(f.read())
+
+
+def item_equals_dict(item: Item, data: dict) -> bool:
+    """Return True if Item and Dict are equivalent or False otherwise."""
+    for key, value in data.items():
+        if key == 'additionalProperty':
+            value = AdditionalProperty.from_list(value)
+        if key == 'aggregateRating':
+            value = Rating.from_dict(value)
+        if key == 'breadcrumbs':
+            value = Breadcrumb.from_list(value)
+        if key == 'gtin':
+            value = GTIN.from_list(value)
+        if key == 'offers':
+            value = Offer.from_list(value)
+
+        if getattr(item, key) != value:
+            return False
+
+    return True
diff --git a/tests/fixtures/sample_article.json b/tests/fixtures/sample_article.json
@@ -1,51 +1,57 @@
-{
+[
+  {
     "article": {
-        "headline": "Article headline",
-        "datePublished": "2019-06-19T00:00:00",
-        "datePublishedRaw": "June 19, 2019",
-        "dateModified": "2019-06-21T00:00:00",
-        "dateModifiedRaw": "June 21, 2019",
-        "author": "Article author",
-        "authorsList": [
-            "Article author"
-        ],
-        "inLanguage": "en",
-        "breadcrumbs": [
-            {
-                "name": "Level 1",
-                "link": "http://example.com"
-            }
-        ],
-        "mainImage": "http://example.com/image.png",
-        "images": [
-            "http://example.com/image.png"
-        ],
-        "description": "Article summary",
-        "articleBody": "Article body ...",
-        "articleBodyHtml": "<article><p>Article body ... </p> ... </article>",
-        "articleBodyRaw": "<div id=\"an-article\">Article body ...",
-        "videoUrls": [
-            "https://example.com/video.mp4"
-        ],
-        "audioUrls": [
-            "https://example.com/audio.mp3"
-        ],
-        "probability": 0.95,
-        "canonicalUrl": "https://example.com/article/article-about-something",
-        "url": "https://example.com/article?id=24"
+      "headline": "Article headline",
+      "datePublished": "2019-06-19T00:00:00",
+      "datePublishedRaw": "June 19, 2019",
+      "dateModified": "2019-06-21T00:00:00",
+      "dateModifiedRaw": "June 21, 2019",
+      "author": "Article author",
+      "authorsList": [
+        "Article author"
+      ],
+      "inLanguage": "en",
+      "breadcrumbs": [
+        {
+          "name": "Level 1",
+          "link": "http://example.com"
+        }
+      ],
+      "mainImage": "http://example.com/image.png",
+      "images": [
+        "http://example.com/image.png"
+      ],
+      "description": "Article summary",
+      "articleBody": "Article body ...",
+      "articleBodyHtml": "<article><p>Article body ... </p> ... </article>",
+      "articleBodyRaw": "<div id=\"an-article\">Article body ...",
+      "videoUrls": [
+        "https://example.com/video.mp4"
+      ],
+      "audioUrls": [
+        "https://example.com/audio.mp3"
+      ],
+      "probability": 0.95,
+      "canonicalUrl": "https://example.com/article/article-about-something",
+      "url": "https://example.com/article?id=24"
     },
     "webPage": {
-        "inLanguages": [
-            {"code": "en"},
-            {"code": "es"}
-        ]
+      "inLanguages": [
+        {
+          "code": "en"
+        },
+        {
+          "code": "es"
+        }
+      ]
     },
     "query": {
-        "id": "1564747029122-9e02a1868d70b7a3",
-        "domain": "example.com",
-        "userQuery": {
-            "pageType": "article",
-            "url": "http://example.com/article?id=24"
-        }
+      "id": "1564747029122-9e02a1868d70b7a3",
+      "domain": "example.com",
+      "userQuery": {
+        "pageType": "article",
+        "url": "http://example.com/article?id=24"
+      }
     }
-}
+  }
+]
diff --git a/tests/fixtures/sample_product.json b/tests/fixtures/sample_product.json
@@ -1,59 +1,65 @@
-{
+[
+  {
     "product": {
-        "name": "Product name",
-        "offers": [
-            {
-                "price": "42",
-                "currency": "USD",
-                "availability": "InStock"
-            }
-        ],
-        "sku": "product sku",
-        "mpn": "product mpn",
-        "gtin": [
-            {
-                "type": "ean13",
-                "value": "978-3-16-148410-0"
-            }
-        ],
-        "brand": "product brand",
-        "breadcrumbs": [
-            {
-                "name": "Level 1",
-                "link": "http://example.com"
-            }
-        ],
-        "mainImage": "http://example.com/image.png",
-        "images": [
-            "http://example.com/image.png"
-        ],
-        "description": "product description",
-        "aggregateRating": {
-            "ratingValue": 4.5,
-            "bestRating": 5.0,
-            "reviewCount": 31
-        },
-        "additionalProperty": [
-            {
-                "name": "property 1",
-                "value": "value of property 1"
-            }
-        ],
-        "probability": 0.95,
-        "url": "https://example.com/product"
+      "name": "Product name",
+      "offers": [
+        {
+          "price": "42",
+          "currency": "USD",
+          "availability": "InStock"
+        }
+      ],
+      "sku": "product sku",
+      "mpn": "product mpn",
+      "gtin": [
+        {
+          "type": "ean13",
+          "value": "978-3-16-148410-0"
+        }
+      ],
+      "brand": "product brand",
+      "breadcrumbs": [
+        {
+          "name": "Level 1",
+          "link": "http://example.com"
+        }
+      ],
+      "mainImage": "http://example.com/image.png",
+      "images": [
+        "http://example.com/image.png"
+      ],
+      "description": "product description",
+      "aggregateRating": {
+        "ratingValue": 4.5,
+        "bestRating": 5.0,
+        "reviewCount": 31
+      },
+      "additionalProperty": [
+        {
+          "name": "property 1",
+          "value": "value of property 1"
+        }
+      ],
+      "probability": 0.95,
+      "url": "https://example.com/product"
     },
     "webPage": {
-        "inLanguages": [
-            {"code": "en"},
-            {"code": "es"}
-        ]
+      "inLanguages": [
+        {
+          "code": "en"
+        },
+        {
+          "code": "es"
+        }
+      ]
     },
     "query": {
-        "id": "1564747029122-9e02a1868d70b7a2",
-        "domain": "example.com",
-        "userQuery": {
-            "pageType": "product",
-            "url": "https://example.com/product"
-        }
+      "id": "1564747029122-9e02a1868d70b7a2",
+      "domain": "example.com",
+      "userQuery": {
+        "pageType": "product",
+        "url": "https://example.com/product"
+      }
     }
-}
+  }
+]
diff --git a/tests/test_items.py b/tests/test_items.py
@@ -1,19 +1,19 @@
 import pytest
 
 from autoextract_poet.items import (
-    Offer,
-    Breadcrumb,
-    Rating,
     AdditionalProperty,
-    GTIN,
     Article,
+    Breadcrumb,
+    GTIN,
+    Offer,
     Product,
+    Rating,
 )
 
-from tests import load_fixture
+from tests import load_fixture, item_equals_dict
 
-example_product_result = load_fixture("sample_product.json")
-example_article_result = load_fixture("sample_article.json")
+example_article_result = load_fixture("sample_article.json")[0]
+example_product_result = load_fixture("sample_product.json")[0]
 
 
 @pytest.mark.parametrize(
@@ -28,19 +28,7 @@
 )  # type: ignore
 def test_item(cls, data):
     item = cls.from_dict(data)
-    for key, value in data.items():
-        if key == 'breadcrumbs':
-            value = Breadcrumb.from_list(value)
-        if key == 'offers':
-            value = Offer.from_list(value)
-        if key == 'additionalProperty':
-            value = AdditionalProperty.from_list(value)
-        if key == 'gtin':
-            value = GTIN.from_list(value)
-        if key == 'aggregateRating':
-            value = Rating.from_dict(value)
-
-        assert getattr(item, key) == value
+    assert item_equals_dict(item, data)
 
     # AttributeError: 'cls' object has no attribute 'foo'
     with pytest.raises(AttributeError):

diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
@@ -0,0 +1,22 @@
+import pytest
+
+from autoextract_poet.page_inputs import (
+    AutoExtractArticleData,
+    AutoExtractProductData,
+)
+
+from tests import load_fixture, item_equals_dict
+
+example_article_result = load_fixture("sample_article.json")
+example_product_result = load_fixture("sample_product.json")
+
+
+@pytest.mark.parametrize("cls, results", [
+    (AutoExtractArticleData, example_article_result),
+    (AutoExtractProductData, example_product_result),
+])
+def test_response_data(cls, results):
+    response_data = cls(results[0])
+    item = response_data.to_item()
+    assert isinstance(item, response_data.item_class)
+    assert item_equals_dict(item, results[0][cls.item_key])