diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4d0dee01f05c1..17993b8712019 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -569,6 +569,7 @@ I/O - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`) - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) +- Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) - diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 90fd5d077d031..2c2c127394fb6 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -389,6 +389,8 @@ def _pull_field( try: if isinstance(spec, list): for field in spec: + if result is None: + raise KeyError(field) result = result[field] else: result = result[spec] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index a2b90f607e918..272a4aa6723dd 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -634,6 +634,33 @@ def test_missing_meta(self, missing_metadata): expected = DataFrame(ex_data, columns=columns) tm.assert_frame_equal(result, expected) + def test_missing_nested_meta(self): + # GH44312 + # If errors="ignore" and nested metadata is null, we should return nan + data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]} + result = json_normalize( + data, + record_path="value", + meta=["meta", ["nested_meta", "leaf"]], + errors="ignore", + ) + ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]] + columns = ["rec", "meta", "nested_meta.leaf"] + expected = DataFrame(ex_data, columns=columns).astype( + {"nested_meta.leaf": object} + ) + tm.assert_frame_equal(result, expected) + + # If errors="raise" and nested metadata is null, we should raise with the + # key of the first missing level + with pytest.raises(KeyError, match="'leaf' not found"): + json_normalize( + data, + record_path="value", + meta=["meta", ["nested_meta", "leaf"]], + errors="raise", + ) + def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata): # GH41876 # Ensure errors='raise' works as intended even when a record_path of length