diff --git a/doc/source/io.rst b/doc/source/io.rst
index 9442f59425106..0fabfa7077a95 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1230,6 +1230,37 @@ nanoseconds
    import os
    os.remove('test.json')
 
+.. _io.json_normalize:
+
+Normalization
+~~~~~~~~~~~~~
+
+.. versionadded:: 0.13.0
+
+Pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data
+into a flat table.
+
+.. ipython:: python
+
+   from pandas.io.json import json_normalize
+   data = [{'state': 'Florida',
+             'shortname': 'FL',
+             'info': {
+                  'governor': 'Rick Scott'
+             },
+             'counties': [{'name': 'Dade', 'population': 12345},
+                         {'name': 'Broward', 'population': 40000},
+                         {'name': 'Palm Beach', 'population': 60000}]},
+            {'state': 'Ohio',
+             'shortname': 'OH',
+             'info': {
+                  'governor': 'John Kasich'
+             },
+             'counties': [{'name': 'Summit', 'population': 1234},
+                          {'name': 'Cuyahoga', 'population': 1337}]}]
+
+   json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
+
 HTML
 ----
 
@@ -1244,7 +1275,7 @@ Reading HTML Content
 
 .. _io.read_html:
 
-.. versionadded:: 0.12
+.. versionadded:: 0.12.0
 
 The top-level :func:`~pandas.io.html.read_html` function can accept an HTML
 string/file/url and will parse HTML tables into list of pandas DataFrames.
@@ -1620,7 +1651,7 @@ advanced strategies
 
 .. note::
 
-   The prior method of accessing Excel is now deprecated as of 0.12,
+   The prior method of accessing Excel is now deprecated as of 0.12.0,
    this will work but will be removed in a future version.
 
       .. code-block:: python
@@ -2291,7 +2322,7 @@ The default is 50,000 rows returned in a chunk.
 
 .. note::
 
-   .. versionadded:: 0.12
+   .. versionadded:: 0.12.0
 
    You can also use the iterator with ``read_hdf`` which will open, then
    automatically close the store when finished iterating.
@@ -2580,7 +2611,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specifiy the minim
 ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to
 allow all *indexables* or *data_columns* to have this min_itemsize.
 
-Starting in 0.11, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
+Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
 
 .. note::
 
@@ -2860,7 +2891,7 @@ Reading from STATA format
 
 .. _io.stata_reader:
 
-.. versionadded:: 0.12
+.. versionadded:: 0.12.0
 
 The top-level function ``read_stata`` will read a dta format file
 and return a DataFrame:
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 78236bbf821dd..179e7ff091444 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -169,6 +169,8 @@ Improvements to existing features
     high-dimensional arrays).
   - :func:`~pandas.read_html` now supports the ``parse_dates``,
     ``tupleize_cols`` and ``thousands`` parameters (:issue:`4770`).
+  - :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table
+    from semi-structured JSON data. :ref:`See the docs<io.json_normalize>` (:issue:`1067`)
 
 API Changes
 ~~~~~~~~~~~
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
index fe6d796d95968..c6a4c280ca4bb 100644
--- a/doc/source/v0.13.0.txt
+++ b/doc/source/v0.13.0.txt
@@ -490,6 +490,8 @@ Enhancements
   - ``tz_localize`` can infer a fall daylight savings transition based on the structure
     of the unlocalized data (:issue:`4230`), see :ref:`here<timeseries.timezone>`
   - DatetimeIndex is now in the API documentation, see :ref:`here<api.datetimeindex>`
+  - :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table
+    from semi-structured JSON data. :ref:`See the docs<io.json_normalize>` (:issue:`1067`)
 
 .. _whatsnew_0130.experimental:
 
diff --git a/pandas/io/json.py b/pandas/io/json.py
index e3c85fae045d0..497831f597681 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -1,6 +1,8 @@
 # pylint: disable-msg=E1101,W0613,W0603
-import os
 
+import os
+import copy
+from collections import defaultdict
 import numpy as np
 
 import pandas.json as _json
@@ -15,7 +17,6 @@
 dumps = _json.dumps
 ### interface to/from ###
 
-
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=10, force_ascii=True, date_unit='ms'):
 
@@ -71,7 +72,6 @@ def write(self):
             date_unit=self.date_unit,
             iso_dates=self.date_format == 'iso')
 
-
 class SeriesWriter(Writer):
     _default_orient = 'index'
 
@@ -537,3 +537,201 @@ def is_ok(col):
             lambda col, c: self._try_convert_to_date(c),
             lambda col, c: ((self.keep_default_dates and is_ok(col))
                             or col in convert_dates))
+
+
+#----------------------------------------------------------------------
+# JSON normalization routines
+
+def nested_to_record(ds,prefix="",level=0):
+    """a simplified json_normalize
+
+    converts a nested dict into a flat dict ("record"), unlike json_normalize,
+    it does not attempt to extract a subset of the data.
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+
+    Returns
+    -------
+    d - dict or list of dicts, matching `ds`
+
+    Example:
+    IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),nested=dict(e=dict(c=1,d=2),d=2)))
+    Out[52]:
+    {'dict1.c': 1,
+     'dict1.d': 2,
+     'flat1': 1,
+     'nested.d': 2,
+     'nested.e.c': 1,
+     'nested.e.d': 2}
+    """
+    singleton = False
+    if isinstance(ds,dict):
+        ds = [ds]
+        singleton = True
+
+    new_ds = []
+    for d in ds:
+
+        new_d = copy.deepcopy(d)
+        for k,v in d.items():
+            # each key gets renamed with prefix
+            if level == 0:
+                newkey = str(k)
+            else:
+                newkey = prefix+'.'+ str(k)
+
+            # only dicts gets recurse-flattend
+            # only at level>1 do we rename the rest of the keys
+            if not isinstance(v,dict):
+                if level!=0: # so we skip copying for top level, common case
+                    v = new_d.pop(k)
+                    new_d[newkey]= v
+                continue
+            else:
+                v = new_d.pop(k)
+                new_d.update(nested_to_record(v,newkey,level+1))
+        new_ds.append(new_d)
+
+    if singleton:
+        return new_ds[0]
+    return new_ds
+
+
+def json_normalize(data, record_path=None, meta=None,
+                   meta_prefix=None,
+                   record_prefix=None):
+    """
+    "Normalize" semi-structured JSON data into a flat table
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+        Unserialized JSON objects
+    record_path : string or list of strings, default None
+        Path in each object to list of records. If not passed, data will be
+        assumed to be an array of records
+    meta : list of paths (string or list of strings)
+        Fields to use as metadata for each record in resulting table
+    record_prefix : string, default None
+        If True, prefix records with dotted (?) path, e.g. foo.bar.field if
+        path to records is ['foo', 'bar']
+    meta_prefix : string, default None
+
+    Examples
+    --------
+    data = [{'state': 'Florida',
+             'shortname': 'FL',
+             'info': {
+                  'governor': 'Rick Scott'
+             },
+             'counties': [{'name': 'Dade', 'population': 12345},
+                         {'name': 'Broward', 'population': 40000},
+                         {'name': 'Palm Beach', 'population': 60000}]},
+            {'state': 'Ohio',
+             'shortname': 'OH',
+             'info': {
+                  'governor': 'John Kasich'
+             },
+             'counties': [{'name': 'Summit', 'population': 1234},
+                          {'name': 'Cuyahoga', 'population': 1337}]}]
+
+    result = json_normalize(data, 'counties', ['state', 'shortname',
+                                              ['info', 'governor']])
+
+      state    governor
+    Florida  Rick Scott
+
+
+    Returns
+    -------
+    frame : DataFrame
+    """
+    def _pull_field(js, spec):
+        result = js
+        if isinstance(spec, list):
+            for field in spec:
+                result = result[field]
+        else:
+            result = result[spec]
+
+        return result
+
+    # A bit of a hackjob
+    if isinstance(data, dict):
+        data = [data]
+
+    if record_path is None:
+        if any([isinstance(x,dict) for x in compat.itervalues(data[0])]):
+            # naive normalization, this is idempotent for flat records
+            # and potentially will inflate the data considerably for
+            # deeply nested structures:
+            #  {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
+            #
+            # TODO: handle record value which are lists, at least error reasonabley
+            data = nested_to_record(data)
+        return DataFrame(data)
+    elif not isinstance(record_path, list):
+        record_path = [record_path]
+
+    if meta is None:
+        meta = []
+    elif not isinstance(meta, list):
+        meta = [meta]
+
+    for i, x in enumerate(meta):
+        if not isinstance(x, list):
+            meta[i] = [x]
+
+    # Disastrously inefficient for now
+    records = []
+    lengths = []
+
+    meta_vals = defaultdict(list)
+    meta_keys = ['.'.join(val) for val in meta]
+
+    def _recursive_extract(data, path, seen_meta, level=0):
+        if len(path) > 1:
+            for obj in data:
+                for val, key in zip(meta, meta_keys):
+                    if level + 1 == len(val):
+                        seen_meta[key] = _pull_field(obj, val[-1])
+
+                _recursive_extract(obj[path[0]], path[1:],
+                                   seen_meta, level=level+1)
+        else:
+            for obj in data:
+                recs = _pull_field(obj, path[0])
+
+                # For repeating the metadata later
+                lengths.append(len(recs))
+
+                for val, key in zip(meta, meta_keys):
+                    if level + 1 > len(val):
+                        meta_val = seen_meta[key]
+                    else:
+                        meta_val = _pull_field(obj, val[level:])
+                    meta_vals[key].append(meta_val)
+
+                records.extend(recs)
+
+    _recursive_extract(data, record_path, {}, level=0)
+
+    result = DataFrame(records)
+
+    if record_prefix is not None:
+        result.rename(columns=lambda x: record_prefix + x, inplace=True)
+
+    # Data types, a problem
+    for k, v in compat.iteritems(meta_vals):
+        if meta_prefix is not None:
+            k = meta_prefix + k
+
+        if k in result:
+            raise ValueError('Conflicting metadata name %s, '
+                             'need distinguishing prefix ' % k)
+
+        result[k] = np.array(v).repeat(lengths)
+
+    return result
diff --git a/pandas/io/tests/test_json_norm.py b/pandas/io/tests/test_json_norm.py
new file mode 100644
index 0000000000000..e96a89e71f12d
--- /dev/null
+++ b/pandas/io/tests/test_json_norm.py
@@ -0,0 +1,208 @@
+import nose
+import unittest
+
+from pandas import DataFrame
+import numpy as np
+
+import pandas.util.testing as tm
+
+from pandas.io.json import json_normalize, nested_to_record
+
+def _assert_equal_data(left, right):
+    if not left.columns.equals(right.columns):
+        left = left.reindex(columns=right.columns)
+
+    tm.assert_frame_equal(left, right)
+
+
+class TestJSONNormalize(unittest.TestCase):
+
+    def setUp(self):
+        self.state_data = [
+             {'counties': [{'name': 'Dade', 'population': 12345},
+                           {'name': 'Broward', 'population': 40000},
+                           {'name': 'Palm Beach', 'population': 60000}],
+              'info': {'governor': 'Rick Scott'},
+              'shortname': 'FL',
+              'state': 'Florida'},
+             {'counties': [{'name': 'Summit', 'population': 1234},
+                           {'name': 'Cuyahoga', 'population': 1337}],
+              'info': {'governor': 'John Kasich'},
+              'shortname': 'OH',
+              'state': 'Ohio'}]
+
+    def test_simple_records(self):
+        recs = [{'a': 1, 'b': 2, 'c': 3},
+                {'a': 4, 'b': 5, 'c': 6},
+                {'a': 7, 'b': 8, 'c': 9},
+                {'a': 10, 'b': 11, 'c': 12}]
+
+        result = json_normalize(recs)
+        expected = DataFrame(recs)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize(self):
+        result = json_normalize(self.state_data[0], 'counties')
+        expected = DataFrame(self.state_data[0]['counties'])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(self.state_data, 'counties')
+
+        expected = []
+        for rec in self.state_data:
+            expected.extend(rec['counties'])
+        expected = DataFrame(expected)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(self.state_data, 'counties', meta='state')
+        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_more_deeply_nested(self):
+        data = [{'country': 'USA',
+                 'states': [{'name': 'California',
+                             'cities': [{'name': 'San Francisco',
+                                         'pop': 12345},
+                                        {'name': 'Los Angeles',
+                                         'pop': 12346}]
+                            },
+                            {'name': 'Ohio',
+                             'cities': [{'name': 'Columbus',
+                                         'pop': 1234},
+                                        {'name': 'Cleveland',
+                                         'pop': 1236}]}
+                           ]
+                 },
+                {'country': 'Germany',
+                 'states': [{'name': 'Bayern',
+                             'cities': [{'name': 'Munich', 'pop': 12347}]
+                            },
+                            {'name': 'Nordrhein-Westfalen',
+                             'cities': [{'name': 'Duesseldorf', 'pop': 1238},
+                                        {'name': 'Koeln', 'pop': 1239}]}
+                           ]
+                 }
+                ]
+
+        result = json_normalize(data, ['states', 'cities'],
+                                meta=['country', ['states', 'name']])
+                                # meta_prefix={'states': 'state_'})
+
+        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
+                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
+                                   'Bayern', 'Nordrhein-Westfalen',
+                                   'Nordrhein-Westfalen'],
+                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
+                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
+                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
+
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_shallow_nested(self):
+        data = [{'state': 'Florida',
+                 'shortname': 'FL',
+                 'info': {
+                      'governor': 'Rick Scott'
+                 },
+                 'counties': [{'name': 'Dade', 'population': 12345},
+                             {'name': 'Broward', 'population': 40000},
+                             {'name': 'Palm Beach', 'population': 60000}]},
+                {'state': 'Ohio',
+                 'shortname': 'OH',
+                 'info': {
+                      'governor': 'John Kasich'
+                 },
+                 'counties': [{'name': 'Summit', 'population': 1234},
+                              {'name': 'Cuyahoga', 'population': 1337}]}]
+
+        result = json_normalize(data, 'counties',
+                                ['state', 'shortname',
+                                 ['info', 'governor']])
+        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
+                            'Cuyahoga'],
+                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
+                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
+                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
+                   'population': [12345, 40000, 60000, 1234, 1337]}
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_meta_name_conflict(self):
+        data = [{'foo': 'hello',
+                 'bar': 'there',
+                 'data': [{'foo': 'something', 'bar': 'else'},
+                          {'foo': 'something2', 'bar': 'else2'}]}]
+
+        self.assertRaises(ValueError, json_normalize, data,
+                          'data', meta=['foo', 'bar'])
+
+        result = json_normalize(data, 'data', meta=['foo', 'bar'],
+                                meta_prefix='meta')
+
+        for val in ['metafoo', 'metabar', 'foo', 'bar']:
+            self.assertTrue(val in result)
+
+    def test_record_prefix(self):
+        result = json_normalize(self.state_data[0], 'counties')
+        expected = DataFrame(self.state_data[0]['counties'])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(self.state_data, 'counties',
+                                meta='state',
+                                record_prefix='county_')
+
+        expected = []
+        for rec in self.state_data:
+            expected.extend(rec['counties'])
+        expected = DataFrame(expected)
+        expected = expected.rename(columns=lambda x: 'county_' + x)
+        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestNestedToRecord(unittest.TestCase):
+
+    def test_flat_stays_flat(self):
+        recs = [dict(flat1=1,flat2=2),
+                dict(flat1=3,flat2=4),
+                ]
+
+        result = nested_to_record(recs)
+        expected = recs
+        self.assertEqual(result, expected)
+
+    def test_one_level_deep_flattens(self):
+        data = dict(flat1=1,
+                    dict1=dict(c=1,d=2))
+
+        result = nested_to_record(data)
+        expected =     {'dict1.c': 1,
+             'dict1.d': 2,
+             'flat1': 1}
+
+        self.assertEqual(result,expected)
+
+    def test_nested_flattens(self):
+        data = dict(flat1=1,
+                    dict1=dict(c=1,d=2),
+                    nested=dict(e=dict(c=1,d=2),
+                                d=2))
+
+        result = nested_to_record(data)
+        expected =     {'dict1.c': 1,
+             'dict1.d': 2,
+             'flat1': 1,
+             'nested.d': 2,
+             'nested.e.c': 1,
+             'nested.e.d': 2}
+
+        self.assertEqual(result,expected)
+
+if __name__ == '__main__':
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
+                         '--pdb-failure', '-s'], exit=False)
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000000..b3295e2d830e7
--- /dev/null
+++ b/test.py
@@ -0,0 +1,12 @@
+
+
+import pandas as pd
+df = pd.DataFrame(
+    {'pid' : [1,1,1,2,2,3,3,3],
+     'tag' : [23,45,62,24,45,34,25,62],
+          })
+
+g = df.groupby('tag')
+
+import pdb; pdb.set_trace()
+g.filter(lambda x: len(x) > 1)