From 138c6c251e502eb985efcdfc9055a408e8996f3b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Feb 2019 11:28:34 -0800 Subject: [PATCH 1/5] make core.config self-contained --- pandas/core/config.py | 226 ++++++++++++++++++++++++++++++++-- pandas/io/formats/printing.py | 168 +------------------------ 2 files changed, 215 insertions(+), 179 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 0f43ca65d187a..24976c773ac43 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -53,8 +53,13 @@ import re import warnings -import pandas.compat as compat -from pandas.compat import lmap, map, u + +try: + unicode + PY3 = False +except NameError: + unicode = str + PY3 = True DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple('RegisteredOption', @@ -140,7 +145,7 @@ def _describe_option(pat='', _print_desc=True): if len(keys) == 0: raise OptionError('No such keys(s)') - s = u('') + s = u'' for k in keys: # filter by pat s += _build_option_description(k) @@ -634,7 +639,7 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = u('{k} ').format(k=k) + s = u'{k} '.format(k=k) if o.doc: s += '\n'.join(o.doc.strip().split('\n')) @@ -642,14 +647,14 @@ def _build_option_description(k): s += 'No description available.' if o: - s += (u('\n [default: {default}] [currently: {current}]') + s += (u'\n [default: {default}] [currently: {current}]' .format(default=o.defval, current=_get_option(k, True))) if d: - s += u('\n (Deprecated') - s += (u(', use `{rkey}` instead.') + s += u'\n (Deprecated' + s += (u', use `{rkey}` instead.' .format(rkey=d.rkey if d.rkey else '')) - s += u(')') + s += u')' s += '\n\n' return s @@ -736,6 +741,204 @@ def inner(key, *args, **kwds): get_option = _get_option register_option = _register_option + +# ----------------------------------------------------------------------- +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather then rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = u"{{{body}}}" + else: + fmt = u"[{body}]" if hasattr(seq, '__setitem__') else u"({body})" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + # handle sets, no slicing + r = [pprint_thing(next(s), + _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq)))] + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += ',' + + return fmt.format(body=body) + + +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + """ + fmt = u"{{{things}}}" + pairs = [] + + pfmt = u"{key}: {val}" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) + + if nitems < len(seq): + return fmt.format(things=", ".join(pairs) + ", ...") + else: + return fmt.format(things=", ".join(pairs)) + + +def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, + quote_strings=False, max_seq_items=None): + """ + This function is the sanctioned way of converting objects + to a unicode representation. + + properly handles nested sequences containing unicode strings + (unicode(object) does not) + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : False, int, default None + Pass thru to other pretty printers to limit sequence printing + + Returns + ------- + result - unicode object on py2, str on py3. Always Unicode. + + """ + + def as_escaped_unicode(thing, escape_chars=escape_chars): + # Unicode is fine, else we try to decode using utf-8 and 'replace' + # if that's not it either, we have no way of knowing and the user + # should deal with it himself. + + try: + result = unicode(thing) # we should try this first + except UnicodeDecodeError: + # either utf-8 or we replace errors + result = str(thing).decode('utf-8', "replace") + + translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or tuple() + for c in escape_chars: + result = result.replace(c, translate[c]) + + return unicode(result) + + if (PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): + return unicode(thing) + elif (isinstance(thing, dict) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_dict(thing, _nest_lvl, quote_strings=True, + max_seq_items=max_seq_items) + elif (is_sequence(thing) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items) + elif isinstance(thing, (str, unicode)) and quote_strings: + if PY3: + fmt = u"'{thing}'" + else: + fmt = u"u'{thing}'" + result = fmt.format(thing=as_escaped_unicode(thing)) + else: + result = as_escaped_unicode(thing) + + return unicode(result) # always unicode + + +# TODO: de-duplicate with version in core.dtypes.inference +def is_sequence(obj): + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + + try: + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, (bytes, unicode)) + except (TypeError, AttributeError): + return False + + +# ----------------------------------------------------------------------- # These factories and methods are handy for use as the validator # arg in register_option @@ -777,7 +980,6 @@ def is_instance_factory(_type): """ if isinstance(_type, (tuple, list)): _type = tuple(_type) - from pandas.io.formats.printing import pprint_thing type_repr = "|".join(map(pprint_thing, _type)) else: type_repr = "'{typ}'".format(typ=_type) @@ -796,11 +998,11 @@ def is_one_of_factory(legal_values): legal_values = [c for c in legal_values if not callable(c)] def inner(x): - from pandas.io.formats.printing import pprint_thing as pp if x not in legal_values: if not any(c(x) for c in callables): - pp_values = pp("|".join(lmap(pp, legal_values))) + uvals = [pprint_thing(lval) for lval in legal_values] + pp_values = pprint_thing("|".join(uvals)) msg = "Value must be one of {pp_values}" if len(callables): msg += " or a callable" @@ -815,7 +1017,7 @@ def inner(x): is_bool = is_type_factory(bool) is_float = is_type_factory(float) is_str = is_type_factory(str) -is_unicode = is_type_factory(compat.text_type) +is_unicode = is_type_factory(unicode) is_text = is_instance_factory((str, bytes)) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 6d45d1e5dfcee..052fe53a64c3e 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -4,12 +4,8 @@ import sys -from pandas.compat import u - -from pandas.core.dtypes.inference import is_sequence - from pandas import compat -from pandas.core.config import get_option +from pandas.core.config import get_option, pprint_thing def adjoin(space, *lists, **kwargs): @@ -68,168 +64,6 @@ def _join_unicode(lines, sep=''): for x in lines]) -# Unicode consolidation -# --------------------- -# -# pprinting utility functions for generating Unicode text or -# bytes(3.x)/str(2.x) representations of objects. -# Try to use these as much as possible rather then rolling your own. -# -# When to use -# ----------- -# -# 1) If you're writing code internal to pandas (no I/O directly involved), -# use pprint_thing(). -# -# It will always return unicode text which can handled by other -# parts of the package without breakage. -# -# 2) if you need to write something out to file, use -# pprint_thing_encoded(encoding). -# -# If no encoding is specified, it defaults to utf-8. Since encoding pure -# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're -# working with straight ascii. - - -def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - - bounds length of printed sequence, depending on options - """ - if isinstance(seq, set): - fmt = u("{{{body}}}") - else: - fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - s = iter(seq) - # handle sets, no slicing - r = [pprint_thing(next(s), - _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) - for i in range(min(nitems, len(seq)))] - body = ", ".join(r) - - if nitems < len(seq): - body += ", ..." - elif isinstance(seq, tuple) and len(seq) == 1: - body += ',' - - return fmt.format(body=body) - - -def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - """ - fmt = u("{{{things}}}") - pairs = [] - - pfmt = u("{key}: {val}") - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - for k, v in list(seq.items())[:nitems]: - pairs.append( - pfmt.format( - key=pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - val=pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) - - if nitems < len(seq): - return fmt.format(things=", ".join(pairs) + ", ...") - else: - return fmt.format(things=", ".join(pairs)) - - -def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, - quote_strings=False, max_seq_items=None): - """ - This function is the sanctioned way of converting objects - to a unicode representation. - - properly handles nested sequences containing unicode strings - (unicode(object) does not) - - Parameters - ---------- - thing : anything to be formatted - _nest_lvl : internal use only. pprint_thing() is mutually-recursive - with pprint_sequence, this argument is used to keep track of the - current nesting level, and limit it. - escape_chars : list or dict, optional - Characters to escape. If a dict is passed the values are the - replacements - default_escapes : bool, default False - Whether the input escape characters replaces or adds to the defaults - max_seq_items : False, int, default None - Pass thru to other pretty printers to limit sequence printing - - Returns - ------- - result - unicode object on py2, str on py3. Always Unicode. - - """ - - def as_escaped_unicode(thing, escape_chars=escape_chars): - # Unicode is fine, else we try to decode using utf-8 and 'replace' - # if that's not it either, we have no way of knowing and the user - # should deal with it himself. - - try: - result = compat.text_type(thing) # we should try this first - except UnicodeDecodeError: - # either utf-8 or we replace errors - result = str(thing).decode('utf-8', "replace") - - translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } - if isinstance(escape_chars, dict): - if default_escapes: - translate.update(escape_chars) - else: - translate = escape_chars - escape_chars = list(escape_chars.keys()) - else: - escape_chars = escape_chars or tuple() - for c in escape_chars: - result = result.replace(c, translate[c]) - - return compat.text_type(result) - - if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): - return compat.text_type(thing) - elif (isinstance(thing, dict) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_dict(thing, _nest_lvl, quote_strings=True, - max_seq_items=max_seq_items) - elif (is_sequence(thing) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, - quote_strings=quote_strings, - max_seq_items=max_seq_items) - elif isinstance(thing, compat.string_types) and quote_strings: - if compat.PY3: - fmt = u("'{thing}'") - else: - fmt = u("u'{thing}'") - result = fmt.format(thing=as_escaped_unicode(thing)) - else: - result = as_escaped_unicode(thing) - - return compat.text_type(result) # always unicode - - def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors, **kwds) From 6f96f3e868ed72c27d23884bce6a5c61be13cf02 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Feb 2019 08:58:12 -0800 Subject: [PATCH 2/5] isort fixup --- pandas/core/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 24976c773ac43..afb7cd87eadfb 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -53,7 +53,6 @@ import re import warnings - try: unicode PY3 = False From 022ee4f84d748097f3c57e1097c9b8c77af62748 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Feb 2019 17:00:27 -0800 Subject: [PATCH 3/5] use low-tech print --- pandas/core/config.py | 203 +--------------------------------- pandas/io/formats/printing.py | 168 +++++++++++++++++++++++++++- 2 files changed, 170 insertions(+), 201 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index afb7cd87eadfb..9cd51dbca6730 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -741,207 +741,10 @@ def inner(key, *args, **kwds): register_option = _register_option -# ----------------------------------------------------------------------- -# Unicode consolidation -# --------------------- -# -# pprinting utility functions for generating Unicode text or -# bytes(3.x)/str(2.x) representations of objects. -# Try to use these as much as possible rather then rolling your own. -# -# When to use -# ----------- -# -# 1) If you're writing code internal to pandas (no I/O directly involved), -# use pprint_thing(). -# -# It will always return unicode text which can handled by other -# parts of the package without breakage. -# -# 2) if you need to write something out to file, use -# pprint_thing_encoded(encoding). -# -# If no encoding is specified, it defaults to utf-8. Since encoding pure -# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're -# working with straight ascii. - - -def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - - bounds length of printed sequence, depending on options - """ - if isinstance(seq, set): - fmt = u"{{{body}}}" - else: - fmt = u"[{body}]" if hasattr(seq, '__setitem__') else u"({body})" - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - s = iter(seq) - # handle sets, no slicing - r = [pprint_thing(next(s), - _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) - for i in range(min(nitems, len(seq)))] - body = ", ".join(r) - - if nitems < len(seq): - body += ", ..." - elif isinstance(seq, tuple) and len(seq) == 1: - body += ',' - - return fmt.format(body=body) - - -def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - """ - fmt = u"{{{things}}}" - pairs = [] - - pfmt = u"{key}: {val}" - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - for k, v in list(seq.items())[:nitems]: - pairs.append( - pfmt.format( - key=pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - val=pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) - - if nitems < len(seq): - return fmt.format(things=", ".join(pairs) + ", ...") - else: - return fmt.format(things=", ".join(pairs)) - - -def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, - quote_strings=False, max_seq_items=None): - """ - This function is the sanctioned way of converting objects - to a unicode representation. - - properly handles nested sequences containing unicode strings - (unicode(object) does not) - - Parameters - ---------- - thing : anything to be formatted - _nest_lvl : internal use only. pprint_thing() is mutually-recursive - with pprint_sequence, this argument is used to keep track of the - current nesting level, and limit it. - escape_chars : list or dict, optional - Characters to escape. If a dict is passed the values are the - replacements - default_escapes : bool, default False - Whether the input escape characters replaces or adds to the defaults - max_seq_items : False, int, default None - Pass thru to other pretty printers to limit sequence printing - - Returns - ------- - result - unicode object on py2, str on py3. Always Unicode. - - """ - - def as_escaped_unicode(thing, escape_chars=escape_chars): - # Unicode is fine, else we try to decode using utf-8 and 'replace' - # if that's not it either, we have no way of knowing and the user - # should deal with it himself. - - try: - result = unicode(thing) # we should try this first - except UnicodeDecodeError: - # either utf-8 or we replace errors - result = str(thing).decode('utf-8', "replace") - - translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } - if isinstance(escape_chars, dict): - if default_escapes: - translate.update(escape_chars) - else: - translate = escape_chars - escape_chars = list(escape_chars.keys()) - else: - escape_chars = escape_chars or tuple() - for c in escape_chars: - result = result.replace(c, translate[c]) - - return unicode(result) - - if (PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): - return unicode(thing) - elif (isinstance(thing, dict) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_dict(thing, _nest_lvl, quote_strings=True, - max_seq_items=max_seq_items) - elif (is_sequence(thing) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, - quote_strings=quote_strings, - max_seq_items=max_seq_items) - elif isinstance(thing, (str, unicode)) and quote_strings: - if PY3: - fmt = u"'{thing}'" - else: - fmt = u"u'{thing}'" - result = fmt.format(thing=as_escaped_unicode(thing)) - else: - result = as_escaped_unicode(thing) - - return unicode(result) # always unicode - - -# TODO: de-duplicate with version in core.dtypes.inference -def is_sequence(obj): - """ - Check if the object is a sequence of objects. - String types are not included as sequences here. - - Parameters - ---------- - obj : The object to check - - Returns - ------- - is_sequence : bool - Whether `obj` is a sequence of objects. - - Examples - -------- - >>> l = [1, 2, 3] - >>> - >>> is_sequence(l) - True - >>> is_sequence(iter(l)) - False - """ - - try: - iter(obj) # Can iterate over it. - len(obj) # Has a length associated with it. - return not isinstance(obj, (bytes, unicode)) - except (TypeError, AttributeError): - return False - - # ----------------------------------------------------------------------- # These factories and methods are handy for use as the validator # arg in register_option - def is_type_factory(_type): """ @@ -979,7 +782,7 @@ def is_instance_factory(_type): """ if isinstance(_type, (tuple, list)): _type = tuple(_type) - type_repr = "|".join(map(pprint_thing, _type)) + type_repr = "|".join(map(str, _type)) else: type_repr = "'{typ}'".format(typ=_type) @@ -1000,8 +803,8 @@ def inner(x): if x not in legal_values: if not any(c(x) for c in callables): - uvals = [pprint_thing(lval) for lval in legal_values] - pp_values = pprint_thing("|".join(uvals)) + uvals = [str(lval) for lval in legal_values] + pp_values = "|".join(uvals) msg = "Value must be one of {pp_values}" if len(callables): msg += " or a callable" diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 052fe53a64c3e..6d45d1e5dfcee 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -4,8 +4,12 @@ import sys +from pandas.compat import u + +from pandas.core.dtypes.inference import is_sequence + from pandas import compat -from pandas.core.config import get_option, pprint_thing +from pandas.core.config import get_option def adjoin(space, *lists, **kwargs): @@ -64,6 +68,168 @@ def _join_unicode(lines, sep=''): for x in lines]) +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather then rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = u("{{{body}}}") + else: + fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + # handle sets, no slicing + r = [pprint_thing(next(s), + _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq)))] + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += ',' + + return fmt.format(body=body) + + +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + """ + fmt = u("{{{things}}}") + pairs = [] + + pfmt = u("{key}: {val}") + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) + + if nitems < len(seq): + return fmt.format(things=", ".join(pairs) + ", ...") + else: + return fmt.format(things=", ".join(pairs)) + + +def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, + quote_strings=False, max_seq_items=None): + """ + This function is the sanctioned way of converting objects + to a unicode representation. + + properly handles nested sequences containing unicode strings + (unicode(object) does not) + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : False, int, default None + Pass thru to other pretty printers to limit sequence printing + + Returns + ------- + result - unicode object on py2, str on py3. Always Unicode. + + """ + + def as_escaped_unicode(thing, escape_chars=escape_chars): + # Unicode is fine, else we try to decode using utf-8 and 'replace' + # if that's not it either, we have no way of knowing and the user + # should deal with it himself. + + try: + result = compat.text_type(thing) # we should try this first + except UnicodeDecodeError: + # either utf-8 or we replace errors + result = str(thing).decode('utf-8', "replace") + + translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or tuple() + for c in escape_chars: + result = result.replace(c, translate[c]) + + return compat.text_type(result) + + if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): + return compat.text_type(thing) + elif (isinstance(thing, dict) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_dict(thing, _nest_lvl, quote_strings=True, + max_seq_items=max_seq_items) + elif (is_sequence(thing) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items) + elif isinstance(thing, compat.string_types) and quote_strings: + if compat.PY3: + fmt = u("'{thing}'") + else: + fmt = u("u'{thing}'") + result = fmt.format(thing=as_escaped_unicode(thing)) + else: + result = as_escaped_unicode(thing) + + return compat.text_type(result) # always unicode + + def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors, **kwds) From 40ca99007ee5395c3dad614e07b97d3328501db1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Feb 2019 21:19:21 -0800 Subject: [PATCH 4/5] remove unused PY3 --- pandas/core/config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 9cd51dbca6730..21eea66fac082 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -55,10 +55,8 @@ try: unicode - PY3 = False except NameError: unicode = str - PY3 = True DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple('RegisteredOption', From 46216b03e596d25a5b1cfe1fd929215f33710efd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Feb 2019 21:39:41 -0800 Subject: [PATCH 5/5] move date opts into config --- pandas/core/config.py | 20 ++++++++++++++++++++ pandas/core/config_init.py | 14 -------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 21eea66fac082..c901e830a749b 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -837,3 +837,23 @@ def is_callable(obj): if not callable(obj): raise ValueError("Value must be a callable") return True + + +# ----------------------------------------------------------------------- +# Options needed in _libs + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with config_prefix('display'): + register_option('date_dayfirst', False, pc_date_dayfirst_doc, + validator=is_bool) + register_option('date_yearfirst', False, pc_date_yearfirst_doc, + validator=is_bool) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d42a1ab72b156..1dcacfb737503 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -110,16 +110,6 @@ def use_numexpr_cb(key): pandas objects (if it is available). """ -pc_date_dayfirst_doc = """ -: boolean - When True, prints and parses dates with the day first, eg 20/01/2005 -""" - -pc_date_yearfirst_doc = """ -: boolean - When True, prints and parses dates with the year first, eg 2005/01/20 -""" - pc_pprint_nest_depth = """ : int Controls the number of nested levels to process when pretty-printing @@ -331,10 +321,6 @@ def table_schema_cb(key): validator=is_text) cf.register_option('notebook_repr_html', True, pc_nb_repr_h_doc, validator=is_bool) - cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, - validator=is_bool) - cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, - validator=is_bool) cf.register_option('pprint_nest_depth', 3, pc_pprint_nest_depth, validator=is_int) cf.register_option('multi_sparse', True, pc_multi_sparse_doc,