Skip to content

CLN: Remove to_xml from format.py to run directly in frame.py #40240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2820,12 +2820,31 @@ def to_xml(
</doc:data>
"""

formatter = fmt.DataFrameFormatter(
self,
index=index,
from pandas.io.formats.xml import (
EtreeXMLFormatter,
LxmlXMLFormatter,
)

return fmt.DataFrameRenderer(formatter).to_xml(
lxml = import_optional_dependency("lxml.etree", errors="ignore")

TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]]

if parser == "lxml":
if lxml is not None:
TreeBuilder = LxmlXMLFormatter
else:
raise ImportError(
"lxml not found, please install or use the etree parser."
)

elif parser == "etree":
TreeBuilder = EtreeXMLFormatter

else:
raise ValueError("Values for parser can only be lxml or etree.")

xml_formatter = TreeBuilder(
self,
path_or_buffer=path_or_buffer,
index=index,
root_name=root_name,
Expand All @@ -2838,12 +2857,13 @@ def to_xml(
encoding=encoding,
xml_declaration=xml_declaration,
pretty_print=pretty_print,
parser=parser,
stylesheet=stylesheet,
compression=compression,
storage_options=storage_options,
)

return xml_formatter.write_output()

# ----------------------------------------------------------------------
@Substitution(
klass="DataFrame",
Expand Down
133 changes: 0 additions & 133 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@
IndexLabel,
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -98,7 +96,6 @@
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.reshape.concat import concat
from pandas.core.shared_docs import _shared_docs

from pandas.io.common import stringify_path
from pandas.io.formats.printing import (
Expand Down Expand Up @@ -944,7 +941,6 @@ class DataFrameRenderer:

Called in pandas.core.frame.DataFrame:
- to_html
- to_xml
- to_string

Parameters
Expand Down Expand Up @@ -1037,135 +1033,6 @@ def to_html(
string = html_formatter.to_string()
return save_to_buffer(string, buf=buf, encoding=encoding)

@doc(storage_options=_shared_docs["storage_options"])
def to_xml(
self,
path_or_buffer: Optional[FilePathOrBuffer] = None,
index: Optional[bool] = True,
root_name: Optional[str] = "data",
row_name: Optional[str] = "row",
na_rep: Optional[str] = None,
attr_cols: Optional[Union[str, List[str]]] = None,
elem_cols: Optional[Union[str, List[str]]] = None,
namespaces: Optional[Dict[Optional[str], str]] = None,
prefix: Optional[str] = None,
encoding: str = "utf-8",
xml_declaration: Optional[bool] = True,
pretty_print: Optional[bool] = True,
parser: Optional[str] = "lxml",
stylesheet: Optional[FilePathOrBuffer] = None,
compression: CompressionOptions = "infer",
storage_options: StorageOptions = None,
) -> Optional[str]:
"""
Render a DataFrame to an XML document.

.. versionadded:: 1.3.0

Parameters
----------
path_or_buffer : str, path object or file-like object, optional
File to write output to. If None, the output is returned as a
string.
index : bool, default True
Whether to include index in XML document.
root_name : str, default 'data'
The name of root element in XML document.
row_name : str, default 'row'
The name of row element in XML document.
na_rep : str, optional
Missing data representation.
attr_cols : list-like, optional
List of columns to write as attributes in row element.
Hierarchical columns will be flattened with underscore
delimiting the different levels.
elem_cols : list-like, optional
List of columns to write as children in row element. By default,
all columns output as children of row element. Hierarchical
columns will be flattened with underscore delimiting the
different levels.
namespaces : dict, optional
All namespaces to be defined in root element. Keys of dict
should be prefix names and values of dict corresponding URIs.
Default namespaces should be given empty string key. For
example, ::

namespaces = {{'': 'https://example.com'}}

prefix : str, optional
Namespace prefix to be used for every element and/or attribute
in document. This should be one of the keys in ``namespaces``
dict.
encoding : str, default 'utf-8'
Encoding of the resulting document.
xml_declaration : str, optional
Whether to include the XML declaration at start of document.
pretty_print : bool, default True
Whether output should be pretty printed with indentation and
line breaks.
parser : {{'lxml','etree'}}, default "lxml"
Parser module to use for building of tree. Only 'lxml' and
'etree' are supported. With 'lxml', the ability to use XSLT
stylesheet is supported.
stylesheet : str, path object or file-like object, optional
A URL, file-like object, or a raw string containing an XSLT
script used to transform the raw XML output. Script should use
layout of elements and attributes from original output. This
argument requires ``lxml`` to be installed. Only XSLT 1.0
scripts and not later versions is currently supported.
compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
For on-the-fly decompression of on-disk data. If 'infer', then use
gzip, bz2, zip or xz if path_or_buffer is a string ending in
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
otherwise. If using 'zip', the ZIP file must contain only one data
file to be read in. Set to None for no decompression.
{storage_options}
"""

from pandas.io.formats.xml import (
EtreeXMLFormatter,
LxmlXMLFormatter,
)

lxml = import_optional_dependency("lxml.etree", errors="ignore")

TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]]

if parser == "lxml":
if lxml is not None:
TreeBuilder = LxmlXMLFormatter
else:
raise ImportError(
"lxml not found, please install or use the etree parser."
)

elif parser == "etree":
TreeBuilder = EtreeXMLFormatter

else:
raise ValueError("Values for parser can only be lxml or etree.")

xml_formatter = TreeBuilder(
self.fmt,
path_or_buffer=path_or_buffer,
index=index,
root_name=root_name,
row_name=row_name,
na_rep=na_rep,
attr_cols=attr_cols,
elem_cols=elem_cols,
namespaces=namespaces,
prefix=prefix,
encoding=encoding,
xml_declaration=xml_declaration,
pretty_print=pretty_print,
stylesheet=stylesheet,
compression=compression,
storage_options=storage_options,
)

return xml_formatter.write_output()

def to_string(
self,
buf: Optional[FilePathOrBuffer[str]] = None,
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@

from pandas.core.dtypes.common import is_list_like

from pandas.core.frame import DataFrame

from pandas.io.common import get_handle
from pandas.io.formats.format import DataFrameFormatter
from pandas.io.xml import (
get_data_from_filepath,
preprocess_data,
Expand Down Expand Up @@ -93,7 +94,7 @@ class BaseXMLFormatter:

def __init__(
self,
formatter: DataFrameFormatter,
frame: DataFrame,
path_or_buffer: Optional[FilePathOrBuffer] = None,
index: Optional[bool] = True,
root_name: Optional[str] = "data",
Expand All @@ -110,7 +111,7 @@ def __init__(
compression: CompressionOptions = "infer",
storage_options: StorageOptions = None,
) -> None:
self.fmt = formatter
self.frame = frame
self.path_or_buffer = path_or_buffer
self.index = index
self.root_name = root_name
Expand All @@ -127,8 +128,7 @@ def __init__(
self.compression = compression
self.storage_options = storage_options

self.frame = self.fmt.frame
self.orig_cols = self.fmt.frame.columns.tolist()
self.orig_cols = self.frame.columns.tolist()
self.frame_dicts = self.process_dataframe()

def build_tree(self) -> bytes:
Expand Down Expand Up @@ -183,7 +183,7 @@ def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]:
including optionally replacing missing values and including indexes.
"""

df = self.fmt.frame
df = self.frame

if self.index:
df = df.reset_index()
Expand Down