diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16d6071585a29..70111f3bc9a2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2820,12 +2820,31 @@ def to_xml( """ - formatter = fmt.DataFrameFormatter( - self, - index=index, + from pandas.io.formats.xml import ( + EtreeXMLFormatter, + LxmlXMLFormatter, ) - return fmt.DataFrameRenderer(formatter).to_xml( + lxml = import_optional_dependency("lxml.etree", errors="ignore") + + TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]] + + if parser == "lxml": + if lxml is not None: + TreeBuilder = LxmlXMLFormatter + else: + raise ImportError( + "lxml not found, please install or use the etree parser." + ) + + elif parser == "etree": + TreeBuilder = EtreeXMLFormatter + + else: + raise ValueError("Values for parser can only be lxml or etree.") + + xml_formatter = TreeBuilder( + self, path_or_buffer=path_or_buffer, index=index, root_name=root_name, @@ -2838,12 +2857,13 @@ def to_xml( encoding=encoding, xml_declaration=xml_declaration, pretty_print=pretty_print, - parser=parser, stylesheet=stylesheet, compression=compression, storage_options=storage_options, ) + return xml_formatter.write_output() + # ---------------------------------------------------------------------- @Substitution( klass="DataFrame", diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 44428abdcd8a5..ab8e19d9f8a6f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -61,8 +61,6 @@ IndexLabel, StorageOptions, ) -from pandas.compat._optional import import_optional_dependency -from pandas.util._decorators import doc from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -98,7 +96,6 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.reshape.concat import concat -from pandas.core.shared_docs import _shared_docs from pandas.io.common import stringify_path from pandas.io.formats.printing import ( @@ -944,7 +941,6 @@ class DataFrameRenderer: Called in pandas.core.frame.DataFrame: - to_html - - to_xml - to_string Parameters @@ -1037,135 +1033,6 @@ def to_html( string = html_formatter.to_string() return save_to_buffer(string, buf=buf, encoding=encoding) - @doc(storage_options=_shared_docs["storage_options"]) - def to_xml( - self, - path_or_buffer: Optional[FilePathOrBuffer] = None, - index: Optional[bool] = True, - root_name: Optional[str] = "data", - row_name: Optional[str] = "row", - na_rep: Optional[str] = None, - attr_cols: Optional[Union[str, List[str]]] = None, - elem_cols: Optional[Union[str, List[str]]] = None, - namespaces: Optional[Dict[Optional[str], str]] = None, - prefix: Optional[str] = None, - encoding: str = "utf-8", - xml_declaration: Optional[bool] = True, - pretty_print: Optional[bool] = True, - parser: Optional[str] = "lxml", - stylesheet: Optional[FilePathOrBuffer] = None, - compression: CompressionOptions = "infer", - storage_options: StorageOptions = None, - ) -> Optional[str]: - """ - Render a DataFrame to an XML document. - - .. versionadded:: 1.3.0 - - Parameters - ---------- - path_or_buffer : str, path object or file-like object, optional - File to write output to. If None, the output is returned as a - string. - index : bool, default True - Whether to include index in XML document. - root_name : str, default 'data' - The name of root element in XML document. - row_name : str, default 'row' - The name of row element in XML document. - na_rep : str, optional - Missing data representation. - attr_cols : list-like, optional - List of columns to write as attributes in row element. - Hierarchical columns will be flattened with underscore - delimiting the different levels. - elem_cols : list-like, optional - List of columns to write as children in row element. By default, - all columns output as children of row element. Hierarchical - columns will be flattened with underscore delimiting the - different levels. - namespaces : dict, optional - All namespaces to be defined in root element. Keys of dict - should be prefix names and values of dict corresponding URIs. - Default namespaces should be given empty string key. For - example, :: - - namespaces = {{'': 'https://example.com'}} - - prefix : str, optional - Namespace prefix to be used for every element and/or attribute - in document. This should be one of the keys in ``namespaces`` - dict. - encoding : str, default 'utf-8' - Encoding of the resulting document. - xml_declaration : str, optional - Whether to include the XML declaration at start of document. - pretty_print : bool, default True - Whether output should be pretty printed with indentation and - line breaks. - parser : {{'lxml','etree'}}, default "lxml" - Parser module to use for building of tree. Only 'lxml' and - 'etree' are supported. With 'lxml', the ability to use XSLT - stylesheet is supported. - stylesheet : str, path object or file-like object, optional - A URL, file-like object, or a raw string containing an XSLT - script used to transform the raw XML output. Script should use - layout of elements and attributes from original output. This - argument requires ``lxml`` to be installed. Only XSLT 1.0 - scripts and not later versions is currently supported. - compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer', then use - gzip, bz2, zip or xz if path_or_buffer is a string ending in - '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression - otherwise. If using 'zip', the ZIP file must contain only one data - file to be read in. Set to None for no decompression. - {storage_options} - """ - - from pandas.io.formats.xml import ( - EtreeXMLFormatter, - LxmlXMLFormatter, - ) - - lxml = import_optional_dependency("lxml.etree", errors="ignore") - - TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]] - - if parser == "lxml": - if lxml is not None: - TreeBuilder = LxmlXMLFormatter - else: - raise ImportError( - "lxml not found, please install or use the etree parser." - ) - - elif parser == "etree": - TreeBuilder = EtreeXMLFormatter - - else: - raise ValueError("Values for parser can only be lxml or etree.") - - xml_formatter = TreeBuilder( - self.fmt, - path_or_buffer=path_or_buffer, - index=index, - root_name=root_name, - row_name=row_name, - na_rep=na_rep, - attr_cols=attr_cols, - elem_cols=elem_cols, - namespaces=namespaces, - prefix=prefix, - encoding=encoding, - xml_declaration=xml_declaration, - pretty_print=pretty_print, - stylesheet=stylesheet, - compression=compression, - storage_options=storage_options, - ) - - return xml_formatter.write_output() - def to_string( self, buf: Optional[FilePathOrBuffer[str]] = None, diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index dd68f0f78261e..6987da63eaf3e 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -21,8 +21,9 @@ from pandas.core.dtypes.common import is_list_like +from pandas.core.frame import DataFrame + from pandas.io.common import get_handle -from pandas.io.formats.format import DataFrameFormatter from pandas.io.xml import ( get_data_from_filepath, preprocess_data, @@ -93,7 +94,7 @@ class BaseXMLFormatter: def __init__( self, - formatter: DataFrameFormatter, + frame: DataFrame, path_or_buffer: Optional[FilePathOrBuffer] = None, index: Optional[bool] = True, root_name: Optional[str] = "data", @@ -110,7 +111,7 @@ def __init__( compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ) -> None: - self.fmt = formatter + self.frame = frame self.path_or_buffer = path_or_buffer self.index = index self.root_name = root_name @@ -127,8 +128,7 @@ def __init__( self.compression = compression self.storage_options = storage_options - self.frame = self.fmt.frame - self.orig_cols = self.fmt.frame.columns.tolist() + self.orig_cols = self.frame.columns.tolist() self.frame_dicts = self.process_dataframe() def build_tree(self) -> bytes: @@ -183,7 +183,7 @@ def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]: including optionally replacing missing values and including indexes. """ - df = self.fmt.frame + df = self.frame if self.index: df = df.reset_index()