From b67d87670dcd59fb743ace757d31a9077a092e4b Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sun, 31 Jan 2021 18:32:55 -0600
Subject: [PATCH 01/35] ENH: Add i/o support of XML with pandas.read_xml and
 DataFrame.to_xml (GH27554)

---
 doc/source/reference/io.rst                   |    7 +
 doc/source/whatsnew/v1.3.0.rst                |   30 +
 pandas/__init__.py                            |    1 +
 pandas/core/frame.py                          |  172 +++
 pandas/io/api.py                              |    1 +
 pandas/io/formats/format.py                   |  117 ++
 pandas/io/formats/xml.py                      |  763 ++++++++++++
 pandas/io/xml.py                              | 1017 +++++++++++++++
 pandas/tests/io/data/xml/baby_names.xml       |   53 +
 pandas/tests/io/data/xml/books.xml            |   21 +
 pandas/tests/io/data/xml/cta_rail_lines.kml   |   92 ++
 pandas/tests/io/data/xml/flatten_doc.xsl      |   18 +
 pandas/tests/io/data/xml/row_field_output.xsl |   19 +
 pandas/tests/io/formats/test_to_xml.py        | 1099 +++++++++++++++++
 pandas/tests/io/test_xml.py                   |  708 +++++++++++
 15 files changed, 4118 insertions(+)
 create mode 100644 pandas/io/formats/xml.py
 create mode 100644 pandas/io/xml.py
 create mode 100644 pandas/tests/io/data/xml/baby_names.xml
 create mode 100644 pandas/tests/io/data/xml/books.xml
 create mode 100644 pandas/tests/io/data/xml/cta_rail_lines.kml
 create mode 100644 pandas/tests/io/data/xml/flatten_doc.xsl
 create mode 100644 pandas/tests/io/data/xml/row_field_output.xsl
 create mode 100644 pandas/tests/io/formats/test_to_xml.py
 create mode 100644 pandas/tests/io/test_xml.py
diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
index e755ce94812bb..442631de50c7a 100644
--- a/doc/source/reference/io.rst
+++ b/doc/source/reference/io.rst
@@ -68,6 +68,13 @@ HTML
 
    read_html
 
+XML
+~~~~
+.. autosummary::
+   :toctree: api/
+
+   read_xml
+
 HDFStore: PyTables (HDF5)
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 931ec895cc73f..0034d1bb3ecbc 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -39,6 +39,36 @@ For example:
 ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`.
 See ref:`window.overview` for performance and functional benefits. (:issue:`15095`)
 
+.. _whatsnew_130.read_to_xml:
+
+We added to support to read and generate shallow versions of xml documents.
+With lxml as parser, full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
+
+.. ipython:: python
+
+    xml = """<?xml version='1.0' encoding='utf-8'?>
+    <data>
+      <row>
+        <shape>square</shape>
+        <degrees>360</degrees>
+        <sides>4.0</sides>
+      </row>
+      <row>
+        <shape>circle</shape>
+        <degrees>360</degrees>
+        <sides/>
+      </row>
+      <row>
+        <shape>triangle</shape>
+        <degrees>180</degrees>
+        <sides>3.0</sides>
+      </row>
+    </data>"""
+
+    df = pd.read_xml(xml)
+
+    df.to_xml()
+
 .. _whatsnew_130.enhancements.other:
 
 Other enhancements
diff --git a/pandas/__init__.py b/pandas/__init__.py
index cc5d835a52833..cddd6397de33e 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -167,6 +167,7 @@
     read_feather,
     read_gbq,
     read_html,
+    read_xml,
     read_json,
     read_stata,
     read_sas,
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 03d439bd461da..60fe9ca3f6430 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2551,6 +2551,178 @@ def to_html(
             render_links=render_links,
         )
 
+    def to_xml(
+        self,
+        io: Optional[FilePathOrBuffer[str]] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Union[dict, List[dict]]] = None,
+        prefix: Optional[str] = None,
+        encoding: Optional[str] = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        parser: Optional[str] = "lxml",
+        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    ) -> Optional[str]:
+        """
+        Render a DataFrame to an XML document.
+
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        io : str, path object or file-like object, optional
+            File to write output to. If None, the output is returned as a
+            string.
+        index : bool, optional
+            Whether to include index in XML document.
+        root_name : str, default 'data'
+            The name of root element in XML document.
+        root_name : str, default 'row'
+            The name of row element in XML document.
+        na_rep : str, optional
+            Missing data representation.
+        attr_cols : list-like, optional
+            List of columns to write as attributes in row element.
+            Hierarchical columns will be flattened with underscore
+            delimiting the different levels.
+        elem_cols : list-like, optional
+            List of columns to write as children in row element. By default,
+            all columns output as children of row element. Hierarchical
+            columns will be flattened with underscore delimiting the
+            different levels.
+        namespaces : dict, optional
+            All namespaces to be defined in root element. Keys of dict
+            should be prefix names and values of dict corresponding URIs.
+            Default namespaces should be given empty string key. For
+            example, ::
+
+                namespaces = {'': 'https://example.com'}
+
+        prefix : str, optional
+            Namespace prefix to be used for every element and/or attribute
+            in document. This should be one of the keys in ``namespaces``
+            dict.
+        encoding : str, optional, default 'utf-8'
+            Encoding of the resulting document.
+        xml_declaration : str, optional
+            Whether to include the XML declaration at start of document.
+        pretty_print : bool, optional
+            Whether output should be pretty printed with indentation and
+            line breaks.
+        parser : {'lxml','etree'}, default "lxml"
+            Parser module to use for building of tree. Only 'lxml' and
+            'etree' are supported. With 'lxml', the ability to use XSLT
+            stylesheet is supported. Default parser uses 'lxml'. If
+            module is not installed a warning will raise and process
+            will continue with 'etree'.
+        stylesheet : str, path object or file-like object, optional
+            A URL, file-like object, or a raw string containing an XSLT
+            script used to transform the raw XML output. Script should use
+            layout of elements and attributes from original output. This
+            argument requires ``lxml`` to be installed. Only XSLT 1.0
+            scripts and not later versions is currently supported.
+
+        Returns
+        -------
+        None or str
+            If ``io`` is None, returns the resulting XML format as a
+            string. Otherwise returns None.
+
+        See Also
+        --------
+        to_json : Convert the pandas object to a JSON string.
+        to_html : Convert DataFrame to a html.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'shape': ['square', 'circle', 'triangle'],
+        ...                    'degrees': [360, 360, 180],
+        ...                    'sides': [4, np.nan, 3]})
+
+        >>> df.to_xml()
+        <?xml version='1.0' encoding='utf-8'?>
+        <data>
+          <row>
+            <index>0</index>
+            <shape>square</shape>
+            <degrees>360</degrees>
+            <sides>4.0</sides>
+          </row>
+          <row>
+            <index>1</index>
+            <shape>circle</shape>
+            <degrees>360</degrees>
+            <sides/>
+          </row>
+          <row>
+            <index>2</index>
+            <shape>triangle</shape>
+            <degrees>180</degrees>
+            <sides>3.0</sides>
+          </row>
+        </data>
+
+        >>> df.to_xml(attr_cols=['index', 'shape', 'degrees', 'sides'])
+        <?xml version='1.0' encoding='utf-8'?>
+        <data>
+          <row index="0" shape="square" degrees="360" sides="4.0"/>
+          <row index="1" shape="circle" degrees="360"/>
+          <row index="2" shape="triangle" degrees="180" sides="3.0"/>
+        </data>
+
+        >>> df.to_xml(namespaces = {"doc": "https://example.com"},
+        ...           prefix = "doc")
+        <?xml version='1.0' encoding='utf-8'?>
+        <doc:data xmlns:doc="https://example.com">
+          <doc:row>
+            <doc:index>0</doc:index>
+            <doc:shape>square</doc:shape>
+            <doc:degrees>360</doc:degrees>
+            <doc:sides>4.0</doc:sides>
+          </doc:row>
+          <doc:row>
+            <doc:index>1</doc:index>
+            <doc:shape>circle</doc:shape>
+            <doc:degrees>360</doc:degrees>
+            <doc:sides/>
+          </doc:row>
+          <doc:row>
+            <doc:index>2</doc:index>
+            <doc:shape>triangle</doc:shape>
+            <doc:degrees>180</doc:degrees>
+            <doc:sides>3.0</doc:sides>
+          </doc:row>
+        </doc:data>
+        """
+
+        formatter = fmt.DataFrameFormatter(
+            self,
+            index=index,
+            na_rep=na_rep,
+        )
+
+        return fmt.DataFrameRenderer(formatter).to_xml(
+            io=io,
+            index=index,
+            root_name=root_name,
+            row_name=row_name,
+            na_rep=na_rep,
+            attr_cols=attr_cols,
+            elem_cols=elem_cols,
+            namespaces=namespaces,
+            prefix=prefix,
+            encoding=encoding,
+            xml_declaration=xml_declaration,
+            pretty_print=pretty_print,
+            parser=parser,
+            stylesheet=stylesheet,
+        )
+
     # ----------------------------------------------------------------------
     @Substitution(
         klass="DataFrame",
diff --git a/pandas/io/api.py b/pandas/io/api.py
index 2d25ffe5f8a6b..ad514014c3e6d 100644
--- a/pandas/io/api.py
+++ b/pandas/io/api.py
@@ -19,3 +19,4 @@
 from pandas.io.spss import read_spss
 from pandas.io.sql import read_sql, read_sql_query, read_sql_table
 from pandas.io.stata import read_stata
+from pandas.io.xml import read_xml
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index b3c2411304f6b..a372cd4f16119 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -28,6 +28,7 @@
     cast,
 )
 from unicodedata import east_asian_width
+from warnings import warn
 
 import numpy as np
 
@@ -913,6 +914,7 @@ class DataFrameRenderer:
 
     Called in pandas.core.frame.DataFrame:
         - to_html
+        - to_xml
         - to_string
 
     Parameters
@@ -1002,6 +1004,121 @@ def to_html(
         string = html_formatter.to_string()
         return save_to_buffer(string, buf=buf, encoding=encoding)
 
+    def to_xml(
+        self,
+        io: Optional[FilePathOrBuffer[str]] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Union[dict, List[dict]]] = None,
+        prefix: Optional[str] = None,
+        encoding: Optional[str] = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        parser: Optional[str] = "lxml",
+        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    ) -> Optional[str]:
+        """
+        Render a DataFrame to an XML document.
+
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        io : str, path object or file-like object, optional
+            File to write output to. If None, the output is returned as a
+            string.
+        index : bool, optional
+            Whether to include index in XML document.
+        root_name : str, default 'data'
+            The name of root element in XML document.
+        root_name : str, default 'row'
+            The name of row element in XML document.
+        na_rep : str, optional
+            Missing data representation.
+        attr_cols : list-like, optional
+            List of columns to write as attributes in row element.
+            Hierarchical columns will be flattened with underscore
+            delimiting the different levels.
+        elem_cols : list-like, optional
+            List of columns to write as children in row element. By default,
+            all columns output as children of row element. Hierarchical
+            columns will be flattened with underscore delimiting the
+            different levels.
+        namespaces : dict, optional
+            All namespaces to be defined in root element. Keys of dict
+            should be prefix names and values of dict corresponding URIs.
+            Default namespaces should be given empty string key. For
+            example, ::
+
+                namespaces = {'': 'https://example.com'}
+
+        prefix : str, optional
+            Namespace prefix to be used for every element and/or attribute
+            in document. This should be one of the keys in ``namespaces``
+            dict.
+        encoding : str, optional, default 'utf-8'
+            Encoding of the resulting document.
+        xml_declaration : str, optional
+            Whether to include the XML declaration at start of document.
+        pretty_print : bool, optional
+            Whether output should be pretty printed with indentation and
+            line breaks.
+        parser : {'lxml','etree'}, default "lxml"
+            Parser module to use for building of tree. Only 'lxml' and
+            'etree' are supported. With 'lxml', the ability to use XSLT
+            stylesheet is supported. Default parser uses 'lxml'. If
+            module is not installed a warning will raise and process
+            will continue with 'etree'.
+        stylesheet : str, path object or file-like object, optional
+            A URL, file-like object, or a raw string containing an XSLT
+            script used to transform the raw XML output. Script should use
+            layout of elements and attributes from original output. This
+            argument requires ``lxml`` to be installed. Only XSLT 1.0
+            scripts and not later versions is currently supported.
+        """
+
+        from pandas.io.formats.xml import EtreeXMLFormatter, LxmlXMLFormatter
+
+        if parser == "lxml":
+            try:
+                TreeBuilder = LxmlXMLFormatter
+            except ImportError:
+                warn(
+                    "You do not have lxml installed (default parser). "
+                    "Instead, etree will be used.",
+                    ImportWarning,
+                )
+                TreeBuilder = EtreeXMLFormatter
+
+        elif parser == "etree":
+            TreeBuilder = EtreeXMLFormatter
+
+        else:
+            raise ValueError("Values for parser can only be lxml or etree.")
+
+        xml_formatter = TreeBuilder(
+            self.fmt,
+            io=io,
+            index=index,
+            root_name=root_name,
+            row_name=row_name,
+            na_rep=na_rep,
+            attr_cols=attr_cols,
+            elem_cols=elem_cols,
+            namespaces=namespaces,
+            prefix=prefix,
+            encoding=encoding,
+            xml_declaration=xml_declaration,
+            pretty_print=pretty_print,
+            stylesheet=stylesheet,
+        )
+
+        return xml_formatter.write_output()
+
     def to_string(
         self,
         buf: Optional[FilePathOrBuffer[str]] = None,
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
new file mode 100644
index 0000000000000..86448c9d4498f
--- /dev/null
+++ b/pandas/io/formats/xml.py
@@ -0,0 +1,763 @@
+"""
+Module for formatting output data in XML.
+"""
+
+import codecs
+import io
+from typing import Dict, List, Optional, Union
+from urllib.error import HTTPError, URLError
+from warnings import warn
+
+from pandas._typing import FilePathOrBuffer
+
+from pandas.core.dtypes.common import is_list_like
+
+from pandas.io.common import is_url, urlopen
+from pandas.io.formats.format import DataFrameFormatter
+
+
+class EtreeXMLFormatter:
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+
+    Parameters
+    ----------
+    io : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    index : bool
+        Whether to include index in xml document.
+
+    row_name : str
+        Name for root of xml document. Default is 'data'.
+
+    root_name : str
+        Name for row elemens of xml document. Default is 'row'.
+
+    na_rep : str
+        Missing data representation.
+
+    attrs_cols : list
+        List of columns to write as attributes in row element.
+
+    elem_cols : list
+        List of columns to write as children in row element.
+
+    namespacess : dict
+        The namespaces to define in XML document as dicts with key
+        being namespace and value the URI.
+
+    prefix : str
+        The prefix for each element in XML document including root.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    xml_declaration : bool
+        Whether to include xml declaration at top line item in xml.
+
+    pretty_print : bool
+        Whether to write xml document with line breaks and indentation.
+
+    stylesheet : str or file-like
+        A URL, file, file-like object, or a raw string containing XSLT,
+        `etree` does not support XSLT but retained for consistency.
+
+    See also
+    --------
+    pandas.io.formats.xml.LxmlXMLFormatter
+
+    Notes
+    -----
+    This class serves as fall back option if user does not have
+    ``lxml`` installed or user specifically requests ``etree`` parser.
+    """
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        io: Optional[FilePathOrBuffer[str]] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Dict[str, str]] = None,
+        prefix: Optional[str] = None,
+        encoding: Optional[str] = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    ) -> None:
+        self.fmt = formatter
+        self.io = io
+        self.index = index
+        self.root_name = root_name
+        self.row_name = row_name
+        self.na_rep = na_rep
+        self.attr_cols = attr_cols
+        self.elem_cols = elem_cols
+        self.namespaces = namespaces
+        self.prefix = prefix
+        self.encoding = encoding
+        self.xml_declaration = xml_declaration
+        self.pretty_print = pretty_print
+        self.stylesheet = stylesheet
+        self.frame = self.fmt.frame
+
+        self.validate_columns()
+        self.validate_encoding()
+        self.orig_cols = self.fmt.frame.columns.tolist()
+        self.frame_dicts = self.process_dataframe()
+        self.handle_indexes()
+        self.prefix_uri = self.get_prefix_uri()
+
+    def build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        from xml.etree.ElementTree import Element, SubElement, tostring
+
+        self.root = Element(
+            f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces()
+        )
+
+        for k, d in self.frame_dicts.items():
+            self.d = d
+            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if self.attr_cols:
+                self.build_attribs()
+            if self.elem_cols:
+                self.build_elems()
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.build_elems()
+
+        self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
+
+        if self.pretty_print:
+            self.out_xml = self.prettify_tree()
+
+        if not self.xml_declaration:
+            self.out_xml = self.remove_declaration()
+
+        if self.stylesheet:
+            warn(
+                "To use stylesheet, you need lxml installed. "
+                "The non-transformed, original XML is returned instead.",
+                UserWarning,
+            )
+
+        return self.out_xml
+
+    def validate_columns(self) -> None:
+        """
+        Validate elems_cols and attrs_cols.
+
+        This method will check if columns is list-like.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.attr_cols and not is_list_like(self.attr_cols):
+            raise TypeError(
+                f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
+            )
+
+        if self.elem_cols and not is_list_like(self.elem_cols):
+            raise TypeError(
+                f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
+            )
+
+    def validate_encoding(self) -> None:
+        """
+        Validate encoding.
+
+        This method will check if encoding is among listed under codecs.
+
+        Raises
+        ------
+        LookupError
+            * If encoding is not available in codecs.
+        """
+
+        try:
+            codecs.lookup(self.encoding)
+        except LookupError as e:
+            raise e
+
+    def process_dataframe(self) -> None:
+        """
+        Adjust Data Frame to fit xml output.
+
+        This method will adjust underlying data frame for xml output,
+        including replacing missing entities and including indexes.
+        """
+
+        na_dict = {"None": self.na_rep, "NaN": self.na_rep, "nan": self.na_rep}
+
+        df = (
+            (self.fmt.frame.reset_index().applymap(str).replace(na_dict))
+            if self.index
+            else self.fmt.frame.applymap(str).replace(na_dict)
+        )
+
+        return df.to_dict(orient="index")
+
+    def handle_indexes(self) -> None:
+        """
+        Handle indexes.
+
+        This method will add indexes into attr_cols or elem_cols.
+        """
+
+        indexes = [x for x in self.frame_dicts[0].keys() if x not in self.orig_cols]
+
+        if self.attr_cols and self.index:
+            self.attr_cols = list(indexes) + self.attr_cols
+
+        if self.elem_cols and self.index:
+            self.elem_cols = list(indexes) + self.elem_cols
+
+    def get_prefix_uri(self) -> str:
+        """
+        Get uri of namespace prefix.
+
+        This method retrieves corresponding URI to prefix in namespaces.
+        """
+
+        from xml.etree.ElementTree import register_namespace
+
+        uri = ""
+        if self.namespaces:
+            for p, n in self.namespaces.items():
+                register_namespace(p, n)
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except (KeyError):
+                    raise KeyError("prefix is not included in namespaces")
+            else:
+                uri = f'{{{self.namespaces[""]}}}'
+
+        return uri
+
+    def other_namespaces(self) -> dict:
+        """
+        Define other namespaces.
+
+        This method will build dictionary of namespaces attributes
+        for root element, conditionally with optional namespaces and
+        prefix.
+        """
+
+        nmsp_dict = {}
+        if self.namespaces and self.prefix is None:
+            nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p != ""}
+
+        if self.namespaces and self.prefix:
+            nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p == ""}
+
+        return nmsp_dict
+
+    def build_attribs(self) -> None:
+        """
+        Create attributes of row.
+
+        This method adds attributes using attr_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                if self.d[col] is not None:
+                    self.elem_row.attrib[attr_name] = str(self.d[col])
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def build_elems(self) -> None:
+        """
+        Create child elements of row.
+
+        This method adds child elements using elem_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        from xml.etree.ElementTree import SubElement
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def prettify_tree(self) -> bytes:
+        """
+        Output tree for pretty print format.
+
+        This method will pretty print xml with line breaks and indentation.
+        """
+
+        from xml.dom.minidom import parseString
+
+        dom = parseString(self.out_xml)
+
+        return dom.toprettyxml(indent="  ", encoding=self.encoding)
+
+    def remove_declaration(self) -> None:
+        """
+        Remove xml declaration.
+
+        This method will remove xml declaration of working tree. Currently,
+        pretty_print is not supported in etree.
+        """
+
+        return self.out_xml.split(b"?>")[-1].strip()
+
+    def write_output(self) -> Optional[str]:
+        xml_doc = self.build_tree()
+
+        try:
+            if self.io:
+                with open(self.io, "wb") as f:
+                    f.write(xml_doc)
+                xml_doc = None
+            else:
+                xml_doc = xml_doc.decode(self.encoding).rstrip()
+        except (UnicodeDecodeError, OSError) as e:
+            raise e
+
+        return xml_doc
+
+
+class LxmlXMLFormatter:
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+
+    Parameters
+    ----------
+    io : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    index : bool
+        Whether to include index in xml document.
+
+    row_name : str
+        Name for root of xml document. Default is 'data'.
+
+    root_name : str
+        Name for row elemens of xml document. Default is 'row'.
+
+    na_rep : str
+        Missing data representation.
+
+    attrs_cols : list
+        List of columns to write as attributes in row element.
+
+    elem_cols : list
+        List of columns to write as children in row element.
+
+    namespacess : dict
+        The namespaces to define in XML document as dicts with key
+        being namespace and value the URI.
+
+    prefix : str
+        The prefix for each element in XML document including root.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    xml_declaration : bool
+        Whether to include xml declaration at top line item in xml.
+
+    pretty_print : bool
+        Whether to write xml document with line breaks and indentation.
+
+    stylesheet : str or file-like
+        A URL, file, file-like object, or a raw string containing XSLT.
+
+    See also
+    --------
+    pandas.io.formats.xml.EtreeXMLFormatter
+
+    Notes
+    -----
+    This class serves as default option. If user does not have `lxml`
+    installed, `to_xml` will fall back with EtreeXMLFormatter.
+    """
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        io: Optional[FilePathOrBuffer[str]] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Dict[str, str]] = None,
+        prefix: Optional[str] = None,
+        encoding: Optional[str] = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    ) -> None:
+        self.fmt = formatter
+        self.io = io
+        self.index = index
+        self.root_name = root_name
+        self.row_name = row_name
+        self.na_rep = na_rep
+        self.attr_cols = attr_cols
+        self.elem_cols = elem_cols
+        self.namespaces = namespaces
+        self.prefix = prefix
+        self.encoding = encoding
+        self.xml_declaration = xml_declaration
+        self.pretty_print = pretty_print
+        self.stylesheet = stylesheet
+
+        self.validate_columns()
+        self.validate_encoding()
+        self.orig_cols = self.fmt.frame.columns.tolist()
+        self.frame_dicts = self.process_dataframe()
+        self.prefix_uri = self.get_prefix_uri()
+
+        self.convert_empty_str_key()
+        self.handle_indexes()
+
+    def build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        from lxml.etree import Element, SubElement, tostring
+
+        self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
+
+        for k, d in self.frame_dicts.items():
+            self.d = d
+            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if self.attr_cols:
+                self.build_attribs()
+
+            if self.elem_cols:
+                self.build_elems()
+
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.build_elems()
+
+        self.out_xml = tostring(
+            self.root,
+            pretty_print=self.pretty_print,
+            method="xml",
+            encoding=self.encoding,
+            xml_declaration=self.xml_declaration,
+        )
+
+        if self.stylesheet:
+            self.out_xml = self.transform_doc()
+
+        return self.out_xml
+
+    def validate_columns(self) -> None:
+        """
+        Validate elems_cols and attrs_cols.
+
+        This method will check if columns is list-like.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.attr_cols and not is_list_like(self.attr_cols):
+            raise TypeError(
+                f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
+            )
+
+        if self.elem_cols and not is_list_like(self.elem_cols):
+            raise TypeError(
+                f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
+            )
+
+    def validate_encoding(self) -> None:
+        """
+        Validate encoding.
+
+        This method will check if encoding is among listed under codecs.
+
+        Raises
+        ------
+        LookupError
+            * If encoding is not available in codecs.
+        """
+
+        try:
+            codecs.lookup(self.encoding)
+        except LookupError as e:
+            raise e
+
+    def process_dataframe(self) -> dict:
+        """
+        Adjust Data Frame to fit xml output.
+
+        This method will adjust underlying data frame for xml output,
+        including replacing missing entities and including indexes.
+        """
+
+        na_dict = {"None": self.na_rep, "NaN": self.na_rep, "nan": self.na_rep}
+
+        df = (
+            (self.fmt.frame.reset_index().applymap(str).replace(na_dict))
+            if self.index
+            else self.fmt.frame.applymap(str).replace(na_dict)
+        )
+
+        return df.to_dict(orient="index")
+
+    def convert_empty_str_key(self) -> None:
+        """
+        Replace zero-lengh string in `namespaces`.
+
+        This method will replce '' with None to align to `lxml`
+        requirement that empty string prefixes are not allowed.
+        """
+
+        if self.namespaces and "" in self.namespaces.keys():
+            self.namespaces[None] = self.namespaces.pop("", "default")
+
+    def handle_indexes(self) -> None:
+        """
+        Handle indexes.
+
+        This method will add indexes into attr_cols or elem_cols.
+        """
+        indexes = [x for x in self.frame_dicts[0].keys() if x not in self.orig_cols]
+
+        if self.attr_cols and self.index:
+            self.attr_cols = list(indexes) + self.attr_cols
+
+        if self.elem_cols and self.index:
+            self.elem_cols = list(indexes) + self.elem_cols
+
+    def get_prefix_uri(self) -> str:
+        """
+        Get uri of namespace prefix.
+
+        This method retrieves corresponding URI to prefix in namespaces.
+
+        Raises
+        ------
+        ValueError
+            *If prefix is not included in namespace dict.
+        """
+
+        uri = ""
+        if self.namespaces:
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except (KeyError):
+                    raise KeyError("prefix is not included in namespaces")
+            else:
+                uri = f'{{{self.namespaces[""]}}}'
+
+        return uri
+
+    def build_attribs(self) -> None:
+        """
+        Create attributes of row.
+
+        This method adds attributes using attr_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                if self.d[col] is not None:
+                    self.elem_row.attrib[attr_name] = self.d[col]
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def build_elems(self) -> None:
+        """
+        Create child elements of row.
+
+        This method adds child elements using elem_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+        from lxml.etree import SubElement
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def convert_io(self) -> Union[None, str]:
+        """
+        Convert stylesheet object to string.
+
+        This method will convert stylesheet object into a string or keep
+        as string, depending on object type.
+        """
+
+        obj = None
+
+        if isinstance(self.stylesheet, str):
+            obj = self.stylesheet
+
+        if isinstance(self.stylesheet, bytes):
+            obj = self.stylesheet.decode(self.encoding)
+
+        if isinstance(self.stylesheet, io.StringIO):
+            obj = self.stylesheet.getvalue()
+
+        if isinstance(self.stylesheet, io.BytesIO):
+            obj = self.stylesheet.getvalue().decode(self.encoding)
+
+        if isinstance(self.stylesheet, io.TextIOWrapper):
+            obj = self.stylesheet.read()
+
+        if isinstance(self.stylesheet, io.BufferedReader):
+            obj = self.stylesheet.read().decode(self.encoding)
+
+        return obj
+
+    def parse_doc(self):
+        """
+        Build tree from stylesheet.
+
+        This method will parse stylesheet object into tree for parsing
+        conditionally by its specific object type.
+
+        Raises
+        ------
+        HttpError
+            * If URL cannot be reached.
+
+        LookupError
+            * If xml document has incorrect or unknown encoding.
+
+        OSError
+            * If file cannot be found.
+
+        XMLSyntaxError
+            * If xml document conntains syntax issues.
+
+        ValueError
+            * If io object is not readable as string or file-like object.
+        """
+
+        from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
+
+        current_doc = self.convert_io()
+        if current_doc:
+            is_xml = current_doc.startswith(("<?xml", "<"))
+        else:
+            raise ValueError("io is not a url, file, or xml string")
+
+        try:
+            curr_parser = XMLParser(encoding=self.encoding)
+
+            if is_url(current_doc):
+                with urlopen(current_doc) as f:
+                    r = parse(f, parser=curr_parser)
+            elif is_xml:
+                r = XML(bytes(current_doc, encoding=self.encoding))
+            else:
+                r = parse(current_doc, parser=curr_parser)
+        except (LookupError, URLError, HTTPError, OSError, XMLSyntaxError) as e:
+            raise e
+
+        return r
+
+    def transform_doc(self) -> bytes:
+        """
+        Transform original tree using stylesheet.
+
+        This method will transform built tree with XSLT script.
+        """
+
+        from lxml.etree import XSLT, XSLTApplyError, XSLTParseError
+
+        xsl_doc = self.parse_doc()
+
+        try:
+            transformer = XSLT(xsl_doc)
+            new_doc = transformer(self.root)
+
+        except (XSLTApplyError, XSLTParseError) as e:
+            raise e
+
+        return bytes(new_doc)
+
+    def write_output(self) -> Optional[str]:
+        xml_doc = self.build_tree()
+
+        try:
+            if self.io:
+                with open(self.io, "wb") as f:
+                    f.write(xml_doc)
+                xml_doc = None
+            else:
+                xml_doc = xml_doc.decode(self.encoding).rstrip()
+
+        except (UnicodeDecodeError, OSError) as e:
+            raise e
+
+        return xml_doc
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
new file mode 100644
index 0000000000000..af2004c05428c
--- /dev/null
+++ b/pandas/io/xml.py
@@ -0,0 +1,1017 @@
+"""
+:mod:`pandas.io.xml` is a module containing functionality for dealing with
+XML IO.
+
+"""
+
+import io
+from typing import Dict, List, Optional, Union
+from urllib.error import HTTPError, URLError
+from warnings import warn
+
+from pandas._typing import FilePathOrBuffer
+from pandas.errors import ParserError
+from pandas.util._decorators import deprecate_nonkeyword_arguments
+
+from pandas.core.dtypes.common import is_list_like
+
+from pandas.core.frame import DataFrame
+
+from pandas.io.common import is_url, stringify_path, urlopen
+from pandas.io.parsers import TextParser
+
+
+class _EtreeFrameParser:
+    """
+    Internal class to parse XML into DataFrames with the Python
+    standard library XML modules: `xml.etree.ElementTree`.
+
+    Parameters
+    ----------
+    io : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    xpath : str or regex
+        The XPath expression to parse required set of nodes for
+        migration to `Data Frame`. `etree` supports limited XPath.
+
+    namespacess : dict
+        The namespaces defined in XML document (`xmlns:namespace='URI')
+        as dicts with key being namespace and value the URI.
+
+    elems_only : bool
+        Parse only the child elements at the specified `xpath`.
+
+    attrs_only : bool
+        Parse only the attributes at the specified `xpath`.
+
+    names : list
+        Column names for Data Frame of parsed XML data.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    stylesheet : str or file-like
+        URL, file, file-like object, or a raw string containing XSLT,
+        `etree` does not support XSLT but retained for consistency.
+
+    See also
+    --------
+    pandas.io.xml._LxmlFrameParser
+
+    Notes
+    -----
+    This class serves as fall back option if user does not have
+    ``lxml`` installed or user specifically requests ``etree`` parser.
+    """
+
+    from xml.etree.ElementTree import Element, ElementTree
+
+    def __init__(
+        self,
+        io,
+        xpath,
+        namespaces,
+        elems_only,
+        attrs_only,
+        names,
+        encoding,
+        stylesheet,
+    ):
+        self.io = io
+        self.xpath = xpath
+        self.namespaces = namespaces
+        self.elems_only = elems_only
+        self.attrs_only = attrs_only
+        self.names = names
+        self.encoding = encoding
+        self.stylesheet = stylesheet
+
+    def parse_data(self) -> List[Dict[str, List[str]]]:
+        """
+        Parse xml data.
+
+        This method will call the other internal methods to
+        validate xpath, names, parse and return specific nodes.
+        """
+
+        if self.stylesheet:
+            warn(
+                "To use stylesheet, you need lxml installed. "
+                "Nodes will be parsed on original XML at the xpath.",
+                UserWarning,
+            )
+
+        self.xml_doc = self._parse_doc()
+
+        self._validate_path()
+        self._validate_names()
+
+        return self._parse_nodes()
+
+    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
+        """
+        Parse xml nodes.
+
+        This method will parse the children and attributes of elements
+        in xpath, conditionally for only elements, only attributes
+        or both while optionally renaming node names.
+
+        Raises
+        ------
+        ValueError
+            * If only elements and only attributes are specified.
+
+        Notes
+        -----
+        Namespace URIs will be removed from return node values.Also,
+        elements with missing children or attributes compared to siblings
+        will have optional keys filled withi None values.
+        """
+
+        elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces)
+
+        if self.elems_only and self.attrs_only:
+            raise ValueError("Either element or attributes can be parsed not both.")
+        elif self.elems_only:
+            if self.names:
+                dicts = [
+                    {
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.findall("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        ch.tag: ch.text.strip() if ch.text else None
+                        for ch in el.findall("*")
+                    }
+                    for el in elems
+                ]
+
+        elif self.attrs_only:
+            dicts = [el.attrib for el in elems]
+
+        else:
+            if self.names:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.findall("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+
+            else:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            ch.tag: ch.text.strip() if ch.text else None
+                            for ch in el.findall("*")
+                        },
+                    }
+                    for el in elems
+                ]
+
+        if self.namespaces:
+            dicts = [
+                {k.split("}")[1] if "}" in k else k: v for k, v in d.items()}
+                for d in dicts
+            ]
+
+        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
+        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
+
+        if self.names:
+            dicts = [
+                {nm: v for nm, (k, v) in zip(self.names, d.items())} for d in dicts
+            ]
+
+        return dicts
+
+    def _validate_path(self) -> None:
+        """
+        Validate xpath.
+
+        This method checks for sytnax, evaluation, or empty nodes return.
+
+        Raises
+        ------
+        SyntaxError
+            * If xpah is not supported or issues with namespaces.
+
+        Notes
+        -----
+        `etree` supports limited XPath. If user attempts a more complex
+        expression syntax error will raise.
+        """
+
+        msg = (
+            "xpath does not return any nodes. "
+            "If document uses namespaces denoted with "
+            "xmlns, be sure to define namespaces and "
+            "use them in xpath."
+        )
+        try:
+            elems = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
+            if elems is None:
+                raise ValueError(msg)
+
+            if elems is not None and elems.find("*") is None and elems.attrib is None:
+                raise ValueError(msg)
+
+        except (KeyError, SyntaxError):
+            raise SyntaxError(
+                "You have used an incorrect or unsupported XPath "
+                "expression for etree library or you used an "
+                "undeclared namespace prefix."
+            )
+
+    def _validate_names(self) -> None:
+        """
+        Validate names.
+
+        This method will check if names is a list-like and aligns
+        with length of parse nodes.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.names:
+            children = self.xml_doc.find(
+                self.xpath, namespaces=self.namespaces
+            ).findall("*")
+
+            if is_list_like(self.names):
+                if len(self.names) < len(children):
+                    raise ValueError(
+                        "names does not match length of child elements in xpath."
+                    )
+            else:
+                raise TypeError(
+                    f"{type(self.names).__name__} is not a valid type for names"
+                )
+
+    def _convert_io(self) -> Union[None, str]:
+        """
+        Convert io object to string.
+
+        This method will convert io object into a string or keep
+        as string, depending on object type.
+        """
+
+        obj = None
+
+        if isinstance(self.io, str):
+            obj = self.io
+
+        if isinstance(self.io, bytes):
+            obj = self.io.decode(self.encoding)
+
+        if isinstance(self.io, io.StringIO):
+            obj = self.io.getvalue()
+
+        if isinstance(self.io, io.BytesIO):
+            obj = self.io.getvalue().decode(self.encoding)
+
+        if isinstance(self.io, io.TextIOWrapper):
+            obj = self.io.read()
+
+        if isinstance(self.io, io.BufferedReader):
+            obj = self.io.read().decode(self.encoding)
+
+        return obj
+
+    def _parse_doc(self) -> Union[Element, ElementTree]:
+        """
+        Build tree from io.
+
+        This method will parse io object into tree for parsing
+        conditionally by its specific object type.
+
+        Raises
+        ------
+        HttpError
+            * If URL cannot be reached.
+
+        OSError
+            * If file cannot be found.
+
+        ParseError
+            * If xml document conntains syntax issues.
+
+        ValueError
+            * If io object is not readable as string or file-like object.
+        """
+
+        from xml.etree.ElementTree import ParseError, fromstring, parse
+
+        current_doc = self._convert_io()
+        if current_doc:
+            is_xml = current_doc.startswith(("<?xml", "<"))
+        else:
+            raise ValueError("io is not a url, file, or xml string")
+
+        is_xml = (
+            (current_doc.decode(self.encoding).startswith(("<?xml", "<")))
+            if isinstance(current_doc, bytes)
+            else current_doc.startswith(("<?xml", "<"))
+        )
+
+        try:
+            if is_url(current_doc):
+                with urlopen(current_doc) as f:
+                    r = parse(f)
+            elif is_xml:
+                r = fromstring(current_doc)
+            else:
+                r = parse(current_doc)
+        except (URLError, HTTPError, OSError, ParseError) as e:
+            raise e
+
+        return r
+
+
+class _LxmlFrameParser:
+    """
+    Internal class to parse XML into DataFrames with third-party
+    full-featured XML library, `lxml`, that supports
+    XPath 1.0 and XSLT 1.0.
+
+    Parameters
+    ----------
+    io : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    xpath : str or regex
+        The XPath expression to parse required set of nodes for
+        migration to `Data Frame`.
+
+    namespacess : dict
+        The namespaces defined in XML document (`xmlns:namespace='URI')
+        as dicts with key being namespace and value the URI.
+
+    elems_only : bool
+        Parse only the child elements at the specified `xpath`.
+
+    attrs_only : bool
+        Parse only the attributes at the specified `xpath`.
+
+    names : list
+        Column names for Data Frame of parsed XML data.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    stylesheet : str or file-like
+        URL, file, file-like object, or a raw string containing XSLT.
+
+    See also
+    --------
+    pandas.io.xml._EtreeFrameParser
+
+    Notes
+    -----
+    This is the default class called with `_EtreeFrameParser` serving
+    as fall back option if user does not have ``lxml`` installed.
+    With `lxml`, the user enjoys the full scope of funcationality and
+    efficiency.
+    """
+
+    def __init__(
+        self,
+        io,
+        xpath,
+        namespaces,
+        elems_only,
+        attrs_only,
+        names,
+        encoding,
+        stylesheet,
+    ):
+        self.io = io
+        self.xpath = xpath
+        self.namespaces = namespaces
+        self.elems_only = elems_only
+        self.attrs_only = attrs_only
+        self.names = names
+        self.encoding = encoding
+        self.stylesheet = stylesheet
+        self.is_style = False
+
+        self.compression = "infer"
+
+    def parse_data(self) -> List[Dict[str, List[str]]]:
+        """
+        Parse xml data.
+
+        This method will call the other internal methods to
+        validate xpath, names, optionally parse and run XSLT,
+        and parse original or transformed XML and return specific nodes.
+        """
+
+        self.xml_doc = self._parse_doc()
+
+        if self.stylesheet:
+            self.is_style = True
+            self.xsl_doc = self._parse_doc()
+            self.xml_doc = self._transform_doc()
+
+        self._validate_path()
+        self._validate_names()
+
+        return self._parse_nodes()
+
+    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
+        """
+        Parse xml nodes.
+
+        This method will parse the children and attributes of elements
+        in xpath, conditionally for only elements, only attributes
+        or both while optionally renaming node names.
+
+        Raises
+        ------
+        ValueError
+            * If only elements and only attributes are specified.
+
+        Notes
+        -----
+        Namespace URIs will be removed from return node values.Also,
+        elements with missing children or attributes compared to siblings
+        will have optional keys filled withi None values.
+        """
+        elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+
+        if self.elems_only and self.attrs_only:
+            raise ValueError("Either element or attributes can be parsed not both.")
+
+        elif self.elems_only:
+            if self.names:
+                dicts = [
+                    {
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.xpath("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        ch.tag: ch.text.strip() if ch.text else None
+                        for ch in el.xpath("*")
+                    }
+                    for el in elems
+                ]
+
+        elif self.attrs_only:
+            dicts = [el.attrib for el in elems]
+
+        else:
+            if self.names:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.xpath("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            ch.tag: ch.text.strip() if ch.text else None
+                            for ch in el.xpath("*")
+                        },
+                    }
+                    for el in elems
+                ]
+
+        if self.namespaces or "}" in list(dicts[0].keys())[0]:
+            dicts = [
+                {k.split("}")[1] if "}" in k else k: v for k, v in d.items()}
+                for d in dicts
+            ]
+
+        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
+        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
+
+        if self.names:
+            dicts = [
+                {nm: v for nm, (k, v) in zip(self.names, d.items())} for d in dicts
+            ]
+
+        return dicts
+
+    def _transform_doc(self):
+        """
+        Transform original tree using stylesheet.
+
+        This method will transform original xml using XSLT script into
+        am ideally flatter xml document for easier parsing and migration
+        to Data Frame.
+        """
+        from lxml.etree import XSLT, XSLTApplyError, XSLTParseError
+
+        try:
+            transformer = XSLT(self.xsl_doc)
+            new_doc = transformer(self.xml_doc)
+        except (XSLTApplyError, XSLTParseError) as e:
+            raise e
+
+        return new_doc
+
+    def _validate_path(self) -> None:
+        """
+        Validate xpath.
+
+        This method checks for sytnax, evaluation, or empty nodes return.
+
+        Raises
+        ------
+        SyntaxError
+            * If xpah is not supported or issues with namespaces.
+
+        Notes
+        -----
+        `etree` supports limited XPath. If user attempts a more complex
+        expression syntax error will raise.
+        """
+        from lxml.etree import XPathEvalError, XPathSyntaxError
+
+        try:
+            elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+            children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces)
+            attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces)
+
+            if (elems == [] and attrs == [] and children == []) or (
+                elems != [] and attrs == [] and children == []
+            ):
+                raise ValueError(
+                    "xpath does not return any nodes. "
+                    "Be sure row level nodes are in xpath. "
+                    "If document uses namespaces denoted with "
+                    "xmlns, be sure to define namespaces and "
+                    "use them in xpath."
+                )
+        except (XPathEvalError, XPathSyntaxError, TypeError) as e:
+            raise e
+
+    def _validate_names(self) -> None:
+        """
+        Validate names.
+
+        This method will check if names is a list and aligns with
+        length of parse nodes.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.names:
+            children = self.xml_doc.xpath(
+                self.xpath + "[1]/*", namespaces=self.namespaces
+            )
+
+            if is_list_like(self.names):
+                if len(self.names) < len(children):
+                    raise ValueError(
+                        "names does not match length of child elements in xpath."
+                    )
+            else:
+                raise TypeError(
+                    f"{type(self.names).__name__} is not a valid type for names"
+                )
+
+    def _convert_io(self) -> Union[None, str]:
+        """
+        Convert filepath_or_buffer object to string.
+
+        This method will convert io object into a string or keep
+        as string, depending on object type.
+        """
+
+        obj = None
+
+        if isinstance(self.raw_doc, str):
+            obj = self.raw_doc
+
+        if isinstance(self.raw_doc, bytes):
+            obj = self.raw_doc.decode(self.encoding)
+
+        if isinstance(self.raw_doc, io.StringIO):
+            obj = self.raw_doc.getvalue()
+
+        if isinstance(self.raw_doc, io.BytesIO):
+            obj = self.raw_doc.getvalue().decode(self.encoding)
+
+        if isinstance(self.raw_doc, io.TextIOWrapper):
+            obj = self.raw_doc.read()
+
+        if isinstance(self.raw_doc, io.BufferedReader):
+            obj = self.raw_doc.read().decode(self.encoding)
+
+        return obj
+
+    def _parse_doc(self):
+        """
+        Build tree from io.
+
+        This method will parse io object into tree for parsing
+        conditionally by its specific object type.
+
+        Raises
+        ------
+        HttpError
+            * If URL cannot be reached.
+
+        LookupError
+            * If xml document has incorrect or unknown encoding.
+
+        OSError
+            * If file cannot be found.
+
+        XMLSyntaxError
+            * If xml document conntains syntax issues.
+
+        ValueError
+            * If io object is not readable as string or file-like object.
+        """
+
+        from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
+
+        self.raw_doc = self.stylesheet if self.is_style else self.io
+
+        current_doc = self._convert_io()
+        if current_doc:
+            is_xml = current_doc.startswith(("<?xml", "<"))
+        else:
+            raise ValueError("io is not a url, file, or xml string")
+
+        try:
+            curr_parser = XMLParser(encoding=self.encoding)
+
+            if is_url(current_doc):
+                with urlopen(current_doc) as f:
+                    r = parse(f, parser=curr_parser)
+            elif is_xml:
+                r = XML(bytes(current_doc, encoding=self.encoding))
+            else:
+                r = parse(current_doc, parser=curr_parser)
+        except (LookupError, URLError, HTTPError, OSError, XMLSyntaxError) as e:
+            raise e
+
+        return r
+
+
+def _data_to_frame(data, **kwargs) -> DataFrame:
+    """
+    Convert parsed data to Data Frame.
+
+    This method will bind xml dictionary data of keys and values
+    into named columns of Data Frame using the built-in TextParser
+    class that build Data Frame and infers specific dtypes.
+    """
+
+    tags = [list(d.keys()) for d in data]
+    nodes = [list(d.values()) for d in data]
+
+    try:
+        with TextParser(nodes, names=tags[0], **kwargs) as tp:
+            return tp.read()
+    except ParserError:
+        raise ParserError(
+            "XML document may be too complex for import. "
+            "Try to flatten document and use distinct "
+            "element and attribute names."
+        )
+
+
+def _parse(
+    io,
+    xpath,
+    namespaces,
+    elems_only,
+    attrs_only,
+    names,
+    encoding,
+    parser,
+    stylesheet,
+    **kwargs,
+) -> DataFrame:
+    """
+    Call internal parsers.
+
+    This method will conditionally call internal parsers:
+    LxmlFrameParser and/or EtreeParser.
+
+    Raises
+    ------
+    ValueError
+        * If parser is not lxml or etree.e.
+
+    Notes
+    -----
+    This method will raise a warning instead of module not found or
+    import error if user does not have 1xml and then reverts to
+    fallback option with etree parser.
+    """
+
+    if parser == "lxml":
+        try:
+            p = _LxmlFrameParser(
+                io,
+                xpath,
+                namespaces,
+                elems_only,
+                attrs_only,
+                names,
+                encoding,
+                stylesheet,
+            )
+        except ImportError:
+            warn(
+                "You do not have lxml installed (default parser). "
+                "Instead, etree will be used.",
+                ImportWarning,
+            )
+
+            p = _EtreeFrameParser(
+                io,
+                xpath,
+                namespaces,
+                elems_only,
+                attrs_only,
+                names,
+                encoding,
+                stylesheet,
+            )
+
+    elif parser == "etree":
+        p = _EtreeFrameParser(
+            io,
+            xpath,
+            namespaces,
+            elems_only,
+            attrs_only,
+            names,
+            encoding,
+            stylesheet,
+        )
+    else:
+        raise ValueError("Values for parser can only be lxml or etree.")
+
+    data_dicts = p.parse_data()
+
+    return _data_to_frame(data=data_dicts, **kwargs)
+
+
+@deprecate_nonkeyword_arguments(version="2.0")
+def read_xml(
+    io: FilePathOrBuffer,
+    xpath: Optional[str] = "./*",
+    namespaces: Optional[Union[dict, List[dict]]] = None,
+    elems_only: Optional[bool] = False,
+    attrs_only: Optional[bool] = False,
+    names: Optional[List[str]] = None,
+    encoding: Optional[str] = "utf-8",
+    parser: Optional[str] = "lxml",
+    stylesheet: Optional[FilePathOrBuffer[str]] = None,
+) -> DataFrame:
+    r"""
+    Read XML docuemnts into a ``DataFrame`` object.
+
+    .. versionadded:: 1.3.0
+
+    Parameters
+    ----------
+    io : str, path object or file-like object
+        A URL, file-like object, or raw string containing XML.
+
+    xpath : str, optional
+        The XPath to parse required set of nodes for migration to DataFrame.
+        XPath should return a collection of elements and not a single
+        element. Note: The ``etree`` parser supports limited XPath
+        expressions. For more complex XPath, use ``lxml`` which requires
+        installation.
+
+    namespaces : dict, optional
+        The namespaces defined in XML document as dicts with key being
+        namespace prefix and value the URI. There is no need to include all
+        namespaces in XML, only the ones used in ``xpath`` expression.
+        Note: if XML document uses default namespace denoted as
+        `xmlns='<URI>'` without a prefix, you must assign any temporary
+        namespace, like 'doc', to URI in order to parse any underlying
+        nodes. For example, ::
+
+            namespaces = {"doc": "https://example.com"}
+
+    elems_only : bool, optional, default = False
+        Parse only the child elements at the specified ``xpath``. By default,
+        all child elements and non-empty text nodes are returned.
+
+    attrs_only :  bool, optional, default = False
+        Parse only the attributes at the specified ``xpath``.
+        By default, all attributes are returned.
+
+    names :  list-like, optional
+        Column names for DataFrame of parsed XML data. Use this parameter to
+        rename original element names and distinguish same named elements.
+
+    encoding : str, optional, default = 'utf-8'
+        Encoding of XML document.
+
+    parser : {'lxml','etree'}, default='lxml'
+        Parser module to use for retrieval of data. Only 'lxml' and
+        'etree' are supported. With 'lxml' more complex XPath searches
+        and ability to use XSLT stylesheet are supported. Default parser
+        uses 'lxml'. If module is not installed a warning will raise and
+        process will continue with 'etree'.
+
+    stylesheet : str, path object or file-like object
+        A URL, file-like object, or a raw string containing an XSLT script.
+        This stylesheet should flatten complex, deeply nested XML documents.
+        To use this feature you must have ``lxml`` module installed and use
+        'lxml' as ``parser``. The ``xpath`` must reference nodes of
+        transformed XML document generated after XSLT transformation and not
+        the original XML document. Only XSLT 1.0 scripts and not later
+        versions is currently supported.
+
+    Returns
+    -------
+    df
+        A DataFrame.
+
+    See Also
+    --------
+    read_json : Convert a JSON string to pandas object.
+    read_html : Read HTML tables into a list of DataFrame objects.
+
+    Notes
+    -----
+    This method is best designed to import shallow XML documents in
+    following format which is the ideal fit for the two-dimensions of a
+    ``DataFrame`` (row by column). ::
+
+            <root>
+                <row>
+                  <column1>data</column1>
+                  <column2>data</column2>
+                  <column3>data</column3>
+                  ...
+               </row>
+               <row>
+                  ...
+               </row>
+               ...
+            </root>
+
+    As a file format, XML documents can be designed any way including
+    layout of elements and attributes as long as it conforms to W3C
+    specifications. Therefore, this method is a convenience handler for
+    a specific flatter design and not all possible XML structures.
+
+    However, for more complex XML documents, ``stylesheet`` allows you to
+    temporarily redesign original document with XSLT (a special purpose
+    language) for a flatter version for migration to a DataFrame.
+
+    This function will *always* return a single :class:`DataFrame` or raise
+    exceptions due to issues with XML document, ``xpath``, or other
+    parameters.
+
+    Examples
+    --------
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <data xmlns="http://example.com">
+    ...  <row>
+    ...    <shape>square</shape>
+    ...    <degrees>360</degrees>
+    ...    <sides>4.0</sides>
+    ...  </row>
+    ...  <row>
+    ...    <shape>circle</shape>
+    ...    <degrees>360</degrees>
+    ...    <sides/>
+    ...  </row>
+    ...  <row>
+    ...    <shape>triangle</shape>
+    ...    <degrees>180</degrees>
+    ...    <sides>3.0</sides>
+    ...  </row>
+    ... </data>'''
+
+    >>> df = pd.read_xml(xml)
+
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <data>
+    ...   <row shape="square" degrees="360" sides="4.0"/>
+    ...   <row shape="circle" degrees="360"/>
+    ...   <row shape="triangle" degrees="180" sides="3.0"/>
+    ... </data>"'''
+
+    >>> df = pd.read_xml(xml, xpath=".//row")
+
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <doc:data xmlns:doc="https://example.com">
+    ...   <doc:row>
+    ...     <doc:shape>square</doc:shape>
+    ...     <doc:degrees>360</doc:degrees>
+    ...     <doc:sides>4.0</doc:sides>
+    ...   </doc:row>
+    ...   <doc:row>
+    ...     <doc:shape>circle</doc:shape>
+    ...     <doc:degrees>360</doc:degrees>
+    ...     <doc:sides/>
+    ...   </doc:row>
+    ...   <doc:row>
+    ...     <doc:shape>triangle</doc:shape>
+    ...     <doc:degrees>180</doc:degrees>
+    ...     <doc:sides>3.0</doc:sides>
+    ...   </doc:row>
+    ... </doc:data>'''
+
+    >>> df = pd.read(xml,
+                     xpath="//doc:row",
+                     namespaces = {'doc': 'https://example.com'})
+
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+    """
+
+    io = stringify_path(io)
+
+    return _parse(
+        io=io,
+        xpath=xpath,
+        namespaces=namespaces,
+        elems_only=elems_only,
+        attrs_only=attrs_only,
+        names=names,
+        encoding=encoding,
+        parser=parser,
+        stylesheet=stylesheet,
+    )
diff --git a/pandas/tests/io/data/xml/baby_names.xml b/pandas/tests/io/data/xml/baby_names.xml
new file mode 100644
index 0000000000000..b4797b79d7112
--- /dev/null
+++ b/pandas/tests/io/data/xml/baby_names.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<data>
+  <row>
+    <rank>1</rank>
+    <malename>Jos�</malename>
+    <femalename>Sof�a</femalename>
+  </row>
+  <row>
+    <rank>2</rank>
+    <malename>Luis</malename>
+    <femalename>Valentina</femalename>
+  </row>
+  <row>
+    <rank>3</rank>
+    <malename>Carlos</malename>
+    <femalename>Isabella</femalename>
+  </row>
+  <row>
+    <rank>4</rank>
+    <malename>Juan</malename>
+    <femalename>Camila</femalename>
+  </row>
+  <row>
+    <rank>5</rank>
+    <malename>Jorge</malename>
+    <femalename>Valeria</femalename>
+  </row>
+  <row>
+    <rank>6</rank>
+    <malename>Pedro</malename>
+    <femalename>Mariana</femalename>
+  </row>
+  <row>
+    <rank>7</rank>
+    <malename>Jes�s</malename>
+    <femalename>Gabriela</femalename>
+  </row>
+  <row>
+    <rank>8</rank>
+    <malename>Manuel</malename>
+    <femalename>Sara</femalename>
+  </row>
+  <row>
+    <rank>9</rank>
+    <malename>Santiago</malename>
+    <femalename>Daniella</femalename>
+  </row>
+  <row>
+    <rank>10</rank>
+    <malename>Sebasti�n</malename>
+    <femalename>Mar�a Jos�</femalename>
+  </row>
+</data>
diff --git a/pandas/tests/io/data/xml/books.xml b/pandas/tests/io/data/xml/books.xml
new file mode 100644
index 0000000000000..666ce60e9a2be
--- /dev/null
+++ b/pandas/tests/io/data/xml/books.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bookstore>
+  <book category="cooking">
+    <title lang="en">Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.00</price>
+  </book>
+  <book category="children">
+    <title lang="en">Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </book>
+  <book category="web">
+    <title lang="en">Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </book>
+</bookstore>
diff --git a/pandas/tests/io/data/xml/cta_rail_lines.kml b/pandas/tests/io/data/xml/cta_rail_lines.kml
new file mode 100644
index 0000000000000..c031137ee7b20
--- /dev/null
+++ b/pandas/tests/io/data/xml/cta_rail_lines.kml
@@ -0,0 +1,92 @@
+<kml xmlns="http://www.opengis.net/kml/2.2"
+     xmlns:gx="http://www.google.com/kml/ext/2.2"
+     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+     xsi:schemaLocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd">
+   <Document id="CTA_RailLines">
+      <name>CTA_RailLines</name>
+      <Snippet/>
+      <Folder id="FeatureLayer0">
+         <name>CTA_RailLines</name>
+         <Snippet/>
+         <Placemark id="ID_00001">
+            <name>Blue Line (Forest Park)</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Blue Line (Forest Park)</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>1</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100001</td> </tr> <tr> <td>LINES</td> <td>Blue Line (Forest Park)</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Oak Park to Austin</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>BL</td> </tr> <tr> <td>ALT_LEGEND</td> <td>BL</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Blue Line Forest Park</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4060.368778</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00002">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>2</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100002</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Lawrence to Wilson</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1800.132896</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00003">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>3</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100003</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Wilson to Sheridan</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4256.243677</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00004">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>4</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100004</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Sheridan to Addison</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>2581.713736</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00005">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>5</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100005</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Addison to Clark Junction</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1918.716686</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+      </Folder>
+      <Style id="LineStyle01">
+         <LabelStyle>
+            <color>00000000</color>
+            <scale>0.000000</scale>
+         </LabelStyle>
+         <LineStyle>
+            <color>ff899e00</color>
+            <width>1.000000</width>
+         </LineStyle>
+         <PolyStyle>
+            <color>00000000</color>
+            <outline>0</outline>
+         </PolyStyle>
+      </Style>
+   </Document>
+</kml>
diff --git a/pandas/tests/io/data/xml/flatten_doc.xsl b/pandas/tests/io/data/xml/flatten_doc.xsl
new file mode 100644
index 0000000000000..a9d62d180beaf
--- /dev/null
+++ b/pandas/tests/io/data/xml/flatten_doc.xsl
@@ -0,0 +1,18 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2">
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node()|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>
diff --git a/pandas/tests/io/data/xml/row_field_output.xsl b/pandas/tests/io/data/xml/row_field_output.xsl
new file mode 100644
index 0000000000000..5a0f0e655a78e
--- /dev/null
+++ b/pandas/tests/io/data/xml/row_field_output.xsl
@@ -0,0 +1,19 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+	<xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+	<xsl:template match="@*|node()">
+		<xsl:copy>
+		    <xsl:apply-templates select="@*|node()"/>
+		</xsl:copy>
+	</xsl:template>
+
+	<xsl:template match="row/*">
+	    <field>
+    	    <xsl:attribute name="field">
+    	        <xsl:value-of select="name()"/>
+    	    </xsl:attribute>
+    	    <xsl:value-of select="text()"/>
+	    </field>
+	</xsl:template>
+</xsl:stylesheet>
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
new file mode 100644
index 0000000000000..62958894981fd
--- /dev/null
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -0,0 +1,1099 @@
+from io import BytesIO, StringIO
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.xml import read_xml
+
+geom_df = DataFrame(
+    {
+        "shape": ["square", "circle", "triangle"],
+        "degrees": [360, 360, 180],
+        "sides": [4, np.nan, 3],
+    }
+)
+
+planet_df = DataFrame(
+    {
+        "planet": [
+            "Mercury",
+            "Venus",
+            "Earth",
+            "Mars",
+            "Jupiter",
+            "Saturn",
+            "Uranus",
+            "Neptune",
+        ],
+        "type": [
+            "terrestrial",
+            "terrestrial",
+            "terrestrial",
+            "terrestrial",
+            "gas giant",
+            "gas giant",
+            "ice giant",
+            "ice giant",
+        ],
+        "location": [
+            "inner",
+            "inner",
+            "inner",
+            "inner",
+            "outer",
+            "outer",
+            "outer",
+            "outer",
+        ],
+        "mass": [
+            0.330114,
+            4.86747,
+            5.97237,
+            0.641712,
+            1898.187,
+            568.3174,
+            86.8127,
+            102.4126,
+        ],
+    }
+)
+
+from_file_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </row>
+  <row>
+    <index>1</index>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </row>
+  <row>
+    <index>2</index>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </row>
+</data>"""
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_file_output_str_read(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        # etree and lxml differs on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+        assert output == from_file_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_file_output_bytes_read(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        # etree and lxml differs on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+        assert output == from_file_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_str_output(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    output = df_file.to_xml()
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == from_file_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_index_false(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </row>
+  <row>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </row>
+  <row>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </row>
+</data>"""
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        # etree and lxml differs on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+        assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_index_false_rename_row_root(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<books>
+  <book>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </book>
+  <book>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </book>
+  <book>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </book>
+</books>"""
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(
+            path, index=False, root_name="books", row_name="book", parser=parser
+        )
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        # etree and lxml differs on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+        assert output == expected
+
+
+na_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_na_elem_output(datapath, parser):
+    output = geom_df.to_xml(parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == na_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_na_empty_str_elem_option(datapath, parser):
+    output = geom_df.to_xml(na_rep="", parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == na_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_na_empty_elem_option(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides>0.0</sides>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(na_rep="0.0", parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_attrs_cols_nan_output(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row index="0" shape="square" degrees="360" sides="4.0"/>
+  <row index="1" shape="circle" degrees="360"/>
+  <row index="2" shape="triangle" degrees="180" sides="3.0"/>
+</data>"""
+
+    output = geom_df.to_xml(attr_cols=["shape", "degrees", "sides"], parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_attrs_cols_prefix(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.xom">
+  <doc:row doc:index="0" doc:shape="square" doc:degrees="360" doc:sides="4.0"/>
+  <doc:row doc:index="1" doc:shape="circle" doc:degrees="360"/>
+  <doc:row doc:index="2" doc:shape="triangle" doc:degrees="180" doc:sides="3.0"/>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        attr_cols=["index", "shape", "degrees", "sides"],
+        namespaces={"doc": "http://example.xom"},
+        prefix="doc",
+        parser=parser,
+    )
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_attrs_unknown_column(parser):
+    with pytest.raises(KeyError, match=("no valid column")):
+        geom_df.to_xml(attr_cols=["shape", "degreees", "sides"], parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_attrs_wrong_type(parser):
+    with pytest.raises(TypeError, match=("is not a valid type for attr_cols")):
+        geom_df.to_xml(attr_cols='"shape", "degreees", "sides"', parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_elems_cols_nan_output(datapath, parser):
+    elems_cols_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+    <shape>square</shape>
+  </row>
+  <row>
+    <degrees>360</degrees>
+    <sides/>
+    <shape>circle</shape>
+  </row>
+  <row>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+    <shape>triangle</shape>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(
+        index=False, elem_cols=["degrees", "sides", "shape"], parser=parser
+    )
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == elems_cols_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_elems_unknown_column(parser):
+    with pytest.raises(KeyError, match=("no valid column")):
+        geom_df.to_xml(elem_cols=["shape", "degreees", "sides"], parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_elems_wrong_type(parser):
+    with pytest.raises(TypeError, match=("is not a valid type for elem_cols")):
+        geom_df.to_xml(elem_cols='"shape", "degreees", "sides"', parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_elems_and_attrs_cols(datapath, parser):
+    elems_cols_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row shape="square">
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row shape="circle">
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row shape="triangle">
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(
+        index=False,
+        elem_cols=["degrees", "sides"],
+        attr_cols=["shape"],
+        parser=parser,
+    )
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == elems_cols_expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_hierarchical_columns(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <location>inner</location>
+    <type>terrestrial</type>
+    <count_mass>4</count_mass>
+    <sum_mass>11.811666</sum_mass>
+    <mean_mass>2.9529165</mean_mass>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>gas giant</type>
+    <count_mass>2</count_mass>
+    <sum_mass>2466.5044</sum_mass>
+    <mean_mass>1233.2522</mean_mass>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>ice giant</type>
+    <count_mass>2</count_mass>
+    <sum_mass>189.2253</sum_mass>
+    <mean_mass>94.61265</mean_mass>
+  </row>
+  <row>
+    <location>All</location>
+    <type/>
+    <count_mass>8</count_mass>
+    <sum_mass>2667.541366</sum_mass>
+    <mean_mass>333.44267075</mean_mass>
+  </row>
+</data>"""
+
+    pvt = planet_df.pivot_table(
+        index=["location", "type"],
+        values="mass",
+        aggfunc=["count", "sum", "mean"],
+        margins=True,
+    )
+
+    output = pvt.to_xml(parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_hierarchical_attrs_columns(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row location="inner" type="terrestrial" count_mass="4" \
+sum_mass="11.811666" mean_mass="2.9529165"/>
+  <row location="outer" type="gas giant" count_mass="2" \
+sum_mass="2466.5044" mean_mass="1233.2522"/>
+  <row location="outer" type="ice giant" count_mass="2" \
+sum_mass="189.2253" mean_mass="94.61265"/>
+  <row location="All" type="" count_mass="8" \
+sum_mass="2667.541366" mean_mass="333.44267075"/>
+</data>"""
+
+    pvt = planet_df.pivot_table(
+        index=["location", "type"],
+        values="mass",
+        aggfunc=["count", "sum", "mean"],
+        margins=True,
+    )
+
+    output = pvt.to_xml(attr_cols=list(pvt.reset_index().columns.values), parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_multi_index(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <location>inner</location>
+    <type>terrestrial</type>
+    <count>4</count>
+    <sum>11.811666</sum>
+    <mean>2.9529165</mean>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>gas giant</type>
+    <count>2</count>
+    <sum>2466.5044</sum>
+    <mean>1233.2522</mean>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>ice giant</type>
+    <count>2</count>
+    <sum>189.2253</sum>
+    <mean>94.61265</mean>
+  </row>
+</data>"""
+
+    agg = planet_df.groupby(["location", "type"])["mass"].agg(["count", "sum", "mean"])
+
+    output = agg.to_xml(parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_multi_index_attrs_cols(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row location="inner" type="terrestrial" count="4" sum="11.811666" mean="2.9529165"/>
+  <row location="outer" type="gas giant" count="2" sum="2466.5044" mean="1233.2522"/>
+  <row location="outer" type="ice giant" count="2" sum="189.2253" mean="94.61265"/>
+</data>"""
+
+    agg = planet_df.groupby(["location", "type"])["mass"].agg(["count", "sum", "mean"])
+
+    output = agg.to_xml(attr_cols=list(agg.reset_index().columns.values), parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_default_namespace(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data xmlns="http://example.com">
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(namespaces={"": "http://example.com"}, parser=parser)
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_namespace_prefix(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.com">
+  <doc:row>
+    <doc:index>0</doc:index>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:index>1</doc:index>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:index>2</doc:index>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        namespaces={"doc": "http://example.com"}, prefix="doc", parser=parser
+    )
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_missing_prefix_in_nmsp(parser):
+    with pytest.raises(KeyError, match=("prefix is not included in namespaces")):
+
+        geom_df.to_xml(
+            namespaces={"": "http://example.com"}, prefix="doc", parser=parser
+        )
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_namespace_prefix_and_default(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns="http://example.com" xmlns:doc="http://other.org">
+  <doc:row>
+    <doc:index>0</doc:index>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:index>1</doc:index>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:index>2</doc:index>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        namespaces={"": "http://example.com", "doc": "http://other.org"},
+        prefix="doc",
+        parser=parser,
+    )
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    # etree and lxml differs on order of namespace prefixes
+    output = output.replace(
+        'xmlns:doc="http://other.org" xmlns="http://example.com"',
+        'xmlns="http://example.com" xmlns:doc="http://other.org"',
+    )
+
+    assert output == expected
+
+
+encoding_expected = """\
+<?xml version='1.0' encoding='ISO-8859-1'?>
+<data>
+  <row>
+    <index>0</index>
+    <rank>1</rank>
+    <malename>José</malename>
+    <femalename>Sofía</femalename>
+  </row>
+  <row>
+    <index>1</index>
+    <rank>2</rank>
+    <malename>Luis</malename>
+    <femalename>Valentina</femalename>
+  </row>
+  <row>
+    <index>2</index>
+    <rank>3</rank>
+    <malename>Carlos</malename>
+    <femalename>Isabella</femalename>
+  </row>
+  <row>
+    <index>3</index>
+    <rank>4</rank>
+    <malename>Juan</malename>
+    <femalename>Camila</femalename>
+  </row>
+  <row>
+    <index>4</index>
+    <rank>5</rank>
+    <malename>Jorge</malename>
+    <femalename>Valeria</femalename>
+  </row>
+</data>"""
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_encoding_option_str(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5)
+
+    output = df_file.to_xml(encoding="ISO-8859-1")
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.replace(
+        '<?xml version="1.0" encoding="ISO-8859-1"?',
+        "<?xml version='1.0' encoding='ISO-8859-1'?",
+    )
+
+    assert output == encoding_expected
+
+
+@td.skip_if_no("lxml")
+def test_correct_encoding_file(datapath):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, encoding="ISO-8859-1")
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, encoding="ISO-8859-1")
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("encoding", ["UTF-8", "UTF-16", "ISO-8859-1"])
+def test_wrong_encoding_option_lxml(datapath, parser, encoding):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, encoding="ISO-8859-1")
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, encoding=encoding, parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_misspelled_encoding(parser):
+    with pytest.raises(LookupError, match=("unknown encoding")):
+        geom_df.to_xml(parser=parser, encoding="uft-8")
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_xml_declaration_pretty_print(parser):
+    expected = """\
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(xml_declaration=False, parser=parser)
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_no_pretty_print_with_decl(parser):
+    expected = (
+        "<?xml version='1.0' encoding='utf-8'?>\n"
+        "<data><row><index>0</index><shape>square</shape>"
+        "<degrees>360</degrees><sides>4.0</sides></row><row>"
+        "<index>1</index><shape>circle</shape><degrees>360"
+        "</degrees><sides/></row><row><index>2</index><shape>"
+        "triangle</shape><degrees>180</degrees><sides>3.0</sides>"
+        "</row></data>"
+    )
+
+    output = geom_df.to_xml(pretty_print=False)
+
+    output = output.replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert output == expected
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_no_pretty_print_no_decl(parser):
+    expected = (
+        "<data><row><index>0</index><shape>square</shape>"
+        "<degrees>360</degrees><sides>4.0</sides></row><row>"
+        "<index>1</index><shape>circle</shape><degrees>360"
+        "</degrees><sides/></row><row><index>2</index><shape>"
+        "triangle</shape><degrees>180</degrees><sides>3.0</sides>"
+        "</row></data>"
+    )
+
+    output = geom_df.to_xml(xml_declaration=False, pretty_print=False)
+
+    assert output == expected
+
+
+xsl_expected = """\
+<?xml version="1.0" encoding="utf-8"?>
+<data>
+  <row>
+    <field field="index">0</field>
+    <field field="shape">square</field>
+    <field field="degrees">360</field>
+    <field field="sides">4.0</field>
+  </row>
+  <row>
+    <field field="index">1</field>
+    <field field="shape">circle</field>
+    <field field="degrees">360</field>
+    <field field="sides"/>
+  </row>
+  <row>
+    <field field="index">2</field>
+    <field field="shape">triangle</field>
+    <field field="degrees">180</field>
+    <field field="sides">3.0</field>
+  </row>
+</data>"""
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_file_like(datapath, mode):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with open(xsl, mode) as f:
+        assert geom_df.to_xml(stylesheet=f) == xsl_expected
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_io(datapath, mode):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    xsl_io = BytesIO(xsl_obj) if isinstance(xsl_obj, bytes) else StringIO(xsl_obj)
+
+    output = geom_df.to_xml(stylesheet=xsl_io)
+
+    assert output == xsl_expected
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_buffered_reader(datapath, mode):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    output = geom_df.to_xml(stylesheet=xsl_obj)
+
+    assert output == xsl_expected
+
+
+@td.skip_if_no("lxml")
+def test_style_to_csv():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="delim">,</xsl:param>
+    <xsl:template match="/data">
+        <xsl:text>,shape,degrees,sides&#xa;</xsl:text>
+        <xsl:apply-templates select="row"/>
+    </xsl:template>
+
+    <xsl:template match="row">
+        <xsl:value-of select="concat(index, $delim, shape, $delim,
+                                     degrees, $delim, sides)"/>
+         <xsl:text>&#xa;</xsl:text>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_csv = geom_df.to_csv().strip()
+    out_xml = geom_df.to_xml(stylesheet=xsl)
+
+    assert out_csv == out_xml
+
+
+@td.skip_if_no("lxml")
+def test_style_to_string():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="delim"><xsl:text>               </xsl:text></xsl:param>
+    <xsl:template match="/data">
+        <xsl:text>      shape  degrees  sides&#xa;</xsl:text>
+        <xsl:apply-templates select="row"/>
+    </xsl:template>
+
+    <xsl:template match="row">
+        <xsl:value-of select="concat(index, ' ',
+                                     substring($delim, 1, string-length('triangle')
+                                               - string-length(shape) + 1),
+                                     shape,
+                                     substring($delim, 1, string-length(name(degrees))
+                                               - string-length(degrees) + 2),
+                                     degrees,
+                                     substring($delim, 1, string-length(name(sides))
+                                               - string-length(sides) + 2),
+                                     sides)"/>
+         <xsl:text>&#xa;</xsl:text>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_str = geom_df.to_string()
+    out_xml = geom_df.to_xml(na_rep="NaN", stylesheet=xsl)
+
+    assert out_xml == out_str
+
+
+@td.skip_if_no("lxml")
+def test_style_to_json():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="quot">"</xsl:param>
+
+    <xsl:template match="/data">
+        <xsl:text>{"shape":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/shape"/>
+        <xsl:text>},"degrees":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/degrees"/>
+        <xsl:text>},"sides":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/sides"/>
+        <xsl:text>}}</xsl:text>
+    </xsl:template>
+
+    <xsl:template match="shape|degrees|sides">
+        <xsl:variable name="val">
+            <xsl:if test = ".=''">
+                <xsl:value-of select="'null'"/>
+            </xsl:if>
+            <xsl:if test = "number(text()) = text()">
+                <xsl:value-of select="text()"/>
+            </xsl:if>
+            <xsl:if test = "number(text()) != text()">
+                <xsl:value-of select="concat($quot, text(), $quot)"/>
+            </xsl:if>
+        </xsl:variable>
+        <xsl:value-of select="concat($quot, preceding-sibling::index,
+                                     $quot,':', $val)"/>
+        <xsl:if test="preceding-sibling::index != //row[last()]/index">
+            <xsl:text>,</xsl:text>
+        </xsl:if>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_json = geom_df.to_json()
+    out_xml = geom_df.to_xml(stylesheet=xsl)
+
+    assert out_json == out_xml
+
+
+@pytest.mark.skip(
+    reason="incorrect <th> tag in <tbody> from to_html() to be skipped until fix"
+)
+def test_style_to_html():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" omit-xml-declaration="yes" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="/data">
+        <table border="1" class="dataframe">
+            <thead>
+                <tr style="text-align: right;">
+                    <th></th>
+                    <th>shape</th>
+                    <th>degrees</th>
+                    <th>sides</th>
+                </tr>
+            </thead>
+            <tbody>
+                <xsl:apply-templates select="@*|node()"/>
+            </tbody>
+        </table>
+    </xsl:template>
+
+    <xsl:template match="row">
+        <tr>
+            <xsl:apply-templates select="@*|node()"/>
+        </tr>
+    </xsl:template>
+
+    <xsl:template match="row/*">
+        <td>
+            <xsl:value-of select="text()"/>
+        </td>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_html = geom_df.to_html()
+    out_xml = geom_df.to_xml(stylesheet=xsl)
+
+    assert out_html == out_xml
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
new file mode 100644
index 0000000000000..375e9c2472742
--- /dev/null
+++ b/pandas/tests/io/test_xml.py
@@ -0,0 +1,708 @@
+from io import BytesIO, StringIO
+import os
+from urllib.error import HTTPError
+
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.xml import read_xml
+
+xml_default_nmsp = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data xmlns="http://example.com">
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4</sides>
+  </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3</sides>
+  </row>
+</data>"""
+
+xml_prefix_nmsp = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.com">
+  <doc:row>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+
+def test_parser_consistency_file(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file_lxml = read_xml(filename, parser="lxml")
+    df_file_etree = read_xml(filename, parser="etree")
+
+    tm.assert_frame_equal(df_file_lxml, df_file_etree)
+
+
+@tm.network
+@pytest.mark.slow
+def test_parser_consistency_url(datapath):
+    url = (
+        "https://data.cityofchicago.org/api/views/"
+        "8pix-ypme/rows.xml?accessType=DOWNLOAD"
+    )
+    df_file_lxml = read_xml(url, xpath=".//row/row", parser="lxml")
+    df_file_etree = read_xml(url, xpath=".//row/row", parser="etree")
+
+    tm.assert_frame_equal(df_file_lxml, df_file_etree)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_file_like(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        df_file = read_xml(f, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_file_io(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        xml_obj = f.read()
+
+    df_io = read_xml(
+        (BytesIO(xml_obj) if isinstance(xml_obj, bytes) else StringIO(xml_obj)),
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_io, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_file_buffered_reader_string(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        xml_obj = f.read()
+
+    df_str = read_xml(xml_obj, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_str, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        next(f)
+        xml_obj = f.read()
+
+    df_str = read_xml(xml_obj, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_str, df_expected)
+
+
+def test_wrong_file_lxml(datapath):
+    with pytest.raises(OSError, match=("failed to load external entity")):
+        filename = os.path.join("data", "html", "books.xml")
+        read_xml(filename, parser="lxml")
+
+
+def test_wrong_file_etree(datapath):
+    with pytest.raises(OSError, match=("No such file")):
+        filename = os.path.join("data", "html", "books.xml")
+        read_xml(filename, parser="etree")
+
+
+@tm.network
+@td.skip_if_no("lxml")
+def test_url():
+    url = "https://www.w3schools.com/xml/books.xml"
+    df_url = read_xml(url, xpath=".//book[count(*)=4]")
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+            "cover": [None, None, "paperback"],
+        }
+    )
+
+    tm.assert_frame_equal(df_url, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_wrong_url(parser):
+    with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
+        url = "https://www.w3schools.com/xml/python.xml"
+        read_xml(url, xpath=".//book[count(*)=4]", parser=parser)
+
+
+def test_empty_xpath_lxml(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(ValueError, match=("xpath does not return any nodes")):
+        read_xml(filename, xpath=".//python", parser="lxml")
+
+
+def test_bad_xpath_etree(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(
+        SyntaxError, match=("You have used an incorrect or unsupported XPath")
+    ):
+        read_xml(filename, xpath=".//[book]", parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_bad_xpath_lxml(datapath):
+    from lxml.etree import XPathEvalError
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(XPathEvalError, match=("Invalid expression")):
+        read_xml(filename, xpath=".//[book]", parser="lxml")
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_default_namespace(parser):
+    df_nmsp = read_xml(
+        xml_default_nmsp,
+        xpath=".//ns:row",
+        namespaces={"ns": "http://example.com"},
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_nmsp, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_prefix_namespace(parser):
+    df_nmsp = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_nmsp, df_expected)
+
+
+def test_consistency_default_namespace():
+    df_lxml = read_xml(
+        xml_default_nmsp,
+        xpath=".//ns:row",
+        namespaces={"ns": "http://example.com"},
+        parser="lxml",
+    )
+
+    df_etree = read_xml(
+        xml_default_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="etree",
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+def test_consistency_prefix_namespace():
+    df_lxml = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="lxml",
+    )
+
+    df_etree = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="etree",
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_missing_prefix_with_default_namespace(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(ValueError, match=("xpath does not return any nodes")):
+        read_xml(filename, xpath=".//Placemark", parser=parser)
+
+
+def test_missing_prefix_definition_etree(datapath):
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(SyntaxError, match=("you used an undeclared namespace prefix")):
+        read_xml(filename, xpath=".//kml:Placemark", parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_missing_prefix_definition_lxml(datapath):
+    from lxml.etree import XPathEvalError
+
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(XPathEvalError, match=("Undefined namespace prefix")):
+        read_xml(filename, xpath=".//kml:Placemark", parser="lxml")
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("key", ["", None])
+def test_none_namespace_prefix(key):
+    with pytest.raises(
+        TypeError, match=("empty namespace prefix is not supported in XPath")
+    ):
+        read_xml(
+            xml_default_nmsp,
+            xpath=".//kml:Placemark",
+            namespaces={key: "http://www.opengis.net/kml/2.2"},
+            parser="lxml",
+        )
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_file_elems_and_attrs(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_file_only_attrs(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, attrs_only=True, parser=parser)
+    df_expected = DataFrame({"category": ["cooking", "children", "web"]})
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_file_only_elems(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, elems_only=True, parser=parser)
+    df_expected = DataFrame(
+        {
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_elem_and_attrs_only(datapath, parser):
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(
+        ValueError,
+        match=("Either element or attributes can be parsed not both"),
+    ):
+        read_xml(filename, elems_only=True, attrs_only=True, parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_names_option_output(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(
+        filename, names=["Col1", "Col2", "Col3", "Col4", "Col5"], parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "Col1": ["cooking", "children", "web"],
+            "Col2": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "Col3": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "Col4": [2005, 2005, 2003],
+            "Col5": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_names_option_wrong_length(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(ValueError, match=("names does not match length")):
+        read_xml(filename, names=["Col1", "Col2", "Col3"], parser=parser)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_names_option_wrong_type(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(TypeError, match=("is not a valid type for names")):
+        read_xml(filename, names="Col1, Col2, Col3", parser=parser)
+
+
+@td.skip_if_no("lxml")
+def test_wrong_encoding_lxml(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(XMLSyntaxError, match=("Input is not proper UTF-8")):
+        read_xml(filename)
+
+
+@td.skip_if_no("lxml")
+def test_utf16_encoding_lxml(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(XMLSyntaxError, match=("Start tag expected, '<' not found")):
+        read_xml(filename, encoding="UTF-16")
+
+
+@td.skip_if_no("lxml")
+def test_unknown_encoding_lxml(datapath):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(LookupError, match=("unknown encoding")):
+        read_xml(filename, encoding="UFT-8")
+
+
+# etree raises no error on wrong, utf-16, or unknown encoding
+@pytest.mark.parametrize("encoding", [None, "UTF-16", "UFT-8"])
+def test_wrong_encoding_etree(datapath, encoding):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    read_xml(filename, parser="etree", encoding=encoding)
+
+
+@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_ascii_encoding(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    read_xml(filename, encoding="ascii", parser=parser)
+
+
+def test_parser_consistency_with_encoding(datapath):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_lxml = read_xml(filename, parser="lxml", encoding="ISO-8859-1")
+    df_etree = read_xml(filename, parser="etree")
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+def test_attribute_centric_xml():
+    xml = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<TrainSchedule>
+      <Stations>
+         <station Name="Manhattan" coords="31,460,195,498"/>
+         <station Name="Laraway Road" coords="63,409,194,455"/>
+         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
+         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
+         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
+         <station Name="Palos Park" coords="128,281,239,303"/>
+         <station Name="Palos Heights" coords="148,257,283,279"/>
+         <station Name="Worth" coords="170,230,248,255"/>
+         <station Name="Chicago Ridge" coords="70,187,208,214"/>
+         <station Name="Oak Lawn" coords="166,159,266,185"/>
+         <station Name="Ashburn" coords="197,133,336,157"/>
+         <station Name="Wrightwood" coords="219,106,340,133"/>
+         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
+      </Stations>
+</TrainSchedule>"""
+
+    df_lxml = read_xml(xml, xpath=".//station")
+    df_etree = read_xml(xml, xpath=".//station", parser="etree")
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+def test_wrong_parser(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(
+        ValueError, match=("Values for parser can only be lxml or etree.")
+    ):
+        read_xml(filename, parser="bs4")
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_file(datapath):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    data = {
+        "id": {
+            0: "ID_00001",
+            1: "ID_00002",
+            2: "ID_00003",
+            3: "ID_00004",
+            4: "ID_00005",
+        },
+        "name": {
+            0: "Blue Line (Forest Park)",
+            1: "Red, Purple Line",
+            2: "Red, Purple Line",
+            3: "Red, Purple Line",
+            4: "Red, Purple Line",
+        },
+        "styleUrl": {
+            0: "#LineStyle01",
+            1: "#LineStyle01",
+            2: "#LineStyle01",
+            3: "#LineStyle01",
+            4: "#LineStyle01",
+        },
+        "extrude": {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
+        "altitudeMode": {
+            0: "clampedToGround",
+            1: "clampedToGround",
+            2: "clampedToGround",
+            3: "clampedToGround",
+            4: "clampedToGround",
+        },
+        "coordinates": {
+            0: (
+                "-87.77678526964958,41.8708863930319,0 "
+                "-87.77826234150609,41.87097820122218,0 "
+                "-87.78251583439344,41.87130129991005,0 "
+                "-87.78418294588424,41.87145055520308,0 "
+                "-87.7872369165933,41.8717239119163,0 "
+                "-87.79160214925886,41.87210797280065,0"
+            ),
+            1: (
+                "-87.65758750947528,41.96427269188822,0 "
+                "-87.65802133507393,41.96581929055245,0 "
+                "-87.65819033925305,41.96621846093642,0 "
+                "-87.6583189819129,41.96650362897086,0 "
+                "-87.65835858701473,41.96669002089185,0 "
+                "-87.65838428411853,41.96688150295095,0 "
+                "-87.65842208882658,41.96745896091846,0 "
+                "-87.65846556843937,41.9683761425439,0 "
+                "-87.65849296214573,41.96913893870342,0"
+            ),
+            2: (
+                "-87.65492939166126,41.95377494531437,0 "
+                "-87.65557043199591,41.95376544118533,0 "
+                "-87.65606302030132,41.95376391658746,0 "
+                "-87.65623502146268,41.95377379126367,0 "
+                "-87.65634748981634,41.95380103566435,0 "
+                "-87.65646537904269,41.95387703994676,0 "
+                "-87.65656532461145,41.95396622645799,0 "
+                "-87.65664760856414,41.95404201996044,0 "
+                "-87.65671750555913,41.95416647054043,0 "
+                "-87.65673983607117,41.95429949810849,0 "
+                "-87.65673866475777,41.95441024240925,0 "
+                "-87.6567690255541,41.95490657227902,0 "
+                "-87.65683672482363,41.95692259283837,0 "
+                "-87.6568900886376,41.95861070983142,0 "
+                "-87.65699865558875,41.96181418669004,0 "
+                "-87.65756347177603,41.96397045777844,0 "
+                "-87.65758750947528,41.96427269188822,0"
+            ),
+            3: (
+                "-87.65362593118043,41.94742799535678,0 "
+                "-87.65363554415794,41.94819886386848,0 "
+                "-87.6536456393239,41.95059994675451,0 "
+                "-87.65365831235026,41.95108288489359,0 "
+                "-87.6536604873874,41.9519954657554,0 "
+                "-87.65362592053201,41.95245597302328,0 "
+                "-87.65367158496069,41.95311153649393,0 "
+                "-87.65368468595476,41.9533202828916,0 "
+                "-87.65369271253692,41.95343095587119,0 "
+                "-87.65373335834569,41.95351536301472,0 "
+                "-87.65378605844126,41.95358212680591,0 "
+                "-87.65385067928185,41.95364452823767,0 "
+                "-87.6539390793817,41.95370263886964,0 "
+                "-87.6540786298351,41.95373403675265,0 "
+                "-87.65430648647626,41.9537535411832,0 "
+                "-87.65492939166126,41.95377494531437,0"
+            ),
+            4: (
+                "-87.65345391792157,41.94217681262115,0 "
+                "-87.65342448305786,41.94237224420864,0 "
+                "-87.65339745703922,41.94268217746244,0 "
+                "-87.65337753982941,41.94288140770284,0 "
+                "-87.65336256753105,41.94317369618263,0 "
+                "-87.65338799707138,41.94357253961736,0 "
+                "-87.65340240886648,41.94389158188269,0 "
+                "-87.65341837392448,41.94406444407721,0 "
+                "-87.65342275247338,41.94421065714904,0 "
+                "-87.65347469646018,41.94434829382345,0 "
+                "-87.65351486483024,41.94447699917548,0 "
+                "-87.65353483605053,41.9453896864472,0 "
+                "-87.65361975532807,41.94689193720703,0 "
+                "-87.65362593118043,41.94742799535678,0"
+            ),
+        },
+    }
+
+    df_expected = DataFrame(data)
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl,
+    )
+
+    tm.assert_frame_equal(df_expected, df_style)
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_file_like(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    with open(xsl, mode) as f:
+        read_xml(kml, stylesheet=f)
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_io(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    xsl_io = BytesIO(xsl_obj) if isinstance(xsl_obj, bytes) else StringIO(xsl_obj)
+    read_xml(kml, stylesheet=xsl_io)
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("mode", ["rb", "r"])
+def test_stylesheet_buffered_reader(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    read_xml(kml, stylesheet=xsl_obj)
+
+
+@td.skip_if_no("lxml")
+def test_wrong_stylesheet(datapath):
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    xsl = os.path.join("data", "xml", "flatten.xsl")
+
+    with pytest.raises(OSError, match=("failed to load external entity")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@tm.network
+@td.skip_if_no("lxml")
+def test_online_stylesheet():
+    xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml"
+    xsl = "https://www.w3schools.com/xml/cdcatalog.xsl"
+
+    df_xsl = read_xml(
+        xml,
+        xpath=".//tr[td and position() <= 6]",
+        names=["title", "artist"],
+        stylesheet=xsl,
+    )
+
+    df_expected = DataFrame(
+        {
+            "title": {
+                0: "Empire Burlesque",
+                1: "Hide your heart",
+                2: "Greatest Hits",
+                3: "Still got the blues",
+                4: "Eros",
+            },
+            "artist": {
+                0: "Bob Dylan",
+                1: "Bonnie Tyler",
+                2: "Dolly Parton",
+                3: "Gary Moore",
+                4: "Eros Ramazzotti",
+            },
+        }
+    )
+
+    tm.assert_frame_equal(df_expected, df_xsl)

From cd79a06871aacd0fa1a640389a043cce501c614c Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 2 Feb 2021 18:53:13 -0600
Subject: [PATCH 02/35] Refactor code for base classes, add tests, adjust
 whatsnew entry

---
 doc/source/whatsnew/v1.3.0.rst         |  53 +++-
 pandas/core/frame.py                   |   6 +-
 pandas/io/formats/format.py            |  15 +-
 pandas/io/formats/xml.py               | 409 ++++++++-----------------
 pandas/io/xml.py                       | 403 ++++++++++--------------
 pandas/tests/io/formats/test_to_xml.py | 361 +++++++++++++++-------
 pandas/tests/io/test_xml.py            | 267 ++++++++++++----
 7 files changed, 814 insertions(+), 700 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 3a6ac281829fe..85b272767e642 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -41,34 +41,71 @@ See ref:`window.overview` for performance and functional benefits. (:issue:`1509
 
 .. _whatsnew_130.read_to_xml:
 
-We added I/O support to read and render shallow versions of XML documents with 
-:func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using lxml as parser, 
+We added I/O support to read and render shallow versions of XML documents with
+:func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using lxml as parser,
+full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
+=======
+We added I/O support to read and render shallow versions of `XML`_ documents with
+:func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using `lxml`_ as parser,
 full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
 
-.. ipython:: python
+.. _XML: https://www.w3.org/standards/xml/core
+.. _lxml: https://lxml.de
 
-    xml = """<?xml version='1.0' encoding='utf-8'?>
+.. code-block:: ipython
+
+    In [1]: xml = """<?xml version='1.0' encoding='utf-8'?>
+       ...: <data>
+       ...:  <row>
+       ...:     <shape>square</shape>
+       ...:     <degrees>360</degrees>
+       ...:     <sides>4.0</sides>
+       ...:  </row>
+       ...:  <row>
+       ...:     <shape>circle</shape>
+       ...:     <degrees>360</degrees>
+       ...:     <sides/>
+       ...:  </row>
+       ...:  <row>
+       ...:     <shape>triangle</shape>
+       ...:     <degrees>180</degrees>
+       ...:     <sides>3.0</sides>
+       ...:  </row>
+       ...:  </data>"""
+
+    In [2]: df = pd.read_xml(xml)
+    In [3]: df
+    Out[3]:
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    In [4]: df.to_xml()
+    Out[4]:
+    <?xml version='1.0' encoding='utf-8'?>
     <data>
       <row>
+        <index>0</index>
         <shape>square</shape>
         <degrees>360</degrees>
         <sides>4.0</sides>
       </row>
       <row>
+        <index>1</index>
         <shape>circle</shape>
         <degrees>360</degrees>
         <sides/>
       </row>
       <row>
+        <index>2</index>
         <shape>triangle</shape>
         <degrees>180</degrees>
         <sides>3.0</sides>
       </row>
-    </data>"""
-
-    df = pd.read_xml(xml)
+    </data>
 
-    df.to_xml()
+For more, see :ref:`io` in the user guide on IO tools.
 
 .. _whatsnew_130.enhancements.other:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2b3427943cda0..6737f7151bf03 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2606,7 +2606,7 @@ def to_html(
 
     def to_xml(
         self,
-        io: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
         index: Optional[bool] = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -2628,7 +2628,7 @@ def to_xml(
 
         Parameters
         ----------
-        io : str, path object or file-like object, optional
+        path_or_buffer : str, path object or file-like object, optional
             File to write output to. If None, the output is returned as a
             string.
         index : bool, optional
@@ -2760,7 +2760,7 @@ def to_xml(
         )
 
         return fmt.DataFrameRenderer(formatter).to_xml(
-            io=io,
+            path_or_buffer=path_or_buffer,
             index=index,
             root_name=root_name,
             row_name=row_name,
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index b08b66ba46d61..ebf3cf0852575 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -51,6 +51,7 @@
     IndexLabel,
     StorageOptions,
 )
+from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -1007,7 +1008,7 @@ def to_html(
 
     def to_xml(
         self,
-        io: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
         index: Optional[bool] = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -1029,7 +1030,7 @@ def to_xml(
 
         Parameters
         ----------
-        io : str, path object or file-like object, optional
+        path_or_buffer : str, path object or file-like object, optional
             File to write output to. If None, the output is returned as a
             string.
         index : bool, optional
@@ -1084,10 +1085,14 @@ def to_xml(
 
         from pandas.io.formats.xml import EtreeXMLFormatter, LxmlXMLFormatter
 
+        lxml = import_optional_dependency(
+            "lxml.etree", raise_on_missing=False, on_version="ignore"
+        )
+
         if parser == "lxml":
-            try:
+            if lxml is not None:
                 TreeBuilder = LxmlXMLFormatter
-            except ImportError:
+            else:
                 warn(
                     "You do not have lxml installed (default parser). "
                     "Instead, etree will be used.",
@@ -1103,7 +1108,7 @@ def to_xml(
 
         xml_formatter = TreeBuilder(
             self.fmt,
-            io=io,
+            path_or_buffer=path_or_buffer,
             index=index,
             root_name=root_name,
             row_name=row_name,
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 86448c9d4498f..90ee289ad3414 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -1,5 +1,5 @@
 """
-Module for formatting output data in XML.
+:mod:`pandas.io.formats.xml` is a module for formatting data in XML.
 """
 
 import codecs
@@ -9,6 +9,7 @@
 from warnings import warn
 
 from pandas._typing import FilePathOrBuffer
+from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.common import is_list_like
 
@@ -16,14 +17,13 @@
 from pandas.io.formats.format import DataFrameFormatter
 
 
-class EtreeXMLFormatter:
+class BaseXMLFormatter:
     """
-    Class for formatting data in xml using Python standard library
-    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+    Subclass for formatting data in XML.
 
     Parameters
     ----------
-    io : str or file-like
+    path_or_buffer : str or file-like
         This can be either a string of raw XML, a valid URL,
         file or file-like object.
 
@@ -34,7 +34,7 @@ class EtreeXMLFormatter:
         Name for root of xml document. Default is 'data'.
 
     root_name : str
-        Name for row elemens of xml document. Default is 'row'.
+        Name for row elements of xml document. Default is 'row'.
 
     na_rep : str
         Missing data representation.
@@ -62,23 +62,19 @@ class EtreeXMLFormatter:
         Whether to write xml document with line breaks and indentation.
 
     stylesheet : str or file-like
-        A URL, file, file-like object, or a raw string containing XSLT,
-        `etree` does not support XSLT but retained for consistency.
+        A URL, file, file-like object, or a raw string containing XSLT.
 
     See also
     --------
+    pandas.io.formats.xml.EtreeXMLFormatter
     pandas.io.formats.xml.LxmlXMLFormatter
 
-    Notes
-    -----
-    This class serves as fall back option if user does not have
-    ``lxml`` installed or user specifically requests ``etree`` parser.
     """
 
     def __init__(
         self,
         formatter: DataFrameFormatter,
-        io: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
         index: Optional[bool] = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -93,7 +89,7 @@ def __init__(
         stylesheet: Optional[FilePathOrBuffer[str]] = None,
     ) -> None:
         self.fmt = formatter
-        self.io = io
+        self.path_or_buffer = path_or_buffer
         self.index = index
         self.root_name = root_name
         self.row_name = row_name
@@ -108,13 +104,6 @@ def __init__(
         self.stylesheet = stylesheet
         self.frame = self.fmt.frame
 
-        self.validate_columns()
-        self.validate_encoding()
-        self.orig_cols = self.fmt.frame.columns.tolist()
-        self.frame_dicts = self.process_dataframe()
-        self.handle_indexes()
-        self.prefix_uri = self.get_prefix_uri()
-
     def build_tree(self) -> bytes:
         """
         Build tree from  data.
@@ -122,40 +111,7 @@ def build_tree(self) -> bytes:
         This method initializes the root and builds attributes and elements
         with optional namespaces.
         """
-        from xml.etree.ElementTree import Element, SubElement, tostring
-
-        self.root = Element(
-            f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces()
-        )
-
-        for k, d in self.frame_dicts.items():
-            self.d = d
-            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
-
-            if self.attr_cols:
-                self.build_attribs()
-            if self.elem_cols:
-                self.build_elems()
-            if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(self.frame_dicts[0].keys())
-                self.build_elems()
-
-        self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
-
-        if self.pretty_print:
-            self.out_xml = self.prettify_tree()
-
-        if not self.xml_declaration:
-            self.out_xml = self.remove_declaration()
-
-        if self.stylesheet:
-            warn(
-                "To use stylesheet, you need lxml installed. "
-                "The non-transformed, original XML is returned instead.",
-                UserWarning,
-            )
-
-        return self.out_xml
+        raise AbstractMethodError(self)
 
     def validate_columns(self) -> None:
         """
@@ -233,23 +189,14 @@ def get_prefix_uri(self) -> str:
         Get uri of namespace prefix.
 
         This method retrieves corresponding URI to prefix in namespaces.
-        """
 
-        from xml.etree.ElementTree import register_namespace
-
-        uri = ""
-        if self.namespaces:
-            for p, n in self.namespaces.items():
-                register_namespace(p, n)
-            if self.prefix:
-                try:
-                    uri = f"{{{self.namespaces[self.prefix]}}}"
-                except (KeyError):
-                    raise KeyError("prefix is not included in namespaces")
-            else:
-                uri = f'{{{self.namespaces[""]}}}'
+        Raises
+        ------
+        KeyError
+            *If prefix is not included in namespace dict.
+        """
 
-        return uri
+        raise AbstractMethodError(self)
 
     def other_namespaces(self) -> dict:
         """
@@ -277,6 +224,109 @@ def build_attribs(self) -> None:
         works with tuples for multindex or hierarchical columns.
         """
 
+        raise AbstractMethodError(self)
+
+    def build_elems(self) -> None:
+        """
+        Create child elements of row.
+
+        This method adds child elements using elem_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        raise AbstractMethodError(self)
+
+    def write_output(self) -> Optional[str]:
+        xml_doc = self.build_tree()
+
+        try:
+            if self.path_or_buffer:
+                with open(self.path_or_buffer, "wb") as f:
+                    f.write(xml_doc)
+                xml_doc = None
+            else:
+                xml_doc = xml_doc.decode(self.encoding).rstrip()
+        except (UnicodeDecodeError, OSError) as e:
+            raise e
+
+        return xml_doc
+
+
+class EtreeXMLFormatter(BaseXMLFormatter):
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+
+    Notes
+    -----
+    This class serves as fall back option if user does not have
+    ``lxml`` installed or user specifically requests ``etree`` parser.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.validate_columns()
+        self.validate_encoding()
+        self.orig_cols = self.fmt.frame.columns.tolist()
+        self.frame_dicts = self.process_dataframe()
+        self.handle_indexes()
+        self.prefix_uri = self.get_prefix_uri()
+
+    def build_tree(self) -> bytes:
+        from xml.etree.ElementTree import Element, SubElement, tostring
+
+        self.root = Element(
+            f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces()
+        )
+
+        for k, d in self.frame_dicts.items():
+            self.d = d
+            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if self.attr_cols:
+                self.build_attribs()
+            if self.elem_cols:
+                self.build_elems()
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.build_elems()
+
+        self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
+
+        if self.pretty_print:
+            self.out_xml = self.prettify_tree()
+
+        if not self.xml_declaration:
+            self.out_xml = self.remove_declaration()
+
+        if self.stylesheet:
+            warn(
+                "To use stylesheet, you need lxml installed. "
+                "Instead, the non-transformed, original XML is returned.",
+                UserWarning,
+            )
+
+        return self.out_xml
+
+    def get_prefix_uri(self) -> str:
+        from xml.etree.ElementTree import register_namespace
+
+        uri = ""
+        if self.namespaces:
+            for p, n in self.namespaces.items():
+                register_namespace(p, n)
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except (KeyError):
+                    raise KeyError(f"{self.prefix} is not included in namespaces")
+            else:
+                uri = f'{{{self.namespaces[""]}}}'
+
+        return uri
+
+    def build_attribs(self) -> None:
         for col in self.attr_cols:
             flat_col = col
             if isinstance(col, tuple):
@@ -294,13 +344,6 @@ def build_attribs(self) -> None:
                 raise KeyError(f"no valid column, {col}")
 
     def build_elems(self) -> None:
-        """
-        Create child elements of row.
-
-        This method adds child elements using elem_cols to row element and
-        works with tuples for multindex or hierarchical columns.
-        """
-
         from xml.etree.ElementTree import SubElement
 
         for col in self.elem_cols:
@@ -342,111 +385,20 @@ def remove_declaration(self) -> None:
 
         return self.out_xml.split(b"?>")[-1].strip()
 
-    def write_output(self) -> Optional[str]:
-        xml_doc = self.build_tree()
-
-        try:
-            if self.io:
-                with open(self.io, "wb") as f:
-                    f.write(xml_doc)
-                xml_doc = None
-            else:
-                xml_doc = xml_doc.decode(self.encoding).rstrip()
-        except (UnicodeDecodeError, OSError) as e:
-            raise e
-
-        return xml_doc
-
 
-class LxmlXMLFormatter:
+class LxmlXMLFormatter(BaseXMLFormatter):
     """
     Class for formatting data in xml using Python standard library
     modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
 
-    Parameters
-    ----------
-    io : str or file-like
-        This can be either a string of raw XML, a valid URL,
-        file or file-like object.
-
-    index : bool
-        Whether to include index in xml document.
-
-    row_name : str
-        Name for root of xml document. Default is 'data'.
-
-    root_name : str
-        Name for row elemens of xml document. Default is 'row'.
-
-    na_rep : str
-        Missing data representation.
-
-    attrs_cols : list
-        List of columns to write as attributes in row element.
-
-    elem_cols : list
-        List of columns to write as children in row element.
-
-    namespacess : dict
-        The namespaces to define in XML document as dicts with key
-        being namespace and value the URI.
-
-    prefix : str
-        The prefix for each element in XML document including root.
-
-    encoding : str
-        Encoding of xml object or document.
-
-    xml_declaration : bool
-        Whether to include xml declaration at top line item in xml.
-
-    pretty_print : bool
-        Whether to write xml document with line breaks and indentation.
-
-    stylesheet : str or file-like
-        A URL, file, file-like object, or a raw string containing XSLT.
-
-    See also
-    --------
-    pandas.io.formats.xml.EtreeXMLFormatter
-
     Notes
     -----
     This class serves as default option. If user does not have `lxml`
     installed, `to_xml` will fall back with EtreeXMLFormatter.
     """
 
-    def __init__(
-        self,
-        formatter: DataFrameFormatter,
-        io: Optional[FilePathOrBuffer[str]] = None,
-        index: Optional[bool] = True,
-        root_name: Optional[str] = "data",
-        row_name: Optional[str] = "row",
-        na_rep: Optional[str] = None,
-        attr_cols: Optional[Union[str, List[str]]] = None,
-        elem_cols: Optional[Union[str, List[str]]] = None,
-        namespaces: Optional[Dict[str, str]] = None,
-        prefix: Optional[str] = None,
-        encoding: Optional[str] = "utf-8",
-        xml_declaration: Optional[bool] = True,
-        pretty_print: Optional[bool] = True,
-        stylesheet: Optional[FilePathOrBuffer[str]] = None,
-    ) -> None:
-        self.fmt = formatter
-        self.io = io
-        self.index = index
-        self.root_name = root_name
-        self.row_name = row_name
-        self.na_rep = na_rep
-        self.attr_cols = attr_cols
-        self.elem_cols = elem_cols
-        self.namespaces = namespaces
-        self.prefix = prefix
-        self.encoding = encoding
-        self.xml_declaration = xml_declaration
-        self.pretty_print = pretty_print
-        self.stylesheet = stylesheet
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
         self.validate_columns()
         self.validate_encoding()
@@ -495,62 +447,6 @@ def build_tree(self) -> bytes:
 
         return self.out_xml
 
-    def validate_columns(self) -> None:
-        """
-        Validate elems_cols and attrs_cols.
-
-        This method will check if columns is list-like.
-
-        Raises
-        ------
-        ValueError
-            * If value is not a list and less then length of nodes.
-        """
-        if self.attr_cols and not is_list_like(self.attr_cols):
-            raise TypeError(
-                f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
-            )
-
-        if self.elem_cols and not is_list_like(self.elem_cols):
-            raise TypeError(
-                f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
-            )
-
-    def validate_encoding(self) -> None:
-        """
-        Validate encoding.
-
-        This method will check if encoding is among listed under codecs.
-
-        Raises
-        ------
-        LookupError
-            * If encoding is not available in codecs.
-        """
-
-        try:
-            codecs.lookup(self.encoding)
-        except LookupError as e:
-            raise e
-
-    def process_dataframe(self) -> dict:
-        """
-        Adjust Data Frame to fit xml output.
-
-        This method will adjust underlying data frame for xml output,
-        including replacing missing entities and including indexes.
-        """
-
-        na_dict = {"None": self.na_rep, "NaN": self.na_rep, "nan": self.na_rep}
-
-        df = (
-            (self.fmt.frame.reset_index().applymap(str).replace(na_dict))
-            if self.index
-            else self.fmt.frame.applymap(str).replace(na_dict)
-        )
-
-        return df.to_dict(orient="index")
-
     def convert_empty_str_key(self) -> None:
         """
         Replace zero-lengh string in `namespaces`.
@@ -562,39 +458,14 @@ def convert_empty_str_key(self) -> None:
         if self.namespaces and "" in self.namespaces.keys():
             self.namespaces[None] = self.namespaces.pop("", "default")
 
-    def handle_indexes(self) -> None:
-        """
-        Handle indexes.
-
-        This method will add indexes into attr_cols or elem_cols.
-        """
-        indexes = [x for x in self.frame_dicts[0].keys() if x not in self.orig_cols]
-
-        if self.attr_cols and self.index:
-            self.attr_cols = list(indexes) + self.attr_cols
-
-        if self.elem_cols and self.index:
-            self.elem_cols = list(indexes) + self.elem_cols
-
     def get_prefix_uri(self) -> str:
-        """
-        Get uri of namespace prefix.
-
-        This method retrieves corresponding URI to prefix in namespaces.
-
-        Raises
-        ------
-        ValueError
-            *If prefix is not included in namespace dict.
-        """
-
         uri = ""
         if self.namespaces:
             if self.prefix:
                 try:
                     uri = f"{{{self.namespaces[self.prefix]}}}"
                 except (KeyError):
-                    raise KeyError("prefix is not included in namespaces")
+                    raise KeyError(f"{self.prefix} is not included in namespaces")
             else:
                 uri = f'{{{self.namespaces[""]}}}'
 
@@ -656,25 +527,25 @@ def convert_io(self) -> Union[None, str]:
         as string, depending on object type.
         """
 
-        obj = None
-
         if isinstance(self.stylesheet, str):
             obj = self.stylesheet
 
-        if isinstance(self.stylesheet, bytes):
+        elif isinstance(self.stylesheet, bytes):
             obj = self.stylesheet.decode(self.encoding)
 
-        if isinstance(self.stylesheet, io.StringIO):
+        elif isinstance(self.stylesheet, io.StringIO):
             obj = self.stylesheet.getvalue()
 
-        if isinstance(self.stylesheet, io.BytesIO):
+        elif isinstance(self.stylesheet, io.BytesIO):
             obj = self.stylesheet.getvalue().decode(self.encoding)
 
-        if isinstance(self.stylesheet, io.TextIOWrapper):
+        elif isinstance(self.stylesheet, io.TextIOWrapper):
             obj = self.stylesheet.read()
 
-        if isinstance(self.stylesheet, io.BufferedReader):
+        elif isinstance(self.stylesheet, io.BufferedReader):
             obj = self.stylesheet.read().decode(self.encoding)
+        else:
+            obj = None
 
         return obj
 
@@ -709,7 +580,7 @@ def parse_doc(self):
         if current_doc:
             is_xml = current_doc.startswith(("<?xml", "<"))
         else:
-            raise ValueError("io is not a url, file, or xml string")
+            raise ValueError("stylesheet is not a url, file, or xml string")
 
         try:
             curr_parser = XMLParser(encoding=self.encoding)
@@ -745,19 +616,3 @@ def transform_doc(self) -> bytes:
             raise e
 
         return bytes(new_doc)
-
-    def write_output(self) -> Optional[str]:
-        xml_doc = self.build_tree()
-
-        try:
-            if self.io:
-                with open(self.io, "wb") as f:
-                    f.write(xml_doc)
-                xml_doc = None
-            else:
-                xml_doc = xml_doc.decode(self.encoding).rstrip()
-
-        except (UnicodeDecodeError, OSError) as e:
-            raise e
-
-        return xml_doc
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index af2004c05428c..dd4736176d602 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1,6 +1,5 @@
 """
-:mod:`pandas.io.xml` is a module containing functionality for dealing with
-XML IO.
+:mod:`pandas.io.xml` is a module for reading XML.
 
 """
 
@@ -10,8 +9,8 @@
 from warnings import warn
 
 from pandas._typing import FilePathOrBuffer
-from pandas.errors import ParserError
-from pandas.util._decorators import deprecate_nonkeyword_arguments
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import AbstractMethodError, ParserError
 
 from pandas.core.dtypes.common import is_list_like
 
@@ -21,10 +20,9 @@
 from pandas.io.parsers import TextParser
 
 
-class _EtreeFrameParser:
+class _XMLFrameParser:
     """
-    Internal class to parse XML into DataFrames with the Python
-    standard library XML modules: `xml.etree.ElementTree`.
+    Internal subclass to parse XML into DataFrames.
 
     Parameters
     ----------
@@ -58,15 +56,22 @@ class _EtreeFrameParser:
 
     See also
     --------
+    pandas.io.xml._EtreeFrameParser
     pandas.io.xml._LxmlFrameParser
 
     Notes
     -----
-    This class serves as fall back option if user does not have
-    ``lxml`` installed or user specifically requests ``etree`` parser.
-    """
+    To subclass this class effectively you must override the following methods:`
+        * :func:`parse_data`
+        * :func:`_parse_nodes`
+        * :func:`_parse_doc`
+        * :func:`_validate_names`
+        * :func:`_validate_path`
 
-    from xml.etree.ElementTree import Element, ElementTree
+
+    See each method's respective documentation for details on their
+    functionality.
+    """
 
     def __init__(
         self,
@@ -87,6 +92,7 @@ def __init__(
         self.names = names
         self.encoding = encoding
         self.stylesheet = stylesheet
+        self.is_style = None
 
     def parse_data(self) -> List[Dict[str, List[str]]]:
         """
@@ -96,19 +102,7 @@ def parse_data(self) -> List[Dict[str, List[str]]]:
         validate xpath, names, parse and return specific nodes.
         """
 
-        if self.stylesheet:
-            warn(
-                "To use stylesheet, you need lxml installed. "
-                "Nodes will be parsed on original XML at the xpath.",
-                UserWarning,
-            )
-
-        self.xml_doc = self._parse_doc()
-
-        self._validate_path()
-        self._validate_names()
-
-        return self._parse_nodes()
+        raise AbstractMethodError(self)
 
     def _parse_nodes(self) -> List[Dict[str, List[str]]]:
         """
@@ -130,6 +124,131 @@ def _parse_nodes(self) -> List[Dict[str, List[str]]]:
         will have optional keys filled withi None values.
         """
 
+        raise AbstractMethodError(self)
+
+    def _validate_path(self) -> None:
+        """
+        Validate xpath.
+
+        This method checks for syntax, evaluation, or empty nodes return.
+
+        Raises
+        ------
+        SyntaxError
+            * If xpah is not supported or issues with namespaces.
+
+        ValueError
+            * If xpah does not return any nodes.
+        """
+
+        raise AbstractMethodError(self)
+
+    def _validate_names(self) -> None:
+        """
+        Validate names.
+
+        This method will check if names is a list-like and aligns
+        with length of parse nodes.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        raise AbstractMethodError(self)
+
+    def _convert_io(self, xml_data) -> Union[None, str]:
+        """
+        Convert io object to string.
+
+        This method will convert io object into a string or keep
+        as string, depending on object type.
+        """
+
+        if isinstance(xml_data, str):
+            obj = xml_data
+
+        elif isinstance(xml_data, bytes):
+            obj = xml_data.decode(self.encoding)
+
+        elif isinstance(xml_data, io.StringIO):
+            obj = xml_data.getvalue()
+
+        elif isinstance(xml_data, io.BytesIO):
+            obj = xml_data.getvalue().decode(self.encoding)
+
+        elif isinstance(xml_data, io.TextIOWrapper):
+            obj = xml_data.read()
+
+        elif isinstance(xml_data, io.BufferedReader):
+            obj = xml_data.read().decode(self.encoding)
+        else:
+            obj = None
+
+        return obj
+
+    def _parse_doc(self):
+        """
+        Build tree from io.
+
+        This method will parse io object into tree for parsing
+        conditionally by its specific object type.
+
+        Raises
+        ------
+        HttpError
+            * If URL cannot be reached.
+
+        LookupError
+            * If xml document has incorrect or unknown encoding.
+
+        OSError
+            * If file cannot be found.
+
+        ParseError
+            * If xml document conntains syntax issues.
+
+        ValueError
+            * If io object is not readable as string or file-like object.
+        """
+
+        raise AbstractMethodError(self)
+
+
+class _EtreeFrameParser(_XMLFrameParser):
+    """
+    Internal class to parse XML into DataFrames with the Python
+    standard library XML modules: `xml.etree.ElementTree`.
+
+    Notes
+    -----
+    This class serves as fall back option if user does not have
+    ``lxml`` installed or user specifically requests ``etree`` parser.
+    """
+
+    from xml.etree.ElementTree import Element, ElementTree
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def parse_data(self) -> List[Dict[str, List[str]]]:
+
+        if self.stylesheet:
+            warn(
+                "To use stylesheet, you need lxml installed. "
+                "Nodes will be parsed on original XML at the xpath.",
+                UserWarning,
+            )
+
+        self.xml_doc = self._parse_doc()
+
+        self._validate_path()
+        self._validate_names()
+
+        return self._parse_nodes()
+
+    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
+
         elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces)
 
         if self.elems_only and self.attrs_only:
@@ -215,15 +334,6 @@ def _parse_nodes(self) -> List[Dict[str, List[str]]]:
 
     def _validate_path(self) -> None:
         """
-        Validate xpath.
-
-        This method checks for sytnax, evaluation, or empty nodes return.
-
-        Raises
-        ------
-        SyntaxError
-            * If xpah is not supported or issues with namespaces.
-
         Notes
         -----
         `etree` supports limited XPath. If user attempts a more complex
@@ -252,17 +362,6 @@ def _validate_path(self) -> None:
             )
 
     def _validate_names(self) -> None:
-        """
-        Validate names.
-
-        This method will check if names is a list-like and aligns
-        with length of parse nodes.
-
-        Raises
-        ------
-        ValueError
-            * If value is not a list and less then length of nodes.
-        """
         if self.names:
             children = self.xml_doc.find(
                 self.xpath, namespaces=self.namespaces
@@ -278,65 +377,14 @@ def _validate_names(self) -> None:
                     f"{type(self.names).__name__} is not a valid type for names"
                 )
 
-    def _convert_io(self) -> Union[None, str]:
-        """
-        Convert io object to string.
-
-        This method will convert io object into a string or keep
-        as string, depending on object type.
-        """
-
-        obj = None
-
-        if isinstance(self.io, str):
-            obj = self.io
-
-        if isinstance(self.io, bytes):
-            obj = self.io.decode(self.encoding)
-
-        if isinstance(self.io, io.StringIO):
-            obj = self.io.getvalue()
-
-        if isinstance(self.io, io.BytesIO):
-            obj = self.io.getvalue().decode(self.encoding)
-
-        if isinstance(self.io, io.TextIOWrapper):
-            obj = self.io.read()
-
-        if isinstance(self.io, io.BufferedReader):
-            obj = self.io.read().decode(self.encoding)
-
-        return obj
-
     def _parse_doc(self) -> Union[Element, ElementTree]:
-        """
-        Build tree from io.
-
-        This method will parse io object into tree for parsing
-        conditionally by its specific object type.
-
-        Raises
-        ------
-        HttpError
-            * If URL cannot be reached.
-
-        OSError
-            * If file cannot be found.
-
-        ParseError
-            * If xml document conntains syntax issues.
-
-        ValueError
-            * If io object is not readable as string or file-like object.
-        """
-
         from xml.etree.ElementTree import ParseError, fromstring, parse
 
-        current_doc = self._convert_io()
+        current_doc = self._convert_io(self.io)
         if current_doc:
             is_xml = current_doc.startswith(("<?xml", "<"))
         else:
-            raise ValueError("io is not a url, file, or xml string")
+            raise ValueError("io is not a url, file, or xml string.")
 
         is_xml = (
             (current_doc.decode(self.encoding).startswith(("<?xml", "<")))
@@ -358,45 +406,12 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
         return r
 
 
-class _LxmlFrameParser:
+class _LxmlFrameParser(_XMLFrameParser):
     """
     Internal class to parse XML into DataFrames with third-party
     full-featured XML library, `lxml`, that supports
     XPath 1.0 and XSLT 1.0.
 
-    Parameters
-    ----------
-    io : str or file-like
-        This can be either a string of raw XML, a valid URL,
-        file or file-like object.
-
-    xpath : str or regex
-        The XPath expression to parse required set of nodes for
-        migration to `Data Frame`.
-
-    namespacess : dict
-        The namespaces defined in XML document (`xmlns:namespace='URI')
-        as dicts with key being namespace and value the URI.
-
-    elems_only : bool
-        Parse only the child elements at the specified `xpath`.
-
-    attrs_only : bool
-        Parse only the attributes at the specified `xpath`.
-
-    names : list
-        Column names for Data Frame of parsed XML data.
-
-    encoding : str
-        Encoding of xml object or document.
-
-    stylesheet : str or file-like
-        URL, file, file-like object, or a raw string containing XSLT.
-
-    See also
-    --------
-    pandas.io.xml._EtreeFrameParser
-
     Notes
     -----
     This is the default class called with `_EtreeFrameParser` serving
@@ -405,28 +420,8 @@ class _LxmlFrameParser:
     efficiency.
     """
 
-    def __init__(
-        self,
-        io,
-        xpath,
-        namespaces,
-        elems_only,
-        attrs_only,
-        names,
-        encoding,
-        stylesheet,
-    ):
-        self.io = io
-        self.xpath = xpath
-        self.namespaces = namespaces
-        self.elems_only = elems_only
-        self.attrs_only = attrs_only
-        self.names = names
-        self.encoding = encoding
-        self.stylesheet = stylesheet
-        self.is_style = False
-
-        self.compression = "infer"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def parse_data(self) -> List[Dict[str, List[str]]]:
         """
@@ -450,24 +445,6 @@ def parse_data(self) -> List[Dict[str, List[str]]]:
         return self._parse_nodes()
 
     def _parse_nodes(self) -> List[Dict[str, List[str]]]:
-        """
-        Parse xml nodes.
-
-        This method will parse the children and attributes of elements
-        in xpath, conditionally for only elements, only attributes
-        or both while optionally renaming node names.
-
-        Raises
-        ------
-        ValueError
-            * If only elements and only attributes are specified.
-
-        Notes
-        -----
-        Namespace URIs will be removed from return node values.Also,
-        elements with missing children or attributes compared to siblings
-        will have optional keys filled withi None values.
-        """
         elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
 
         if self.elems_only and self.attrs_only:
@@ -570,21 +547,6 @@ def _transform_doc(self):
         return new_doc
 
     def _validate_path(self) -> None:
-        """
-        Validate xpath.
-
-        This method checks for sytnax, evaluation, or empty nodes return.
-
-        Raises
-        ------
-        SyntaxError
-            * If xpah is not supported or issues with namespaces.
-
-        Notes
-        -----
-        `etree` supports limited XPath. If user attempts a more complex
-        expression syntax error will raise.
-        """
         from lxml.etree import XPathEvalError, XPathSyntaxError
 
         try:
@@ -632,70 +594,16 @@ def _validate_names(self) -> None:
                     f"{type(self.names).__name__} is not a valid type for names"
                 )
 
-    def _convert_io(self) -> Union[None, str]:
-        """
-        Convert filepath_or_buffer object to string.
-
-        This method will convert io object into a string or keep
-        as string, depending on object type.
-        """
-
-        obj = None
-
-        if isinstance(self.raw_doc, str):
-            obj = self.raw_doc
-
-        if isinstance(self.raw_doc, bytes):
-            obj = self.raw_doc.decode(self.encoding)
-
-        if isinstance(self.raw_doc, io.StringIO):
-            obj = self.raw_doc.getvalue()
-
-        if isinstance(self.raw_doc, io.BytesIO):
-            obj = self.raw_doc.getvalue().decode(self.encoding)
-
-        if isinstance(self.raw_doc, io.TextIOWrapper):
-            obj = self.raw_doc.read()
-
-        if isinstance(self.raw_doc, io.BufferedReader):
-            obj = self.raw_doc.read().decode(self.encoding)
-
-        return obj
-
     def _parse_doc(self):
-        """
-        Build tree from io.
-
-        This method will parse io object into tree for parsing
-        conditionally by its specific object type.
-
-        Raises
-        ------
-        HttpError
-            * If URL cannot be reached.
-
-        LookupError
-            * If xml document has incorrect or unknown encoding.
-
-        OSError
-            * If file cannot be found.
-
-        XMLSyntaxError
-            * If xml document conntains syntax issues.
-
-        ValueError
-            * If io object is not readable as string or file-like object.
-        """
-
         from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
 
         self.raw_doc = self.stylesheet if self.is_style else self.io
 
-        current_doc = self._convert_io()
+        current_doc = self._convert_io(self.raw_doc)
         if current_doc:
             is_xml = current_doc.startswith(("<?xml", "<"))
         else:
-            raise ValueError("io is not a url, file, or xml string")
+            raise ValueError("io is not a url, file, or xml string.")
 
         try:
             curr_parser = XMLParser(encoding=self.encoding)
@@ -766,8 +674,12 @@ def _parse(
     fallback option with etree parser.
     """
 
+    lxml = import_optional_dependency(
+        "lxml.etree", raise_on_missing=False, on_version="ignore"
+    )
+
     if parser == "lxml":
-        try:
+        if lxml is not None:
             p = _LxmlFrameParser(
                 io,
                 xpath,
@@ -778,7 +690,7 @@ def _parse(
                 encoding,
                 stylesheet,
             )
-        except ImportError:
+        else:
             warn(
                 "You do not have lxml installed (default parser). "
                 "Instead, etree will be used.",
@@ -815,7 +727,6 @@ def _parse(
     return _data_to_frame(data=data_dicts, **kwargs)
 
 
-@deprecate_nonkeyword_arguments(version="2.0")
 def read_xml(
     io: FilePathOrBuffer,
     xpath: Optional[str] = "./*",
@@ -828,7 +739,7 @@ def read_xml(
     stylesheet: Optional[FilePathOrBuffer[str]] = None,
 ) -> DataFrame:
     r"""
-    Read XML docuemnts into a ``DataFrame`` object.
+    Read XML document into a ``DataFrame`` object.
 
     .. versionadded:: 1.3.0
 
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 62958894981fd..5234f25399fef 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1,4 +1,6 @@
 from io import BytesIO, StringIO
+import os
+import sys
 
 import numpy as np
 import pytest
@@ -10,6 +12,44 @@
 
 from pandas.io.xml import read_xml
 
+"""
+CHECKLIST
+
+etree
+[X] - TypeError("...is not a valid type for attr_cols")
+[X] - TypeError("...is not a valid type for elem_cols")
+[X] - LookupError("unknown encoding")
+[X] - KeyError("...is not included in namespaces")
+[X] - KeyError("no valid column")
+[X] - UserWarning("To use stylesheet, you need lxml installed.")
+[X] - ImportWarning("You do not have lxml installed.")
+
+lxml
+[X] - TypeError("...is not a valid type for attr_cols")
+[X] - TypeError("...is not a valid type for elem_cols")
+[X] - LookupError("unknown encoding")
+[]  - UnicodeDecodeError  (NEED TO NON UTF-8 STYLESHEET)
+[]  - OSError             (NEED UNREACHABLE FILE PATH)
+[X] - KeyError("...is not included in namespaces")
+[X] - KeyError("no valid column")
+[X] - ValueError("stylesheet is not a url, file, or xml string.")
+[]  - LookupError
+[]  - URLError            (GENERAL ERROR USUALLY DUE TO NETWORKING)
+[]  - HTTPError           (NEED TO ONLINE STYLESHEET)
+[X] - OSError("failed to load external entity")
+[X] - XMLSyntaxError("Opening and ending tag mismatch")
+[X] - XSLTApplyError("Cannot resolve URI")
+[X] - XSLTParseError("failed to compile")
+"""
+
+etree_attr_skip_param = pytest.param(
+    "etree",
+    marks=pytest.mark.skipif(
+        sys.version_info <= (3, 7),
+        reason=("etree alpha ordered attributes <= py3.7"),
+    ),
+)
+
 geom_df = DataFrame(
     {
         "shape": ["square", "circle", "triangle"],
@@ -93,7 +133,28 @@
 </data>"""
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.fixture(params=["rb", "r"])
+def mode(request):
+    return request.param
+
+
+@pytest.fixture(params=["lxml", "etree"])
+def parser(request):
+    return request.param
+
+
+# FAIL SAFE WARNING
+
+
+@td.skip_if_installed("lxml")
+def test_failsafe_parser(datapath):
+    with pytest.warns(ImportWarning, match=("You do not have lxml installed.")):
+        geom_df.to_xml()
+
+
+# FILE OUTPUT
+
+
 def test_file_output_str_read(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, parser=parser)
@@ -112,7 +173,6 @@ def test_file_output_str_read(datapath, parser):
         assert output == from_file_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_file_output_bytes_read(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, parser=parser)
@@ -131,7 +191,6 @@ def test_file_output_bytes_read(datapath, parser):
         assert output == from_file_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_str_output(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, parser=parser)
@@ -147,7 +206,9 @@ def test_str_output(datapath, parser):
     assert output == from_file_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# INDEX
+
+
 def test_index_false(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -192,7 +253,6 @@ def test_index_false(datapath, parser):
         assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_index_false_rename_row_root(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -239,6 +299,8 @@ def test_index_false_rename_row_root(datapath, parser):
         assert output == expected
 
 
+# NA_REP
+
 na_expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -263,7 +325,6 @@ def test_index_false_rename_row_root(datapath, parser):
 </data>"""
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_na_elem_output(datapath, parser):
     output = geom_df.to_xml(parser=parser)
 
@@ -276,7 +337,6 @@ def test_na_elem_output(datapath, parser):
     assert output == na_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_na_empty_str_elem_option(datapath, parser):
     output = geom_df.to_xml(na_rep="", parser=parser)
 
@@ -289,7 +349,6 @@ def test_na_empty_str_elem_option(datapath, parser):
     assert output == na_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_na_empty_elem_option(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -325,7 +384,10 @@ def test_na_empty_elem_option(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# ATTR_COLS
+
+
+@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
 def test_attrs_cols_nan_output(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -346,14 +408,17 @@ def test_attrs_cols_nan_output(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
 def test_attrs_cols_prefix(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <doc:data xmlns:doc="http://example.xom">
-  <doc:row doc:index="0" doc:shape="square" doc:degrees="360" doc:sides="4.0"/>
-  <doc:row doc:index="1" doc:shape="circle" doc:degrees="360"/>
-  <doc:row doc:index="2" doc:shape="triangle" doc:degrees="180" doc:sides="3.0"/>
+  <doc:row doc:index="0" doc:shape="square" \
+doc:degrees="360" doc:sides="4.0"/>
+  <doc:row doc:index="1" doc:shape="circle" \
+doc:degrees="360"/>
+  <doc:row doc:index="2" doc:shape="triangle" \
+doc:degrees="180" doc:sides="3.0"/>
 </doc:data>"""
 
     output = geom_df.to_xml(
@@ -372,19 +437,19 @@ def test_attrs_cols_prefix(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_attrs_unknown_column(parser):
     with pytest.raises(KeyError, match=("no valid column")):
         geom_df.to_xml(attr_cols=["shape", "degreees", "sides"], parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_attrs_wrong_type(parser):
     with pytest.raises(TypeError, match=("is not a valid type for attr_cols")):
         geom_df.to_xml(attr_cols='"shape", "degreees", "sides"', parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# ELEM_COLS
+
+
 def test_elems_cols_nan_output(datapath, parser):
     elems_cols_expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -419,19 +484,16 @@ def test_elems_cols_nan_output(datapath, parser):
     assert output == elems_cols_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_elems_unknown_column(parser):
     with pytest.raises(KeyError, match=("no valid column")):
         geom_df.to_xml(elem_cols=["shape", "degreees", "sides"], parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_elems_wrong_type(parser):
     with pytest.raises(TypeError, match=("is not a valid type for elem_cols")):
         geom_df.to_xml(elem_cols='"shape", "degreees", "sides"', parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_elems_and_attrs_cols(datapath, parser):
     elems_cols_expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -466,7 +528,9 @@ def test_elems_and_attrs_cols(datapath, parser):
     assert output == elems_cols_expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# HIERARCHICAL COLUMNS
+
+
 def test_hierarchical_columns(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -475,29 +539,29 @@ def test_hierarchical_columns(datapath, parser):
     <location>inner</location>
     <type>terrestrial</type>
     <count_mass>4</count_mass>
-    <sum_mass>11.811666</sum_mass>
-    <mean_mass>2.9529165</mean_mass>
+    <sum_mass>11.81</sum_mass>
+    <mean_mass>2.95</mean_mass>
   </row>
   <row>
     <location>outer</location>
     <type>gas giant</type>
     <count_mass>2</count_mass>
-    <sum_mass>2466.5044</sum_mass>
-    <mean_mass>1233.2522</mean_mass>
+    <sum_mass>2466.5</sum_mass>
+    <mean_mass>1233.25</mean_mass>
   </row>
   <row>
     <location>outer</location>
     <type>ice giant</type>
     <count_mass>2</count_mass>
-    <sum_mass>189.2253</sum_mass>
-    <mean_mass>94.61265</mean_mass>
+    <sum_mass>189.23</sum_mass>
+    <mean_mass>94.61</mean_mass>
   </row>
   <row>
     <location>All</location>
     <type/>
     <count_mass>8</count_mass>
-    <sum_mass>2667.541366</sum_mass>
-    <mean_mass>333.44267075</mean_mass>
+    <sum_mass>2667.54</sum_mass>
+    <mean_mass>333.44</mean_mass>
   </row>
 </data>"""
 
@@ -506,7 +570,7 @@ def test_hierarchical_columns(datapath, parser):
         values="mass",
         aggfunc=["count", "sum", "mean"],
         margins=True,
-    )
+    ).round(2)
 
     output = pvt.to_xml(parser=parser)
 
@@ -519,19 +583,19 @@ def test_hierarchical_columns(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
 def test_hierarchical_attrs_columns(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
   <row location="inner" type="terrestrial" count_mass="4" \
-sum_mass="11.811666" mean_mass="2.9529165"/>
+sum_mass="11.81" mean_mass="2.95"/>
   <row location="outer" type="gas giant" count_mass="2" \
-sum_mass="2466.5044" mean_mass="1233.2522"/>
+sum_mass="2466.5" mean_mass="1233.25"/>
   <row location="outer" type="ice giant" count_mass="2" \
-sum_mass="189.2253" mean_mass="94.61265"/>
+sum_mass="189.23" mean_mass="94.61"/>
   <row location="All" type="" count_mass="8" \
-sum_mass="2667.541366" mean_mass="333.44267075"/>
+sum_mass="2667.54" mean_mass="333.44"/>
 </data>"""
 
     pvt = planet_df.pivot_table(
@@ -539,7 +603,7 @@ def test_hierarchical_attrs_columns(datapath, parser):
         values="mass",
         aggfunc=["count", "sum", "mean"],
         margins=True,
-    )
+    ).round(2)
 
     output = pvt.to_xml(attr_cols=list(pvt.reset_index().columns.values), parser=parser)
 
@@ -552,7 +616,9 @@ def test_hierarchical_attrs_columns(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# MULTIINDEX
+
+
 def test_multi_index(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -561,26 +627,30 @@ def test_multi_index(datapath, parser):
     <location>inner</location>
     <type>terrestrial</type>
     <count>4</count>
-    <sum>11.811666</sum>
-    <mean>2.9529165</mean>
+    <sum>11.81</sum>
+    <mean>2.95</mean>
   </row>
   <row>
     <location>outer</location>
     <type>gas giant</type>
     <count>2</count>
-    <sum>2466.5044</sum>
-    <mean>1233.2522</mean>
+    <sum>2466.5</sum>
+    <mean>1233.25</mean>
   </row>
   <row>
     <location>outer</location>
     <type>ice giant</type>
     <count>2</count>
-    <sum>189.2253</sum>
-    <mean>94.61265</mean>
+    <sum>189.23</sum>
+    <mean>94.61</mean>
   </row>
 </data>"""
 
-    agg = planet_df.groupby(["location", "type"])["mass"].agg(["count", "sum", "mean"])
+    agg = (
+        planet_df.groupby(["location", "type"])["mass"]
+        .agg(["count", "sum", "mean"])
+        .round(2)
+    )
 
     output = agg.to_xml(parser=parser)
 
@@ -593,18 +663,24 @@ def test_multi_index(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
 def test_multi_index_attrs_cols(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
-  <row location="inner" type="terrestrial" count="4" sum="11.811666" mean="2.9529165"/>
-  <row location="outer" type="gas giant" count="2" sum="2466.5044" mean="1233.2522"/>
-  <row location="outer" type="ice giant" count="2" sum="189.2253" mean="94.61265"/>
+  <row location="inner" type="terrestrial" count="4" \
+sum="11.81" mean="2.95"/>
+  <row location="outer" type="gas giant" count="2" \
+sum="2466.5" mean="1233.25"/>
+  <row location="outer" type="ice giant" count="2" \
+sum="189.23" mean="94.61"/>
 </data>"""
 
-    agg = planet_df.groupby(["location", "type"])["mass"].agg(["count", "sum", "mean"])
-
+    agg = (
+        planet_df.groupby(["location", "type"])["mass"]
+        .agg(["count", "sum", "mean"])
+        .round(2)
+    )
     output = agg.to_xml(attr_cols=list(agg.reset_index().columns.values), parser=parser)
 
     # etree and lxml differs on quotes and case in xml declaration
@@ -616,7 +692,9 @@ def test_multi_index_attrs_cols(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# NAMESPACE
+
+
 def test_default_namespace(parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -652,7 +730,9 @@ def test_default_namespace(parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# PREFIX
+
+
 def test_namespace_prefix(parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -690,16 +770,14 @@ def test_namespace_prefix(parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_missing_prefix_in_nmsp(parser):
-    with pytest.raises(KeyError, match=("prefix is not included in namespaces")):
+    with pytest.raises(KeyError, match=("doc is not included in namespaces")):
 
         geom_df.to_xml(
             namespaces={"": "http://example.com"}, prefix="doc", parser=parser
         )
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_namespace_prefix_and_default(parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -745,6 +823,8 @@ def test_namespace_prefix_and_default(parser):
     assert output == expected
 
 
+# ENCODING
+
 encoding_expected = """\
 <?xml version='1.0' encoding='ISO-8859-1'?>
 <data>
@@ -781,7 +861,6 @@ def test_namespace_prefix_and_default(parser):
 </data>"""
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_encoding_option_str(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
     df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5)
@@ -806,7 +885,6 @@ def test_correct_encoding_file(datapath):
         df_file.to_xml(path, index=False, encoding="ISO-8859-1")
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 @pytest.mark.parametrize("encoding", ["UTF-8", "UTF-16", "ISO-8859-1"])
 def test_wrong_encoding_option_lxml(datapath, parser, encoding):
     filename = datapath("io", "data", "xml", "baby_names.xml")
@@ -816,13 +894,14 @@ def test_wrong_encoding_option_lxml(datapath, parser, encoding):
         df_file.to_xml(path, index=False, encoding=encoding, parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_misspelled_encoding(parser):
     with pytest.raises(LookupError, match=("unknown encoding")):
         geom_df.to_xml(parser=parser, encoding="uft-8")
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# PRETTY PRINT
+
+
 def test_xml_declaration_pretty_print(parser):
     expected = """\
 <data>
@@ -851,7 +930,6 @@ def test_xml_declaration_pretty_print(parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_no_pretty_print_with_decl(parser):
     expected = (
         "<?xml version='1.0' encoding='utf-8'?>\n"
@@ -873,7 +951,6 @@ def test_no_pretty_print_with_decl(parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_no_pretty_print_no_decl(parser):
     expected = (
         "<data><row><index>0</index><shape>square</shape>"
@@ -889,6 +966,8 @@ def test_no_pretty_print_no_decl(parser):
     assert output == expected
 
 
+# STYLESHEET
+
 xsl_expected = """\
 <?xml version="1.0" encoding="utf-8"?>
 <data>
@@ -914,7 +993,6 @@ def test_no_pretty_print_no_decl(parser):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
@@ -923,7 +1001,6 @@ def test_stylesheet_file_like(datapath, mode):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_io(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
@@ -938,7 +1015,6 @@ def test_stylesheet_io(datapath, mode):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
@@ -950,6 +1026,119 @@ def test_stylesheet_buffered_reader(datapath, mode):
     assert output == xsl_expected
 
 
+def test_stylesheet_with_etree(datapath):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with pytest.warns(
+        UserWarning, match=("To use stylesheet, you need lxml installed.")
+    ):
+        geom_df.to_xml(parser="etree", stylesheet=xsl)
+
+
+@td.skip_if_installed("lxml")
+def test_stylesheet_without_lxml(datapath, parser):
+    xsl = datapath("io", "data", "xml", "row_field_output.xslt")
+
+    with pytest.warns(
+        UserWarning, match=("To use stylesheet, you need lxml installed.")
+    ):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_wrong_path(datapath, parser):
+    xsl = os.path.join("data", "xml", "row_field_output.xslt")
+
+    with pytest.raises(OSError, match=("failed to load external entity")):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_not_path_buffer(parser):
+    with pytest.raises(
+        ValueError, match=("stylesheet is not a url, file, or xml string")
+    ):
+        geom_df.to_xml(stylesheet=DataFrame)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_syntax():
+    from lxml.etree import XMLSyntaxError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" >
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="row/*">
+        <field>
+            <xsl:attribute name="field">
+                <xsl:value-of select="name()"/>
+            </xsl:attribute>
+            <xsl:value-of select="text()"/>
+        </field>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XMLSyntaxError, match=("Opening and ending tag mismatch")):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_eval():
+    from lxml.etree import XSLTParseError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node(*)">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="row/*">
+        <field>
+            <xsl:attribute name="field">
+                <xsl:value-of select="name()"/>
+            </xsl:attribute>
+            <xsl:value-of select="text()"/>
+        </field>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XSLTParseError, match=("failed to compile")):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+def test_incorrect_xsl_apply(parser):
+    from lxml.etree import XSLTApplyError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:copy-of select="document('non_existent.xml')/*"/>
+        </xsl:copy>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")):
+        with tm.ensure_clean("test.xml") as path:
+            geom_df.to_xml(path, stylesheet=xsl)
+
+
 @td.skip_if_no("lxml")
 def test_style_to_csv():
     xsl = """\
@@ -970,7 +1159,7 @@ def test_style_to_csv():
     </xsl:template>
 </xsl:stylesheet>"""
 
-    out_csv = geom_df.to_csv().strip()
+    out_csv = geom_df.to_csv(line_terminator="\n").strip()
     out_xml = geom_df.to_xml(stylesheet=xsl)
 
     assert out_csv == out_xml
@@ -1053,47 +1242,3 @@ def test_style_to_json():
     out_xml = geom_df.to_xml(stylesheet=xsl)
 
     assert out_json == out_xml
-
-
-@pytest.mark.skip(
-    reason="incorrect <th> tag in <tbody> from to_html() to be skipped until fix"
-)
-def test_style_to_html():
-    xsl = """\
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-    <xsl:output method="xml" omit-xml-declaration="yes" indent="yes" />
-    <xsl:strip-space elements="*"/>
-
-    <xsl:template match="/data">
-        <table border="1" class="dataframe">
-            <thead>
-                <tr style="text-align: right;">
-                    <th></th>
-                    <th>shape</th>
-                    <th>degrees</th>
-                    <th>sides</th>
-                </tr>
-            </thead>
-            <tbody>
-                <xsl:apply-templates select="@*|node()"/>
-            </tbody>
-        </table>
-    </xsl:template>
-
-    <xsl:template match="row">
-        <tr>
-            <xsl:apply-templates select="@*|node()"/>
-        </tr>
-    </xsl:template>
-
-    <xsl:template match="row/*">
-        <td>
-            <xsl:value-of select="text()"/>
-        </td>
-    </xsl:template>
-</xsl:stylesheet>"""
-
-    out_html = geom_df.to_html()
-    out_xml = geom_df.to_xml(stylesheet=xsl)
-
-    assert out_html == out_xml
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 375e9c2472742..23eb128a30379 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -11,6 +11,43 @@
 
 from pandas.io.xml import read_xml
 
+"""
+CHECK LIST
+
+etree
+[X] - ValueError("Either element or attributes can be parsed not both.")
+[X] - ValueError("xpath does not return any nodes...")
+[X] - SyntaxError("You have used an incorrect or unsupported XPath")
+[X] - ValueError("names does not match length of child elements in xpath.")
+[X] - TypeError("...is not a valid type for names")
+[X] - ValueError("io is not a url, file, or xml string")
+[]  - URLError      (GENERAL ERROR USUALLY DUE TO NETWORKING)
+[X] - HTTPError("HTTP Error 404: Not Found")
+[X] - OSError("No such file")
+[]  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
+[X] - ImportWarning("You do not have lxml installed.")
+
+lxml
+[X] - ValueError("Either element or attributes can be parsed not both.")
+[X] - XSLTApplyError("Cannot resolve URI")
+[X] - XSLTParseError("document is not a stylesheet")
+[X] - ValueError("xpath does not return any nodes.")
+[X] - XPathEvalError("Invalid expression")
+[]  - XPathSyntaxError   (OLD VERSION IN lxml FOR XPATH ERRORS)
+[X] - TypeError("empty namespace prefix is not supported in XPath")
+[X] - ValueError("names does not match length of child elements in xpath.")
+[X] - TypeError("...is not a valid type for names")
+[X] - ValueError("io is not a url, file, or xml string")
+[X] - LookupError(unknown encoding)
+[]  - URLError      (GENERAL ERROR USUALLY DUE TO NETWORKING)
+[X  - HTTPError("HTTP Error 404: Not Found")
+[X] - OSError("failed to load external entity")
+[X] - XMLSyntaxError("Start tag expected, '<' not found")
+[]  - ParserError   (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
+[X] - ValueError("Values for parser can only be lxml or etree.")
+"""
+
+
 xml_default_nmsp = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data xmlns="http://example.com">
@@ -52,6 +89,30 @@
 </doc:data>"""
 
 
+@pytest.fixture(params=["rb", "r"])
+def mode(request):
+    return request.param
+
+
+@pytest.fixture(params=["lxml", "etree"])
+def parser(request):
+    return request.param
+
+
+# FAIL SAFE WARNING
+
+
+@td.skip_if_installed("lxml")
+def test_failsafe_parser(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.warns(ImportWarning, match=("You do not have lxml installed.")):
+        read_xml(filename)
+
+
+# FILE / URL
+
+
 def test_parser_consistency_file(datapath):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file_lxml = read_xml(filename, parser="lxml")
@@ -67,14 +128,12 @@ def test_parser_consistency_url(datapath):
         "https://data.cityofchicago.org/api/views/"
         "8pix-ypme/rows.xml?accessType=DOWNLOAD"
     )
-    df_file_lxml = read_xml(url, xpath=".//row/row", parser="lxml")
-    df_file_etree = read_xml(url, xpath=".//row/row", parser="etree")
+    df_url_lxml = read_xml(url, xpath=".//row/row", parser="lxml")
+    df_url_etree = read_xml(url, xpath=".//row/row", parser="etree")
 
-    tm.assert_frame_equal(df_file_lxml, df_file_etree)
+    tm.assert_frame_equal(df_url_lxml, df_url_etree)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_file_like(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
     with open(filename, mode) as f:
@@ -93,8 +152,6 @@ def test_file_like(datapath, parser, mode):
     tm.assert_frame_equal(df_file, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_file_io(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
     with open(filename, mode) as f:
@@ -118,8 +175,6 @@ def test_file_io(datapath, parser, mode):
     tm.assert_frame_equal(df_io, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_file_buffered_reader_string(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
     with open(filename, mode) as f:
@@ -140,8 +195,6 @@ def test_file_buffered_reader_string(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
     with open(filename, mode) as f:
@@ -163,6 +216,11 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
+def test_not_io_object(parser):
+    with pytest.raises(ValueError, match=("io is not a url, file, or xml string")):
+        read_xml(DataFrame, parser="lxml")
+
+
 def test_wrong_file_lxml(datapath):
     with pytest.raises(OSError, match=("failed to load external entity")):
         filename = os.path.join("data", "html", "books.xml")
@@ -195,14 +253,16 @@ def test_url():
     tm.assert_frame_equal(df_url, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_wrong_url(parser):
     with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
         url = "https://www.w3schools.com/xml/python.xml"
         read_xml(url, xpath=".//book[count(*)=4]", parser=parser)
 
 
-def test_empty_xpath_lxml(datapath):
+# XPATH
+
+
+def test_empty_xpath_lxml(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     with pytest.raises(ValueError, match=("xpath does not return any nodes")):
         read_xml(filename, xpath=".//python", parser="lxml")
@@ -225,7 +285,9 @@ def test_bad_xpath_lxml(datapath):
         read_xml(filename, xpath=".//[book]", parser="lxml")
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# NAMESPACE
+
+
 def test_default_namespace(parser):
     df_nmsp = read_xml(
         xml_default_nmsp,
@@ -245,7 +307,6 @@ def test_default_namespace(parser):
     tm.assert_frame_equal(df_nmsp, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_prefix_namespace(parser):
     df_nmsp = read_xml(
         xml_prefix_nmsp,
@@ -301,7 +362,9 @@ def test_consistency_prefix_namespace():
     tm.assert_frame_equal(df_lxml, df_etree)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# PREFIX
+
+
 def test_missing_prefix_with_default_namespace(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     with pytest.raises(ValueError, match=("xpath does not return any nodes")):
@@ -337,7 +400,9 @@ def test_none_namespace_prefix(key):
         )
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+# ELEMS AND ATTRS
+
+
 def test_file_elems_and_attrs(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, parser=parser)
@@ -354,7 +419,6 @@ def test_file_elems_and_attrs(datapath, parser):
     tm.assert_frame_equal(df_file, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_file_only_attrs(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, attrs_only=True, parser=parser)
@@ -363,7 +427,6 @@ def test_file_only_attrs(datapath, parser):
     tm.assert_frame_equal(df_file, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_file_only_elems(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, elems_only=True, parser=parser)
@@ -379,7 +442,6 @@ def test_file_only_elems(datapath, parser):
     tm.assert_frame_equal(df_file, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_elem_and_attrs_only(datapath, parser):
     filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
     with pytest.raises(
@@ -389,7 +451,36 @@ def test_elem_and_attrs_only(datapath, parser):
         read_xml(filename, elems_only=True, attrs_only=True, parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
+def test_attribute_centric_xml():
+    xml = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<TrainSchedule>
+      <Stations>
+         <station Name="Manhattan" coords="31,460,195,498"/>
+         <station Name="Laraway Road" coords="63,409,194,455"/>
+         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
+         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
+         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
+         <station Name="Palos Park" coords="128,281,239,303"/>
+         <station Name="Palos Heights" coords="148,257,283,279"/>
+         <station Name="Worth" coords="170,230,248,255"/>
+         <station Name="Chicago Ridge" coords="70,187,208,214"/>
+         <station Name="Oak Lawn" coords="166,159,266,185"/>
+         <station Name="Ashburn" coords="197,133,336,157"/>
+         <station Name="Wrightwood" coords="219,106,340,133"/>
+         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
+      </Stations>
+</TrainSchedule>"""
+
+    df_lxml = read_xml(xml, xpath=".//station")
+    df_etree = read_xml(xml, xpath=".//station", parser="etree")
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+# NAMES
+
+
 def test_names_option_output(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(
@@ -409,7 +500,6 @@ def test_names_option_output(datapath, parser):
     tm.assert_frame_equal(df_file, df_expected)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_names_option_wrong_length(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
 
@@ -417,7 +507,6 @@ def test_names_option_wrong_length(datapath, parser):
         read_xml(filename, names=["Col1", "Col2", "Col3"], parser=parser)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_names_option_wrong_type(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
 
@@ -425,6 +514,9 @@ def test_names_option_wrong_type(datapath, parser):
         read_xml(filename, names="Col1, Col2, Col3", parser=parser)
 
 
+# ENCODING
+
+
 @td.skip_if_no("lxml")
 def test_wrong_encoding_lxml(datapath):
     from lxml.etree import XMLSyntaxError
@@ -457,7 +549,6 @@ def test_wrong_encoding_etree(datapath, encoding):
     read_xml(filename, parser="etree", encoding=encoding)
 
 
-@pytest.mark.parametrize("parser", ["lxml", "etree"])
 def test_ascii_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
     read_xml(filename, encoding="ascii", parser=parser)
@@ -471,31 +562,7 @@ def test_parser_consistency_with_encoding(datapath):
     tm.assert_frame_equal(df_lxml, df_etree)
 
 
-def test_attribute_centric_xml():
-    xml = """\
-<?xml version="1.0" encoding="UTF-8"?>
-<TrainSchedule>
-      <Stations>
-         <station Name="Manhattan" coords="31,460,195,498"/>
-         <station Name="Laraway Road" coords="63,409,194,455"/>
-         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
-         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
-         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
-         <station Name="Palos Park" coords="128,281,239,303"/>
-         <station Name="Palos Heights" coords="148,257,283,279"/>
-         <station Name="Worth" coords="170,230,248,255"/>
-         <station Name="Chicago Ridge" coords="70,187,208,214"/>
-         <station Name="Oak Lawn" coords="166,159,266,185"/>
-         <station Name="Ashburn" coords="197,133,336,157"/>
-         <station Name="Wrightwood" coords="219,106,340,133"/>
-         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
-      </Stations>
-</TrainSchedule>"""
-
-    df_lxml = read_xml(xml, xpath=".//station")
-    df_etree = read_xml(xml, xpath=".//station", parser="etree")
-
-    tm.assert_frame_equal(df_lxml, df_etree)
+# PARSER
 
 
 def test_wrong_parser(datapath):
@@ -507,6 +574,9 @@ def test_wrong_parser(datapath):
         read_xml(filename, parser="bs4")
 
 
+# STYLESHEET
+
+
 @td.skip_if_no("lxml")
 def test_stylesheet_file(datapath):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
@@ -630,7 +700,6 @@ def test_stylesheet_file(datapath):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_file_like(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
@@ -640,7 +709,6 @@ def test_stylesheet_file_like(datapath, mode):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_io(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
@@ -653,7 +721,6 @@ def test_stylesheet_io(datapath, mode):
 
 
 @td.skip_if_no("lxml")
-@pytest.mark.parametrize("mode", ["rb", "r"])
 def test_stylesheet_buffered_reader(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
@@ -664,8 +731,102 @@ def test_stylesheet_buffered_reader(datapath, mode):
     read_xml(kml, stylesheet=xsl_obj)
 
 
+def test_not_stylesheet(datapath):
+    from lxml.etree import XSLTParseError
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(XSLTParseError, match=("document is not a stylesheet")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_syntax(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2"/>
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node()|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(
+        XMLSyntaxError, match=("Extra content at the end of the document")
+    ):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_eval(datapath):
+    from lxml.etree import XSLTParseError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2">
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node(*)|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(XSLTParseError, match=("failed to compile")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_apply(datapath):
+    from lxml.etree import XSLTApplyError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:copy-of select="document('non_existent.xml')/*"/>
+        </xsl:copy>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")):
+        read_xml(kml, stylesheet=xsl)
+
+
 @td.skip_if_no("lxml")
-def test_wrong_stylesheet(datapath):
+def test_wrong_stylesheet():
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
     xsl = os.path.join("data", "xml", "flatten.xsl")
 

From fadcb679121cbfd065bd22d50495f813e035efa1 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 2 Feb 2021 23:17:06 -0600
Subject: [PATCH 03/35] Fixed import_optional_dependency() args

---
 pandas/io/formats/format.py | 4 +---
 pandas/io/xml.py            | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index ebf3cf0852575..870e2a5976319 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1085,9 +1085,7 @@ def to_xml(
 
         from pandas.io.formats.xml import EtreeXMLFormatter, LxmlXMLFormatter
 
-        lxml = import_optional_dependency(
-            "lxml.etree", raise_on_missing=False, on_version="ignore"
-        )
+        lxml = import_optional_dependency("lxml.etree", errors="ignore")
 
         if parser == "lxml":
             if lxml is not None:
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index dd4736176d602..0302c5f287e94 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -674,9 +674,7 @@ def _parse(
     fallback option with etree parser.
     """
 
-    lxml = import_optional_dependency(
-        "lxml.etree", raise_on_missing=False, on_version="ignore"
-    )
+    lxml = import_optional_dependency("lxml.etree", errors="ignore")
 
     if parser == "lxml":
         if lxml is not None:

From ac5fd3a861b4032c8190ad733b7063e3b6664249 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 3 Feb 2021 01:05:10 -0600
Subject: [PATCH 04/35] Fix fixture and param name collision and check two
 errors in tests

---
 pandas/tests/io/formats/test_to_xml.py | 23 +++++++++++++----------
 pandas/tests/io/test_xml.py            | 15 +++++++++++++--
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 5234f25399fef..412c07dfd7960 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -387,8 +387,8 @@ def test_na_empty_elem_option(datapath, parser):
 # ATTR_COLS
 
 
-@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
-def test_attrs_cols_nan_output(datapath, parser):
+@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
+def test_attrs_cols_nan_output(datapath, attrs_parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -408,8 +408,8 @@ def test_attrs_cols_nan_output(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
-def test_attrs_cols_prefix(datapath, parser):
+@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
+def test_attrs_cols_prefix(datapath, attrs_parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <doc:data xmlns:doc="http://example.xom">
@@ -583,8 +583,8 @@ def test_hierarchical_columns(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
-def test_hierarchical_attrs_columns(datapath, parser):
+@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
+def test_hierarchical_attrs_columns(datapath, attrs_parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -663,8 +663,8 @@ def test_multi_index(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("parser", ["lxml", etree_attr_skip_param])
-def test_multi_index_attrs_cols(datapath, parser):
+@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
+def test_multi_index_attrs_cols(datapath, attrs_parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -1037,7 +1037,7 @@ def test_stylesheet_with_etree(datapath):
 
 @td.skip_if_installed("lxml")
 def test_stylesheet_without_lxml(datapath, parser):
-    xsl = datapath("io", "data", "xml", "row_field_output.xslt")
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     with pytest.warns(
         UserWarning, match=("To use stylesheet, you need lxml installed.")
@@ -1049,7 +1049,10 @@ def test_stylesheet_without_lxml(datapath, parser):
 def test_stylesheet_wrong_path(datapath, parser):
     xsl = os.path.join("data", "xml", "row_field_output.xslt")
 
-    with pytest.raises(OSError, match=("failed to load external entity")):
+    with pytest.raises(
+        (OSError, FileNotFoundError),
+        match=("failed to load external entity|No such file or directory"),
+    ):
         geom_df.to_xml(stylesheet=xsl)
 
 
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 23eb128a30379..53926765f00d8 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -113,6 +113,7 @@ def test_failsafe_parser(datapath):
 # FILE / URL
 
 
+@td.skip_if_no("lxml")
 def test_parser_consistency_file(datapath):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file_lxml = read_xml(filename, parser="lxml")
@@ -123,6 +124,7 @@ def test_parser_consistency_file(datapath):
 
 @tm.network
 @pytest.mark.slow
+@td.skip_if_no("lxml")
 def test_parser_consistency_url(datapath):
     url = (
         "https://data.cityofchicago.org/api/views/"
@@ -216,13 +218,18 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
+@td.skip_if_no("lxml")
 def test_not_io_object(parser):
     with pytest.raises(ValueError, match=("io is not a url, file, or xml string")):
         read_xml(DataFrame, parser="lxml")
 
 
+@td.skip_if_no("lxml")
 def test_wrong_file_lxml(datapath):
-    with pytest.raises(OSError, match=("failed to load external entity")):
+    with pytest.raises(
+        (OSError, FileNotFoundError),
+        match=("failed to load external entity|No such file or directory"),
+    ):
         filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="lxml")
 
@@ -731,6 +738,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     read_xml(kml, stylesheet=xsl_obj)
 
 
+@td.skip_if_no("lxml")
 def test_not_stylesheet(datapath):
     from lxml.etree import XSLTParseError
 
@@ -830,7 +838,10 @@ def test_wrong_stylesheet():
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
     xsl = os.path.join("data", "xml", "flatten.xsl")
 
-    with pytest.raises(OSError, match=("failed to load external entity")):
+    with pytest.raises(
+        (OSError, FileNotFoundError),
+        match=("failed to load external entity|No such file or directory"),
+    ):
         read_xml(kml, stylesheet=xsl)
 
 

From 938b0a091c8e4d4fe65ce9099f3c208a5798215c Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 3 Feb 2021 09:31:32 -0600
Subject: [PATCH 05/35] Adjusted tests to handle etree version issues

---
 pandas/tests/io/formats/test_to_xml.py | 57 ++++++++++++++------------
 pandas/tests/io/test_xml.py            | 10 ++---
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 412c07dfd7960..4aadbdb4b32e4 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -28,28 +28,20 @@
 [X] - TypeError("...is not a valid type for attr_cols")
 [X] - TypeError("...is not a valid type for elem_cols")
 [X] - LookupError("unknown encoding")
-[]  - UnicodeDecodeError  (NEED TO NON UTF-8 STYLESHEET)
-[]  - OSError             (NEED UNREACHABLE FILE PATH)
+[]  - UnicodeDecodeError  (NEED NON-UTF-8 STYLESHEET)
+[]  - OSError             (NEED UNREACHABLE LOCAL FILE PATH)
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
 [X] - ValueError("stylesheet is not a url, file, or xml string.")
 []  - LookupError
-[]  - URLError            (GENERAL ERROR USUALLY DUE TO NETWORKING)
-[]  - HTTPError           (NEED TO ONLINE STYLESHEET)
+[]  - URLError            (USUALLY DUE TO NETWORKING)
+[]  - HTTPError           (NEED AN ONLINE STYLESHEET)
 [X] - OSError("failed to load external entity")
 [X] - XMLSyntaxError("Opening and ending tag mismatch")
 [X] - XSLTApplyError("Cannot resolve URI")
 [X] - XSLTParseError("failed to compile")
 """
 
-etree_attr_skip_param = pytest.param(
-    "etree",
-    marks=pytest.mark.skipif(
-        sys.version_info <= (3, 7),
-        reason=("etree alpha ordered attributes <= py3.7"),
-    ),
-)
-
 geom_df = DataFrame(
     {
         "shape": ["square", "circle", "triangle"],
@@ -387,8 +379,11 @@ def test_na_empty_elem_option(datapath, parser):
 # ATTR_COLS
 
 
-@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
-def test_attrs_cols_nan_output(datapath, attrs_parser):
+@pytest.mark.skipif(
+    sys.version_info <= (3, 7),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_attrs_cols_nan_output(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -408,8 +403,11 @@ def test_attrs_cols_nan_output(datapath, attrs_parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
-def test_attrs_cols_prefix(datapath, attrs_parser):
+@pytest.mark.skipif(
+    sys.version_info <= (3, 7),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_attrs_cols_prefix(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <doc:data xmlns:doc="http://example.xom">
@@ -583,8 +581,11 @@ def test_hierarchical_columns(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
-def test_hierarchical_attrs_columns(datapath, attrs_parser):
+@pytest.mark.skipif(
+    sys.version_info <= (3, 7),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_hierarchical_attrs_columns(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -663,8 +664,11 @@ def test_multi_index(datapath, parser):
     assert output == expected
 
 
-@pytest.mark.parametrize("attrs_parser", ["lxml", etree_attr_skip_param])
-def test_multi_index_attrs_cols(datapath, attrs_parser):
+@pytest.mark.skipif(
+    sys.version_info <= (3, 7),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_multi_index_attrs_cols(datapath, parser):
     expected = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -941,12 +945,13 @@ def test_no_pretty_print_with_decl(parser):
         "</row></data>"
     )
 
-    output = geom_df.to_xml(pretty_print=False)
+    output = geom_df.to_xml(pretty_print=False, parser=parser)
 
     output = output.replace(
         '<?xml version="1.0" encoding="utf-8"?',
         "<?xml version='1.0' encoding='utf-8'?",
     )
+    output = output.replace(" />", "/>")
 
     assert output == expected
 
@@ -961,7 +966,7 @@ def test_no_pretty_print_no_decl(parser):
         "</row></data>"
     )
 
-    output = geom_df.to_xml(xml_declaration=False, pretty_print=False)
+    output = geom_df.to_xml(xml_declaration=False, pretty_print=False, parser=parser)
 
     assert output == expected
 
@@ -1036,7 +1041,7 @@ def test_stylesheet_with_etree(datapath):
 
 
 @td.skip_if_installed("lxml")
-def test_stylesheet_without_lxml(datapath, parser):
+def test_stylesheet_without_lxml(datapath):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     with pytest.warns(
@@ -1046,18 +1051,18 @@ def test_stylesheet_without_lxml(datapath, parser):
 
 
 @td.skip_if_no("lxml")
-def test_stylesheet_wrong_path(datapath, parser):
+def test_stylesheet_wrong_path(datapath):
     xsl = os.path.join("data", "xml", "row_field_output.xslt")
 
     with pytest.raises(
         (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory"),
+        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         geom_df.to_xml(stylesheet=xsl)
 
 
 @td.skip_if_no("lxml")
-def test_stylesheet_not_path_buffer(parser):
+def test_stylesheet_not_path_buffer():
     with pytest.raises(
         ValueError, match=("stylesheet is not a url, file, or xml string")
     ):
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 53926765f00d8..36eb4e2464209 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -21,7 +21,7 @@
 [X] - ValueError("names does not match length of child elements in xpath.")
 [X] - TypeError("...is not a valid type for names")
 [X] - ValueError("io is not a url, file, or xml string")
-[]  - URLError      (GENERAL ERROR USUALLY DUE TO NETWORKING)
+[]  - URLError      (USUALLY DUE TO NETWORKING)
 [X] - HTTPError("HTTP Error 404: Not Found")
 [X] - OSError("No such file")
 []  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
@@ -39,11 +39,11 @@
 [X] - TypeError("...is not a valid type for names")
 [X] - ValueError("io is not a url, file, or xml string")
 [X] - LookupError(unknown encoding)
-[]  - URLError      (GENERAL ERROR USUALLY DUE TO NETWORKING)
+[]  - URLError           (USUALLY DUE TO NETWORKING)
 [X  - HTTPError("HTTP Error 404: Not Found")
 [X] - OSError("failed to load external entity")
 [X] - XMLSyntaxError("Start tag expected, '<' not found")
-[]  - ParserError   (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
+[]  - ParserError        (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
 [X] - ValueError("Values for parser can only be lxml or etree.")
 """
 
@@ -228,7 +228,7 @@ def test_not_io_object(parser):
 def test_wrong_file_lxml(datapath):
     with pytest.raises(
         (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory"),
+        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="lxml")
@@ -840,7 +840,7 @@ def test_wrong_stylesheet():
 
     with pytest.raises(
         (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory"),
+        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         read_xml(kml, stylesheet=xsl)
 

From a92c21e0765f8a247ba5665039cf00639bfaa7b6 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 3 Feb 2021 12:30:20 -0600
Subject: [PATCH 06/35] Add appropriate etree skips in tests

---
 pandas/tests/io/formats/test_to_xml.py | 28 +++++++++++++++-----------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 4aadbdb4b32e4..3480c8891d594 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -33,7 +33,7 @@
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
 [X] - ValueError("stylesheet is not a url, file, or xml string.")
-[]  - LookupError
+[]  - LookupError         (NEED WRONG ENCODING FOR FILE OUTPUT)
 []  - URLError            (USUALLY DUE TO NETWORKING)
 []  - HTTPError           (NEED AN ONLINE STYLESHEET)
 [X] - OSError("failed to load external entity")
@@ -380,7 +380,7 @@ def test_na_empty_elem_option(datapath, parser):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7),
+    sys.version_info < (3, 8),
     reason=("etree alpha ordered attributes <= py3.7"),
 )
 def test_attrs_cols_nan_output(datapath, parser):
@@ -404,7 +404,7 @@ def test_attrs_cols_nan_output(datapath, parser):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7),
+    sys.version_info < (3, 8),
     reason=("etree alpha ordered attributes <= py3.7"),
 )
 def test_attrs_cols_prefix(datapath, parser):
@@ -582,7 +582,7 @@ def test_hierarchical_columns(datapath, parser):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7),
+    sys.version_info < (3, 8),
     reason=("etree alpha ordered attributes <= py3.7"),
 )
 def test_hierarchical_attrs_columns(datapath, parser):
@@ -665,7 +665,7 @@ def test_multi_index(datapath, parser):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7),
+    sys.version_info < (3, 8),
     reason=("etree alpha ordered attributes <= py3.7"),
 )
 def test_multi_index_attrs_cols(datapath, parser):
@@ -906,7 +906,8 @@ def test_misspelled_encoding(parser):
 # PRETTY PRINT
 
 
-def test_xml_declaration_pretty_print(parser):
+@td.skip_if_no("lxml")
+def test_xml_declaration_pretty_print():
     expected = """\
 <data>
   <row>
@@ -929,12 +930,13 @@ def test_xml_declaration_pretty_print(parser):
   </row>
 </data>"""
 
-    output = geom_df.to_xml(xml_declaration=False, parser=parser)
+    output = geom_df.to_xml(xml_declaration=False)
 
     assert output == expected
 
 
-def test_no_pretty_print_with_decl(parser):
+@td.skip_if_no("lxml")
+def test_no_pretty_print_with_decl():
     expected = (
         "<?xml version='1.0' encoding='utf-8'?>\n"
         "<data><row><index>0</index><shape>square</shape>"
@@ -945,7 +947,7 @@ def test_no_pretty_print_with_decl(parser):
         "</row></data>"
     )
 
-    output = geom_df.to_xml(pretty_print=False, parser=parser)
+    output = geom_df.to_xml(pretty_print=False)
 
     output = output.replace(
         '<?xml version="1.0" encoding="utf-8"?',
@@ -956,7 +958,8 @@ def test_no_pretty_print_with_decl(parser):
     assert output == expected
 
 
-def test_no_pretty_print_no_decl(parser):
+@td.skip_if_no("lxml")
+def test_no_pretty_print_no_decl():
     expected = (
         "<data><row><index>0</index><shape>square</shape>"
         "<degrees>360</degrees><sides>4.0</sides></row><row>"
@@ -966,7 +969,7 @@ def test_no_pretty_print_no_decl(parser):
         "</row></data>"
     )
 
-    output = geom_df.to_xml(xml_declaration=False, pretty_print=False, parser=parser)
+    output = geom_df.to_xml(xml_declaration=False, pretty_print=False)
 
     assert output == expected
 
@@ -1031,7 +1034,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     assert output == xsl_expected
 
 
-def test_stylesheet_with_etree(datapath):
+def test_stylesheet_with_etree_parser(datapath):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     with pytest.warns(
@@ -1127,6 +1130,7 @@ def test_incorrect_xsl_eval():
         geom_df.to_xml(stylesheet=xsl)
 
 
+@td.skip_if_no("lxml")
 def test_incorrect_xsl_apply(parser):
     from lxml.etree import XSLTApplyError
 

From 51f10f207747dd83f796f4e3f33ecb9cc3113c8a Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 3 Feb 2021 16:58:54 -0600
Subject: [PATCH 07/35] Remove check for warnings in tests

---
 pandas/tests/io/formats/test_to_xml.py | 30 --------------------------
 pandas/tests/io/test_xml.py            | 12 -----------
 2 files changed, 42 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 3480c8891d594..791f5cdd48970 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -21,8 +21,6 @@
 [X] - LookupError("unknown encoding")
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
-[X] - UserWarning("To use stylesheet, you need lxml installed.")
-[X] - ImportWarning("You do not have lxml installed.")
 
 lxml
 [X] - TypeError("...is not a valid type for attr_cols")
@@ -135,15 +133,6 @@ def parser(request):
     return request.param
 
 
-# FAIL SAFE WARNING
-
-
-@td.skip_if_installed("lxml")
-def test_failsafe_parser(datapath):
-    with pytest.warns(ImportWarning, match=("You do not have lxml installed.")):
-        geom_df.to_xml()
-
-
 # FILE OUTPUT
 
 
@@ -1034,25 +1023,6 @@ def test_stylesheet_buffered_reader(datapath, mode):
     assert output == xsl_expected
 
 
-def test_stylesheet_with_etree_parser(datapath):
-    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
-
-    with pytest.warns(
-        UserWarning, match=("To use stylesheet, you need lxml installed.")
-    ):
-        geom_df.to_xml(parser="etree", stylesheet=xsl)
-
-
-@td.skip_if_installed("lxml")
-def test_stylesheet_without_lxml(datapath):
-    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
-
-    with pytest.warns(
-        UserWarning, match=("To use stylesheet, you need lxml installed.")
-    ):
-        geom_df.to_xml(stylesheet=xsl)
-
-
 @td.skip_if_no("lxml")
 def test_stylesheet_wrong_path(datapath):
     xsl = os.path.join("data", "xml", "row_field_output.xslt")
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 36eb4e2464209..51c14361a7cad 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -25,7 +25,6 @@
 [X] - HTTPError("HTTP Error 404: Not Found")
 [X] - OSError("No such file")
 []  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
-[X] - ImportWarning("You do not have lxml installed.")
 
 lxml
 [X] - ValueError("Either element or attributes can be parsed not both.")
@@ -99,17 +98,6 @@ def parser(request):
     return request.param
 
 
-# FAIL SAFE WARNING
-
-
-@td.skip_if_installed("lxml")
-def test_failsafe_parser(datapath):
-    filename = datapath("io", "data", "xml", "books.xml")
-
-    with pytest.warns(ImportWarning, match=("You do not have lxml installed.")):
-        read_xml(filename)
-
-
 # FILE / URL
 
 

From 3520d58f1cbaccc8eeb483c4e46117a3a9b26556 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 4 Feb 2021 10:06:07 -0600
Subject: [PATCH 08/35] Adjust code to conform to mypy and docstring validation

---
 pandas/core/frame.py        |  23 ++--
 pandas/io/formats/format.py |  10 +-
 pandas/io/formats/xml.py    | 203 +++++++++++++++++-------------------
 pandas/io/xml.py            |  71 +++++++------
 4 files changed, 158 insertions(+), 149 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5c256776540a9..cb738eff0bc1a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2607,15 +2607,15 @@ def to_html(
     def to_xml(
         self,
         path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
-        index: Optional[bool] = True,
+        index: bool = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
         na_rep: Optional[str] = None,
         attr_cols: Optional[Union[str, List[str]]] = None,
         elem_cols: Optional[Union[str, List[str]]] = None,
-        namespaces: Optional[Union[dict, List[dict]]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
         prefix: Optional[str] = None,
-        encoding: Optional[str] = "utf-8",
+        encoding: str = "utf-8",
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
@@ -2635,7 +2635,7 @@ def to_xml(
             Whether to include index in XML document.
         root_name : str, default 'data'
             The name of root element in XML document.
-        root_name : str, default 'row'
+        row_name : str, default 'row'
             The name of row element in XML document.
         na_rep : str, optional
             Missing data representation.
@@ -2654,13 +2654,13 @@ def to_xml(
             Default namespaces should be given empty string key. For
             example, ::
 
-                namespaces = {'': 'https://example.com'}
+                namespaces = {"": "https://example.com"}
 
         prefix : str, optional
             Namespace prefix to be used for every element and/or attribute
             in document. This should be one of the keys in ``namespaces``
             dict.
-        encoding : str, optional, default 'utf-8'
+        encoding : str, default 'utf-8'
             Encoding of the resulting document.
         xml_declaration : str, optional
             Whether to include the XML declaration at start of document.
@@ -2697,7 +2697,7 @@ def to_xml(
         ...                    'degrees': [360, 360, 180],
         ...                    'sides': [4, np.nan, 3]})
 
-        >>> df.to_xml()
+        >>> df.to_xml()  # doctest: +SKIP
         <?xml version='1.0' encoding='utf-8'?>
         <data>
           <row>
@@ -2720,7 +2720,9 @@ def to_xml(
           </row>
         </data>
 
-        >>> df.to_xml(attr_cols=['index', 'shape', 'degrees', 'sides'])
+        >>> df.to_xml(attr_cols=[
+        ...           'index', 'shape', 'degrees', 'sides'
+        ...           ])  # doctest: +SKIP
         <?xml version='1.0' encoding='utf-8'?>
         <data>
           <row index="0" shape="square" degrees="360" sides="4.0"/>
@@ -2728,8 +2730,8 @@ def to_xml(
           <row index="2" shape="triangle" degrees="180" sides="3.0"/>
         </data>
 
-        >>> df.to_xml(namespaces = {"doc": "https://example.com"},
-        ...           prefix = "doc")
+        >>> df.to_xml(namespaces={"doc": "https://example.com"},
+        ...           prefix="doc")  # doctest: +SKIP
         <?xml version='1.0' encoding='utf-8'?>
         <doc:data xmlns:doc="https://example.com">
           <doc:row>
@@ -2756,7 +2758,6 @@ def to_xml(
         formatter = fmt.DataFrameFormatter(
             self,
             index=index,
-            na_rep=na_rep,
         )
 
         return fmt.DataFrameRenderer(formatter).to_xml(
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 870e2a5976319..7788faf52b01e 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1015,9 +1015,9 @@ def to_xml(
         na_rep: Optional[str] = None,
         attr_cols: Optional[Union[str, List[str]]] = None,
         elem_cols: Optional[Union[str, List[str]]] = None,
-        namespaces: Optional[Union[dict, List[dict]]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
         prefix: Optional[str] = None,
-        encoding: Optional[str] = "utf-8",
+        encoding: str = "utf-8",
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
@@ -1037,7 +1037,7 @@ def to_xml(
             Whether to include index in XML document.
         root_name : str, default 'data'
             The name of root element in XML document.
-        root_name : str, default 'row'
+        row_name : str, default 'row'
             The name of row element in XML document.
         na_rep : str, optional
             Missing data representation.
@@ -1062,7 +1062,7 @@ def to_xml(
             Namespace prefix to be used for every element and/or attribute
             in document. This should be one of the keys in ``namespaces``
             dict.
-        encoding : str, optional, default 'utf-8'
+        encoding : str, default 'utf-8'
             Encoding of the resulting document.
         xml_declaration : str, optional
             Whether to include the XML declaration at start of document.
@@ -1087,6 +1087,8 @@ def to_xml(
 
         lxml = import_optional_dependency("lxml.etree", errors="ignore")
 
+        TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]]
+
         if parser == "lxml":
             if lxml is not None:
                 TreeBuilder = LxmlXMLFormatter
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 90ee289ad3414..45cda69efbb05 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -4,7 +4,7 @@
 
 import codecs
 import io
-from typing import Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 from urllib.error import HTTPError, URLError
 from warnings import warn
 
@@ -79,11 +79,11 @@ def __init__(
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
         na_rep: Optional[str] = None,
-        attr_cols: Optional[Union[str, List[str]]] = None,
-        elem_cols: Optional[Union[str, List[str]]] = None,
-        namespaces: Optional[Dict[str, str]] = None,
+        attr_cols: Optional[List[str]] = None,
+        elem_cols: Optional[List[str]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
         prefix: Optional[str] = None,
-        encoding: Optional[str] = "utf-8",
+        encoding: str = "utf-8",
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         stylesheet: Optional[FilePathOrBuffer[str]] = None,
@@ -104,6 +104,9 @@ def __init__(
         self.stylesheet = stylesheet
         self.frame = self.fmt.frame
 
+        self.orig_cols = self.fmt.frame.columns.tolist()
+        self.frame_dicts = self.process_dataframe()
+
     def build_tree(self) -> bytes:
         """
         Build tree from  data.
@@ -151,7 +154,7 @@ def validate_encoding(self) -> None:
         except LookupError as e:
             raise e
 
-    def process_dataframe(self) -> None:
+    def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]:
         """
         Adjust Data Frame to fit xml output.
 
@@ -176,13 +179,15 @@ def handle_indexes(self) -> None:
         This method will add indexes into attr_cols or elem_cols.
         """
 
-        indexes = [x for x in self.frame_dicts[0].keys() if x not in self.orig_cols]
+        indexes: List[str] = [
+            x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
+        ]
 
         if self.attr_cols and self.index:
-            self.attr_cols = list(indexes) + self.attr_cols
+            self.attr_cols = indexes + self.attr_cols
 
         if self.elem_cols and self.index:
-            self.elem_cols = list(indexes) + self.elem_cols
+            self.elem_cols = indexes + self.elem_cols
 
     def get_prefix_uri(self) -> str:
         """
@@ -207,7 +212,7 @@ def other_namespaces(self) -> dict:
         prefix.
         """
 
-        nmsp_dict = {}
+        nmsp_dict: Dict[str, str] = {}
         if self.namespaces and self.prefix is None:
             nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p != ""}
 
@@ -238,18 +243,19 @@ def build_elems(self) -> None:
 
     def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
+        out_str: Optional[str] = xml_doc.decode(self.encoding).rstrip()
 
         try:
-            if self.path_or_buffer:
+            if self.path_or_buffer and isinstance(self.path_or_buffer, str):
                 with open(self.path_or_buffer, "wb") as f:
                     f.write(xml_doc)
-                xml_doc = None
-            else:
-                xml_doc = xml_doc.decode(self.encoding).rstrip()
+
+                out_str = None
+
         except (UnicodeDecodeError, OSError) as e:
             raise e
 
-        return xml_doc
+        return out_str
 
 
 class EtreeXMLFormatter(BaseXMLFormatter):
@@ -268,8 +274,6 @@ def __init__(self, *args, **kwargs):
 
         self.validate_columns()
         self.validate_encoding()
-        self.orig_cols = self.fmt.frame.columns.tolist()
-        self.frame_dicts = self.process_dataframe()
         self.handle_indexes()
         self.prefix_uri = self.get_prefix_uri()
 
@@ -284,13 +288,12 @@ def build_tree(self) -> bytes:
             self.d = d
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
-            if self.attr_cols:
-                self.build_attribs()
-            if self.elem_cols:
-                self.build_elems()
             if not self.attr_cols and not self.elem_cols:
                 self.elem_cols = list(self.frame_dicts[0].keys())
                 self.build_elems()
+            else:
+                self.build_attribs()
+                self.build_elems()
 
         self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
 
@@ -315,7 +318,8 @@ def get_prefix_uri(self) -> str:
         uri = ""
         if self.namespaces:
             for p, n in self.namespaces.items():
-                register_namespace(p, n)
+                if isinstance(p, str) and isinstance(n, str):
+                    register_namespace(p, n)
             if self.prefix:
                 try:
                     uri = f"{{{self.namespaces[self.prefix]}}}"
@@ -327,40 +331,42 @@ def get_prefix_uri(self) -> str:
         return uri
 
     def build_attribs(self) -> None:
-        for col in self.attr_cols:
-            flat_col = col
-            if isinstance(col, tuple):
-                flat_col = (
-                    "".join(str(c) for c in col).strip()
-                    if "" in col
-                    else "_".join(str(c) for c in col).strip()
-                )
-
-            attr_name = f"{self.prefix_uri}{flat_col}"
-            try:
-                if self.d[col] is not None:
-                    self.elem_row.attrib[attr_name] = str(self.d[col])
-            except KeyError:
-                raise KeyError(f"no valid column, {col}")
+        if self.attr_cols:
+            for col in self.attr_cols:
+                flat_col = col
+                if isinstance(col, tuple):
+                    flat_col = (
+                        "".join(str(c) for c in col).strip()
+                        if "" in col
+                        else "_".join(str(c) for c in col).strip()
+                    )
+
+                attr_name = f"{self.prefix_uri}{flat_col}"
+                try:
+                    if self.d[col] is not None:
+                        self.elem_row.attrib[attr_name] = str(self.d[col])
+                except KeyError:
+                    raise KeyError(f"no valid column, {col}")
 
     def build_elems(self) -> None:
         from xml.etree.ElementTree import SubElement
 
-        for col in self.elem_cols:
-            flat_col = col
-            if isinstance(col, tuple):
-                flat_col = (
-                    "".join(str(c) for c in col).strip()
-                    if "" in col
-                    else "_".join(str(c) for c in col).strip()
-                )
-
-            elem_name = f"{self.prefix_uri}{flat_col}"
-            try:
-                val = None if self.d[col] in [None, ""] else str(self.d[col])
-                SubElement(self.elem_row, elem_name).text = val
-            except KeyError:
-                raise KeyError(f"no valid column, {col}")
+        if self.elem_cols:
+            for col in self.elem_cols:
+                flat_col = col
+                if isinstance(col, tuple):
+                    flat_col = (
+                        "".join(str(c) for c in col).strip()
+                        if "" in col
+                        else "_".join(str(c) for c in col).strip()
+                    )
+
+                elem_name = f"{self.prefix_uri}{flat_col}"
+                try:
+                    val = None if self.d[col] in [None, ""] else str(self.d[col])
+                    SubElement(self.elem_row, elem_name).text = val
+                except KeyError:
+                    raise KeyError(f"no valid column, {col}")
 
     def prettify_tree(self) -> bytes:
         """
@@ -375,7 +381,7 @@ def prettify_tree(self) -> bytes:
 
         return dom.toprettyxml(indent="  ", encoding=self.encoding)
 
-    def remove_declaration(self) -> None:
+    def remove_declaration(self) -> bytes:
         """
         Remove xml declaration.
 
@@ -402,8 +408,6 @@ def __init__(self, *args, **kwargs):
 
         self.validate_columns()
         self.validate_encoding()
-        self.orig_cols = self.fmt.frame.columns.tolist()
-        self.frame_dicts = self.process_dataframe()
         self.prefix_uri = self.get_prefix_uri()
 
         self.convert_empty_str_key()
@@ -424,15 +428,12 @@ def build_tree(self) -> bytes:
             self.d = d
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
-            if self.attr_cols:
-                self.build_attribs()
-
-            if self.elem_cols:
-                self.build_elems()
-
             if not self.attr_cols and not self.elem_cols:
                 self.elem_cols = list(self.frame_dicts[0].keys())
                 self.build_elems()
+            else:
+                self.build_attribs()
+                self.build_elems()
 
         self.out_xml = tostring(
             self.root,
@@ -472,54 +473,44 @@ def get_prefix_uri(self) -> str:
         return uri
 
     def build_attribs(self) -> None:
-        """
-        Create attributes of row.
-
-        This method adds attributes using attr_cols to row element and
-        works with tuples for multindex or hierarchical columns.
-        """
-        for col in self.attr_cols:
-            flat_col = col
-            if isinstance(col, tuple):
-                flat_col = (
-                    "".join(str(c) for c in col).strip()
-                    if "" in col
-                    else "_".join(str(c) for c in col).strip()
-                )
-
-            attr_name = f"{self.prefix_uri}{flat_col}"
-            try:
-                if self.d[col] is not None:
-                    self.elem_row.attrib[attr_name] = self.d[col]
-            except KeyError:
-                raise KeyError(f"no valid column, {col}")
+        if self.attr_cols:
+            for col in self.attr_cols:
+                flat_col = col
+                if isinstance(col, tuple):
+                    flat_col = (
+                        "".join(str(c) for c in col).strip()
+                        if "" in col
+                        else "_".join(str(c) for c in col).strip()
+                    )
+
+                attr_name = f"{self.prefix_uri}{flat_col}"
+                try:
+                    if self.d[col] is not None:
+                        self.elem_row.attrib[attr_name] = self.d[col]
+                except KeyError:
+                    raise KeyError(f"no valid column, {col}")
 
     def build_elems(self) -> None:
-        """
-        Create child elements of row.
-
-        This method adds child elements using elem_cols to row element and
-        works with tuples for multindex or hierarchical columns.
-        """
         from lxml.etree import SubElement
 
-        for col in self.elem_cols:
-            flat_col = col
-            if isinstance(col, tuple):
-                flat_col = (
-                    "".join(str(c) for c in col).strip()
-                    if "" in col
-                    else "_".join(str(c) for c in col).strip()
-                )
-
-            elem_name = f"{self.prefix_uri}{flat_col}"
-            try:
-                val = None if self.d[col] in [None, ""] else str(self.d[col])
-                SubElement(self.elem_row, elem_name).text = val
-            except KeyError:
-                raise KeyError(f"no valid column, {col}")
+        if self.elem_cols:
+            for col in self.elem_cols:
+                flat_col = col
+                if isinstance(col, tuple):
+                    flat_col = (
+                        "".join(str(c) for c in col).strip()
+                        if "" in col
+                        else "_".join(str(c) for c in col).strip()
+                    )
+
+                elem_name = f"{self.prefix_uri}{flat_col}"
+                try:
+                    val = None if self.d[col] in [None, ""] else str(self.d[col])
+                    SubElement(self.elem_row, elem_name).text = val
+                except KeyError:
+                    raise KeyError(f"no valid column, {col}")
 
-    def convert_io(self) -> Union[None, str]:
+    def convert_io(self) -> Union[bytes, str, None]:
         """
         Convert stylesheet object to string.
 
@@ -527,6 +518,8 @@ def convert_io(self) -> Union[None, str]:
         as string, depending on object type.
         """
 
+        obj: Union[bytes, str, None] = None
+
         if isinstance(self.stylesheet, str):
             obj = self.stylesheet
 
@@ -577,7 +570,7 @@ def parse_doc(self):
         from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
 
         current_doc = self.convert_io()
-        if current_doc:
+        if current_doc and isinstance(current_doc, str):
             is_xml = current_doc.startswith(("<?xml", "<"))
         else:
             raise ValueError("stylesheet is not a url, file, or xml string")
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 0302c5f287e94..eab5e270ff835 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -94,7 +94,7 @@ def __init__(
         self.stylesheet = stylesheet
         self.is_style = None
 
-    def parse_data(self) -> List[Dict[str, List[str]]]:
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
         """
         Parse xml data.
 
@@ -104,7 +104,7 @@ def parse_data(self) -> List[Dict[str, List[str]]]:
 
         raise AbstractMethodError(self)
 
-    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
         """
         Parse xml nodes.
 
@@ -157,7 +157,7 @@ def _validate_names(self) -> None:
         """
         raise AbstractMethodError(self)
 
-    def _convert_io(self, xml_data) -> Union[None, str]:
+    def _convert_io(self, xml_data) -> Union[str, bytes, None]:
         """
         Convert io object to string.
 
@@ -165,6 +165,8 @@ def _convert_io(self, xml_data) -> Union[None, str]:
         as string, depending on object type.
         """
 
+        obj: Union[bytes, str, None] = None
+
         if isinstance(xml_data, str):
             obj = xml_data
 
@@ -231,7 +233,7 @@ class _EtreeFrameParser(_XMLFrameParser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def parse_data(self) -> List[Dict[str, List[str]]]:
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
 
         if self.stylesheet:
             warn(
@@ -247,9 +249,9 @@ def parse_data(self) -> List[Dict[str, List[str]]]:
 
         return self._parse_nodes()
 
-    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
-
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
         elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces)
+        dicts: List[Dict[str, Optional[str]]]
 
         if self.elems_only and self.attrs_only:
             raise ValueError("Either element or attributes can be parsed not both.")
@@ -279,7 +281,10 @@ def _parse_nodes(self) -> List[Dict[str, List[str]]]:
                 ]
 
         elif self.attrs_only:
-            dicts = [el.attrib for el in elems]
+            dicts = [
+                {k: v.strip() if v else None for k, v in el.attrib.items()}
+                for el in elems
+            ]
 
         else:
             if self.names:
@@ -363,9 +368,9 @@ def _validate_path(self) -> None:
 
     def _validate_names(self) -> None:
         if self.names:
-            children = self.xml_doc.find(
-                self.xpath, namespaces=self.namespaces
-            ).findall("*")
+            parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
+            if parent:
+                children = parent.findall("*")
 
             if is_list_like(self.names):
                 if len(self.names) < len(children):
@@ -378,20 +383,24 @@ def _validate_names(self) -> None:
                 )
 
     def _parse_doc(self) -> Union[Element, ElementTree]:
-        from xml.etree.ElementTree import ParseError, fromstring, parse
+        from xml.etree.ElementTree import (
+            Element,
+            ElementTree,
+            ParseError,
+            fromstring,
+            parse,
+        )
 
         current_doc = self._convert_io(self.io)
         if current_doc:
-            is_xml = current_doc.startswith(("<?xml", "<"))
+            if isinstance(current_doc, str):
+                is_xml = current_doc.startswith(("<?xml", "<"))
+            elif isinstance(current_doc, bytes):
+                is_xml = current_doc.decode(self.encoding).startswith(("<?xml", "<"))
         else:
             raise ValueError("io is not a url, file, or xml string.")
 
-        is_xml = (
-            (current_doc.decode(self.encoding).startswith(("<?xml", "<")))
-            if isinstance(current_doc, bytes)
-            else current_doc.startswith(("<?xml", "<"))
-        )
-
+        r: Union[Element, ElementTree]
         try:
             if is_url(current_doc):
                 with urlopen(current_doc) as f:
@@ -423,7 +432,7 @@ class _LxmlFrameParser(_XMLFrameParser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def parse_data(self) -> List[Dict[str, List[str]]]:
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
         """
         Parse xml data.
 
@@ -444,8 +453,9 @@ def parse_data(self) -> List[Dict[str, List[str]]]:
 
         return self._parse_nodes()
 
-    def _parse_nodes(self) -> List[Dict[str, List[str]]]:
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
         elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+        dicts: List[Dict[str, Optional[str]]]
 
         if self.elems_only and self.attrs_only:
             raise ValueError("Either element or attributes can be parsed not both.")
@@ -601,7 +611,10 @@ def _parse_doc(self):
 
         current_doc = self._convert_io(self.raw_doc)
         if current_doc:
-            is_xml = current_doc.startswith(("<?xml", "<"))
+            if isinstance(current_doc, str):
+                is_xml = current_doc.startswith(("<?xml", "<"))
+            elif isinstance(current_doc, bytes):
+                is_xml = current_doc.decode(self.encoding).startswith(("<?xml", "<"))
         else:
             raise ValueError("io is not a url, file, or xml string.")
 
@@ -611,8 +624,10 @@ def _parse_doc(self):
             if is_url(current_doc):
                 with urlopen(current_doc) as f:
                     r = parse(f, parser=curr_parser)
-            elif is_xml:
+            elif is_xml and isinstance(current_doc, str):
                 r = XML(bytes(current_doc, encoding=self.encoding))
+            elif is_xml and isinstance(current_doc, bytes):
+                r = XML(current_doc)
             else:
                 r = parse(current_doc, parser=curr_parser)
         except (LookupError, URLError, HTTPError, OSError, XMLSyntaxError) as e:
@@ -675,6 +690,7 @@ def _parse(
     """
 
     lxml = import_optional_dependency("lxml.etree", errors="ignore")
+    p: Union[_EtreeFrameParser, _LxmlFrameParser]
 
     if parser == "lxml":
         if lxml is not None:
@@ -859,7 +875,6 @@ def read_xml(
     ... </data>'''
 
     >>> df = pd.read_xml(xml)
-
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -871,10 +886,9 @@ def read_xml(
     ...   <row shape="square" degrees="360" sides="4.0"/>
     ...   <row shape="circle" degrees="360"/>
     ...   <row shape="triangle" degrees="180" sides="3.0"/>
-    ... </data>"'''
+    ... </data>'''
 
     >>> df = pd.read_xml(xml, xpath=".//row")
-
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -900,10 +914,9 @@ def read_xml(
     ...   </doc:row>
     ... </doc:data>'''
 
-    >>> df = pd.read(xml,
-                     xpath="//doc:row",
-                     namespaces = {'doc': 'https://example.com'})
-
+    >>> df = pd.read_xml(xml,
+    ...                  xpath="//doc:row",
+    ...                  namespaces={"doc": "https://example.com"})
     >>> df
           shape  degrees  sides
     0    square      360    4.0

From 483256250fdc057c4360afcb8f6a6fcff2edd069 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 4 Feb 2021 11:57:23 -0600
Subject: [PATCH 09/35] Add read_xml to TestPDApi test and fix for etree test

---
 pandas/tests/api/test_api.py | 1 +
 pandas/tests/io/test_xml.py  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 541c2988a0636..fd1c19219c4bf 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -159,6 +159,7 @@ class TestPDApi(Base):
         "read_gbq",
         "read_hdf",
         "read_html",
+        "read_xml",
         "read_json",
         "read_pickle",
         "read_sas",
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 51c14361a7cad..18e953db92c8d 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -216,7 +216,7 @@ def test_not_io_object(parser):
 def test_wrong_file_lxml(datapath):
     with pytest.raises(
         (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
+        match=(r"failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="lxml")

From 2914c32b4d6713fbed964dbae1bbd780008e361e Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 4 Feb 2021 11:57:54 -0600
Subject: [PATCH 10/35] Add read_xml to TestPDApi test and fix for etree test

---
 pandas/tests/io/test_xml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 18e953db92c8d..95ac1c5ff8db3 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -216,14 +216,14 @@ def test_not_io_object(parser):
 def test_wrong_file_lxml(datapath):
     with pytest.raises(
         (OSError, FileNotFoundError),
-        match=(r"failed to load external entity|No such file or directory|没有那个文件或目录"),
+        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="lxml")
 
 
 def test_wrong_file_etree(datapath):
-    with pytest.raises(OSError, match=("No such file")):
+    with pytest.raises(OSError, match=("No such file|没有那个文件或目录")):
         filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="etree")
 

From 72d0e93aec1a024cf473a8cf01bcb9a936ad6251 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 4 Feb 2021 15:15:05 -0600
Subject: [PATCH 11/35] Replace lxml ImportWarning for ImportError with added
 tests

---
 pandas/core/frame.py                   | 12 ++++-----
 pandas/io/formats/format.py            | 18 +++++---------
 pandas/io/formats/xml.py               | 10 --------
 pandas/io/xml.py                       | 34 ++++++--------------------
 pandas/tests/io/formats/test_to_xml.py | 21 ++++++++++++++++
 pandas/tests/io/test_xml.py            | 13 ++++++++++
 6 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cb738eff0bc1a..78ad2c089dc3e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2631,7 +2631,7 @@ def to_xml(
         path_or_buffer : str, path object or file-like object, optional
             File to write output to. If None, the output is returned as a
             string.
-        index : bool, optional
+        index : bool, default True
             Whether to include index in XML document.
         root_name : str, default 'data'
             The name of root element in XML document.
@@ -2662,17 +2662,15 @@ def to_xml(
             dict.
         encoding : str, default 'utf-8'
             Encoding of the resulting document.
-        xml_declaration : str, optional
+        xml_declaration : bool, default True
             Whether to include the XML declaration at start of document.
-        pretty_print : bool, optional
+        pretty_print : bool, default True
             Whether output should be pretty printed with indentation and
             line breaks.
-        parser : {'lxml','etree'}, default "lxml"
+        parser : {'lxml','etree'}, default 'lxml'
             Parser module to use for building of tree. Only 'lxml' and
             'etree' are supported. With 'lxml', the ability to use XSLT
-            stylesheet is supported. Default parser uses 'lxml'. If
-            module is not installed a warning will raise and process
-            will continue with 'etree'.
+            stylesheet is supported.
         stylesheet : str, path object or file-like object, optional
             A URL, file-like object, or a raw string containing an XSLT
             script used to transform the raw XML output. Script should use
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 7788faf52b01e..0f7ed424a5f1e 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1,5 +1,5 @@
 """
-Internal module for formatting output data in csv, html,
+Internal module for formatting output data in csv, html, xml,
 and latex files. This module also applies to display formatting.
 """
 from __future__ import annotations
@@ -30,7 +30,6 @@
     cast,
 )
 from unicodedata import east_asian_width
-from warnings import warn
 
 import numpy as np
 
@@ -1033,7 +1032,7 @@ def to_xml(
         path_or_buffer : str, path object or file-like object, optional
             File to write output to. If None, the output is returned as a
             string.
-        index : bool, optional
+        index : bool, default True
             Whether to include index in XML document.
         root_name : str, default 'data'
             The name of root element in XML document.
@@ -1066,15 +1065,13 @@ def to_xml(
             Encoding of the resulting document.
         xml_declaration : str, optional
             Whether to include the XML declaration at start of document.
-        pretty_print : bool, optional
+        pretty_print : bool, default True
             Whether output should be pretty printed with indentation and
             line breaks.
         parser : {'lxml','etree'}, default "lxml"
             Parser module to use for building of tree. Only 'lxml' and
             'etree' are supported. With 'lxml', the ability to use XSLT
-            stylesheet is supported. Default parser uses 'lxml'. If
-            module is not installed a warning will raise and process
-            will continue with 'etree'.
+            stylesheet is supported.
         stylesheet : str, path object or file-like object, optional
             A URL, file-like object, or a raw string containing an XSLT
             script used to transform the raw XML output. Script should use
@@ -1093,12 +1090,9 @@ def to_xml(
             if lxml is not None:
                 TreeBuilder = LxmlXMLFormatter
             else:
-                warn(
-                    "You do not have lxml installed (default parser). "
-                    "Instead, etree will be used.",
-                    ImportWarning,
+                raise ImportError(
+                    "lxml not found, please install or use the etree parser."
                 )
-                TreeBuilder = EtreeXMLFormatter
 
         elif parser == "etree":
             TreeBuilder = EtreeXMLFormatter
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 45cda69efbb05..39ff7f7e24222 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -262,11 +262,6 @@ class EtreeXMLFormatter(BaseXMLFormatter):
     """
     Class for formatting data in xml using Python standard library
     modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
-
-    Notes
-    -----
-    This class serves as fall back option if user does not have
-    ``lxml`` installed or user specifically requests ``etree`` parser.
     """
 
     def __init__(self, *args, **kwargs):
@@ -396,11 +391,6 @@ class LxmlXMLFormatter(BaseXMLFormatter):
     """
     Class for formatting data in xml using Python standard library
     modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
-
-    Notes
-    -----
-    This class serves as default option. If user does not have `lxml`
-    installed, `to_xml` will fall back with EtreeXMLFormatter.
     """
 
     def __init__(self, *args, **kwargs):
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index eab5e270ff835..b1c5978877cc6 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -220,12 +220,7 @@ def _parse_doc(self):
 class _EtreeFrameParser(_XMLFrameParser):
     """
     Internal class to parse XML into DataFrames with the Python
-    standard library XML modules: `xml.etree.ElementTree`.
-
-    Notes
-    -----
-    This class serves as fall back option if user does not have
-    ``lxml`` installed or user specifically requests ``etree`` parser.
+    standard library XML module: `xml.etree.ElementTree`.
     """
 
     from xml.etree.ElementTree import Element, ElementTree
@@ -420,13 +415,6 @@ class _LxmlFrameParser(_XMLFrameParser):
     Internal class to parse XML into DataFrames with third-party
     full-featured XML library, `lxml`, that supports
     XPath 1.0 and XSLT 1.0.
-
-    Notes
-    -----
-    This is the default class called with `_EtreeFrameParser` serving
-    as fall back option if user does not have ``lxml`` installed.
-    With `lxml`, the user enjoys the full scope of funcationality and
-    efficiency.
     """
 
     def __init__(self, *args, **kwargs):
@@ -705,11 +693,7 @@ def _parse(
                 stylesheet,
             )
         else:
-            warn(
-                "You do not have lxml installed (default parser). "
-                "Instead, etree will be used.",
-                ImportWarning,
-            )
+            raise ImportError("lxml not found, please install or use the etree parser.")
 
             p = _EtreeFrameParser(
                 io,
@@ -762,7 +746,7 @@ def read_xml(
     io : str, path object or file-like object
         A URL, file-like object, or raw string containing XML.
 
-    xpath : str, optional
+    xpath : str, optional, default './*'
         The XPath to parse required set of nodes for migration to DataFrame.
         XPath should return a collection of elements and not a single
         element. Note: The ``etree`` parser supports limited XPath
@@ -780,11 +764,11 @@ def read_xml(
 
             namespaces = {"doc": "https://example.com"}
 
-    elems_only : bool, optional, default = False
+    elems_only : bool, optional, default False
         Parse only the child elements at the specified ``xpath``. By default,
         all child elements and non-empty text nodes are returned.
 
-    attrs_only :  bool, optional, default = False
+    attrs_only :  bool, optional, default False
         Parse only the attributes at the specified ``xpath``.
         By default, all attributes are returned.
 
@@ -792,15 +776,13 @@ def read_xml(
         Column names for DataFrame of parsed XML data. Use this parameter to
         rename original element names and distinguish same named elements.
 
-    encoding : str, optional, default = 'utf-8'
+    encoding : str, optional, default 'utf-8'
         Encoding of XML document.
 
-    parser : {'lxml','etree'}, default='lxml'
+    parser : {'lxml','etree'}, default 'lxml'
         Parser module to use for retrieval of data. Only 'lxml' and
         'etree' are supported. With 'lxml' more complex XPath searches
-        and ability to use XSLT stylesheet are supported. Default parser
-        uses 'lxml'. If module is not installed a warning will raise and
-        process will continue with 'etree'.
+        and ability to use XSLT stylesheet are supported.
 
     stylesheet : str, path object or file-like object
         A URL, file-like object, or a raw string containing an XSLT script.
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 791f5cdd48970..dd90f8292142f 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -15,7 +15,10 @@
 """
 CHECKLIST
 
+[x] - ValueError("Values for parser can only be lxml or etree.")
+
 etree
+[x] - ImportError("lxml not found, please install or use the etree parser.")
 [X] - TypeError("...is not a valid type for attr_cols")
 [X] - TypeError("...is not a valid type for elem_cols")
 [X] - LookupError("unknown encoding")
@@ -963,6 +966,24 @@ def test_no_pretty_print_no_decl():
     assert output == expected
 
 
+# PARSER
+
+
+@td.skip_if_installed("lxml")
+def test_default_parser_no_lxml():
+    with pytest.raises(
+        ImportError, match=("lxml not found, please install or use the etree parser.")
+    ):
+        geom_df.to_xml()
+
+
+def test_unknown_parser():
+    with pytest.raises(
+        ValueError, match=("Values for parser can only be lxml or etree.")
+    ):
+        geom_df.to_xml(parser="bs4")
+
+
 # STYLESHEET
 
 xsl_expected = """\
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 95ac1c5ff8db3..8bc195377a340 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -14,7 +14,10 @@
 """
 CHECK LIST
 
+[x] - ValueError("Values for parser can only be lxml or etree.")
+
 etree
+[x] - ImportError("lxml not found, please install or use the etree parser.")
 [X] - ValueError("Either element or attributes can be parsed not both.")
 [X] - ValueError("xpath does not return any nodes...")
 [X] - SyntaxError("You have used an incorrect or unsupported XPath")
@@ -560,6 +563,16 @@ def test_parser_consistency_with_encoding(datapath):
 # PARSER
 
 
+@td.skip_if_installed("lxml")
+def test_default_parser_no_lxml(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(
+        ImportError, match=("lxml not found, please install or use the etree parser.")
+    ):
+        read_xml(filename)
+
+
 def test_wrong_parser(datapath):
     filename = datapath("io", "data", "xml", "books.xml")
 

From b80b8ce32f40a34e370bbc5c8d3476d4cc08ad44 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Fri, 5 Feb 2021 11:18:14 -0600
Subject: [PATCH 12/35] Adjust fixture for lxml skip and add error validation
 in tests

---
 pandas/io/formats/xml.py               |  2 +-
 pandas/io/xml.py                       | 11 +++++++++--
 pandas/tests/io/formats/test_to_xml.py | 14 ++++++++++++--
 pandas/tests/io/test_xml.py            | 23 ++++++++---------------
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 39ff7f7e24222..b361748dca819 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -252,7 +252,7 @@ def write_output(self) -> Optional[str]:
 
                 out_str = None
 
-        except (UnicodeDecodeError, OSError) as e:
+        except (UnicodeDecodeError, OSError, FileNotFoundError) as e:
             raise e
 
         return out_str
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index b1c5978877cc6..a4e010ee35f23 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -404,7 +404,7 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
                 r = fromstring(current_doc)
             else:
                 r = parse(current_doc)
-        except (URLError, HTTPError, OSError, ParseError) as e:
+        except (URLError, HTTPError, OSError, FileNotFoundError, ParseError) as e:
             raise e
 
         return r
@@ -618,7 +618,14 @@ def _parse_doc(self):
                 r = XML(current_doc)
             else:
                 r = parse(current_doc, parser=curr_parser)
-        except (LookupError, URLError, HTTPError, OSError, XMLSyntaxError) as e:
+        except (
+            LookupError,
+            URLError,
+            HTTPError,
+            OSError,
+            FileNotFoundError,
+            XMLSyntaxError,
+        ) as e:
             raise e
 
         return r
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index dd90f8292142f..bd3e0728fc42e 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -24,13 +24,16 @@
 [X] - LookupError("unknown encoding")
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
+[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - FileNotFoundError("No such file or directory")
 
 lxml
 [X] - TypeError("...is not a valid type for attr_cols")
 [X] - TypeError("...is not a valid type for elem_cols")
 [X] - LookupError("unknown encoding")
 []  - UnicodeDecodeError  (NEED NON-UTF-8 STYLESHEET)
-[]  - OSError             (NEED UNREACHABLE LOCAL FILE PATH)
+[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - FileNotFoundError("No such file or directory")
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
 [X] - ValueError("stylesheet is not a url, file, or xml string.")
@@ -131,7 +134,7 @@ def mode(request):
     return request.param
 
 
-@pytest.fixture(params=["lxml", "etree"])
+@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"])
 def parser(request):
     return request.param
 
@@ -190,6 +193,13 @@ def test_str_output(datapath, parser):
     assert output == from_file_expected
 
 
+def test_wrong_file_path(parser):
+    with pytest.raises(
+        FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
+    ):
+        geom_df.to_xml("/my/fake/path/output.xml")
+
+
 # INDEX
 
 
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 8bc195377a340..e792d75f1b070 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -24,9 +24,10 @@
 [X] - ValueError("names does not match length of child elements in xpath.")
 [X] - TypeError("...is not a valid type for names")
 [X] - ValueError("io is not a url, file, or xml string")
-[]  - URLError      (USUALLY DUE TO NETWORKING)
+[]  - URLError      (GENERAL ERROR WITH HTTPError AS SUBCLASS)
 [X] - HTTPError("HTTP Error 404: Not Found")
-[X] - OSError("No such file")
+[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - FileNotFoundError("No such file or directory")
 []  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
 
 lxml
@@ -96,7 +97,7 @@ def mode(request):
     return request.param
 
 
-@pytest.fixture(params=["lxml", "etree"])
+@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"])
 def parser(request):
     return request.param
 
@@ -209,26 +210,18 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
-@td.skip_if_no("lxml")
-def test_not_io_object(parser):
+def test_wrong_io_object(parser):
     with pytest.raises(ValueError, match=("io is not a url, file, or xml string")):
-        read_xml(DataFrame, parser="lxml")
+        read_xml(DataFrame, parser=parser)
 
 
-@td.skip_if_no("lxml")
-def test_wrong_file_lxml(datapath):
+def test_wrong_file_path(datapath, parser):
     with pytest.raises(
         (OSError, FileNotFoundError),
         match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
     ):
         filename = os.path.join("data", "html", "books.xml")
-        read_xml(filename, parser="lxml")
-
-
-def test_wrong_file_etree(datapath):
-    with pytest.raises(OSError, match=("No such file|没有那个文件或目录")):
-        filename = os.path.join("data", "html", "books.xml")
-        read_xml(filename, parser="etree")
+        read_xml(filename, parser=parser)
 
 
 @tm.network

From a6cfc90eccff7754e87d4f7093d29ff7c8f72812 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Fri, 5 Feb 2021 12:21:04 -0600
Subject: [PATCH 13/35] Add conditional skips for envs without lxml

---
 pandas/tests/io/formats/test_to_xml.py | 15 ++++++++-------
 pandas/tests/io/test_xml.py            |  7 ++++++-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index bd3e0728fc42e..079cc6bf0fa39 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -182,7 +182,7 @@ def test_str_output(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
     df_file = read_xml(filename, parser=parser)
 
-    output = df_file.to_xml()
+    output = df_file.to_xml(parser=parser)
 
     # etree and lxml differs on quotes and case in xml declaration
     output = output.replace(
@@ -197,7 +197,7 @@ def test_wrong_file_path(parser):
     with pytest.raises(
         FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
     ):
-        geom_df.to_xml("/my/fake/path/output.xml")
+        geom_df.to_xml("/my/fake/path/output.xml", parser=parser)
 
 
 # INDEX
@@ -871,7 +871,7 @@ def test_encoding_option_str(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
     df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5)
 
-    output = df_file.to_xml(encoding="ISO-8859-1")
+    output = df_file.to_xml(encoding="ISO-8859-1", parser=parser)
 
     # etree and lxml differs on quotes and case in xml declaration
     output = output.replace(
@@ -885,16 +885,17 @@ def test_encoding_option_str(datapath, parser):
 @td.skip_if_no("lxml")
 def test_correct_encoding_file(datapath):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    df_file = read_xml(filename, encoding="ISO-8859-1")
+    df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml")
 
     with tm.ensure_clean("test.xml") as path:
-        df_file.to_xml(path, index=False, encoding="ISO-8859-1")
+        df_file.to_xml(path, index=False, encoding="ISO-8859-1", parser="lxml")
 
 
+@td.skip_if_no("lxml")
 @pytest.mark.parametrize("encoding", ["UTF-8", "UTF-16", "ISO-8859-1"])
 def test_wrong_encoding_option_lxml(datapath, parser, encoding):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    df_file = read_xml(filename, encoding="ISO-8859-1")
+    df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml")
 
     with tm.ensure_clean("test.xml") as path:
         df_file.to_xml(path, index=False, encoding=encoding, parser=parser)
@@ -902,7 +903,7 @@ def test_wrong_encoding_option_lxml(datapath, parser, encoding):
 
 def test_misspelled_encoding(parser):
     with pytest.raises(LookupError, match=("unknown encoding")):
-        geom_df.to_xml(parser=parser, encoding="uft-8")
+        geom_df.to_xml(encoding="uft-8", parser=parser)
 
 
 # PRETTY PRINT
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index e792d75f1b070..cd5738d259eb9 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -253,7 +253,8 @@ def test_wrong_url(parser):
 # XPATH
 
 
-def test_empty_xpath_lxml(datapath, parser):
+@td.skip_if_no("lxml")
+def test_empty_xpath_lxml(datapath):
     filename = datapath("io", "data", "xml", "books.xml")
     with pytest.raises(ValueError, match=("xpath does not return any nodes")):
         read_xml(filename, xpath=".//python", parser="lxml")
@@ -317,6 +318,7 @@ def test_prefix_namespace(parser):
     tm.assert_frame_equal(df_nmsp, df_expected)
 
 
+@td.skip_if_no("lxml")
 def test_consistency_default_namespace():
     df_lxml = read_xml(
         xml_default_nmsp,
@@ -335,6 +337,7 @@ def test_consistency_default_namespace():
     tm.assert_frame_equal(df_lxml, df_etree)
 
 
+@td.skip_if_no("lxml")
 def test_consistency_prefix_namespace():
     df_lxml = read_xml(
         xml_prefix_nmsp,
@@ -442,6 +445,7 @@ def test_elem_and_attrs_only(datapath, parser):
         read_xml(filename, elems_only=True, attrs_only=True, parser=parser)
 
 
+@td.skip_if_no("lxml")
 def test_attribute_centric_xml():
     xml = """\
 <?xml version="1.0" encoding="UTF-8"?>
@@ -545,6 +549,7 @@ def test_ascii_encoding(datapath, parser):
     read_xml(filename, encoding="ascii", parser=parser)
 
 
+@td.skip_if_no("lxml")
 def test_parser_consistency_with_encoding(datapath):
     filename = datapath("io", "data", "xml", "baby_names.xml")
     df_lxml = read_xml(filename, parser="lxml", encoding="ISO-8859-1")

From 6c4e0b4c2d2d9524c93b525eeeddd24181eeffb2 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Fri, 5 Feb 2021 15:22:24 -0600
Subject: [PATCH 14/35] Clean up whatnew rst of rebase issue

---
 doc/source/whatsnew/v1.3.0.rst | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 8b32bdeb0deea..3187c4dfdb7b9 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -41,10 +41,6 @@ See ref:`window.overview` for performance and functional benefits. (:issue:`1509
 
 .. _whatsnew_130.read_to_xml:
 
-We added I/O support to read and render shallow versions of XML documents with
-:func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using lxml as parser,
-full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
-=======
 We added I/O support to read and render shallow versions of `XML`_ documents with
 :func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using `lxml`_ as parser,
 full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)

From a57fd35e75d8150601e2feca351010f809ede674 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Fri, 5 Feb 2021 16:45:02 -0600
Subject: [PATCH 15/35] Fix unescaped emphasis and wording in read_xml
 docstring

---
 pandas/io/xml.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index a4e010ee35f23..cb66bd79f4c48 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -674,14 +674,11 @@ def _parse(
 
     Raises
     ------
-    ValueError
-        * If parser is not lxml or etree.e.
+    ImportError
+        * If lxml is not installed if selected as parser.
 
-    Notes
-    -----
-    This method will raise a warning instead of module not found or
-    import error if user does not have 1xml and then reverts to
-    fallback option with etree parser.
+    ValueError
+        * If parser is not lxml or etree.
     """
 
     lxml = import_optional_dependency("lxml.etree", errors="ignore")
@@ -753,7 +750,7 @@ def read_xml(
     io : str, path object or file-like object
         A URL, file-like object, or raw string containing XML.
 
-    xpath : str, optional, default './*'
+    xpath : str, optional, default './\*'
         The XPath to parse required set of nodes for migration to DataFrame.
         XPath should return a collection of elements and not a single
         element. Note: The ``etree`` parser supports limited XPath
@@ -766,8 +763,8 @@ def read_xml(
         namespaces in XML, only the ones used in ``xpath`` expression.
         Note: if XML document uses default namespace denoted as
         `xmlns='<URI>'` without a prefix, you must assign any temporary
-        namespace, like 'doc', to URI in order to parse any underlying
-        nodes. For example, ::
+        namespace prefix such as 'doc' to the URI in order to parse
+        underlying nodes and/or attributes. For example, ::
 
             namespaces = {"doc": "https://example.com"}
 
@@ -793,12 +790,12 @@ def read_xml(
 
     stylesheet : str, path object or file-like object
         A URL, file-like object, or a raw string containing an XSLT script.
-        This stylesheet should flatten complex, deeply nested XML documents.
-        To use this feature you must have ``lxml`` module installed and use
-        'lxml' as ``parser``. The ``xpath`` must reference nodes of
-        transformed XML document generated after XSLT transformation and not
-        the original XML document. Only XSLT 1.0 scripts and not later
-        versions is currently supported.
+        This stylesheet should flatten complex, deeply nested XML documents
+        for easier parsing. To use this feature you must have ``lxml`` module
+        installed and specify 'lxml' as ``parser``. The ``xpath`` must
+        reference nodes of transformed XML document generated after XSLT
+        transformation and not the original XML document. Only XSLT 1.0
+        scripts and not later versions is currently supported.
 
     Returns
     -------

From 23439b49beafbbab843396ba4358263d4d8f5150 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sun, 7 Feb 2021 19:44:14 -0600
Subject: [PATCH 16/35] Add XML section in io.rst and lxml dependency for
 read_xml in install.rst

---
 doc/source/getting_started/install.rst |   2 +-
 doc/source/user_guide/io.rst           | 445 +++++++++++++++++++++++++
 2 files changed, 446 insertions(+), 1 deletion(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 49039f05b889a..d49c2698a1ace 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -271,7 +271,7 @@ fsspec                    0.7.4              Handling files aside from local and
 fastparquet               0.4.0              Parquet reading / writing
 gcsfs                     0.6.0              Google Cloud Storage access
 html5lib                  1.0.1              HTML parser for read_html (see :ref:`note <optional_html>`)
-lxml                      4.3.0              HTML parser for read_html (see :ref:`note <optional_html>`)
+lxml                      4.3.0              HTML parser for read_html (see :ref:`note <optional_html>`); XML parser for read_xml
 matplotlib                2.2.3              Visualization
 numba                     0.46.0             Alternative execution engine for rolling operations
 openpyxl                  2.6.0              Reading / writing for xlsx files
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index d6934a3ca2a6c..185432d3c09d4 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -22,6 +22,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     text;Fixed-Width Text File;:ref:`read_fwf<io.fwf_reader>`
     text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
     text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
+    text;`XML <https://www.w3.org/standards/xml/core>`__;:ref:`read_xml<io.read_xml>`;:ref:`to_xml<io.xml>`
     text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
     binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
     binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
@@ -2831,6 +2832,450 @@ parse HTML tables in the top-level pandas io function ``read_html``.
 
 
 
+XML
+---
+
+.. _io.read_xml:
+
+Reading XML
+'''''''''''
+
+.. versionadded:: 1.3.0
+
+The top-level :func:`~pandas.io.xml.read_xml` function can accept an XML
+string/file/URL and will parse nodes and attributes into a pandas ``DataFrame``.
+
+.. note::
+
+   Since there is no standard XML structure where design types can vary in
+   many ways, ``read_xml`` works best with flatter, shallow versions. If
+   an XML document is deeply nested, use the ``stylesheet`` feature to
+   transform XML into a flatter version.
+
+Let's look at a few examples.
+
+Read an XML string:
+
+.. ipython:: python
+
+   xml = """<?xml version="1.0" encoding="UTF-8"?>
+   <bookstore>
+     <book category="cooking">
+       <title lang="en">Everyday Italian</title>
+       <author>Giada De Laurentiis</author>
+       <year>2005</year>
+       <price>30.00</price>
+     </book>
+     <book category="children">
+       <title lang="en">Harry Potter</title>
+       <author>J K. Rowling</author>
+       <year>2005</year>
+       <price>29.99</price>
+     </book>
+     <book category="web">
+       <title lang="en">Learning XML</title>
+       <author>Erik T. Ray</author>
+       <year>2003</year>
+       <price>39.95</price>
+     </book>
+   </bookstore>"""
+
+   df = pd.read_xml(xml)
+   df
+
+Read a URL with no options:
+
+.. ipython:: python
+
+   df = pd.read_xml("https://www.w3schools.com/xml/books.xml")
+   df
+
+Read in the content of the "books.xml" file and pass it to ``read_xml``
+as a string:
+
+.. ipython:: python
+   :suppress:
+
+   rel_path = os.path.join("..", "pandas", "tests", "io", "data", "xml",
+                           "books.xml")
+   file_path = os.path.abspath(rel_path)
+
+.. ipython:: python
+
+   with open(file_path, "r") as f:
+       df = pd.read_xml(f.read())
+   df
+
+Read in the content of the "books.xml" as instance of ``StringIO`` or
+``BytesIO`` and pass it to ``read_xml``:
+
+.. ipython:: python
+
+   with open(file_path, "r") as f:
+       sio = StringIO(f.read())
+
+   df = pd.read_xml(sio)
+   df
+
+.. ipython:: python
+
+   with open(file_path, "rb") as f:
+       bio = BytesIO(f.read())
+
+   df = pd.read_xml(bio)
+   df
+
+With `lxml`_ as default ``parser``, you access the full-featured XML library
+that extends Python's ElementTree API. One powerful tool is ability to query
+nodes selectively or conditionally with more expressive XPath:
+
+.. _lxml: https://lxml.de
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, xpath="//book[year=2005]")
+   df
+
+Specify only elements or only attributes to parse:
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, elems_only=True)
+   df
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, attrs_only=True)
+   df
+
+XML documents can have namespaces with prefixes and default namespaces without
+prefixes both of which are denoted with a special attribute ``xmlns``. In order
+to parse by node under a namespace context, ``xpath`` must reference a prefix.
+
+For example, below XML contains a namespace with prefix, ``doc``, and URI at
+``https://example.com``. In order to parse ``doc:row`` nodes,
+``namespaces`` must be used.
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+   <doc:data xmlns:doc="https://example.com">
+     <doc:row>
+       <doc:shape>square</doc:shape>
+       <doc:degrees>360</doc:degrees>
+       <doc:sides>4.0</doc:sides>
+     </doc:row>
+     <doc:row>
+       <doc:shape>circle</doc:shape>
+       <doc:degrees>360</doc:degrees>
+       <doc:sides/>
+     </doc:row>
+     <doc:row>
+       <doc:shape>triangle</doc:shape>
+       <doc:degrees>180</doc:degrees>
+       <doc:sides>3.0</doc:sides>
+     </doc:row>
+   </doc:data>"""
+
+   df = pd.read_xml(xml,
+                    xpath="//doc:row",
+                    namespaces={"doc": "https://example.com"})
+   df
+
+Similarly, an XML document can have a default namespace without prefix. Failing
+to assign a temporary prefix will return no nodes and raise a ``ValueError``.
+But assiging *any* temporary name to correct URI allows parsing by nodes.
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+   <data xmlns="https://example.com">
+    <row>
+      <shape>square</shape>
+      <degrees>360</degrees>
+      <sides>4.0</sides>
+    </row>
+    <row>
+      <shape>circle</shape>
+      <degrees>360</degrees>
+      <sides/>
+    </row>
+    <row>
+      <shape>triangle</shape>
+      <degrees>180</degrees>
+      <sides>3.0</sides>
+    </row>
+   </data>"""
+
+   df = pd.read_xml(xml,
+                    xpath="//pandas:row",
+                    namespaces={"pandas": "https://example.com"})
+   df
+
+However, if XPath does not reference node names such as default, ``/\*``, then
+``namespaces`` is not required.
+
+With `lxml`_ as parser, you can flatten nested XML documents with an XSLT
+script which also can be string/file/URL types. As background, `XSLT`_ is
+a special-purpose language written in a special XML file that can transform
+original XML documents into other XML, HTML, even text (CSV, JSON, etc.)
+using an XSLT processor.
+
+.. _lxml: https://lxml.de
+.. _XSLT: https://www.w3.org/TR/xslt/
+
+For example, consider this somewhat nested structure of Chicago "L" Rides
+where station and rides elements encapsulate data in their own sections.
+With below XSLT, ``lxml`` can transform original nested document into a flatter
+output (as shown below for demonstration) for easier parse into ``DataFrame``:
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+    <response>
+     <row>
+       <station id="40850" name="Library"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>864.2</avg_weekday_rides>
+         <avg_saturday_rides>534</avg_saturday_rides>
+         <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+     <row>
+       <station id="41700" name="Washington/Wabash"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>2707.4</avg_weekday_rides>
+         <avg_saturday_rides>1909.8</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+     <row>
+       <station id="40380" name="Clark/Lake"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>2949.6</avg_weekday_rides>
+         <avg_saturday_rides>1657</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+    </response>"""
+
+   xsl = """<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+      <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
+      <xsl:strip-space elements="*"/>
+      <xsl:template match="/response">
+         <xsl:copy>
+           <xsl:apply-templates select="row"/>
+         </xsl:copy>
+      </xsl:template>
+      <xsl:template match="row">
+         <xsl:copy>
+           <station_id><xsl:value-of select="station/@id"/></station_id>
+           <station_name><xsl:value-of select="station/@name"/></station_name>
+           <xsl:copy-of select="month|rides/*"/>
+         </xsl:copy>
+      </xsl:template>
+    </xsl:stylesheet>"""
+
+   output = """<?xml version='1.0' encoding='utf-8'?>
+    <response>
+      <row>
+         <station_id>40850</station_id>
+         <station_name>Library</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>864.2</avg_weekday_rides>
+         <avg_saturday_rides>534</avg_saturday_rides>
+         <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
+      </row>
+      <row>
+         <station_id>41700</station_id>
+         <station_name>Washington/Wabash</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>2707.4</avg_weekday_rides>
+         <avg_saturday_rides>1909.8</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
+      </row>
+      <row>
+         <station_id>40380</station_id>
+         <station_name>Clark/Lake</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>2949.6</avg_weekday_rides>
+         <avg_saturday_rides>1657</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
+      </row>
+    </response>"""
+
+   df = pd.read_xml(xml, stylesheet=xsl)
+   df
+
+
+.. _io.xml:
+
+Writing XML
+'''''''''''
+
+.. versionadded:: 1.3.0
+
+``DataFrame`` objects have an instance method ``to_xml`` which renders the
+contents of the ``DataFrame`` as an XML document.
+
+.. note::
+
+   This method does not support special properties of XML including DTD,
+   CData, XSD schemas, processing instructions, comments, and others.
+   Only namespaces at the root level is supported. However, ``stylesheet``
+   allows design changes after initial output.
+
+Let's look at a few examples.
+
+Write an XML without options:
+
+.. ipython:: python
+
+   geom_df = pd.DataFrame(
+       {
+           "shape": ["square", "circle", "triangle"],
+           "degrees": [360, 360, 180],
+           "sides": [4, np.nan, 3],
+       }
+   )
+
+   print(geom_df.to_xml())
+
+
+Write an XML with new root and row name:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(root_name="geometry", row_name="objects"))
+
+Write an attribute-centric XML:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(attr_cols=geom_df.columns.tolist()))
+
+Write a mix of elements and attributes:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(
+           index=False,
+           attr_cols=['shape'],
+           elem_cols=['degrees', 'sides'])
+   )
+
+Any ``DataFrames`` with hierarchical columns will be flattened for XML element names
+with levels delimited by underscores:
+
+.. ipython:: python
+
+   ext_geom_df = pd.DataFrame(
+       {
+           "type": ["polygon", "other", "polygon"],
+           "shape": ["square", "circle", "triangle"],
+           "degrees": [360, 360, 180],
+           "sides": [4, np.nan, 3],
+       }
+   )
+
+   pvt_df = ext_geom_df.pivot_table(index='shape',
+                                    columns='type',
+                                    values=['degrees', 'sides'],
+                                    aggfunc='sum')
+   pvt_df
+
+   print(pvt_df.to_xml())
+
+Write an XML with default namespace:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(namespaces={"": "https://example.com"}))
+
+Write an XML with namespace prefix:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(namespaces={"doc": "https://example.com"},
+                      prefix="doc")
+   )
+
+Write an XML without declaration or pretty print:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(xml_declaration=False,
+                      pretty_print=False)
+   )
+
+Write an XML and transform with stylesheet:
+
+.. ipython:: python
+
+   xsl = """<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+      <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
+      <xsl:strip-space elements="*"/>
+      <xsl:template match="/data">
+        <geometry>
+          <xsl:apply-templates select="row"/>
+        </geometry>
+      </xsl:template>
+      <xsl:template match="row">
+        <object index="{index}">
+          <xsl:if test="shape!='circle'">
+              <xsl:attribute name="type">polygon</xsl:attribute>
+          </xsl:if>
+          <xsl:copy-of select="shape"/>
+          <property>
+              <xsl:copy-of select="degrees|sides"/>
+          </property>
+        </object>
+      </xsl:template>
+    </xsl:stylesheet>"""
+
+   print(geom_df.to_xml(stylesheet=xsl))
+
+
+XML Final Notes
+'''''''''''''''
+
+* All XML documents adhere to `W3C specifications`_. Both ``etree`` and ``lxml``
+  parsers will fail to parse any markup document that is not well-formed or
+  follows XML syntax rules. Do be aware HTML is not an XML document unless it
+  follows XHTML specs. However, other popular markup types including KML, XAML,
+  RSS, MusicML, MathML are compliant `XML schemas`_.
+
+* For above reason, if your application builds XML prior to pandas operations,
+  use appropriate DOM libraries like ``etree`` and ``lxml`` to build the necessary
+  document and not by string concatenation or regex adjustments. Always remember
+  XML is a *special* and not any text file.
+
+* With very large XML files (several hundred MBs to GBs), XPath and XSLT
+  can become memory-intensive operations. Be sure to have enough available
+  RAM for reading and writing to large XML files (roughly about 5 times the
+  size of text).
+
+* Because XSLT is a programming language, use it with caution since such scripts
+  can pose a security risk in your environment and can run large or infinite
+  recursive operations. Always test scripts on small fragments before full run.
+
+* The `etree`_ parser supports all functionality of both ``read_xml`` and
+  ``to_xml`` except for complex XPath and any XSLT. Though limited in features,
+  ``etree`` is still a reliable and capable parser and tree builder. Its
+  performance may trail ``lxml`` to a certain degree for larger files but
+  relatively unnoticeable on small to medium size files.
+
+.. _`W3C specifications`: https://www.w3.org/TR/xml/
+.. _`XML schemas`: https://en.wikipedia.org/wiki/List_of_types_of_XML_schemas
+.. _`etree`: https://docs.python.org/3/library/xml.etree.elementtree.html
+
+
 
 .. _io.excel:
 

From 2effae0a9cfba39fba48b92b152dcd5a8799867d Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 10 Feb 2021 13:12:37 -0600
Subject: [PATCH 17/35] Add section title in whatsnew and tree builder for lxml
 dependency in install.rst

---
 doc/source/getting_started/install.rst | 3 ++-
 doc/source/whatsnew/v1.3.0.rst         | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index d49c2698a1ace..aac713f29f16c 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -271,7 +271,8 @@ fsspec                    0.7.4              Handling files aside from local and
 fastparquet               0.4.0              Parquet reading / writing
 gcsfs                     0.6.0              Google Cloud Storage access
 html5lib                  1.0.1              HTML parser for read_html (see :ref:`note <optional_html>`)
-lxml                      4.3.0              HTML parser for read_html (see :ref:`note <optional_html>`); XML parser for read_xml
+lxml                      4.3.0              | HTML parser for read_html (see :ref:`note <optional_html>`)
+                                             | XML parser for read_xml and tree builder for to_xml
 matplotlib                2.2.3              Visualization
 numba                     0.46.0             Alternative execution engine for rolling operations
 openpyxl                  2.6.0              Reading / writing for xlsx files
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 77867b331f596..07bcd4b024693 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -41,9 +41,12 @@ See ref:`window.overview` for performance and functional benefits. (:issue:`1509
 
 .. _whatsnew_130.read_to_xml:
 
+Read and write XML documents
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 We added I/O support to read and render shallow versions of `XML`_ documents with
 :func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using `lxml`_ as parser,
-full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
+both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
 
 .. _XML: https://www.w3.org/standards/xml/core
 .. _lxml: https://lxml.de
@@ -101,7 +104,7 @@ full XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
       </row>
     </data>
 
-For more, see :ref:`io` in the user guide on IO tools.
+For more, see :ref:`io.xml` in the user guide on IO tools.
 
 .. _whatsnew_130.enhancements.other:
 

From 35fa6a6b4e0e99217d05a1e5786559a8544dd890 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 10 Feb 2021 22:52:11 -0600
Subject: [PATCH 18/35] Clean up merge issue in whatsnew, remove escape in
 io.rst, adjust exceptions with added tests

---
 doc/source/user_guide/io.rst           |  2 +-
 doc/source/whatsnew/v1.3.0.rst         |  6 ------
 pandas/io/formats/xml.py               |  9 +++------
 pandas/io/xml.py                       |  7 ++-----
 pandas/tests/io/formats/test_to_xml.py | 24 +++++++++++++++++++++---
 pandas/tests/io/test_xml.py            | 11 +++++++++++
 6 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 185432d3c09d4..28f7b30974e1a 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3012,7 +3012,7 @@ But assiging *any* temporary name to correct URI allows parsing by nodes.
                     namespaces={"pandas": "https://example.com"})
    df
 
-However, if XPath does not reference node names such as default, ``/\*``, then
+However, if XPath does not reference node names such as default, ``/*``, then
 ``namespaces`` is not required.
 
 With `lxml`_ as parser, you can flatten nested XML documents with an XSLT
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index ecf5ca481ab13..edc42ee1552ec 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -33,12 +33,6 @@ For example:
         storage_options=headers
     )
 
-.. _whatsnew_130.window_method_table:
-
-:class:`Rolling` and :class:`Expanding` now support a ``method`` argument with a
-``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`.
-See ref:`window.overview` for performance and functional benefits. (:issue:`15095`)
-
 .. _whatsnew_130.read_to_xml:
 
 Read and write XML documents
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index b361748dca819..cd3fa80b66e0f 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -6,7 +6,6 @@
 import io
 from typing import Any, Dict, List, Optional, Union
 from urllib.error import HTTPError, URLError
-from warnings import warn
 
 from pandas._typing import FilePathOrBuffer
 from pandas.errors import AbstractMethodError
@@ -252,7 +251,7 @@ def write_output(self) -> Optional[str]:
 
                 out_str = None
 
-        except (UnicodeDecodeError, OSError, FileNotFoundError) as e:
+        except (OSError, FileNotFoundError) as e:
             raise e
 
         return out_str
@@ -299,10 +298,8 @@ def build_tree(self) -> bytes:
             self.out_xml = self.remove_declaration()
 
         if self.stylesheet:
-            warn(
-                "To use stylesheet, you need lxml installed. "
-                "Instead, the non-transformed, original XML is returned.",
-                UserWarning,
+            raise ValueError(
+                "To use stylesheet, you need lxml installed and selected as parser."
             )
 
         return self.out_xml
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index cb66bd79f4c48..cd62b02f7f095 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -6,7 +6,6 @@
 import io
 from typing import Dict, List, Optional, Union
 from urllib.error import HTTPError, URLError
-from warnings import warn
 
 from pandas._typing import FilePathOrBuffer
 from pandas.compat._optional import import_optional_dependency
@@ -231,10 +230,8 @@ def __init__(self, *args, **kwargs):
     def parse_data(self) -> List[Dict[str, Optional[str]]]:
 
         if self.stylesheet:
-            warn(
-                "To use stylesheet, you need lxml installed. "
-                "Nodes will be parsed on original XML at the xpath.",
-                UserWarning,
+            raise ValueError(
+                "To use stylesheet, you need lxml installed and selected as parser."
             )
 
         self.xml_doc = self._parse_doc()
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 079cc6bf0fa39..5144d13401e73 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -24,15 +24,15 @@
 [X] - LookupError("unknown encoding")
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
-[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - ValueError("To use stylesheet, you need lxml installed...")
+[]  - OSError        (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
 [X] - FileNotFoundError("No such file or directory")
 
 lxml
 [X] - TypeError("...is not a valid type for attr_cols")
 [X] - TypeError("...is not a valid type for elem_cols")
 [X] - LookupError("unknown encoding")
-[]  - UnicodeDecodeError  (NEED NON-UTF-8 STYLESHEET)
-[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[]  - OSError        (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
 [X] - FileNotFoundError("No such file or directory")
 [X] - KeyError("...is not included in namespaces")
 [X] - KeyError("no valid column")
@@ -1153,6 +1153,24 @@ def test_incorrect_xsl_apply(parser):
             geom_df.to_xml(path, stylesheet=xsl)
 
 
+def test_stylesheet_with_etree(datapath):
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node(*)">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>"""
+
+    with pytest.raises(
+        ValueError, match=("To use stylesheet, you need lxml installed")
+    ):
+        geom_df.to_xml(parser="etree", stylesheet=xsl)
+
+
 @td.skip_if_no("lxml")
 def test_style_to_csv():
     xsl = """\
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index cd5738d259eb9..ef695cb12fc6a 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -24,6 +24,7 @@
 [X] - ValueError("names does not match length of child elements in xpath.")
 [X] - TypeError("...is not a valid type for names")
 [X] - ValueError("io is not a url, file, or xml string")
+[X] - ValueError("To use stylesheet, you need lxml installed...")
 []  - URLError      (GENERAL ERROR WITH HTTPError AS SUBCLASS)
 [X] - HTTPError("HTTP Error 404: Not Found")
 []  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
@@ -844,6 +845,16 @@ def test_wrong_stylesheet():
         read_xml(kml, stylesheet=xsl)
 
 
+def test_stylesheet_with_etree(datapath):
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    xsl = os.path.join("data", "xml", "flatten_doc.xsl")
+
+    with pytest.raises(
+        ValueError, match=("To use stylesheet, you need lxml installed")
+    ):
+        read_xml(kml, parser="etree", stylesheet=xsl)
+
+
 @tm.network
 @td.skip_if_no("lxml")
 def test_online_stylesheet():

From 947840a32b154706c532c7a148ec91bd930b6b22 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 15 Feb 2021 22:32:03 -0600
Subject: [PATCH 19/35] Remove redundant try/except and fix default namespace
 condition

---
 pandas/io/formats/xml.py |  53 +++++++------------
 pandas/io/xml.py         | 110 ++++++++++++++-------------------------
 2 files changed, 59 insertions(+), 104 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index cd3fa80b66e0f..ea18ca851e8c7 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -5,7 +5,6 @@
 import codecs
 import io
 from typing import Any, Dict, List, Optional, Union
-from urllib.error import HTTPError, URLError
 
 from pandas._typing import FilePathOrBuffer
 from pandas.errors import AbstractMethodError
@@ -148,10 +147,7 @@ def validate_encoding(self) -> None:
             * If encoding is not available in codecs.
         """
 
-        try:
-            codecs.lookup(self.encoding)
-        except LookupError as e:
-            raise e
+        codecs.lookup(self.encoding)
 
     def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]:
         """
@@ -244,15 +240,11 @@ def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
         out_str: Optional[str] = xml_doc.decode(self.encoding).rstrip()
 
-        try:
-            if self.path_or_buffer and isinstance(self.path_or_buffer, str):
-                with open(self.path_or_buffer, "wb") as f:
-                    f.write(xml_doc)
+        if self.path_or_buffer and isinstance(self.path_or_buffer, str):
+            with open(self.path_or_buffer, "wb") as f:
+                f.write(xml_doc)
 
-                out_str = None
-
-        except (OSError, FileNotFoundError) as e:
-            raise e
+            out_str = None
 
         return out_str
 
@@ -315,7 +307,7 @@ def get_prefix_uri(self) -> str:
             if self.prefix:
                 try:
                     uri = f"{{{self.namespaces[self.prefix]}}}"
-                except (KeyError):
+                except KeyError:
                     raise KeyError(f"{self.prefix} is not included in namespaces")
             else:
                 uri = f'{{{self.namespaces[""]}}}'
@@ -452,7 +444,7 @@ def get_prefix_uri(self) -> str:
             if self.prefix:
                 try:
                     uri = f"{{{self.namespaces[self.prefix]}}}"
-                except (KeyError):
+                except KeyError:
                     raise KeyError(f"{self.prefix} is not included in namespaces")
             else:
                 uri = f'{{{self.namespaces[""]}}}'
@@ -554,7 +546,7 @@ def parse_doc(self):
             * If io object is not readable as string or file-like object.
         """
 
-        from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
+        from lxml.etree import XML, XMLParser, parse
 
         current_doc = self.convert_io()
         if current_doc and isinstance(current_doc, str):
@@ -562,18 +554,15 @@ def parse_doc(self):
         else:
             raise ValueError("stylesheet is not a url, file, or xml string")
 
-        try:
-            curr_parser = XMLParser(encoding=self.encoding)
+        curr_parser = XMLParser(encoding=self.encoding)
 
-            if is_url(current_doc):
-                with urlopen(current_doc) as f:
-                    r = parse(f, parser=curr_parser)
-            elif is_xml:
-                r = XML(bytes(current_doc, encoding=self.encoding))
-            else:
-                r = parse(current_doc, parser=curr_parser)
-        except (LookupError, URLError, HTTPError, OSError, XMLSyntaxError) as e:
-            raise e
+        if is_url(current_doc):
+            with urlopen(current_doc) as f:
+                r = parse(f, parser=curr_parser)
+        elif is_xml:
+            r = XML(bytes(current_doc, encoding=self.encoding))
+        else:
+            r = parse(current_doc, parser=curr_parser)
 
         return r
 
@@ -584,15 +573,11 @@ def transform_doc(self) -> bytes:
         This method will transform built tree with XSLT script.
         """
 
-        from lxml.etree import XSLT, XSLTApplyError, XSLTParseError
+        from lxml.etree import XSLT
 
         xsl_doc = self.parse_doc()
 
-        try:
-            transformer = XSLT(xsl_doc)
-            new_doc = transformer(self.root)
-
-        except (XSLTApplyError, XSLTParseError) as e:
-            raise e
+        transformer = XSLT(xsl_doc)
+        new_doc = transformer(self.root)
 
         return bytes(new_doc)
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index cd62b02f7f095..9048de3fdb401 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1,11 +1,9 @@
 """
 :mod:`pandas.io.xml` is a module for reading XML.
-
 """
 
 import io
 from typing import Dict, List, Optional, Union
-from urllib.error import HTTPError, URLError
 
 from pandas._typing import FilePathOrBuffer
 from pandas.compat._optional import import_optional_dependency
@@ -313,11 +311,9 @@ def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
                     for el in elems
                 ]
 
-        if self.namespaces:
-            dicts = [
-                {k.split("}")[1] if "}" in k else k: v for k, v in d.items()}
-                for d in dicts
-            ]
+        dicts = [
+            {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} for d in dicts
+        ]
 
         keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
         dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
@@ -375,13 +371,7 @@ def _validate_names(self) -> None:
                 )
 
     def _parse_doc(self) -> Union[Element, ElementTree]:
-        from xml.etree.ElementTree import (
-            Element,
-            ElementTree,
-            ParseError,
-            fromstring,
-            parse,
-        )
+        from xml.etree.ElementTree import Element, ElementTree, fromstring, parse
 
         current_doc = self._convert_io(self.io)
         if current_doc:
@@ -393,16 +383,13 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
             raise ValueError("io is not a url, file, or xml string.")
 
         r: Union[Element, ElementTree]
-        try:
-            if is_url(current_doc):
-                with urlopen(current_doc) as f:
-                    r = parse(f)
-            elif is_xml:
-                r = fromstring(current_doc)
-            else:
-                r = parse(current_doc)
-        except (URLError, HTTPError, OSError, FileNotFoundError, ParseError) as e:
-            raise e
+        if is_url(current_doc):
+            with urlopen(current_doc) as f:
+                r = parse(f)
+        elif is_xml:
+            r = fromstring(current_doc)
+        else:
+            r = parse(current_doc)
 
         return r
 
@@ -531,36 +518,29 @@ def _transform_doc(self):
         am ideally flatter xml document for easier parsing and migration
         to Data Frame.
         """
-        from lxml.etree import XSLT, XSLTApplyError, XSLTParseError
+        from lxml.etree import XSLT
 
-        try:
-            transformer = XSLT(self.xsl_doc)
-            new_doc = transformer(self.xml_doc)
-        except (XSLTApplyError, XSLTParseError) as e:
-            raise e
+        transformer = XSLT(self.xsl_doc)
+        new_doc = transformer(self.xml_doc)
 
         return new_doc
 
     def _validate_path(self) -> None:
-        from lxml.etree import XPathEvalError, XPathSyntaxError
 
-        try:
-            elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
-            children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces)
-            attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces)
-
-            if (elems == [] and attrs == [] and children == []) or (
-                elems != [] and attrs == [] and children == []
-            ):
-                raise ValueError(
-                    "xpath does not return any nodes. "
-                    "Be sure row level nodes are in xpath. "
-                    "If document uses namespaces denoted with "
-                    "xmlns, be sure to define namespaces and "
-                    "use them in xpath."
-                )
-        except (XPathEvalError, XPathSyntaxError, TypeError) as e:
-            raise e
+        elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+        children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces)
+        attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces)
+
+        if (elems == [] and attrs == [] and children == []) or (
+            elems != [] and attrs == [] and children == []
+        ):
+            raise ValueError(
+                "xpath does not return any nodes. "
+                "Be sure row level nodes are in xpath. "
+                "If document uses namespaces denoted with "
+                "xmlns, be sure to define namespaces and "
+                "use them in xpath."
+            )
 
     def _validate_names(self) -> None:
         """
@@ -590,7 +570,7 @@ def _validate_names(self) -> None:
                 )
 
     def _parse_doc(self):
-        from lxml.etree import XML, XMLParser, XMLSyntaxError, parse
+        from lxml.etree import XML, XMLParser, parse
 
         self.raw_doc = self.stylesheet if self.is_style else self.io
 
@@ -603,27 +583,17 @@ def _parse_doc(self):
         else:
             raise ValueError("io is not a url, file, or xml string.")
 
-        try:
-            curr_parser = XMLParser(encoding=self.encoding)
-
-            if is_url(current_doc):
-                with urlopen(current_doc) as f:
-                    r = parse(f, parser=curr_parser)
-            elif is_xml and isinstance(current_doc, str):
-                r = XML(bytes(current_doc, encoding=self.encoding))
-            elif is_xml and isinstance(current_doc, bytes):
-                r = XML(current_doc)
-            else:
-                r = parse(current_doc, parser=curr_parser)
-        except (
-            LookupError,
-            URLError,
-            HTTPError,
-            OSError,
-            FileNotFoundError,
-            XMLSyntaxError,
-        ) as e:
-            raise e
+        curr_parser = XMLParser(encoding=self.encoding)
+
+        if is_url(current_doc):
+            with urlopen(current_doc) as f:
+                r = parse(f, parser=curr_parser)
+        elif is_xml and isinstance(current_doc, str):
+            r = XML(bytes(current_doc, encoding=self.encoding))
+        elif is_xml and isinstance(current_doc, bytes):
+            r = XML(current_doc)
+        else:
+            r = parse(current_doc, parser=curr_parser)
 
         return r
 

From cb34dde4903639fdb56415e59bfa19f24cc4c1a3 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 20 Feb 2021 15:44:46 -0600
Subject: [PATCH 20/35] Replace path or buffer handling with get_handle and add
 compression and storage_options

---
 doc/source/user_guide/io.rst           |  13 +-
 pandas/core/frame.py                   |  17 ++
 pandas/io/formats/format.py            |  17 ++
 pandas/io/formats/xml.py               | 176 +++++++++++---------
 pandas/io/xml.py                       | 219 ++++++++++++++++---------
 pandas/tests/io/data/xml/geom_xml.bz2  | Bin 0 -> 182 bytes
 pandas/tests/io/data/xml/geom_xml.gz   | Bin 0 -> 166 bytes
 pandas/tests/io/data/xml/geom_xml.xz   | Bin 0 -> 200 bytes
 pandas/tests/io/formats/test_to_xml.py | 120 ++++++++++++++
 pandas/tests/io/test_xml.py            | 156 ++++++++++++++----
 10 files changed, 526 insertions(+), 192 deletions(-)
 create mode 100644 pandas/tests/io/data/xml/geom_xml.bz2
 create mode 100644 pandas/tests/io/data/xml/geom_xml.gz
 create mode 100644 pandas/tests/io/data/xml/geom_xml.xz

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 28f7b30974e1a..7fad2adeb6c14 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2925,6 +2925,17 @@ Read in the content of the "books.xml" as instance of ``StringIO`` or
    df = pd.read_xml(bio)
    df
 
+Even read XML from AWS S3 buckets such as Python Software Foundation's IRS 990 Form:
+
+.. ipython:: python
+
+   df = pd.read_xml(
+       "s3://irs-form-990/201923199349319487_public.xml",
+       xpath=".//irs:Form990PartVIISectionAGrp",
+       namespaces={"irs": "http://www.irs.gov/efile"}
+   )
+   df
+
 With `lxml`_ as default ``parser``, you access the full-featured XML library
 that extends Python's ElementTree API. One powerful tool is ability to query
 nodes selectively or conditionally with more expressive XPath:
@@ -3254,7 +3265,7 @@ XML Final Notes
 * For above reason, if your application builds XML prior to pandas operations,
   use appropriate DOM libraries like ``etree`` and ``lxml`` to build the necessary
   document and not by string concatenation or regex adjustments. Always remember
-  XML is a *special* and not any text file.
+  XML is a *special* text file with markup rules.
 
 * With very large XML files (several hundred MBs to GBs), XPath and XSLT
   can become memory-intensive operations. Be sure to have enough available
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1e257ac199d5a..fd6e1c2c24fae 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2611,6 +2611,8 @@ def to_xml(
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
         stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
     ) -> Optional[str]:
         """
         Render a DataFrame to an XML document.
@@ -2668,6 +2670,19 @@ def to_xml(
             layout of elements and attributes from original output. This
             argument requires ``lxml`` to be installed. Only XSLT 1.0
             scripts and not later versions is currently supported.
+        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+            For on-the-fly decompression of on-disk data. If 'infer', then use
+            gzip, bz2, zip or xz if path_or_buffer is a string ending in
+            '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+            otherwise. If using 'zip', the ZIP file must contain only one data
+            file to be read in. Set to None for no decompression.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection,
+            e.g. host, port, username, password, etc., if using a URL that will be
+            parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
+            raised if providing this argument with a non-fsspec URL. See the fsspec
+            and backend storage implementation docs for the set of allowed keys and
+            values.
 
         Returns
         -------
@@ -2764,6 +2779,8 @@ def to_xml(
             pretty_print=pretty_print,
             parser=parser,
             stylesheet=stylesheet,
+            compression=compression,
+            storage_options=storage_options,
         )
 
     # ----------------------------------------------------------------------
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 6a2852123dcba..1e2e1be8a40d2 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1021,6 +1021,8 @@ def to_xml(
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
         stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
     ) -> Optional[str]:
         """
         Render a DataFrame to an XML document.
@@ -1078,6 +1080,19 @@ def to_xml(
             layout of elements and attributes from original output. This
             argument requires ``lxml`` to be installed. Only XSLT 1.0
             scripts and not later versions is currently supported.
+        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+            For on-the-fly decompression of on-disk data. If 'infer', then use
+            gzip, bz2, zip or xz if path_or_buffer is a string ending in
+            '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+            otherwise. If using 'zip', the ZIP file must contain only one data
+            file to be read in. Set to None for no decompression.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection,
+            e.g. host, port, username, password, etc., if using a URL that will be
+            parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
+            raised if providing this argument with a non-fsspec URL. See the fsspec
+            and backend storage implementation docs for the set of allowed keys and
+            values.
         """
 
         from pandas.io.formats.xml import EtreeXMLFormatter, LxmlXMLFormatter
@@ -1115,6 +1130,8 @@ def to_xml(
             xml_declaration=xml_declaration,
             pretty_print=pretty_print,
             stylesheet=stylesheet,
+            compression=compression,
+            storage_options=storage_options,
         )
 
         return xml_formatter.write_output()
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index ea18ca851e8c7..8defe95ac6d8b 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -6,12 +6,12 @@
 import io
 from typing import Any, Dict, List, Optional, Union
 
-from pandas._typing import FilePathOrBuffer
+from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions
 from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.common import is_list_like
 
-from pandas.io.common import is_url, urlopen
+from pandas.io.common import get_handle, is_url, urlopen
 from pandas.io.formats.format import DataFrameFormatter
 
 
@@ -62,6 +62,14 @@ class BaseXMLFormatter:
     stylesheet : str or file-like
         A URL, file, file-like object, or a raw string containing XSLT.
 
+    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+        Compression type for on-the-fly decompression of on-disk data.
+        If 'infer', then use extension for gzip, bz2, zip or xz.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc.,
+
     See also
     --------
     pandas.io.formats.xml.EtreeXMLFormatter
@@ -85,6 +93,8 @@ def __init__(
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
     ) -> None:
         self.fmt = formatter
         self.path_or_buffer = path_or_buffer
@@ -100,8 +110,10 @@ def __init__(
         self.xml_declaration = xml_declaration
         self.pretty_print = pretty_print
         self.stylesheet = stylesheet
-        self.frame = self.fmt.frame
+        self.compression = compression
+        self.storage_options = storage_options
 
+        self.frame = self.fmt.frame
         self.orig_cols = self.fmt.frame.columns.tolist()
         self.frame_dicts = self.process_dataframe()
 
@@ -238,15 +250,23 @@ def build_elems(self) -> None:
 
     def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
-        out_str: Optional[str] = xml_doc.decode(self.encoding).rstrip()
 
-        if self.path_or_buffer and isinstance(self.path_or_buffer, str):
-            with open(self.path_or_buffer, "wb") as f:
-                f.write(xml_doc)
+        out_str: Optional[str]
 
-            out_str = None
+        if self.path_or_buffer is not None:
+            # apply compression and byte/text conversion
+            with get_handle(
+                self.path_or_buffer,
+                "wb",
+                compression=self.compression,
+                storage_options=self.storage_options,
+                is_text=False,
+            ) as handles:
+                handles.handle.write(xml_doc)  # type: ignore[arg-type]
+            return None
 
-        return out_str
+        else:
+            return xml_doc.decode(self.encoding).rstrip()
 
 
 class EtreeXMLFormatter(BaseXMLFormatter):
@@ -277,6 +297,7 @@ def build_tree(self) -> bytes:
             if not self.attr_cols and not self.elem_cols:
                 self.elem_cols = list(self.frame_dicts[0].keys())
                 self.build_elems()
+
             else:
                 self.build_attribs()
                 self.build_elems()
@@ -315,42 +336,46 @@ def get_prefix_uri(self) -> str:
         return uri
 
     def build_attribs(self) -> None:
-        if self.attr_cols:
-            for col in self.attr_cols:
-                flat_col = col
-                if isinstance(col, tuple):
-                    flat_col = (
-                        "".join(str(c) for c in col).strip()
-                        if "" in col
-                        else "_".join(str(c) for c in col).strip()
-                    )
-
-                attr_name = f"{self.prefix_uri}{flat_col}"
-                try:
-                    if self.d[col] is not None:
-                        self.elem_row.attrib[attr_name] = str(self.d[col])
-                except KeyError:
-                    raise KeyError(f"no valid column, {col}")
+        if not self.attr_cols:
+            return
+
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                if self.d[col] is not None:
+                    self.elem_row.attrib[attr_name] = str(self.d[col])
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
 
     def build_elems(self) -> None:
         from xml.etree.ElementTree import SubElement
 
-        if self.elem_cols:
-            for col in self.elem_cols:
-                flat_col = col
-                if isinstance(col, tuple):
-                    flat_col = (
-                        "".join(str(c) for c in col).strip()
-                        if "" in col
-                        else "_".join(str(c) for c in col).strip()
-                    )
-
-                elem_name = f"{self.prefix_uri}{flat_col}"
-                try:
-                    val = None if self.d[col] in [None, ""] else str(self.d[col])
-                    SubElement(self.elem_row, elem_name).text = val
-                except KeyError:
-                    raise KeyError(f"no valid column, {col}")
+        if not self.elem_cols:
+            return
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
 
     def prettify_tree(self) -> bytes:
         """
@@ -410,6 +435,7 @@ def build_tree(self) -> bytes:
             if not self.attr_cols and not self.elem_cols:
                 self.elem_cols = list(self.frame_dicts[0].keys())
                 self.build_elems()
+
             else:
                 self.build_attribs()
                 self.build_elems()
@@ -452,42 +478,46 @@ def get_prefix_uri(self) -> str:
         return uri
 
     def build_attribs(self) -> None:
-        if self.attr_cols:
-            for col in self.attr_cols:
-                flat_col = col
-                if isinstance(col, tuple):
-                    flat_col = (
-                        "".join(str(c) for c in col).strip()
-                        if "" in col
-                        else "_".join(str(c) for c in col).strip()
-                    )
-
-                attr_name = f"{self.prefix_uri}{flat_col}"
-                try:
-                    if self.d[col] is not None:
-                        self.elem_row.attrib[attr_name] = self.d[col]
-                except KeyError:
-                    raise KeyError(f"no valid column, {col}")
+        if not self.attr_cols:
+            return
+
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                if self.d[col] is not None:
+                    self.elem_row.attrib[attr_name] = self.d[col]
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
 
     def build_elems(self) -> None:
         from lxml.etree import SubElement
 
-        if self.elem_cols:
-            for col in self.elem_cols:
-                flat_col = col
-                if isinstance(col, tuple):
-                    flat_col = (
-                        "".join(str(c) for c in col).strip()
-                        if "" in col
-                        else "_".join(str(c) for c in col).strip()
-                    )
-
-                elem_name = f"{self.prefix_uri}{flat_col}"
-                try:
-                    val = None if self.d[col] in [None, ""] else str(self.d[col])
-                    SubElement(self.elem_row, elem_name).text = val
-                except KeyError:
-                    raise KeyError(f"no valid column, {col}")
+        if not self.elem_cols:
+            return
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
 
     def convert_io(self) -> Union[bytes, str, None]:
         """
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 9048de3fdb401..c1ee926775e9c 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -5,7 +5,7 @@
 import io
 from typing import Dict, List, Optional, Union
 
-from pandas._typing import FilePathOrBuffer
+from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError, ParserError
 
@@ -13,7 +13,13 @@
 
 from pandas.core.frame import DataFrame
 
-from pandas.io.common import is_url, stringify_path, urlopen
+from pandas.io.common import (
+    file_exists,
+    get_handle,
+    is_fsspec_url,
+    is_url,
+    stringify_path,
+)
 from pandas.io.parsers import TextParser
 
 
@@ -23,9 +29,9 @@ class _XMLFrameParser:
 
     Parameters
     ----------
-    io : str or file-like
-        This can be either a string of raw XML, a valid URL,
-        file or file-like object.
+    path_or_buffer : a valid JSON str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file.
 
     xpath : str or regex
         The XPath expression to parse required set of nodes for
@@ -51,6 +57,14 @@ class _XMLFrameParser:
         URL, file, file-like object, or a raw string containing XSLT,
         `etree` does not support XSLT but retained for consistency.
 
+    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+        Compression type for on-the-fly decompression of on-disk data.
+        If 'infer', then use extension for gzip, bz2, zip or xz.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc.,
+
     See also
     --------
     pandas.io.xml._EtreeFrameParser
@@ -72,7 +86,7 @@ class _XMLFrameParser:
 
     def __init__(
         self,
-        io,
+        path_or_buffer,
         xpath,
         namespaces,
         elems_only,
@@ -80,8 +94,10 @@ def __init__(
         names,
         encoding,
         stylesheet,
+        compression,
+        storage_options,
     ):
-        self.io = io
+        self.path_or_buffer = path_or_buffer
         self.xpath = xpath
         self.namespaces = namespaces
         self.elems_only = elems_only
@@ -90,6 +106,8 @@ def __init__(
         self.encoding = encoding
         self.stylesheet = stylesheet
         self.is_style = None
+        self.compression = compression
+        self.storage_options = storage_options
 
     def parse_data(self) -> List[Dict[str, Optional[str]]]:
         """
@@ -154,37 +172,54 @@ def _validate_names(self) -> None:
         """
         raise AbstractMethodError(self)
 
-    def _convert_io(self, xml_data) -> Union[str, bytes, None]:
+    def _preprocess_data(self, data):
         """
-        Convert io object to string.
+        Convert extracted raw data.
 
-        This method will convert io object into a string or keep
-        as string, depending on object type.
+        This method will return underlying data of extracted XML content.
+        The data either has a `read` attribute (e.g. a file object or a
+        StringIO/BytesIO) or is a string or bytes that is an XML document.
         """
+        if hasattr(data, "read"):
+            data = data.read()
 
-        obj: Union[bytes, str, None] = None
+        if not hasattr(data, "read") and isinstance(data, str):
+            data = io.StringIO(data)
 
-        if isinstance(xml_data, str):
-            obj = xml_data
+        if not hasattr(data, "read") and isinstance(data, bytes):
+            data = io.BytesIO(data)
 
-        elif isinstance(xml_data, bytes):
-            obj = xml_data.decode(self.encoding)
+        return data
 
-        elif isinstance(xml_data, io.StringIO):
-            obj = xml_data.getvalue()
-
-        elif isinstance(xml_data, io.BytesIO):
-            obj = xml_data.getvalue().decode(self.encoding)
+    def _get_data_from_filepath(self, filepath_or_buffer):
+        """
+        Extract raw XML data.
 
-        elif isinstance(xml_data, io.TextIOWrapper):
-            obj = xml_data.read()
+        The method accepts three input types:
+            1. filepath (string-like)
+            2. file-like object (e.g. open file object, StringIO)
+            3. XML bytes
 
-        elif isinstance(xml_data, io.BufferedReader):
-            obj = xml_data.read().decode(self.encoding)
-        else:
-            obj = None
+        This method turns (1) into (2) to simplify the rest of the processing.
+        It returns input types (2) and (3) unchanged.
+        """
+        filepath_or_buffer = stringify_path(filepath_or_buffer)
+        if (
+            not isinstance(filepath_or_buffer, str)
+            or is_url(filepath_or_buffer)
+            or is_fsspec_url(filepath_or_buffer)
+            or file_exists(filepath_or_buffer)
+        ):
+            self.handles = get_handle(
+                filepath_or_buffer,
+                "r",
+                encoding=self.encoding,
+                compression=self.compression,
+                storage_options=self.storage_options,
+            )
+            filepath_or_buffer = self.handles.handle
 
-        return obj
+        return filepath_or_buffer
 
     def _parse_doc(self):
         """
@@ -371,25 +406,33 @@ def _validate_names(self) -> None:
                 )
 
     def _parse_doc(self) -> Union[Element, ElementTree]:
-        from xml.etree.ElementTree import Element, ElementTree, fromstring, parse
-
-        current_doc = self._convert_io(self.io)
-        if current_doc:
-            if isinstance(current_doc, str):
-                is_xml = current_doc.startswith(("<?xml", "<"))
-            elif isinstance(current_doc, bytes):
-                is_xml = current_doc.decode(self.encoding).startswith(("<?xml", "<"))
-        else:
-            raise ValueError("io is not a url, file, or xml string.")
+        from xml.etree.ElementTree import (
+            Element,
+            ElementTree,
+            XMLParser,
+            fromstring,
+            parse,
+        )
+
+        if isinstance(self.path_or_buffer, str):
+            if self.path_or_buffer.startswith(("<?xml", "<")):
+                self.path_or_buffer = self.path_or_buffer.encode(self.encoding)
+
+        data = self._get_data_from_filepath(self.path_or_buffer)
+        self.data = self._preprocess_data(data)
 
         r: Union[Element, ElementTree]
-        if is_url(current_doc):
-            with urlopen(current_doc) as f:
-                r = parse(f)
-        elif is_xml:
-            r = fromstring(current_doc)
-        else:
-            r = parse(current_doc)
+
+        curr_parser = XMLParser(encoding=self.encoding)
+
+        if isinstance(self.data, str):
+            r = fromstring(self.data.encode(self.encoding), parser=curr_parser)
+        elif isinstance(self.data, bytes):
+            r = fromstring(self.data, parser=curr_parser)
+        elif isinstance(self.data, io.StringIO):
+            r = parse(self.data, parser=curr_parser)
+        elif isinstance(self.data, io.BytesIO):
+            r = parse(self.data, parser=curr_parser)
 
         return r
 
@@ -572,28 +615,25 @@ def _validate_names(self) -> None:
     def _parse_doc(self):
         from lxml.etree import XML, XMLParser, parse
 
-        self.raw_doc = self.stylesheet if self.is_style else self.io
+        self.raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
-        current_doc = self._convert_io(self.raw_doc)
-        if current_doc:
-            if isinstance(current_doc, str):
-                is_xml = current_doc.startswith(("<?xml", "<"))
-            elif isinstance(current_doc, bytes):
-                is_xml = current_doc.decode(self.encoding).startswith(("<?xml", "<"))
-        else:
-            raise ValueError("io is not a url, file, or xml string.")
+        if isinstance(self.raw_doc, str):
+            if self.raw_doc.startswith(("<?xml", "<")):
+                self.raw_doc = self.raw_doc.encode(self.encoding)
+
+        data = self._get_data_from_filepath(self.raw_doc)
+        self.data = self._preprocess_data(data)
 
         curr_parser = XMLParser(encoding=self.encoding)
 
-        if is_url(current_doc):
-            with urlopen(current_doc) as f:
-                r = parse(f, parser=curr_parser)
-        elif is_xml and isinstance(current_doc, str):
-            r = XML(bytes(current_doc, encoding=self.encoding))
-        elif is_xml and isinstance(current_doc, bytes):
-            r = XML(current_doc)
-        else:
-            r = parse(current_doc, parser=curr_parser)
+        if isinstance(self.data, str):
+            r = XML(self.data.encode(self.encoding), parser=curr_parser)
+        elif isinstance(self.data, bytes):
+            r = XML(self.data, parser=curr_parser)
+        elif isinstance(self.data, io.StringIO):
+            r = XML(self.data.getvalue().encode(self.encoding), parser=curr_parser)
+        elif isinstance(self.data, io.BytesIO):
+            r = parse(self.data, parser=curr_parser)
 
         return r
 
@@ -622,7 +662,7 @@ class that build Data Frame and infers specific dtypes.
 
 
 def _parse(
-    io,
+    path_or_buffer,
     xpath,
     namespaces,
     elems_only,
@@ -631,6 +671,8 @@ def _parse(
     encoding,
     parser,
     stylesheet,
+    compression,
+    storage_options,
     **kwargs,
 ) -> DataFrame:
     """
@@ -654,7 +696,7 @@ def _parse(
     if parser == "lxml":
         if lxml is not None:
             p = _LxmlFrameParser(
-                io,
+                path_or_buffer,
                 xpath,
                 namespaces,
                 elems_only,
@@ -662,24 +704,15 @@ def _parse(
                 names,
                 encoding,
                 stylesheet,
+                compression,
+                storage_options,
             )
         else:
             raise ImportError("lxml not found, please install or use the etree parser.")
 
-            p = _EtreeFrameParser(
-                io,
-                xpath,
-                namespaces,
-                elems_only,
-                attrs_only,
-                names,
-                encoding,
-                stylesheet,
-            )
-
     elif parser == "etree":
         p = _EtreeFrameParser(
-            io,
+            path_or_buffer,
             xpath,
             namespaces,
             elems_only,
@@ -687,6 +720,8 @@ def _parse(
             names,
             encoding,
             stylesheet,
+            compression,
+            storage_options,
         )
     else:
         raise ValueError("Values for parser can only be lxml or etree.")
@@ -697,7 +732,7 @@ def _parse(
 
 
 def read_xml(
-    io: FilePathOrBuffer,
+    path_or_buffer: FilePathOrBuffer,
     xpath: Optional[str] = "./*",
     namespaces: Optional[Union[dict, List[dict]]] = None,
     elems_only: Optional[bool] = False,
@@ -706,6 +741,8 @@ def read_xml(
     encoding: Optional[str] = "utf-8",
     parser: Optional[str] = "lxml",
     stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
 ) -> DataFrame:
     r"""
     Read XML document into a ``DataFrame`` object.
@@ -714,8 +751,9 @@ def read_xml(
 
     Parameters
     ----------
-    io : str, path object or file-like object
-        A URL, file-like object, or raw string containing XML.
+    path_or_buffer : str, path object, or file-like object
+        Any valid XML string or path is acceptable. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, and file.
 
     xpath : str, optional, default './\*'
         The XPath to parse required set of nodes for migration to DataFrame.
@@ -764,6 +802,21 @@ def read_xml(
         transformation and not the original XML document. Only XSLT 1.0
         scripts and not later versions is currently supported.
 
+    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, zip or xz if path_or_buffer is a string ending in
+        '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+        otherwise. If using 'zip', the ZIP file must contain only one data
+        file to be read in. Set to None for no decompression.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc., if using a URL that will be
+        parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
+        raised if providing this argument with a non-fsspec URL. See the fsspec
+        and backend storage implementation docs for the set of allowed keys and
+        values.
+
     Returns
     -------
     df
@@ -877,10 +930,10 @@ def read_xml(
     2  triangle      180    3.0
     """
 
-    io = stringify_path(io)
+    path_or_buffer = stringify_path(path_or_buffer)
 
     return _parse(
-        io=io,
+        path_or_buffer=path_or_buffer,
         xpath=xpath,
         namespaces=namespaces,
         elems_only=elems_only,
@@ -889,4 +942,6 @@ def read_xml(
         encoding=encoding,
         parser=parser,
         stylesheet=stylesheet,
+        compression=compression,
+        storage_options=storage_options,
     )
diff --git a/pandas/tests/io/data/xml/geom_xml.bz2 b/pandas/tests/io/data/xml/geom_xml.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..eef4b3597e6eb38b6c0b2c1ba23bcdd27336f60b
GIT binary patch
literal 182
zcmV;n07?HsT4*^jL0KkKSxE8zK>z?hS%5$gK!fc^ujl{3FaWs27)(tD2*fflm^38O
zkTe=J01X6+B*HX#5Yr)`#6bjv4IxD&#YR?6PYyDd%fU|j91CQMi?`2`n(UxNy-=9E
z<x1IH7&v0bj8VByriOWoUq%|vxdT<B@3Nf)#bCPa38ps3D@cmrRM1)Z>=HGx&{|Fj
kK$@grDs3x?pb?S)V8bt%JtGIW7GI0GBAh5lBzXTIfOhvx$^ZZW

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/xml/geom_xml.gz b/pandas/tests/io/data/xml/geom_xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8cd899f91024fdebe12aee22d6b7574c998833c0
GIT binary patch
literal 166
zcmV;X09pSZiwFn}q%dFt|7T@yZC`k8Yyg#z!4AS85Jd0&3T~V%ZE8#plzz(wRwSvQ
z3spZ~D)po_@w79U-IvMA@o}Em4Jo?d2j12VXXte>&f9^{>A36ps48h7fvOmj6z<E+
zDxTniDqd$Gl%lwj7_@{KRd;Blc$@XenXLD94vJiuVugOdwIe<K2^YVSi5z&l1#SC{
U#@)BZdG+;{Cn7r?T15c>04%^yb^rhX

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/xml/geom_xml.xz b/pandas/tests/io/data/xml/geom_xml.xz
new file mode 100644
index 0000000000000000000000000000000000000000..7ef61d2925b05c77b390ca432160a542e8e65bec
GIT binary patch
literal 200
zcmV;(05|{rH+ooF000E$*0e?f03iVu0001VFXf})0Ym_ZT>u^r%ZCxz&SsGhgC5HL
zdo<fw>9`@l$!Nks(G>G~_q!H$g@O_5n)ug+ru2jawQ+!V5g~{~QxIWgJo-WG)vtUM
ziz`pe{lmqHf$N|3oaLKM0JX-1QM|LUl8}?}eQ){EXNG??L8FjQ!mN&^unnSyvi1gb
z2vzZ0XY>v!&=~lufy!lc@BjekzSlrkJ^XF}0i*%N0ssKdWUoH4#Ao{g000001X)^s
Cj$QTu

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 5144d13401e73..f5392442c7dad 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1274,3 +1274,123 @@ def test_style_to_json():
     out_xml = geom_df.to_xml(stylesheet=xsl)
 
     assert out_json == out_xml
+
+
+# COMPRESSION
+
+geom_xml = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+
+def test_bz2_output(parser):
+    import bz2
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, compression="bz2")
+
+        with bz2.BZ2File(path, "rb") as fp:
+            output = fp.read()
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.decode("utf-8").replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert geom_xml == output.strip()
+
+
+def test_gz_output(parser):
+    import gzip
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, compression="gzip")
+
+        with gzip.open(path, "rb") as fp:
+            output = fp.read()
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.decode("utf-8").replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert geom_xml == output.strip()
+
+
+def test_xz_output(parser):
+    import lzma
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, compression="xz")
+
+        with lzma.open(path, "rb") as fp:
+            output = fp.read()
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.decode("utf-8").replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert geom_xml == output.strip()
+
+
+def test_zip_output(parser):
+    import zipfile
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, compression="zip")
+
+        with zipfile.ZipFile(path, "r") as fp:
+            output = fp.read(fp.infolist()[0])
+
+    # etree and lxml differs on quotes and case in xml declaration
+    output = output.decode("utf-8").replace(
+        '<?xml version="1.0" encoding="utf-8"?',
+        "<?xml version='1.0' encoding='utf-8'?",
+    )
+
+    assert geom_xml == output.strip()
+
+
+def test_unsuported_compression(datapath, parser):
+    with pytest.raises(ValueError, match="Unrecognized compression type"):
+        with tm.ensure_clean() as path:
+            geom_df.to_xml(path, compression="7z")
+
+
+# STORAGE OPTIONS
+
+
+@tm.network
+@td.skip_if_no("s3fs")
+@td.skip_if_no("lxml")
+def test_s3_permission_output(parser):
+    import s3fs
+
+    with pytest.raises(PermissionError, match="Access Denied"):
+        fs = s3fs.S3FileSystem(anon=True)
+        fs.ls("pandas-test")
+
+        geom_df.to_xml("s3://pandas-test/geom.xml", compression="zip")
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index ef695cb12fc6a..6f1f7579616e0 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -2,6 +2,7 @@
 import os
 from urllib.error import HTTPError
 
+import numpy as np
 import pytest
 
 import pandas.util._test_decorators as td
@@ -23,7 +24,6 @@
 [X] - SyntaxError("You have used an incorrect or unsupported XPath")
 [X] - ValueError("names does not match length of child elements in xpath.")
 [X] - TypeError("...is not a valid type for names")
-[X] - ValueError("io is not a url, file, or xml string")
 [X] - ValueError("To use stylesheet, you need lxml installed...")
 []  - URLError      (GENERAL ERROR WITH HTTPError AS SUBCLASS)
 [X] - HTTPError("HTTP Error 404: Not Found")
@@ -41,7 +41,6 @@
 [X] - TypeError("empty namespace prefix is not supported in XPath")
 [X] - ValueError("names does not match length of child elements in xpath.")
 [X] - TypeError("...is not a valid type for names")
-[X] - ValueError("io is not a url, file, or xml string")
 [X] - LookupError(unknown encoding)
 []  - URLError           (USUALLY DUE TO NETWORKING)
 [X  - HTTPError("HTTP Error 404: Not Found")
@@ -51,6 +50,13 @@
 [X] - ValueError("Values for parser can only be lxml or etree.")
 """
 
+geom_df = DataFrame(
+    {
+        "shape": ["square", "circle", "triangle"],
+        "degrees": [360, 360, 180],
+        "sides": [4, np.nan, 3],
+    }
+)
 
 xml_default_nmsp = """\
 <?xml version='1.0' encoding='utf-8'?>
@@ -211,18 +217,27 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
-def test_wrong_io_object(parser):
-    with pytest.raises(ValueError, match=("io is not a url, file, or xml string")):
-        read_xml(DataFrame, parser=parser)
+@td.skip_if_no("lxml")
+def test_wrong_file_path_lxml(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(
+        XMLSyntaxError,
+        match=("Start tag expected, '<' not found"),
+    ):
+        filename = os.path.join("data", "html", "books.xml")
+        read_xml(filename, parser="lxml")
+
 
+def test_wrong_file_path_etree(datapath):
+    from xml.etree.ElementTree import ParseError
 
-def test_wrong_file_path(datapath, parser):
     with pytest.raises(
-        (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
+        ParseError,
+        match=("not well-formed"),
     ):
         filename = os.path.join("data", "html", "books.xml")
-        read_xml(filename, parser=parser)
+        read_xml(filename, parser="etree")
 
 
 @tm.network
@@ -513,48 +528,35 @@ def test_names_option_wrong_type(datapath, parser):
 # ENCODING
 
 
-@td.skip_if_no("lxml")
-def test_wrong_encoding_lxml(datapath):
-    from lxml.etree import XMLSyntaxError
-
-    filename = datapath("io", "data", "xml", "baby_names.xml")
-    with pytest.raises(XMLSyntaxError, match=("Input is not proper UTF-8")):
-        read_xml(filename)
-
-
-@td.skip_if_no("lxml")
-def test_utf16_encoding_lxml(datapath):
-    from lxml.etree import XMLSyntaxError
-
+def test_wrong_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    with pytest.raises(XMLSyntaxError, match=("Start tag expected, '<' not found")):
-        read_xml(filename, encoding="UTF-16")
+    with pytest.raises(UnicodeDecodeError, match=("'utf-8' codec can't decode")):
+        read_xml(filename, parser=parser)
 
 
-@td.skip_if_no("lxml")
-def test_unknown_encoding_lxml(datapath):
+def test_utf16_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    with pytest.raises(LookupError, match=("unknown encoding")):
-        read_xml(filename, encoding="UFT-8")
+    with pytest.raises(UnicodeError, match=("UTF-16 stream does not start with BOM")):
+        read_xml(filename, encoding="UTF-16", parser=parser)
 
 
-# etree raises no error on wrong, utf-16, or unknown encoding
-@pytest.mark.parametrize("encoding", [None, "UTF-16", "UFT-8"])
-def test_wrong_encoding_etree(datapath, encoding):
+def test_unknown_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    read_xml(filename, parser="etree", encoding=encoding)
+    with pytest.raises(LookupError, match=("unknown encoding: uft-8")):
+        read_xml(filename, encoding="UFT-8", parser=parser)
 
 
 def test_ascii_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    read_xml(filename, encoding="ascii", parser=parser)
+    with pytest.raises(UnicodeDecodeError, match=("'ascii' codec can't decode byte")):
+        read_xml(filename, encoding="ascii", parser=parser)
 
 
 @td.skip_if_no("lxml")
 def test_parser_consistency_with_encoding(datapath):
     filename = datapath("io", "data", "xml", "baby_names.xml")
     df_lxml = read_xml(filename, parser="lxml", encoding="ISO-8859-1")
-    df_etree = read_xml(filename, parser="etree")
+    df_etree = read_xml(filename, parser="etree", encoding="iso-8859-1")
 
     tm.assert_frame_equal(df_lxml, df_etree)
 
@@ -835,12 +837,14 @@ def test_incorrect_xsl_apply(datapath):
 
 @td.skip_if_no("lxml")
 def test_wrong_stylesheet():
+    from lxml.etree import XMLSyntaxError
+
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
     xsl = os.path.join("data", "xml", "flatten.xsl")
 
     with pytest.raises(
-        (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
+        XMLSyntaxError,
+        match=("Start tag expected, '<' not found"),
     ):
         read_xml(kml, stylesheet=xsl)
 
@@ -888,3 +892,83 @@ def test_online_stylesheet():
     )
 
     tm.assert_frame_equal(df_expected, df_xsl)
+
+
+# COMPRESSION
+
+
+@pytest.mark.parametrize(
+    "compfile", ["geom_xml.bz2", "geom_xml.gzz", "geom_xml.xz", "geom_xml.zip"]
+)
+def test_compression_read(datapath, parser, compfile):
+    filename = datapath("io", "data", "xml", "geom_xml.bz2")
+    xml_df = read_xml(filename, parser=parser)
+
+    tm.assert_frame_equal(xml_df, geom_df)
+
+
+def test_wrong_compression_bz2(datapath, parser):
+    filename = datapath("io", "data", "xml", "geom_xml.zip")
+
+    with pytest.raises(OSError, match="Invalid data stream"):
+        read_xml(filename, parser=parser, compression="bz2")
+
+
+def test_wrong_compression_gz(datapath, parser):
+    filename = datapath("io", "data", "xml", "geom_xml.zip")
+
+    with pytest.raises(OSError, match="Not a gzipped file"):
+        read_xml(filename, parser=parser, compression="gzip")
+
+
+def test_wrong_compression_xz(datapath, parser):
+    from lzma import LZMAError
+
+    filename = datapath("io", "data", "xml", "geom_xml.bz2")
+
+    with pytest.raises(LZMAError, match="Input format not supported by decoder"):
+        read_xml(filename, parser=parser, compression="xz")
+
+
+def test_wrong_compression_zip(datapath, parser):
+    from zipfile import BadZipFile
+
+    filename = datapath("io", "data", "xml", "geom_xml.gz")
+
+    with pytest.raises(BadZipFile, match="File is not a zip file"):
+        read_xml(filename, parser=parser, compression="zip")
+
+
+def test_unsuported_compression(datapath, parser):
+    with pytest.raises(ValueError, match="Unrecognized compression type"):
+        with tm.ensure_clean() as path:
+            read_xml(path, compression="7z")
+
+
+# STORAGE OPTIONS
+
+
+@tm.network
+@td.skip_if_no("s3fs")
+@td.skip_if_no("lxml")
+def test_s3_parser_consistency():
+    # Python Software Foundation (2019 IRS-990 FORM)
+    s3 = "s3://irs-form-990/201923199349319487_public.xml"
+
+    df_lxml = read_xml(
+        s3,
+        xpath=".//irs:Form990PartVIISectionAGrp",
+        namespaces={"irs": "http://www.irs.gov/efile"},
+        parser="lxml",
+        storage_options={"anon": True},
+    )
+
+    df_etree = read_xml(
+        s3,
+        xpath=".//irs:Form990PartVIISectionAGrp",
+        namespaces={"irs": "http://www.irs.gov/efile"},
+        parser="etree",
+        storage_options={"anon": True},
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)

From a7716b8b91d670a0b1c533d20d1795db906474d0 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 20 Feb 2021 20:43:14 -0600
Subject: [PATCH 21/35] Fix issues in tests from other Python envs

---
 pandas/io/formats/xml.py               |  40 ++++++++--
 pandas/io/xml.py                       |  29 +++++--
 pandas/tests/io/formats/test_to_xml.py |  69 ++++++++--------
 pandas/tests/io/test_xml.py            | 105 +++++++++++++++----------
 4 files changed, 158 insertions(+), 85 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 8defe95ac6d8b..a15c0c6ce32ff 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -4,14 +4,28 @@
 
 import codecs
 import io
-from typing import Any, Dict, List, Optional, Union
-
-from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+from pandas._typing import (
+    CompressionOptions,
+    FilePathOrBuffer,
+    StorageOptions,
+)
 from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.common import is_list_like
 
-from pandas.io.common import get_handle, is_url, urlopen
+from pandas.io.common import (
+    get_handle,
+    is_url,
+    urlopen,
+)
 from pandas.io.formats.format import DataFrameFormatter
 
 
@@ -284,7 +298,11 @@ def __init__(self, *args, **kwargs):
         self.prefix_uri = self.get_prefix_uri()
 
     def build_tree(self) -> bytes:
-        from xml.etree.ElementTree import Element, SubElement, tostring
+        from xml.etree.ElementTree import (
+            Element,
+            SubElement,
+            tostring,
+        )
 
         self.root = Element(
             f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces()
@@ -424,7 +442,11 @@ def build_tree(self) -> bytes:
         This method initializes the root and builds attributes and elements
         with optional namespaces.
         """
-        from lxml.etree import Element, SubElement, tostring
+        from lxml.etree import (
+            Element,
+            SubElement,
+            tostring,
+        )
 
         self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
 
@@ -576,7 +598,11 @@ def parse_doc(self):
             * If io object is not readable as string or file-like object.
         """
 
-        from lxml.etree import XML, XMLParser, parse
+        from lxml.etree import (
+            XML,
+            XMLParser,
+            parse,
+        )
 
         current_doc = self.convert_io()
         if current_doc and isinstance(current_doc, str):
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index c1ee926775e9c..3355783a8aa1d 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -3,11 +3,23 @@
 """
 
 import io
-from typing import Dict, List, Optional, Union
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
 
-from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions
+from pandas._typing import (
+    CompressionOptions,
+    FilePathOrBuffer,
+    StorageOptions,
+)
 from pandas.compat._optional import import_optional_dependency
-from pandas.errors import AbstractMethodError, ParserError
+from pandas.errors import (
+    AbstractMethodError,
+    ParserError,
+)
 
 from pandas.core.dtypes.common import is_list_like
 
@@ -255,7 +267,10 @@ class _EtreeFrameParser(_XMLFrameParser):
     standard library XML module: `xml.etree.ElementTree`.
     """
 
-    from xml.etree.ElementTree import Element, ElementTree
+    from xml.etree.ElementTree import (
+        Element,
+        ElementTree,
+    )
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -613,7 +628,11 @@ def _validate_names(self) -> None:
                 )
 
     def _parse_doc(self):
-        from lxml.etree import XML, XMLParser, parse
+        from lxml.etree import (
+            XML,
+            XMLParser,
+            parse,
+        )
 
         self.raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index f5392442c7dad..d978d26429542 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1,4 +1,7 @@
-from io import BytesIO, StringIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 import os
 import sys
 
@@ -15,35 +18,37 @@
 """
 CHECKLIST
 
-[x] - ValueError("Values for parser can only be lxml or etree.")
+[x] - ValueError: "Values for parser can only be lxml or etree."
 
 etree
-[x] - ImportError("lxml not found, please install or use the etree parser.")
-[X] - TypeError("...is not a valid type for attr_cols")
-[X] - TypeError("...is not a valid type for elem_cols")
-[X] - LookupError("unknown encoding")
-[X] - KeyError("...is not included in namespaces")
-[X] - KeyError("no valid column")
-[X] - ValueError("To use stylesheet, you need lxml installed...")
-[]  - OSError        (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
-[X] - FileNotFoundError("No such file or directory")
+[x] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - TypeError: "...is not a valid type for attr_cols"
+[X] - TypeError: "...is not a valid type for elem_cols"
+[X] - LookupError: "unknown encoding"
+[X] - KeyError: "...is not included in namespaces"
+[X] - KeyError: "no valid column"
+[X] - ValueError: "To use stylesheet, you need lxml installed..."
+[]  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+[X] - FileNotFoundError: "No such file or directory"
+[X] - PermissionError: "Forbidden"
 
 lxml
-[X] - TypeError("...is not a valid type for attr_cols")
-[X] - TypeError("...is not a valid type for elem_cols")
-[X] - LookupError("unknown encoding")
-[]  - OSError        (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
-[X] - FileNotFoundError("No such file or directory")
-[X] - KeyError("...is not included in namespaces")
-[X] - KeyError("no valid column")
-[X] - ValueError("stylesheet is not a url, file, or xml string.")
-[]  - LookupError         (NEED WRONG ENCODING FOR FILE OUTPUT)
-[]  - URLError            (USUALLY DUE TO NETWORKING)
-[]  - HTTPError           (NEED AN ONLINE STYLESHEET)
-[X] - OSError("failed to load external entity")
-[X] - XMLSyntaxError("Opening and ending tag mismatch")
-[X] - XSLTApplyError("Cannot resolve URI")
-[X] - XSLTParseError("failed to compile")
+[X] - TypeError: "...is not a valid type for attr_cols"
+[X] - TypeError: "...is not a valid type for elem_cols"
+[X] - LookupError: "unknown encoding"
+[]  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+[X] - FileNotFoundError: "No such file or directory"
+[X] - KeyError: "...is not included in namespaces"
+[X] - KeyError: "no valid column"
+[X] - ValueError: "stylesheet is not a url, file, or xml string."
+[]  - LookupError: (NEED WRONG ENCODING FOR FILE OUTPUT)
+[]  - URLError: (USUALLY DUE TO NETWORKING)
+[]  - HTTPError: (NEED AN ONLINE STYLESHEET)
+[X] - OSError: "failed to load external entity"
+[X] - XMLSyntaxError: "Opening and ending tag mismatch"
+[X] - XSLTApplyError: "Cannot resolve URI"
+[X] - XSLTParseError: "failed to compile"
+[X] - PermissionError: "Forbidden"
 """
 
 geom_df = DataFrame(
@@ -1306,7 +1311,7 @@ def test_bz2_output(parser):
     import bz2
 
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, compression="bz2")
+        geom_df.to_xml(path, parser=parser, compression="bz2")
 
         with bz2.BZ2File(path, "rb") as fp:
             output = fp.read()
@@ -1324,7 +1329,7 @@ def test_gz_output(parser):
     import gzip
 
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, compression="gzip")
+        geom_df.to_xml(path, parser=parser, compression="gzip")
 
         with gzip.open(path, "rb") as fp:
             output = fp.read()
@@ -1342,7 +1347,7 @@ def test_xz_output(parser):
     import lzma
 
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, compression="xz")
+        geom_df.to_xml(path, parser=parser, compression="xz")
 
         with lzma.open(path, "rb") as fp:
             output = fp.read()
@@ -1360,7 +1365,7 @@ def test_zip_output(parser):
     import zipfile
 
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, compression="zip")
+        geom_df.to_xml(path, parser=parser, compression="zip")
 
         with zipfile.ZipFile(path, "r") as fp:
             output = fp.read(fp.infolist()[0])
@@ -1377,7 +1382,7 @@ def test_zip_output(parser):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            geom_df.to_xml(path, compression="7z")
+            geom_df.to_xml(path, parser=parser, compression="7z")
 
 
 # STORAGE OPTIONS
@@ -1393,4 +1398,4 @@ def test_s3_permission_output(parser):
         fs = s3fs.S3FileSystem(anon=True)
         fs.ls("pandas-test")
 
-        geom_df.to_xml("s3://pandas-test/geom.xml", compression="zip")
+        geom_df.to_xml("s3://pandas-test/geom.xml", compression="zip", parser=parser)
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 6f1f7579616e0..927999efbe6ae 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -1,4 +1,7 @@
-from io import BytesIO, StringIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 import os
 from urllib.error import HTTPError
 
@@ -15,39 +18,53 @@
 """
 CHECK LIST
 
-[x] - ValueError("Values for parser can only be lxml or etree.")
+[x] - ValueError: "Values for parser can only be lxml or etree."
 
 etree
-[x] - ImportError("lxml not found, please install or use the etree parser.")
-[X] - ValueError("Either element or attributes can be parsed not both.")
-[X] - ValueError("xpath does not return any nodes...")
-[X] - SyntaxError("You have used an incorrect or unsupported XPath")
-[X] - ValueError("names does not match length of child elements in xpath.")
-[X] - TypeError("...is not a valid type for names")
-[X] - ValueError("To use stylesheet, you need lxml installed...")
-[]  - URLError      (GENERAL ERROR WITH HTTPError AS SUBCLASS)
-[X] - HTTPError("HTTP Error 404: Not Found")
-[]  - OSError        (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
-[X] - FileNotFoundError("No such file or directory")
+[x] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - ValueError: "Either element or attributes can be parsed not both."
+[X] - ValueError: "xpath does not return any nodes..."
+[X] - SyntaxError: "You have used an incorrect or unsupported XPath"
+[X] - ValueError: "names does not match length of child elements in xpath."
+[X] - TypeError: "...is not a valid type for names"
+[X] - ValueError: "To use stylesheet, you need lxml installed..."
+[]  - URLError: (GENERAL ERROR WITH HTTPError AS SUBCLASS)
+[X] - HTTPError: "HTTP Error 404: Not Found"
+[]  - OSError: (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - FileNotFoundError: "No such file or directory"
 []  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
+[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..."
+[X] - UnicodeError: "UTF-16 stream does not start with BOM"
+[X] - BadZipFile: "File is not a zip file"
+[X] - OSError: "Invalid data stream"
+[X] - LZMAError: "Input format not supported by decoder"
+[X] - ValueError: "Unrecognized compression type"
+[X] - PermissionError: "Forbidden"
 
 lxml
-[X] - ValueError("Either element or attributes can be parsed not both.")
-[X] - XSLTApplyError("Cannot resolve URI")
-[X] - XSLTParseError("document is not a stylesheet")
-[X] - ValueError("xpath does not return any nodes.")
-[X] - XPathEvalError("Invalid expression")
-[]  - XPathSyntaxError   (OLD VERSION IN lxml FOR XPATH ERRORS)
-[X] - TypeError("empty namespace prefix is not supported in XPath")
-[X] - ValueError("names does not match length of child elements in xpath.")
-[X] - TypeError("...is not a valid type for names")
-[X] - LookupError(unknown encoding)
-[]  - URLError           (USUALLY DUE TO NETWORKING)
-[X  - HTTPError("HTTP Error 404: Not Found")
-[X] - OSError("failed to load external entity")
-[X] - XMLSyntaxError("Start tag expected, '<' not found")
-[]  - ParserError        (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
-[X] - ValueError("Values for parser can only be lxml or etree.")
+[X] - ValueError: "Either element or attributes can be parsed not both."
+[X] - XSLTApplyError: "Cannot resolve URI"
+[X] - XSLTParseError: "document is not a stylesheet"
+[X] - ValueError: "xpath does not return any nodes."
+[X] - XPathEvalError: "Invalid expression"
+[]  - XPathSyntaxError: (OLD VERSION IN lxml FOR XPATH ERRORS)
+[X] - TypeError: "empty namespace prefix is not supported in XPath"
+[X] - ValueError: "names does not match length of child elements in xpath."
+[X] - TypeError: "...is not a valid type for names"
+[X] - LookupError: "unknown encoding"
+[]  - URLError: (USUALLY DUE TO NETWORKING)
+[X  - HTTPError: "HTTP Error 404: Not Found"
+[X] - OSError: "failed to load external entity"
+[X] - XMLSyntaxError: "Start tag expected, '<' not found"
+[]  - ParserError: (FAILSAFE CATCH ALL FOR VERY COMPLEX XML
+[X] - ValueError: "Values for parser can only be lxml or etree."
+[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..."
+[X] - UnicodeError: "UTF-16 stream does not start with BOM"
+[X] - BadZipFile: "File is not a zip file"
+[X] - OSError: "Invalid data stream"
+[X] - LZMAError: "Input format not supported by decoder"
+[X] - ValueError: "Unrecognized compression type"
+[X] - PermissionError: "Forbidden"
 """
 
 geom_df = DataFrame(
@@ -536,7 +553,13 @@ def test_wrong_encoding(datapath, parser):
 
 def test_utf16_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    with pytest.raises(UnicodeError, match=("UTF-16 stream does not start with BOM")):
+    with pytest.raises(
+        UnicodeError,
+        match=(
+            "UTF-16 stream does not start with BOM|"
+            "'utf-16-le' codec can't decode byte"
+        ),
+    ):
         read_xml(filename, encoding="UTF-16", parser=parser)
 
 
@@ -897,9 +920,7 @@ def test_online_stylesheet():
 # COMPRESSION
 
 
-@pytest.mark.parametrize(
-    "compfile", ["geom_xml.bz2", "geom_xml.gzz", "geom_xml.xz", "geom_xml.zip"]
-)
+@pytest.mark.parametrize("compfile", ["geom_xml.bz2", "geom_xml.gz", "geom_xml.xz"])
 def test_compression_read(datapath, parser, compfile):
     filename = datapath("io", "data", "xml", "geom_xml.bz2")
     xml_df = read_xml(filename, parser=parser)
@@ -908,17 +929,19 @@ def test_compression_read(datapath, parser, compfile):
 
 
 def test_wrong_compression_bz2(datapath, parser):
-    filename = datapath("io", "data", "xml", "geom_xml.zip")
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression="zip")
 
-    with pytest.raises(OSError, match="Invalid data stream"):
-        read_xml(filename, parser=parser, compression="bz2")
+        with pytest.raises(OSError, match="Invalid data stream"):
+            read_xml(path, parser=parser, compression="bz2")
 
 
 def test_wrong_compression_gz(datapath, parser):
-    filename = datapath("io", "data", "xml", "geom_xml.zip")
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression="zip")
 
-    with pytest.raises(OSError, match="Not a gzipped file"):
-        read_xml(filename, parser=parser, compression="gzip")
+        with pytest.raises(OSError, match="Not a gzipped file"):
+            read_xml(path, parser=parser, compression="gzip")
 
 
 def test_wrong_compression_xz(datapath, parser):
@@ -942,7 +965,7 @@ def test_wrong_compression_zip(datapath, parser):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            read_xml(path, compression="7z")
+            read_xml(path, parser=parser, compression="7z")
 
 
 # STORAGE OPTIONS
@@ -952,7 +975,7 @@ def test_unsuported_compression(datapath, parser):
 @td.skip_if_no("s3fs")
 @td.skip_if_no("lxml")
 def test_s3_parser_consistency():
-    # Python Software Foundation (2019 IRS-990 FORM)
+    # Python Software Foundation (2019 IRS-990 RETURN)
     s3 = "s3://irs-form-990/201923199349319487_public.xml"
 
     df_lxml = read_xml(

From 5b93c1629e98560acf3edd598896596dce862f9d Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 20 Feb 2021 23:42:30 -0600
Subject: [PATCH 22/35] Fix precommit issue with import line

---
 pandas/io/formats/format.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index cf4da7757f75c..b08b67ab5b288 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1125,7 +1125,10 @@ def to_xml(
             values.
         """
 
-        from pandas.io.formats.xml import EtreeXMLFormatter, LxmlXMLFormatter
+        from pandas.io.formats.xml import (
+            EtreeXMLFormatter,
+            LxmlXMLFormatter,
+        )
 
         lxml = import_optional_dependency("lxml.etree", errors="ignore")
 

From 9a0dfb4ff89a762b455a6b4dcfe140ba32e7354e Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sun, 21 Feb 2021 14:41:57 -0600
Subject: [PATCH 23/35] Adjust code and tests per twoertwein comments

---
 pandas/core/frame.py                   |  23 ++---
 pandas/io/formats/format.py            |  19 ++--
 pandas/io/formats/xml.py               | 136 +++++++++++++------------
 pandas/io/xml.py                       |  89 ++++++----------
 pandas/tests/io/data/xml/geom_xml.bz2  | Bin 182 -> 0 bytes
 pandas/tests/io/data/xml/geom_xml.gz   | Bin 166 -> 0 bytes
 pandas/tests/io/data/xml/geom_xml.xz   | Bin 200 -> 0 bytes
 pandas/tests/io/formats/test_to_xml.py |  83 ++++++---------
 pandas/tests/io/test_xml.py            |  40 +++++---
 9 files changed, 170 insertions(+), 220 deletions(-)
 delete mode 100644 pandas/tests/io/data/xml/geom_xml.bz2
 delete mode 100644 pandas/tests/io/data/xml/geom_xml.gz
 delete mode 100644 pandas/tests/io/data/xml/geom_xml.xz

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 99b6cba132f8f..a33e7c5c49879 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2639,9 +2639,10 @@ def to_html(
             render_links=render_links,
         )
 
+    @doc(storage_options=generic._shared_docs["storage_options"])
     def to_xml(
         self,
-        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
         index: bool = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -2654,7 +2655,7 @@ def to_xml(
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
-        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        stylesheet: Optional[FilePathOrBuffer] = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ) -> Optional[str]:
@@ -2691,7 +2692,7 @@ def to_xml(
             Default namespaces should be given empty string key. For
             example, ::
 
-                namespaces = {"": "https://example.com"}
+                namespaces = {{"": "https://example.com"}}
 
         prefix : str, optional
             Namespace prefix to be used for every element and/or attribute
@@ -2704,7 +2705,7 @@ def to_xml(
         pretty_print : bool, default True
             Whether output should be pretty printed with indentation and
             line breaks.
-        parser : {'lxml','etree'}, default 'lxml'
+        parser : {{'lxml','etree'}}, default 'lxml'
             Parser module to use for building of tree. Only 'lxml' and
             'etree' are supported. With 'lxml', the ability to use XSLT
             stylesheet is supported.
@@ -2720,13 +2721,7 @@ def to_xml(
             '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
             otherwise. If using 'zip', the ZIP file must contain only one data
             file to be read in. Set to None for no decompression.
-        storage_options : dict, optional
-            Extra options that make sense for a particular storage connection,
-            e.g. host, port, username, password, etc., if using a URL that will be
-            parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
-            raised if providing this argument with a non-fsspec URL. See the fsspec
-            and backend storage implementation docs for the set of allowed keys and
-            values.
+        {storage_options}
 
         Returns
         -------
@@ -2741,9 +2736,9 @@ def to_xml(
 
         Examples
         --------
-        >>> df = pd.DataFrame({'shape': ['square', 'circle', 'triangle'],
+        >>> df = pd.DataFrame({{'shape': ['square', 'circle', 'triangle'],
         ...                    'degrees': [360, 360, 180],
-        ...                    'sides': [4, np.nan, 3]})
+        ...                    'sides': [4, np.nan, 3]}})
 
         >>> df.to_xml()  # doctest: +SKIP
         <?xml version='1.0' encoding='utf-8'?>
@@ -2778,7 +2773,7 @@ def to_xml(
           <row index="2" shape="triangle" degrees="180" sides="3.0"/>
         </data>
 
-        >>> df.to_xml(namespaces={"doc": "https://example.com"},
+        >>> df.to_xml(namespaces={{"doc": "https://example.com"}},
         ...           prefix="doc")  # doctest: +SKIP
         <?xml version='1.0' encoding='utf-8'?>
         <doc:data xmlns:doc="https://example.com">
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index b08b67ab5b288..44428abdcd8a5 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -62,6 +62,7 @@
     StorageOptions,
 )
 from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
 
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -97,6 +98,7 @@
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.reshape.concat import concat
+from pandas.core.shared_docs import _shared_docs
 
 from pandas.io.common import stringify_path
 from pandas.io.formats.printing import (
@@ -1035,9 +1037,10 @@ def to_html(
         string = html_formatter.to_string()
         return save_to_buffer(string, buf=buf, encoding=encoding)
 
+    @doc(storage_options=_shared_docs["storage_options"])
     def to_xml(
         self,
-        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
         index: Optional[bool] = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -1050,7 +1053,7 @@ def to_xml(
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
         parser: Optional[str] = "lxml",
-        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        stylesheet: Optional[FilePathOrBuffer] = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ) -> Optional[str]:
@@ -1087,7 +1090,7 @@ def to_xml(
             Default namespaces should be given empty string key. For
             example, ::
 
-                namespaces = {'': 'https://example.com'}
+                namespaces = {{'': 'https://example.com'}}
 
         prefix : str, optional
             Namespace prefix to be used for every element and/or attribute
@@ -1100,7 +1103,7 @@ def to_xml(
         pretty_print : bool, default True
             Whether output should be pretty printed with indentation and
             line breaks.
-        parser : {'lxml','etree'}, default "lxml"
+        parser : {{'lxml','etree'}}, default "lxml"
             Parser module to use for building of tree. Only 'lxml' and
             'etree' are supported. With 'lxml', the ability to use XSLT
             stylesheet is supported.
@@ -1116,13 +1119,7 @@ def to_xml(
             '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
             otherwise. If using 'zip', the ZIP file must contain only one data
             file to be read in. Set to None for no decompression.
-        storage_options : dict, optional
-            Extra options that make sense for a particular storage connection,
-            e.g. host, port, username, password, etc., if using a URL that will be
-            parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
-            raised if providing this argument with a non-fsspec URL. See the fsspec
-            and backend storage implementation docs for the set of allowed keys and
-            values.
+        {storage_options}
         """
 
         from pandas.io.formats.xml import (
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index a15c0c6ce32ff..49c18b344e1e9 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -22,9 +22,11 @@
 from pandas.core.dtypes.common import is_list_like
 
 from pandas.io.common import (
+    file_exists,
     get_handle,
+    is_fsspec_url,
     is_url,
-    urlopen,
+    stringify_path,
 )
 from pandas.io.formats.format import DataFrameFormatter
 
@@ -76,7 +78,7 @@ class BaseXMLFormatter:
     stylesheet : str or file-like
         A URL, file, file-like object, or a raw string containing XSLT.
 
-    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
         Compression type for on-the-fly decompression of on-disk data.
         If 'infer', then use extension for gzip, bz2, zip or xz.
 
@@ -94,7 +96,7 @@ class BaseXMLFormatter:
     def __init__(
         self,
         formatter: DataFrameFormatter,
-        path_or_buffer: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
         index: Optional[bool] = True,
         root_name: Optional[str] = "data",
         row_name: Optional[str] = "row",
@@ -106,7 +108,7 @@ def __init__(
         encoding: str = "utf-8",
         xml_declaration: Optional[bool] = True,
         pretty_print: Optional[bool] = True,
-        stylesheet: Optional[FilePathOrBuffer[str]] = None,
+        stylesheet: Optional[FilePathOrBuffer] = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ) -> None:
@@ -262,6 +264,56 @@ def build_elems(self) -> None:
 
         raise AbstractMethodError(self)
 
+    def _preprocess_data(self, data):
+        """
+        Convert extracted raw data.
+
+        This method will return underlying data of extracted XML content.
+        The data either has a `read` attribute (e.g. a file object or a
+        StringIO/BytesIO) or is a string or bytes that is an XML document.
+        """
+        if isinstance(data, str):
+            data = io.StringIO(data)
+
+        elif isinstance(data, bytes):
+            data = io.BytesIO(data)
+
+        return data
+
+    def _get_data_from_filepath(self, filepath_or_buffer):
+        """
+        Extract raw XML data.
+
+        The method accepts three input types:
+            1. filepath (string-like)
+            2. file-like object (e.g. open file object, StringIO)
+            3. XML bytes
+
+        This method turns (1) into (2) to simplify the rest of the processing.
+        It returns input types (2) and (3) unchanged.
+        """
+        filepath_or_buffer = stringify_path(filepath_or_buffer)
+        if (
+            not isinstance(filepath_or_buffer, str)
+            or is_url(filepath_or_buffer)
+            or is_fsspec_url(filepath_or_buffer)
+            or file_exists(filepath_or_buffer)
+        ):
+            with get_handle(
+                filepath_or_buffer,
+                "r",
+                encoding=self.encoding,
+                compression=self.compression,
+                storage_options=self.storage_options,
+            ) as handle_obj:
+                filepath_or_buffer = (
+                    handle_obj.handle.read()
+                    if hasattr(handle_obj.handle, "read")
+                    else handle_obj.handle
+                )
+
+        return filepath_or_buffer
+
     def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
 
@@ -541,61 +593,12 @@ def build_elems(self) -> None:
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
 
-    def convert_io(self) -> Union[bytes, str, None]:
-        """
-        Convert stylesheet object to string.
-
-        This method will convert stylesheet object into a string or keep
-        as string, depending on object type.
-        """
-
-        obj: Union[bytes, str, None] = None
-
-        if isinstance(self.stylesheet, str):
-            obj = self.stylesheet
-
-        elif isinstance(self.stylesheet, bytes):
-            obj = self.stylesheet.decode(self.encoding)
-
-        elif isinstance(self.stylesheet, io.StringIO):
-            obj = self.stylesheet.getvalue()
-
-        elif isinstance(self.stylesheet, io.BytesIO):
-            obj = self.stylesheet.getvalue().decode(self.encoding)
-
-        elif isinstance(self.stylesheet, io.TextIOWrapper):
-            obj = self.stylesheet.read()
-
-        elif isinstance(self.stylesheet, io.BufferedReader):
-            obj = self.stylesheet.read().decode(self.encoding)
-        else:
-            obj = None
-
-        return obj
-
     def parse_doc(self):
         """
         Build tree from stylesheet.
 
         This method will parse stylesheet object into tree for parsing
         conditionally by its specific object type.
-
-        Raises
-        ------
-        HttpError
-            * If URL cannot be reached.
-
-        LookupError
-            * If xml document has incorrect or unknown encoding.
-
-        OSError
-            * If file cannot be found.
-
-        XMLSyntaxError
-            * If xml document conntains syntax issues.
-
-        ValueError
-            * If io object is not readable as string or file-like object.
         """
 
         from lxml.etree import (
@@ -604,21 +607,24 @@ def parse_doc(self):
             parse,
         )
 
-        current_doc = self.convert_io()
-        if current_doc and isinstance(current_doc, str):
-            is_xml = current_doc.startswith(("<?xml", "<"))
-        else:
-            raise ValueError("stylesheet is not a url, file, or xml string")
+        style_doc = self.stylesheet
+
+        if isinstance(style_doc, str) and style_doc.startswith(("<?xml", "<")):
+            style_doc = io.StringIO(style_doc)
+
+        data = self._get_data_from_filepath(style_doc)
+        self.data = self._preprocess_data(data)
 
         curr_parser = XMLParser(encoding=self.encoding)
 
-        if is_url(current_doc):
-            with urlopen(current_doc) as f:
-                r = parse(f, parser=curr_parser)
-        elif is_xml:
-            r = XML(bytes(current_doc, encoding=self.encoding))
-        else:
-            r = parse(current_doc, parser=curr_parser)
+        if isinstance(self.data, str):
+            r = XML(self.data.encode(self.encoding), parser=curr_parser)
+        elif isinstance(self.data, bytes):
+            r = XML(self.data, parser=curr_parser)
+        elif isinstance(self.data, io.StringIO):
+            r = XML(self.data.getvalue().encode(self.encoding), parser=curr_parser)
+        elif isinstance(self.data, io.BytesIO):
+            r = parse(self.data, parser=curr_parser)
 
         return r
 
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 3355783a8aa1d..8b05339d2778d 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -20,10 +20,12 @@
     AbstractMethodError,
     ParserError,
 )
+from pandas.util._decorators import doc
 
 from pandas.core.dtypes.common import is_list_like
 
 from pandas.core.frame import DataFrame
+from pandas.core.shared_docs import _shared_docs
 
 from pandas.io.common import (
     file_exists,
@@ -69,7 +71,7 @@ class _XMLFrameParser:
         URL, file, file-like object, or a raw string containing XSLT,
         `etree` does not support XSLT but retained for consistency.
 
-    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
         Compression type for on-the-fly decompression of on-disk data.
         If 'infer', then use extension for gzip, bz2, zip or xz.
 
@@ -192,13 +194,11 @@ def _preprocess_data(self, data):
         The data either has a `read` attribute (e.g. a file object or a
         StringIO/BytesIO) or is a string or bytes that is an XML document.
         """
-        if hasattr(data, "read"):
-            data = data.read()
 
-        if not hasattr(data, "read") and isinstance(data, str):
+        if isinstance(data, str):
             data = io.StringIO(data)
 
-        if not hasattr(data, "read") and isinstance(data, bytes):
+        elif isinstance(data, bytes):
             data = io.BytesIO(data)
 
         return data
@@ -222,14 +222,18 @@ def _get_data_from_filepath(self, filepath_or_buffer):
             or is_fsspec_url(filepath_or_buffer)
             or file_exists(filepath_or_buffer)
         ):
-            self.handles = get_handle(
+            with get_handle(
                 filepath_or_buffer,
                 "r",
                 encoding=self.encoding,
                 compression=self.compression,
                 storage_options=self.storage_options,
-            )
-            filepath_or_buffer = self.handles.handle
+            ) as handle_obj:
+                filepath_or_buffer = (
+                    handle_obj.handle.read()
+                    if hasattr(handle_obj.handle, "read")
+                    else handle_obj.handle
+                )
 
         return filepath_or_buffer
 
@@ -239,23 +243,6 @@ def _parse_doc(self):
 
         This method will parse io object into tree for parsing
         conditionally by its specific object type.
-
-        Raises
-        ------
-        HttpError
-            * If URL cannot be reached.
-
-        LookupError
-            * If xml document has incorrect or unknown encoding.
-
-        OSError
-            * If file cannot be found.
-
-        ParseError
-            * If xml document conntains syntax issues.
-
-        ValueError
-            * If io object is not readable as string or file-like object.
         """
 
         raise AbstractMethodError(self)
@@ -422,32 +409,20 @@ def _validate_names(self) -> None:
 
     def _parse_doc(self) -> Union[Element, ElementTree]:
         from xml.etree.ElementTree import (
-            Element,
-            ElementTree,
             XMLParser,
-            fromstring,
             parse,
         )
 
-        if isinstance(self.path_or_buffer, str):
-            if self.path_or_buffer.startswith(("<?xml", "<")):
-                self.path_or_buffer = self.path_or_buffer.encode(self.encoding)
+        if isinstance(self.path_or_buffer, str) and self.path_or_buffer.startswith(
+            ("<?xml", "<")
+        ):
+            self.path_or_buffer = self.path_or_buffer.encode(self.encoding)
 
         data = self._get_data_from_filepath(self.path_or_buffer)
         self.data = self._preprocess_data(data)
 
-        r: Union[Element, ElementTree]
-
         curr_parser = XMLParser(encoding=self.encoding)
-
-        if isinstance(self.data, str):
-            r = fromstring(self.data.encode(self.encoding), parser=curr_parser)
-        elif isinstance(self.data, bytes):
-            r = fromstring(self.data, parser=curr_parser)
-        elif isinstance(self.data, io.StringIO):
-            r = parse(self.data, parser=curr_parser)
-        elif isinstance(self.data, io.BytesIO):
-            r = parse(self.data, parser=curr_parser)
+        r = parse(self.data, parser=curr_parser)
 
         return r
 
@@ -634,13 +609,12 @@ def _parse_doc(self):
             parse,
         )
 
-        self.raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
+        raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
-        if isinstance(self.raw_doc, str):
-            if self.raw_doc.startswith(("<?xml", "<")):
-                self.raw_doc = self.raw_doc.encode(self.encoding)
+        if isinstance(raw_doc, str) and raw_doc.startswith(("<?xml", "<")):
+            raw_doc = raw_doc.encode(self.encoding)
 
-        data = self._get_data_from_filepath(self.raw_doc)
+        data = self._get_data_from_filepath(raw_doc)
         self.data = self._preprocess_data(data)
 
         curr_parser = XMLParser(encoding=self.encoding)
@@ -666,11 +640,11 @@ def _data_to_frame(data, **kwargs) -> DataFrame:
     class that build Data Frame and infers specific dtypes.
     """
 
-    tags = [list(d.keys()) for d in data]
+    tags = next(iter(data))
     nodes = [list(d.values()) for d in data]
 
     try:
-        with TextParser(nodes, names=tags[0], **kwargs) as tp:
+        with TextParser(nodes, names=tags, **kwargs) as tp:
             return tp.read()
     except ParserError:
         raise ParserError(
@@ -750,6 +724,7 @@ def _parse(
     return _data_to_frame(data=data_dicts, **kwargs)
 
 
+@doc(storage_options=_shared_docs["storage_options"])
 def read_xml(
     path_or_buffer: FilePathOrBuffer,
     xpath: Optional[str] = "./*",
@@ -759,7 +734,7 @@ def read_xml(
     names: Optional[List[str]] = None,
     encoding: Optional[str] = "utf-8",
     parser: Optional[str] = "lxml",
-    stylesheet: Optional[FilePathOrBuffer[str]] = None,
+    stylesheet: Optional[FilePathOrBuffer] = None,
     compression: CompressionOptions = "infer",
     storage_options: StorageOptions = None,
 ) -> DataFrame:
@@ -790,7 +765,7 @@ def read_xml(
         namespace prefix such as 'doc' to the URI in order to parse
         underlying nodes and/or attributes. For example, ::
 
-            namespaces = {"doc": "https://example.com"}
+            namespaces = {{"doc": "https://example.com"}}
 
     elems_only : bool, optional, default False
         Parse only the child elements at the specified ``xpath``. By default,
@@ -807,7 +782,7 @@ def read_xml(
     encoding : str, optional, default 'utf-8'
         Encoding of XML document.
 
-    parser : {'lxml','etree'}, default 'lxml'
+    parser : {{'lxml','etree'}}, default 'lxml'
         Parser module to use for retrieval of data. Only 'lxml' and
         'etree' are supported. With 'lxml' more complex XPath searches
         and ability to use XSLT stylesheet are supported.
@@ -828,13 +803,7 @@ def read_xml(
         otherwise. If using 'zip', the ZIP file must contain only one data
         file to be read in. Set to None for no decompression.
 
-    storage_options : dict, optional
-        Extra options that make sense for a particular storage connection,
-        e.g. host, port, username, password, etc., if using a URL that will be
-        parsed by fsspec, e.g., starting “s3://”, “gcs://”. An error will be
-        raised if providing this argument with a non-fsspec URL. See the fsspec
-        and backend storage implementation docs for the set of allowed keys and
-        values.
+    {storage_options}
 
     Returns
     -------
@@ -941,7 +910,7 @@ def read_xml(
 
     >>> df = pd.read_xml(xml,
     ...                  xpath="//doc:row",
-    ...                  namespaces={"doc": "https://example.com"})
+    ...                  namespaces={{"doc": "https://example.com"}})
     >>> df
           shape  degrees  sides
     0    square      360    4.0
diff --git a/pandas/tests/io/data/xml/geom_xml.bz2 b/pandas/tests/io/data/xml/geom_xml.bz2
deleted file mode 100644
index eef4b3597e6eb38b6c0b2c1ba23bcdd27336f60b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 182
zcmV;n07?HsT4*^jL0KkKSxE8zK>z?hS%5$gK!fc^ujl{3FaWs27)(tD2*fflm^38O
zkTe=J01X6+B*HX#5Yr)`#6bjv4IxD&#YR?6PYyDd%fU|j91CQMi?`2`n(UxNy-=9E
z<x1IH7&v0bj8VByriOWoUq%|vxdT<B@3Nf)#bCPa38ps3D@cmrRM1)Z>=HGx&{|Fj
kK$@grDs3x?pb?S)V8bt%JtGIW7GI0GBAh5lBzXTIfOhvx$^ZZW

diff --git a/pandas/tests/io/data/xml/geom_xml.gz b/pandas/tests/io/data/xml/geom_xml.gz
deleted file mode 100644
index 8cd899f91024fdebe12aee22d6b7574c998833c0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 166
zcmV;X09pSZiwFn}q%dFt|7T@yZC`k8Yyg#z!4AS85Jd0&3T~V%ZE8#plzz(wRwSvQ
z3spZ~D)po_@w79U-IvMA@o}Em4Jo?d2j12VXXte>&f9^{>A36ps48h7fvOmj6z<E+
zDxTniDqd$Gl%lwj7_@{KRd;Blc$@XenXLD94vJiuVugOdwIe<K2^YVSi5z&l1#SC{
U#@)BZdG+;{Cn7r?T15c>04%^yb^rhX

diff --git a/pandas/tests/io/data/xml/geom_xml.xz b/pandas/tests/io/data/xml/geom_xml.xz
deleted file mode 100644
index 7ef61d2925b05c77b390ca432160a542e8e65bec..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 200
zcmV;(05|{rH+ooF000E$*0e?f03iVu0001VFXf})0Ym_ZT>u^r%ZCxz&SsGhgC5HL
zdo<fw>9`@l$!Nks(G>G~_q!H$g@O_5n)ug+ru2jawQ+!V5g~{~QxIWgJo-WG)vtUM
ziz`pe{lmqHf$N|3oaLKM0JX-1QM|LUl8}?}eQ){EXNG??L8FjQ!mN&^unnSyvi1gb
z2vzZ0XY>v!&=~lufy!lc@BjekzSlrkJ^XF}0i*%N0ssKdWUoH4#Ao{g000001X)^s
Cj$QTu

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index d978d26429542..4fb1d0c2cf638 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -13,6 +13,7 @@
 from pandas import DataFrame
 import pandas._testing as tm
 
+from pandas.io.common import get_handle
 from pandas.io.xml import read_xml
 
 """
@@ -1062,11 +1063,13 @@ def test_stylesheet_buffered_reader(datapath, mode):
 
 @td.skip_if_no("lxml")
 def test_stylesheet_wrong_path(datapath):
+    from lxml.etree import XMLSyntaxError
+
     xsl = os.path.join("data", "xml", "row_field_output.xslt")
 
     with pytest.raises(
-        (OSError, FileNotFoundError),
-        match=("failed to load external entity|No such file or directory|没有那个文件或目录"),
+        (XMLSyntaxError),
+        match=("Start tag expected, '<' not found"),
     ):
         geom_df.to_xml(stylesheet=xsl)
 
@@ -1074,7 +1077,7 @@ def test_stylesheet_wrong_path(datapath):
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
     with pytest.raises(
-        ValueError, match=("stylesheet is not a url, file, or xml string")
+        TypeError, match=("argument of type 'function' is not iterable")
     ):
         geom_df.to_xml(stylesheet=DataFrame)
 
@@ -1283,6 +1286,7 @@ def test_style_to_json():
 
 # COMPRESSION
 
+
 geom_xml = """\
 <?xml version='1.0' encoding='utf-8'?>
 <data>
@@ -1307,53 +1311,20 @@ def test_style_to_json():
 </data>"""
 
 
-def test_bz2_output(parser):
-    import bz2
-
-    with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="bz2")
-
-        with bz2.BZ2File(path, "rb") as fp:
-            output = fp.read()
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.decode("utf-8").replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
-
-    assert geom_xml == output.strip()
-
-
-def test_gz_output(parser):
-    import gzip
-
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+def test_compression_output(parser, comp):
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="gzip")
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
-        with gzip.open(path, "rb") as fp:
-            output = fp.read()
+        with get_handle(
+            path,
+            "r",
+            compression=comp,
+        ) as handle_obj:
+            output = handle_obj.handle.read()
 
     # etree and lxml differs on quotes and case in xml declaration
-    output = output.decode("utf-8").replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
-
-    assert geom_xml == output.strip()
-
-
-def test_xz_output(parser):
-    import lzma
-
-    with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="xz")
-
-        with lzma.open(path, "rb") as fp:
-            output = fp.read()
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.decode("utf-8").replace(
+    output = output.replace(
         '<?xml version="1.0" encoding="utf-8"?',
         "<?xml version='1.0' encoding='utf-8'?",
     )
@@ -1361,17 +1332,21 @@ def test_xz_output(parser):
     assert geom_xml == output.strip()
 
 
-def test_zip_output(parser):
-    import zipfile
-
-    with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="zip")
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+@pytest.mark.parametrize("compfile", ["xml.bz2", "xml.gz", "xml.xz", "xml.zip"])
+def test_unmatched_suffix_comp(parser, comp, compfile):
+    with tm.ensure_clean(filename=compfile) as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
-        with zipfile.ZipFile(path, "r") as fp:
-            output = fp.read(fp.infolist()[0])
+        with get_handle(
+            path,
+            "r",
+            compression=comp,
+        ) as handle_obj:
+            output = handle_obj.handle.read()
 
     # etree and lxml differs on quotes and case in xml declaration
-    output = output.decode("utf-8").replace(
+    output = output.replace(
         '<?xml version="1.0" encoding="utf-8"?',
         "<?xml version='1.0' encoding='utf-8'?",
     )
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 927999efbe6ae..179da41ad5e8b 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -920,46 +920,54 @@ def test_online_stylesheet():
 # COMPRESSION
 
 
-@pytest.mark.parametrize("compfile", ["geom_xml.bz2", "geom_xml.gz", "geom_xml.xz"])
-def test_compression_read(datapath, parser, compfile):
-    filename = datapath("io", "data", "xml", "geom_xml.bz2")
-    xml_df = read_xml(filename, parser=parser)
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+def test_compression_read(parser, comp):
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, index=False, parser=parser, compression=comp)
+
+        xml_df = read_xml(path, parser=parser, compression=comp)
 
     tm.assert_frame_equal(xml_df, geom_df)
 
 
-def test_wrong_compression_bz2(datapath, parser):
+@pytest.mark.parametrize("comp", ["gzip", "xz", "zip"])
+def test_wrong_compression_bz2(parser, comp):
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="zip")
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
         with pytest.raises(OSError, match="Invalid data stream"):
             read_xml(path, parser=parser, compression="bz2")
 
 
-def test_wrong_compression_gz(datapath, parser):
+@pytest.mark.parametrize("comp", ["bz2", "xz", "zip"])
+def test_wrong_compression_gz(parser, comp):
     with tm.ensure_clean() as path:
-        geom_df.to_xml(path, parser=parser, compression="zip")
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
         with pytest.raises(OSError, match="Not a gzipped file"):
             read_xml(path, parser=parser, compression="gzip")
 
 
-def test_wrong_compression_xz(datapath, parser):
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"])
+def test_wrong_compression_xz(parser, comp):
     from lzma import LZMAError
 
-    filename = datapath("io", "data", "xml", "geom_xml.bz2")
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
-    with pytest.raises(LZMAError, match="Input format not supported by decoder"):
-        read_xml(filename, parser=parser, compression="xz")
+        with pytest.raises(LZMAError, match="Input format not supported by decoder"):
+            read_xml(path, parser=parser, compression="xz")
 
 
-def test_wrong_compression_zip(datapath, parser):
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz"])
+def test_wrong_compression_zip(parser, comp):
     from zipfile import BadZipFile
 
-    filename = datapath("io", "data", "xml", "geom_xml.gz")
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
 
-    with pytest.raises(BadZipFile, match="File is not a zip file"):
-        read_xml(filename, parser=parser, compression="zip")
+        with pytest.raises(BadZipFile, match="File is not a zip file"):
+            read_xml(path, parser=parser, compression="zip")
 
 
 def test_unsuported_compression(datapath, parser):

From e23200d9242e556bc7b2a20ca3bc841f57a081e6 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 22 Feb 2021 20:12:24 -0600
Subject: [PATCH 24/35] Remove redundancy and object names in XML parse and
 rename tests for clarity

---
 pandas/io/formats/xml.py               | 52 +++++++++++----------
 pandas/io/xml.py                       | 62 ++++++++++++--------------
 pandas/tests/io/formats/test_to_xml.py |  2 +-
 3 files changed, 55 insertions(+), 61 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 49c18b344e1e9..bcb6b41feb31a 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -264,22 +264,6 @@ def build_elems(self) -> None:
 
         raise AbstractMethodError(self)
 
-    def _preprocess_data(self, data):
-        """
-        Convert extracted raw data.
-
-        This method will return underlying data of extracted XML content.
-        The data either has a `read` attribute (e.g. a file object or a
-        StringIO/BytesIO) or is a string or bytes that is an XML document.
-        """
-        if isinstance(data, str):
-            data = io.StringIO(data)
-
-        elif isinstance(data, bytes):
-            data = io.BytesIO(data)
-
-        return data
-
     def _get_data_from_filepath(self, filepath_or_buffer):
         """
         Extract raw XML data.
@@ -314,6 +298,22 @@ def _get_data_from_filepath(self, filepath_or_buffer):
 
         return filepath_or_buffer
 
+    def _preprocess_data(self, data):
+        """
+        Convert extracted raw data.
+
+        This method will return underlying data of extracted XML content.
+        The data either has a `read` attribute (e.g. a file object or a
+        StringIO/BytesIO) or is a string or bytes that is an XML document.
+        """
+        if isinstance(data, str):
+            data = io.StringIO(data)
+
+        elif isinstance(data, bytes):
+            data = io.BytesIO(data)
+
+        return data
+
     def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
 
@@ -602,8 +602,8 @@ def parse_doc(self):
         """
 
         from lxml.etree import (
-            XML,
             XMLParser,
+            fromstring,
             parse,
         )
 
@@ -612,19 +612,17 @@ def parse_doc(self):
         if isinstance(style_doc, str) and style_doc.startswith(("<?xml", "<")):
             style_doc = io.StringIO(style_doc)
 
-        data = self._get_data_from_filepath(style_doc)
-        self.data = self._preprocess_data(data)
+        handle_data = self._get_data_from_filepath(style_doc)
+        xml_data = self._preprocess_data(handle_data)
 
         curr_parser = XMLParser(encoding=self.encoding)
 
-        if isinstance(self.data, str):
-            r = XML(self.data.encode(self.encoding), parser=curr_parser)
-        elif isinstance(self.data, bytes):
-            r = XML(self.data, parser=curr_parser)
-        elif isinstance(self.data, io.StringIO):
-            r = XML(self.data.getvalue().encode(self.encoding), parser=curr_parser)
-        elif isinstance(self.data, io.BytesIO):
-            r = parse(self.data, parser=curr_parser)
+        if isinstance(xml_data, io.StringIO):
+            r = fromstring(
+                xml_data.getvalue().encode(self.encoding), parser=curr_parser
+            )
+        elif isinstance(xml_data, io.BytesIO):
+            r = parse(xml_data, parser=curr_parser)
 
         return r
 
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 8b05339d2778d..55d06a7cc789f 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -186,23 +186,6 @@ def _validate_names(self) -> None:
         """
         raise AbstractMethodError(self)
 
-    def _preprocess_data(self, data):
-        """
-        Convert extracted raw data.
-
-        This method will return underlying data of extracted XML content.
-        The data either has a `read` attribute (e.g. a file object or a
-        StringIO/BytesIO) or is a string or bytes that is an XML document.
-        """
-
-        if isinstance(data, str):
-            data = io.StringIO(data)
-
-        elif isinstance(data, bytes):
-            data = io.BytesIO(data)
-
-        return data
-
     def _get_data_from_filepath(self, filepath_or_buffer):
         """
         Extract raw XML data.
@@ -237,6 +220,23 @@ def _get_data_from_filepath(self, filepath_or_buffer):
 
         return filepath_or_buffer
 
+    def _preprocess_data(self, data):
+        """
+        Convert extracted raw data.
+
+        This method will return underlying data of extracted XML content.
+        The data either has a `read` attribute (e.g. a file object or a
+        StringIO/BytesIO) or is a string or bytes that is an XML document.
+        """
+
+        if isinstance(data, str):
+            data = io.StringIO(data)
+
+        elif isinstance(data, bytes):
+            data = io.BytesIO(data)
+
+        return data
+
     def _parse_doc(self):
         """
         Build tree from io.
@@ -418,11 +418,11 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
         ):
             self.path_or_buffer = self.path_or_buffer.encode(self.encoding)
 
-        data = self._get_data_from_filepath(self.path_or_buffer)
-        self.data = self._preprocess_data(data)
+        handle_data = self._get_data_from_filepath(self.path_or_buffer)
+        self.xml_data = self._preprocess_data(handle_data)
 
         curr_parser = XMLParser(encoding=self.encoding)
-        r = parse(self.data, parser=curr_parser)
+        r = parse(self.xml_data, parser=curr_parser)
 
         return r
 
@@ -604,8 +604,8 @@ def _validate_names(self) -> None:
 
     def _parse_doc(self):
         from lxml.etree import (
-            XML,
             XMLParser,
+            fromstring,
             parse,
         )
 
@@ -614,19 +614,17 @@ def _parse_doc(self):
         if isinstance(raw_doc, str) and raw_doc.startswith(("<?xml", "<")):
             raw_doc = raw_doc.encode(self.encoding)
 
-        data = self._get_data_from_filepath(raw_doc)
-        self.data = self._preprocess_data(data)
+        handle_data = self._get_data_from_filepath(raw_doc)
+        xml_data = self._preprocess_data(handle_data)
 
         curr_parser = XMLParser(encoding=self.encoding)
 
-        if isinstance(self.data, str):
-            r = XML(self.data.encode(self.encoding), parser=curr_parser)
-        elif isinstance(self.data, bytes):
-            r = XML(self.data, parser=curr_parser)
-        elif isinstance(self.data, io.StringIO):
-            r = XML(self.data.getvalue().encode(self.encoding), parser=curr_parser)
-        elif isinstance(self.data, io.BytesIO):
-            r = parse(self.data, parser=curr_parser)
+        if isinstance(xml_data, io.StringIO):
+            r = fromstring(
+                xml_data.getvalue().encode(self.encoding), parser=curr_parser
+            )
+        elif isinstance(xml_data, io.BytesIO):
+            r = parse(xml_data, parser=curr_parser)
 
         return r
 
@@ -918,8 +916,6 @@ def read_xml(
     2  triangle      180    3.0
     """
 
-    path_or_buffer = stringify_path(path_or_buffer)
-
     return _parse(
         path_or_buffer=path_or_buffer,
         xpath=xpath,
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 4fb1d0c2cf638..21bca8b6c6cd3 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1334,7 +1334,7 @@ def test_compression_output(parser, comp):
 
 @pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
 @pytest.mark.parametrize("compfile", ["xml.bz2", "xml.gz", "xml.xz", "xml.zip"])
-def test_unmatched_suffix_comp(parser, comp, compfile):
+def test_filename_and_suffix_comp(parser, comp, compfile):
     with tm.ensure_clean(filename=compfile) as path:
         geom_df.to_xml(path, parser=parser, compression=comp)
 

From b48e2578514725dd916d73bba285b97437e01225 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 22 Feb 2021 20:27:13 -0600
Subject: [PATCH 25/35] Add XML table in install.rst

---
 doc/source/getting_started/install.rst | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 943847f6552ef..a9c3d637a41e3 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -273,16 +273,6 @@ Computation
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
 SciPy                     1.12.0             Miscellaneous statistical functions
-<<<<<<< HEAD
-xlsxwriter                1.0.2              Excel writing
-blosc                     1.17.0             Compression for HDF5
-fsspec                    0.7.4              Handling files aside from local and HTTP
-fastparquet               0.4.0              Parquet reading / writing
-gcsfs                     0.6.0              Google Cloud Storage access
-html5lib                  1.0.1              HTML parser for read_html (see :ref:`note <optional_html>`)
-lxml                      4.3.0              | HTML parser for read_html (see :ref:`note <optional_html>`)
-                                             | XML parser for read_xml and tree builder for to_xml
-matplotlib                2.2.3              Visualization
 numba                     0.46.0             Alternative execution engine for rolling operations
                                              (see :ref:`Enhancing Performance <enhancingperf.numba>`)
 xarray                    0.12.3             pandas-like API for N-dimensional data
@@ -336,6 +326,15 @@ top-level :func:`~pandas.read_html` function:
 .. _lxml: https://lxml.de
 .. _tabulate: https://github.com/astanin/python-tabulate
 
+XML
+^^^
+
+========================= ================== =============================================================
+Dependency                Minimum Version    Notes
+========================= ================== =============================================================
+lxml                      4.3.0              XML parser for read_xml and tree builder for to_xml
+========================= ================== =============================================================
+
 SQL databases
 ^^^^^^^^^^^^^
 

From 9b2163669397c2b06314bfd33f9935936bef17d4 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 23 Feb 2021 12:52:34 -0600
Subject: [PATCH 26/35] Streamline filepath_or_buffer handling and add
 TypeError tests

---
 pandas/io/formats/xml.py               |  9 ++---
 pandas/io/xml.py                       | 17 ++++-----
 pandas/tests/io/formats/test_to_xml.py |  4 +--
 pandas/tests/io/test_xml.py            | 50 +++++++++++++++++++++++---
 4 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index bcb6b41feb31a..a0f0c1f1a8141 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -277,7 +277,11 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         It returns input types (2) and (3) unchanged.
         """
         filepath_or_buffer = stringify_path(filepath_or_buffer)
+
         if (
+            isinstance(filepath_or_buffer, str)
+            and not filepath_or_buffer.startswith(("<?xml", "<"))
+        ) and (
             not isinstance(filepath_or_buffer, str)
             or is_url(filepath_or_buffer)
             or is_fsspec_url(filepath_or_buffer)
@@ -609,9 +613,6 @@ def parse_doc(self):
 
         style_doc = self.stylesheet
 
-        if isinstance(style_doc, str) and style_doc.startswith(("<?xml", "<")):
-            style_doc = io.StringIO(style_doc)
-
         handle_data = self._get_data_from_filepath(style_doc)
         xml_data = self._preprocess_data(handle_data)
 
@@ -621,7 +622,7 @@ def parse_doc(self):
             r = fromstring(
                 xml_data.getvalue().encode(self.encoding), parser=curr_parser
             )
-        elif isinstance(xml_data, io.BytesIO):
+        else:
             r = parse(xml_data, parser=curr_parser)
 
         return r
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 55d06a7cc789f..52d01d0213e74 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -199,7 +199,11 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         It returns input types (2) and (3) unchanged.
         """
         filepath_or_buffer = stringify_path(filepath_or_buffer)
+
         if (
+            isinstance(filepath_or_buffer, str)
+            and not filepath_or_buffer.startswith(("<?xml", "<"))
+        ) and (
             not isinstance(filepath_or_buffer, str)
             or is_url(filepath_or_buffer)
             or is_fsspec_url(filepath_or_buffer)
@@ -394,8 +398,7 @@ def _validate_path(self) -> None:
     def _validate_names(self) -> None:
         if self.names:
             parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
-            if parent:
-                children = parent.findall("*")
+            children = parent.findall("*") if parent else []
 
             if is_list_like(self.names):
                 if len(self.names) < len(children):
@@ -413,11 +416,6 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
             parse,
         )
 
-        if isinstance(self.path_or_buffer, str) and self.path_or_buffer.startswith(
-            ("<?xml", "<")
-        ):
-            self.path_or_buffer = self.path_or_buffer.encode(self.encoding)
-
         handle_data = self._get_data_from_filepath(self.path_or_buffer)
         self.xml_data = self._preprocess_data(handle_data)
 
@@ -611,9 +609,6 @@ def _parse_doc(self):
 
         raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
-        if isinstance(raw_doc, str) and raw_doc.startswith(("<?xml", "<")):
-            raw_doc = raw_doc.encode(self.encoding)
-
         handle_data = self._get_data_from_filepath(raw_doc)
         xml_data = self._preprocess_data(handle_data)
 
@@ -623,7 +618,7 @@ def _parse_doc(self):
             r = fromstring(
                 xml_data.getvalue().encode(self.encoding), parser=curr_parser
             )
-        elif isinstance(xml_data, io.BytesIO):
+        else:
             r = parse(xml_data, parser=curr_parser)
 
         return r
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 21bca8b6c6cd3..6d90b24137155 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1076,9 +1076,7 @@ def test_stylesheet_wrong_path(datapath):
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
-    with pytest.raises(
-        TypeError, match=("argument of type 'function' is not iterable")
-    ):
+    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
         geom_df.to_xml(stylesheet=DataFrame)
 
 
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 179da41ad5e8b..d8b21d3f909a7 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -21,7 +21,9 @@
 [x] - ValueError: "Values for parser can only be lxml or etree."
 
 etree
-[x] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - TypeError: "expected str, bytes or os.PathLike object, not NoneType"
+[X] - TypeError: "expected str, bytes or os.PathLike object, not type"
 [X] - ValueError: "Either element or attributes can be parsed not both."
 [X] - ValueError: "xpath does not return any nodes..."
 [X] - SyntaxError: "You have used an incorrect or unsupported XPath"
@@ -43,6 +45,8 @@
 
 lxml
 [X] - ValueError: "Either element or attributes can be parsed not both."
+[X] - TypeError: "cannot parse from 'NoneType'"
+[X] - TypeError: "cannot parse from 'type'"
 [X] - XSLTApplyError: "Cannot resolve URI"
 [X] - XSLTParseError: "document is not a stylesheet"
 [X] - ValueError: "xpath does not return any nodes."
@@ -235,28 +239,56 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
 
 
 @td.skip_if_no("lxml")
-def test_wrong_file_path_lxml(datapath):
+def test_wrong_file_path_lxml():
     from lxml.etree import XMLSyntaxError
 
+    filename = os.path.join("data", "html", "books.xml")
+
     with pytest.raises(
         XMLSyntaxError,
         match=("Start tag expected, '<' not found"),
     ):
-        filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="lxml")
 
 
-def test_wrong_file_path_etree(datapath):
+def test_wrong_file_path_etree():
     from xml.etree.ElementTree import ParseError
 
+    filename = os.path.join("data", "html", "books.xml")
+
     with pytest.raises(
         ParseError,
         match=("not well-formed"),
     ):
-        filename = os.path.join("data", "html", "books.xml")
         read_xml(filename, parser="etree")
 
 
+@td.skip_if_no("lxml")
+def test_none_path_buffer_lxml(parser):
+    with pytest.raises(TypeError, match=("cannot parse from 'NoneType'")):
+        read_xml(None, parser="lxml")
+
+
+def test_none_path_buffer_etree(parser):
+    with pytest.raises(
+        TypeError, match=("expected str, bytes or os.PathLike object, not NoneType")
+    ):
+        read_xml(None, parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_not_path_buffer_lxml(parser):
+    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
+        read_xml(DataFrame, parser="lxml")
+
+
+def test_not_path_buffer_etree(parser):
+    with pytest.raises(
+        TypeError, match=("expected str, bytes or os.PathLike object, not type")
+    ):
+        read_xml(DataFrame, parser="etree")
+
+
 @tm.network
 @td.skip_if_no("lxml")
 def test_url():
@@ -872,6 +904,14 @@ def test_wrong_stylesheet():
         read_xml(kml, stylesheet=xsl)
 
 
+@td.skip_if_no("lxml")
+def test_stylesheet_not_path_buffer():
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
+        read_xml(kml, stylesheet=DataFrame)
+
+
 def test_stylesheet_with_etree(datapath):
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
     xsl = os.path.join("data", "xml", "flatten_doc.xsl")

From 49343b1a78fd63b21690e71748aa34aa9e3dc523 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 23 Feb 2021 14:16:26 -0600
Subject: [PATCH 27/35] Fix lxml test on few Python envs

---
 pandas/io/formats/xml.py               | 2 +-
 pandas/io/xml.py                       | 2 +-
 pandas/tests/io/formats/test_to_xml.py | 7 ++++++-
 pandas/tests/io/test_xml.py            | 7 ++++++-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index a0f0c1f1a8141..1cd9af30763cc 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -271,7 +271,7 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         The method accepts three input types:
             1. filepath (string-like)
             2. file-like object (e.g. open file object, StringIO)
-            3. XML bytes
+            3. XML string or bytes
 
         This method turns (1) into (2) to simplify the rest of the processing.
         It returns input types (2) and (3) unchanged.
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 52d01d0213e74..b45b80d715d89 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -193,7 +193,7 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         The method accepts three input types:
             1. filepath (string-like)
             2. file-like object (e.g. open file object, StringIO)
-            3. XML bytes
+            3. XML string or bytes
 
         This method turns (1) into (2) to simplify the rest of the processing.
         It returns input types (2) and (3) unchanged.
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 6d90b24137155..0428def340b3d 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1076,7 +1076,12 @@ def test_stylesheet_wrong_path(datapath):
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
-    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(
+        (TypeError, XMLSyntaxError),
+        match=("cannot parse from 'type'|Start tag expected, '<' not found"),
+    ):
         geom_df.to_xml(stylesheet=DataFrame)
 
 
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index d8b21d3f909a7..d04c3a6664943 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -906,9 +906,14 @@ def test_wrong_stylesheet():
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
+    from lxml.etree import XMLSyntaxError
+
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
+    with pytest.raises(
+        (TypeError, XMLSyntaxError),
+        match=("cannot parse from 'type'|Start tag expected, '<' not found"),
+    ):
         read_xml(kml, stylesheet=DataFrame)
 
 

From ce986bcb636ceccae7eb936006be610fee67f9e6 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 23 Feb 2021 21:22:50 -0600
Subject: [PATCH 28/35] Adjust io handling in context maanger

---
 pandas/io/formats/xml.py               | 16 +++++------
 pandas/io/xml.py                       | 16 +++++------
 pandas/tests/io/formats/test_to_xml.py | 15 +++++-----
 pandas/tests/io/test_xml.py            | 40 ++++++++++++++++----------
 4 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 1cd9af30763cc..b5108ebc5d176 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -614,16 +614,16 @@ def parse_doc(self):
         style_doc = self.stylesheet
 
         handle_data = self._get_data_from_filepath(style_doc)
-        xml_data = self._preprocess_data(handle_data)
 
-        curr_parser = XMLParser(encoding=self.encoding)
+        with self._preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
 
-        if isinstance(xml_data, io.StringIO):
-            r = fromstring(
-                xml_data.getvalue().encode(self.encoding), parser=curr_parser
-            )
-        else:
-            r = parse(xml_data, parser=curr_parser)
+            if isinstance(xml_data, io.StringIO):
+                r = fromstring(
+                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
+                )
+            else:
+                r = parse(xml_data, parser=curr_parser)
 
         return r
 
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index b45b80d715d89..1a62c9411942e 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -610,16 +610,16 @@ def _parse_doc(self):
         raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
         handle_data = self._get_data_from_filepath(raw_doc)
-        xml_data = self._preprocess_data(handle_data)
 
-        curr_parser = XMLParser(encoding=self.encoding)
+        with self._preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
 
-        if isinstance(xml_data, io.StringIO):
-            r = fromstring(
-                xml_data.getvalue().encode(self.encoding), parser=curr_parser
-            )
-        else:
-            r = parse(xml_data, parser=curr_parser)
+            if isinstance(xml_data, io.StringIO):
+                r = fromstring(
+                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
+                )
+            else:
+                r = parse(xml_data, parser=curr_parser)
 
         return r
 
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 0428def340b3d..e94dbb0a436a1 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1068,7 +1068,7 @@ def test_stylesheet_wrong_path(datapath):
     xsl = os.path.join("data", "xml", "row_field_output.xslt")
 
     with pytest.raises(
-        (XMLSyntaxError),
+        XMLSyntaxError,
         match=("Start tag expected, '<' not found"),
     ):
         geom_df.to_xml(stylesheet=xsl)
@@ -1076,15 +1076,16 @@ def test_stylesheet_wrong_path(datapath):
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
-    from lxml.etree import XMLSyntaxError
-
-    with pytest.raises(
-        (TypeError, XMLSyntaxError),
-        match=("cannot parse from 'type'|Start tag expected, '<' not found"),
-    ):
+    with pytest.raises(AttributeError, match=("__enter__")):
         geom_df.to_xml(stylesheet=DataFrame)
 
 
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_stylesheet(val):
+    geom_df.to_xml(stylesheet=val)
+
+
 @td.skip_if_no("lxml")
 def test_incorrect_xsl_syntax():
     from lxml.etree import XMLSyntaxError
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index d04c3a6664943..b2426826ccc2e 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -45,8 +45,7 @@
 
 lxml
 [X] - ValueError: "Either element or attributes can be parsed not both."
-[X] - TypeError: "cannot parse from 'NoneType'"
-[X] - TypeError: "cannot parse from 'type'"
+[X] - AttributeError: "__enter__"
 [X] - XSLTApplyError: "Cannot resolve URI"
 [X] - XSLTParseError: "document is not a stylesheet"
 [X] - ValueError: "xpath does not return any nodes."
@@ -238,6 +237,22 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_lxml(val):
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(XMLSyntaxError, match="Document is empty"):
+        read_xml(val, parser="lxml")
+
+
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_etree(val):
+    from xml.etree.ElementTree import ParseError
+
+    with pytest.raises(ParseError, match="no element found"):
+        read_xml(val, parser="etree")
+
+
 @td.skip_if_no("lxml")
 def test_wrong_file_path_lxml():
     from lxml.etree import XMLSyntaxError
@@ -264,12 +279,12 @@ def test_wrong_file_path_etree():
 
 
 @td.skip_if_no("lxml")
-def test_none_path_buffer_lxml(parser):
-    with pytest.raises(TypeError, match=("cannot parse from 'NoneType'")):
+def test_none_path_buffer_lxml():
+    with pytest.raises(AttributeError, match=("__enter__")):
         read_xml(None, parser="lxml")
 
 
-def test_none_path_buffer_etree(parser):
+def test_none_path_buffer_etree():
     with pytest.raises(
         TypeError, match=("expected str, bytes or os.PathLike object, not NoneType")
     ):
@@ -277,12 +292,12 @@ def test_none_path_buffer_etree(parser):
 
 
 @td.skip_if_no("lxml")
-def test_not_path_buffer_lxml(parser):
-    with pytest.raises(TypeError, match=("cannot parse from 'type'")):
+def test_not_path_buffer_lxml():
+    with pytest.raises(AttributeError, match=("__enter__")):
         read_xml(DataFrame, parser="lxml")
 
 
-def test_not_path_buffer_etree(parser):
+def test_not_path_buffer_etree():
     with pytest.raises(
         TypeError, match=("expected str, bytes or os.PathLike object, not type")
     ):
@@ -906,15 +921,10 @@ def test_wrong_stylesheet():
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
-    from lxml.etree import XMLSyntaxError
-
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    with pytest.raises(
-        (TypeError, XMLSyntaxError),
-        match=("cannot parse from 'type'|Start tag expected, '<' not found"),
-    ):
-        read_xml(kml, stylesheet=DataFrame)
+    with pytest.raises(AttributeError, match=("__enter__")):
+        read_xml(kml, stylesheet={"a": 1})
 
 
 def test_stylesheet_with_etree(datapath):

From e2f80db6f15fea9e91b41c4681a8c9ba449c8d21 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Tue, 23 Feb 2021 23:14:27 -0600
Subject: [PATCH 29/35] Add and fix tests for special filepath_or_buffer values

---
 pandas/tests/io/formats/test_to_xml.py |  6 ---
 pandas/tests/io/test_xml.py            | 65 +++++++++++++++++---------
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index e94dbb0a436a1..78640a35e578c 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1074,12 +1074,6 @@ def test_stylesheet_wrong_path(datapath):
         geom_df.to_xml(stylesheet=xsl)
 
 
-@td.skip_if_no("lxml")
-def test_stylesheet_not_path_buffer():
-    with pytest.raises(AttributeError, match=("__enter__")):
-        geom_df.to_xml(stylesheet=DataFrame)
-
-
 @td.skip_if_no("lxml")
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_string_stylesheet(val):
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index b2426826ccc2e..55879ffe703e7 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -23,7 +23,6 @@
 etree
 [X] - ImportError: "lxml not found, please install or use the etree parser."
 [X] - TypeError: "expected str, bytes or os.PathLike object, not NoneType"
-[X] - TypeError: "expected str, bytes or os.PathLike object, not type"
 [X] - ValueError: "Either element or attributes can be parsed not both."
 [X] - ValueError: "xpath does not return any nodes..."
 [X] - SyntaxError: "You have used an incorrect or unsupported XPath"
@@ -237,6 +236,28 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
+@td.skip_if_no("lxml")
+def test_closed_file_lxml(datapath):
+    xml = datapath("io", "data", "xml", "baby_names.xml")
+
+    with open(xml, "rb") as f:
+        f.read()
+
+    with pytest.raises(ValueError, match="I/O operation on closed file"):
+        read_xml(f, parser="lxml")
+
+
+def test_closed_file_etree(datapath):
+    xml = datapath("io", "data", "xml", "baby_names.xml")
+
+    with open(xml, "rb") as f:
+        f.read()
+
+    with pytest.raises(ValueError, match="read of closed file"):
+        read_xml(f, parser="etree")
+
+
+@td.skip_if_no("lxml")
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_string_lxml(val):
     from lxml.etree import XMLSyntaxError
@@ -279,29 +300,18 @@ def test_wrong_file_path_etree():
 
 
 @td.skip_if_no("lxml")
-def test_none_path_buffer_lxml():
-    with pytest.raises(AttributeError, match=("__enter__")):
-        read_xml(None, parser="lxml")
+def test_none_file_path_lxml():
+    xml_var = None
+    with pytest.raises(AttributeError, match="__enter__"):
+        read_xml(xml_var, parser="lxml")
 
 
-def test_none_path_buffer_etree():
+def test_none_file_path_etree():
+    xml_var = None
     with pytest.raises(
-        TypeError, match=("expected str, bytes or os.PathLike object, not NoneType")
+        TypeError, match="expected str, bytes or os.PathLike object, not NoneType"
     ):
-        read_xml(None, parser="etree")
-
-
-@td.skip_if_no("lxml")
-def test_not_path_buffer_lxml():
-    with pytest.raises(AttributeError, match=("__enter__")):
-        read_xml(DataFrame, parser="lxml")
-
-
-def test_not_path_buffer_etree():
-    with pytest.raises(
-        TypeError, match=("expected str, bytes or os.PathLike object, not type")
-    ):
-        read_xml(DataFrame, parser="etree")
+        read_xml(xml_var, parser="etree")
 
 
 @tm.network
@@ -921,9 +931,14 @@ def test_wrong_stylesheet():
 
 @td.skip_if_no("lxml")
 def test_stylesheet_not_path_buffer():
+    from lxml.etree import XMLSyntaxError
+
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    with pytest.raises(AttributeError, match=("__enter__")):
+    with pytest.raises(
+        (AttributeError, XMLSyntaxError),
+        match=("__enter__|Start tag expected, '<' not found"),
+    ):
         read_xml(kml, stylesheet={"a": 1})
 
 
@@ -937,6 +952,14 @@ def test_stylesheet_with_etree(datapath):
         read_xml(kml, parser="etree", stylesheet=xsl)
 
 
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_stylesheet(val):
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+
+    read_xml(kml, parser="etree", stylesheet=val)
+
+
 @tm.network
 @td.skip_if_no("lxml")
 def test_online_stylesheet():

From c7e1e118a85090516336f2b0222e074881019e1c Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 24 Feb 2021 07:30:50 -0600
Subject: [PATCH 30/35] Fix tests for better example and wrong parser

---
 pandas/tests/io/test_xml.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 55879ffe703e7..6247223e99ad0 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -301,17 +301,21 @@ def test_wrong_file_path_etree():
 
 @td.skip_if_no("lxml")
 def test_none_file_path_lxml():
-    xml_var = None
-    with pytest.raises(AttributeError, match="__enter__"):
-        read_xml(xml_var, parser="lxml")
+    with tm.ensure_clean("test.xml") as path:
+        xml_var = geom_df.to_xml(path)
+
+        with pytest.raises(AttributeError, match="__enter__"):
+            read_xml(xml_var, parser="lxml")
 
 
 def test_none_file_path_etree():
-    xml_var = None
-    with pytest.raises(
-        TypeError, match="expected str, bytes or os.PathLike object, not NoneType"
-    ):
-        read_xml(xml_var, parser="etree")
+    with tm.ensure_clean("test.xml") as path:
+        xml_var = geom_df.to_xml(path)
+
+        with pytest.raises(
+            TypeError, match="expected str, bytes or os.PathLike object, not NoneType"
+        ):
+            read_xml(xml_var, parser="etree")
 
 
 @tm.network
@@ -957,7 +961,7 @@ def test_stylesheet_with_etree(datapath):
 def test_empty_stylesheet(val):
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    read_xml(kml, parser="etree", stylesheet=val)
+    read_xml(kml, stylesheet=val)
 
 
 @tm.network

From df9ecf4a7d892c375021e11f708f1ca2ea2a2a51 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Wed, 24 Feb 2021 09:09:37 -0600
Subject: [PATCH 31/35] Adjust to handle empty string stylesheet with tests

---
 pandas/io/formats/xml.py               |  4 ++--
 pandas/io/xml.py                       |  4 ++--
 pandas/tests/io/formats/test_to_xml.py |  7 ++++++-
 pandas/tests/io/test_xml.py            | 11 ++++++++---
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index b5108ebc5d176..d0037dc6cd703 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -384,7 +384,7 @@ def build_tree(self) -> bytes:
         if not self.xml_declaration:
             self.out_xml = self.remove_declaration()
 
-        if self.stylesheet:
+        if self.stylesheet is not None:
             raise ValueError(
                 "To use stylesheet, you need lxml installed and selected as parser."
             )
@@ -526,7 +526,7 @@ def build_tree(self) -> bytes:
             xml_declaration=self.xml_declaration,
         )
 
-        if self.stylesheet:
+        if self.stylesheet is not None:
             self.out_xml = self.transform_doc()
 
         return self.out_xml
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 1a62c9411942e..311bf39e3ebad 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -268,7 +268,7 @@ def __init__(self, *args, **kwargs):
 
     def parse_data(self) -> List[Dict[str, Optional[str]]]:
 
-        if self.stylesheet:
+        if self.stylesheet is not None:
             raise ValueError(
                 "To use stylesheet, you need lxml installed and selected as parser."
             )
@@ -446,7 +446,7 @@ def parse_data(self) -> List[Dict[str, Optional[str]]]:
 
         self.xml_doc = self._parse_doc()
 
-        if self.stylesheet:
+        if self.stylesheet is not None:
             self.is_style = True
             self.xsl_doc = self._parse_doc()
             self.xml_doc = self._transform_doc()
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/formats/test_to_xml.py
index 78640a35e578c..3b915a9664210 100644
--- a/pandas/tests/io/formats/test_to_xml.py
+++ b/pandas/tests/io/formats/test_to_xml.py
@@ -1077,7 +1077,12 @@ def test_stylesheet_wrong_path(datapath):
 @td.skip_if_no("lxml")
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_string_stylesheet(val):
-    geom_df.to_xml(stylesheet=val)
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(
+        XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
+    ):
+        geom_df.to_xml(stylesheet=val)
 
 
 @td.skip_if_no("lxml")
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/test_xml.py
index 6247223e99ad0..33954d0951cde 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/test_xml.py
@@ -302,7 +302,7 @@ def test_wrong_file_path_etree():
 @td.skip_if_no("lxml")
 def test_none_file_path_lxml():
     with tm.ensure_clean("test.xml") as path:
-        xml_var = geom_df.to_xml(path)
+        xml_var = geom_df.to_xml(path, parser="lxml")
 
         with pytest.raises(AttributeError, match="__enter__"):
             read_xml(xml_var, parser="lxml")
@@ -310,7 +310,7 @@ def test_none_file_path_lxml():
 
 def test_none_file_path_etree():
     with tm.ensure_clean("test.xml") as path:
-        xml_var = geom_df.to_xml(path)
+        xml_var = geom_df.to_xml(path, parser="etree")
 
         with pytest.raises(
             TypeError, match="expected str, bytes or os.PathLike object, not NoneType"
@@ -959,9 +959,14 @@ def test_stylesheet_with_etree(datapath):
 @td.skip_if_no("lxml")
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_stylesheet(val):
+    from lxml.etree import XMLSyntaxError
+
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    read_xml(kml, stylesheet=val)
+    with pytest.raises(
+        XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
+    ):
+        read_xml(kml, stylesheet=val)
 
 
 @tm.network

From 5d75d51f01ff15389e78516819d7803482ca3df4 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 25 Feb 2021 00:47:02 -0600
Subject: [PATCH 32/35] Move methods out of class, adjust xpath check, and data
 frame formatting

---
 pandas/io/formats/xml.py                      | 170 ++++++-----
 pandas/io/xml.py                              | 163 +++++-----
 .../tests/io/{formats => xml}/test_to_xml.py  |   0
 pandas/tests/io/{ => xml}/test_xml.py         | 283 ++++++++----------
 4 files changed, 322 insertions(+), 294 deletions(-)
 rename pandas/tests/io/{formats => xml}/test_to_xml.py (100%)
 rename pandas/tests/io/{ => xml}/test_xml.py (95%)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index d0037dc6cd703..fd03dcd342089 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -13,6 +13,7 @@
 )
 
 from pandas._typing import (
+    Buffer,
     CompressionOptions,
     FilePathOrBuffer,
     StorageOptions,
@@ -182,16 +183,16 @@ def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]:
         Adjust Data Frame to fit xml output.
 
         This method will adjust underlying data frame for xml output,
-        including replacing missing entities and including indexes.
+        including optionally replacing missing values and including indexes.
         """
 
-        na_dict = {"None": self.na_rep, "NaN": self.na_rep, "nan": self.na_rep}
+        df = self.fmt.frame
 
-        df = (
-            (self.fmt.frame.reset_index().applymap(str).replace(na_dict))
-            if self.index
-            else self.fmt.frame.applymap(str).replace(na_dict)
-        )
+        if self.index:
+            df = df.reset_index()
+
+        if self.na_rep:
+            df = df.replace({None: self.na_rep, float("nan"): self.na_rep})
 
         return df.to_dict(orient="index")
 
@@ -264,67 +265,12 @@ def build_elems(self) -> None:
 
         raise AbstractMethodError(self)
 
-    def _get_data_from_filepath(self, filepath_or_buffer):
-        """
-        Extract raw XML data.
-
-        The method accepts three input types:
-            1. filepath (string-like)
-            2. file-like object (e.g. open file object, StringIO)
-            3. XML string or bytes
-
-        This method turns (1) into (2) to simplify the rest of the processing.
-        It returns input types (2) and (3) unchanged.
-        """
-        filepath_or_buffer = stringify_path(filepath_or_buffer)
-
-        if (
-            isinstance(filepath_or_buffer, str)
-            and not filepath_or_buffer.startswith(("<?xml", "<"))
-        ) and (
-            not isinstance(filepath_or_buffer, str)
-            or is_url(filepath_or_buffer)
-            or is_fsspec_url(filepath_or_buffer)
-            or file_exists(filepath_or_buffer)
-        ):
-            with get_handle(
-                filepath_or_buffer,
-                "r",
-                encoding=self.encoding,
-                compression=self.compression,
-                storage_options=self.storage_options,
-            ) as handle_obj:
-                filepath_or_buffer = (
-                    handle_obj.handle.read()
-                    if hasattr(handle_obj.handle, "read")
-                    else handle_obj.handle
-                )
-
-        return filepath_or_buffer
-
-    def _preprocess_data(self, data):
-        """
-        Convert extracted raw data.
-
-        This method will return underlying data of extracted XML content.
-        The data either has a `read` attribute (e.g. a file object or a
-        StringIO/BytesIO) or is a string or bytes that is an XML document.
-        """
-        if isinstance(data, str):
-            data = io.StringIO(data)
-
-        elif isinstance(data, bytes):
-            data = io.BytesIO(data)
-
-        return data
-
     def write_output(self) -> Optional[str]:
         xml_doc = self.build_tree()
 
         out_str: Optional[str]
 
         if self.path_or_buffer is not None:
-            # apply compression and byte/text conversion
             with get_handle(
                 self.path_or_buffer,
                 "wb",
@@ -424,8 +370,13 @@ def build_attribs(self) -> None:
 
             attr_name = f"{self.prefix_uri}{flat_col}"
             try:
-                if self.d[col] is not None:
-                    self.elem_row.attrib[attr_name] = str(self.d[col])
+                val = (
+                    None
+                    if self.d[col] is None or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                if val is not None:
+                    self.elem_row.attrib[attr_name] = val
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
 
@@ -446,7 +397,11 @@ def build_elems(self) -> None:
 
             elem_name = f"{self.prefix_uri}{flat_col}"
             try:
-                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                val = (
+                    None
+                    if self.d[col] in [None, ""] or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
                 SubElement(self.elem_row, elem_name).text = val
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
@@ -570,8 +525,13 @@ def build_attribs(self) -> None:
 
             attr_name = f"{self.prefix_uri}{flat_col}"
             try:
-                if self.d[col] is not None:
-                    self.elem_row.attrib[attr_name] = self.d[col]
+                val = (
+                    None
+                    if self.d[col] is None or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                if val is not None:
+                    self.elem_row.attrib[attr_name] = val
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
 
@@ -592,7 +552,11 @@ def build_elems(self) -> None:
 
             elem_name = f"{self.prefix_uri}{flat_col}"
             try:
-                val = None if self.d[col] in [None, ""] else str(self.d[col])
+                val = (
+                    None
+                    if self.d[col] in [None, ""] or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
                 SubElement(self.elem_row, elem_name).text = val
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
@@ -613,9 +577,14 @@ def parse_doc(self):
 
         style_doc = self.stylesheet
 
-        handle_data = self._get_data_from_filepath(style_doc)
+        handle_data = _get_data_from_filepath(
+            filepath_or_buffer=style_doc,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
 
-        with self._preprocess_data(handle_data) as xml_data:
+        with _preprocess_data(handle_data) as xml_data:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
@@ -642,3 +611,64 @@ def transform_doc(self) -> bytes:
         new_doc = transformer(self.root)
 
         return bytes(new_doc)
+
+
+def _get_data_from_filepath(
+    filepath_or_buffer,
+    encoding,
+    compression,
+    storage_options,
+) -> Union[str, bytes, Buffer]:
+    """
+    Extract raw XML data.
+
+    The method accepts three input types:
+        1. filepath (string-like)
+        2. file-like object (e.g. open file object, StringIO)
+        3. XML string or bytes
+
+    This method turns (1) into (2) to simplify the rest of the processing.
+    It returns input types (2) and (3) unchanged.
+    """
+    filepath_or_buffer = stringify_path(filepath_or_buffer)
+
+    if (
+        isinstance(filepath_or_buffer, str)
+        and not filepath_or_buffer.startswith(("<?xml", "<"))
+    ) and (
+        not isinstance(filepath_or_buffer, str)
+        or is_url(filepath_or_buffer)
+        or is_fsspec_url(filepath_or_buffer)
+        or file_exists(filepath_or_buffer)
+    ):
+        with get_handle(
+            filepath_or_buffer,
+            "r",
+            encoding=encoding,
+            compression=compression,
+            storage_options=storage_options,
+        ) as handle_obj:
+            filepath_or_buffer = (
+                handle_obj.handle.read()
+                if hasattr(handle_obj.handle, "read")
+                else handle_obj.handle
+            )
+
+    return filepath_or_buffer
+
+
+def _preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
+    """
+    Convert extracted raw data.
+
+    This method will return underlying data of extracted XML content.
+    The data either has a `read` attribute (e.g. a file object or a
+    StringIO/BytesIO) or is a string or bytes that is an XML document.
+    """
+    if isinstance(data, str):
+        data = io.StringIO(data)
+
+    elif isinstance(data, bytes):
+        data = io.BytesIO(data)
+
+    return data
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 311bf39e3ebad..122057c2c625d 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -11,6 +11,7 @@
 )
 
 from pandas._typing import (
+    Buffer,
     CompressionOptions,
     FilePathOrBuffer,
     StorageOptions,
@@ -186,61 +187,6 @@ def _validate_names(self) -> None:
         """
         raise AbstractMethodError(self)
 
-    def _get_data_from_filepath(self, filepath_or_buffer):
-        """
-        Extract raw XML data.
-
-        The method accepts three input types:
-            1. filepath (string-like)
-            2. file-like object (e.g. open file object, StringIO)
-            3. XML string or bytes
-
-        This method turns (1) into (2) to simplify the rest of the processing.
-        It returns input types (2) and (3) unchanged.
-        """
-        filepath_or_buffer = stringify_path(filepath_or_buffer)
-
-        if (
-            isinstance(filepath_or_buffer, str)
-            and not filepath_or_buffer.startswith(("<?xml", "<"))
-        ) and (
-            not isinstance(filepath_or_buffer, str)
-            or is_url(filepath_or_buffer)
-            or is_fsspec_url(filepath_or_buffer)
-            or file_exists(filepath_or_buffer)
-        ):
-            with get_handle(
-                filepath_or_buffer,
-                "r",
-                encoding=self.encoding,
-                compression=self.compression,
-                storage_options=self.storage_options,
-            ) as handle_obj:
-                filepath_or_buffer = (
-                    handle_obj.handle.read()
-                    if hasattr(handle_obj.handle, "read")
-                    else handle_obj.handle
-                )
-
-        return filepath_or_buffer
-
-    def _preprocess_data(self, data):
-        """
-        Convert extracted raw data.
-
-        This method will return underlying data of extracted XML content.
-        The data either has a `read` attribute (e.g. a file object or a
-        StringIO/BytesIO) or is a string or bytes that is an XML document.
-        """
-
-        if isinstance(data, str):
-            data = io.StringIO(data)
-
-        elif isinstance(data, bytes):
-            data = io.BytesIO(data)
-
-        return data
-
     def _parse_doc(self):
         """
         Build tree from io.
@@ -416,11 +362,16 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
             parse,
         )
 
-        handle_data = self._get_data_from_filepath(self.path_or_buffer)
-        self.xml_data = self._preprocess_data(handle_data)
+        handle_data = _get_data_from_filepath(
+            filepath_or_buffer=self.path_or_buffer,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
 
-        curr_parser = XMLParser(encoding=self.encoding)
-        r = parse(self.xml_data, parser=curr_parser)
+        with _preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
+            r = parse(xml_data, parser=curr_parser)
 
         return r
 
@@ -558,20 +509,23 @@ def _transform_doc(self):
 
     def _validate_path(self) -> None:
 
+        msg = (
+            "xpath does not return any nodes. "
+            "Be sure row level nodes are in xpath. "
+            "If document uses namespaces denoted with "
+            "xmlns, be sure to define namespaces and "
+            "use them in xpath."
+        )
+
         elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
         children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces)
         attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces)
 
-        if (elems == [] and attrs == [] and children == []) or (
-            elems != [] and attrs == [] and children == []
-        ):
-            raise ValueError(
-                "xpath does not return any nodes. "
-                "Be sure row level nodes are in xpath. "
-                "If document uses namespaces denoted with "
-                "xmlns, be sure to define namespaces and "
-                "use them in xpath."
-            )
+        if elems == []:
+            raise ValueError(msg)
+
+        if elems != [] and attrs == [] and children == []:
+            raise ValueError(msg)
 
     def _validate_names(self) -> None:
         """
@@ -609,9 +563,14 @@ def _parse_doc(self):
 
         raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
-        handle_data = self._get_data_from_filepath(raw_doc)
+        handle_data = _get_data_from_filepath(
+            filepath_or_buffer=raw_doc,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
 
-        with self._preprocess_data(handle_data) as xml_data:
+        with _preprocess_data(handle_data) as xml_data:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
@@ -624,6 +583,68 @@ def _parse_doc(self):
         return r
 
 
+def _get_data_from_filepath(
+    filepath_or_buffer,
+    encoding,
+    compression,
+    storage_options,
+) -> Union[str, bytes, Buffer]:
+    """
+    Extract raw XML data.
+
+    The method accepts three input types:
+        1. filepath (string-like)
+        2. file-like object (e.g. open file object, StringIO)
+        3. XML string or bytes
+
+    This method turns (1) into (2) to simplify the rest of the processing.
+    It returns input types (2) and (3) unchanged.
+    """
+    filepath_or_buffer = stringify_path(filepath_or_buffer)
+
+    if (
+        isinstance(filepath_or_buffer, str)
+        and not filepath_or_buffer.startswith(("<?xml", "<"))
+    ) and (
+        not isinstance(filepath_or_buffer, str)
+        or is_url(filepath_or_buffer)
+        or is_fsspec_url(filepath_or_buffer)
+        or file_exists(filepath_or_buffer)
+    ):
+        with get_handle(
+            filepath_or_buffer,
+            "r",
+            encoding=encoding,
+            compression=compression,
+            storage_options=storage_options,
+        ) as handle_obj:
+            filepath_or_buffer = (
+                handle_obj.handle.read()
+                if hasattr(handle_obj.handle, "read")
+                else handle_obj.handle
+            )
+
+    return filepath_or_buffer
+
+
+def _preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
+    """
+    Convert extracted raw data.
+
+    This method will return underlying data of extracted XML content.
+    The data either has a `read` attribute (e.g. a file object or a
+    StringIO/BytesIO) or is a string or bytes that is an XML document.
+    """
+
+    if isinstance(data, str):
+        data = io.StringIO(data)
+
+    elif isinstance(data, bytes):
+        data = io.BytesIO(data)
+
+    return data
+
+
 def _data_to_frame(data, **kwargs) -> DataFrame:
     """
     Convert parsed data to Data Frame.
diff --git a/pandas/tests/io/formats/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
similarity index 100%
rename from pandas/tests/io/formats/test_to_xml.py
rename to pandas/tests/io/xml/test_to_xml.py
diff --git a/pandas/tests/io/test_xml.py b/pandas/tests/io/xml/test_xml.py
similarity index 95%
rename from pandas/tests/io/test_xml.py
rename to pandas/tests/io/xml/test_xml.py
index 33954d0951cde..4cf5618e852e5 100644
--- a/pandas/tests/io/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -118,6 +118,115 @@
 </doc:data>"""
 
 
+df_kml = DataFrame(
+    {
+        "id": {
+            0: "ID_00001",
+            1: "ID_00002",
+            2: "ID_00003",
+            3: "ID_00004",
+            4: "ID_00005",
+        },
+        "name": {
+            0: "Blue Line (Forest Park)",
+            1: "Red, Purple Line",
+            2: "Red, Purple Line",
+            3: "Red, Purple Line",
+            4: "Red, Purple Line",
+        },
+        "styleUrl": {
+            0: "#LineStyle01",
+            1: "#LineStyle01",
+            2: "#LineStyle01",
+            3: "#LineStyle01",
+            4: "#LineStyle01",
+        },
+        "extrude": {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
+        "altitudeMode": {
+            0: "clampedToGround",
+            1: "clampedToGround",
+            2: "clampedToGround",
+            3: "clampedToGround",
+            4: "clampedToGround",
+        },
+        "coordinates": {
+            0: (
+                "-87.77678526964958,41.8708863930319,0 "
+                "-87.77826234150609,41.87097820122218,0 "
+                "-87.78251583439344,41.87130129991005,0 "
+                "-87.78418294588424,41.87145055520308,0 "
+                "-87.7872369165933,41.8717239119163,0 "
+                "-87.79160214925886,41.87210797280065,0"
+            ),
+            1: (
+                "-87.65758750947528,41.96427269188822,0 "
+                "-87.65802133507393,41.96581929055245,0 "
+                "-87.65819033925305,41.96621846093642,0 "
+                "-87.6583189819129,41.96650362897086,0 "
+                "-87.65835858701473,41.96669002089185,0 "
+                "-87.65838428411853,41.96688150295095,0 "
+                "-87.65842208882658,41.96745896091846,0 "
+                "-87.65846556843937,41.9683761425439,0 "
+                "-87.65849296214573,41.96913893870342,0"
+            ),
+            2: (
+                "-87.65492939166126,41.95377494531437,0 "
+                "-87.65557043199591,41.95376544118533,0 "
+                "-87.65606302030132,41.95376391658746,0 "
+                "-87.65623502146268,41.95377379126367,0 "
+                "-87.65634748981634,41.95380103566435,0 "
+                "-87.65646537904269,41.95387703994676,0 "
+                "-87.65656532461145,41.95396622645799,0 "
+                "-87.65664760856414,41.95404201996044,0 "
+                "-87.65671750555913,41.95416647054043,0 "
+                "-87.65673983607117,41.95429949810849,0 "
+                "-87.65673866475777,41.95441024240925,0 "
+                "-87.6567690255541,41.95490657227902,0 "
+                "-87.65683672482363,41.95692259283837,0 "
+                "-87.6568900886376,41.95861070983142,0 "
+                "-87.65699865558875,41.96181418669004,0 "
+                "-87.65756347177603,41.96397045777844,0 "
+                "-87.65758750947528,41.96427269188822,0"
+            ),
+            3: (
+                "-87.65362593118043,41.94742799535678,0 "
+                "-87.65363554415794,41.94819886386848,0 "
+                "-87.6536456393239,41.95059994675451,0 "
+                "-87.65365831235026,41.95108288489359,0 "
+                "-87.6536604873874,41.9519954657554,0 "
+                "-87.65362592053201,41.95245597302328,0 "
+                "-87.65367158496069,41.95311153649393,0 "
+                "-87.65368468595476,41.9533202828916,0 "
+                "-87.65369271253692,41.95343095587119,0 "
+                "-87.65373335834569,41.95351536301472,0 "
+                "-87.65378605844126,41.95358212680591,0 "
+                "-87.65385067928185,41.95364452823767,0 "
+                "-87.6539390793817,41.95370263886964,0 "
+                "-87.6540786298351,41.95373403675265,0 "
+                "-87.65430648647626,41.9537535411832,0 "
+                "-87.65492939166126,41.95377494531437,0"
+            ),
+            4: (
+                "-87.65345391792157,41.94217681262115,0 "
+                "-87.65342448305786,41.94237224420864,0 "
+                "-87.65339745703922,41.94268217746244,0 "
+                "-87.65337753982941,41.94288140770284,0 "
+                "-87.65336256753105,41.94317369618263,0 "
+                "-87.65338799707138,41.94357253961736,0 "
+                "-87.65340240886648,41.94389158188269,0 "
+                "-87.65341837392448,41.94406444407721,0 "
+                "-87.65342275247338,41.94421065714904,0 "
+                "-87.65347469646018,41.94434829382345,0 "
+                "-87.65351486483024,41.94447699917548,0 "
+                "-87.65353483605053,41.9453896864472,0 "
+                "-87.65361975532807,41.94689193720703,0 "
+                "-87.65362593118043,41.94742799535678,0"
+            ),
+        },
+    }
+)
+
+
 @pytest.fixture(params=["rb", "r"])
 def mode(request):
     return request.param
@@ -236,25 +345,13 @@ def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     tm.assert_frame_equal(df_str, df_expected)
 
 
-@td.skip_if_no("lxml")
-def test_closed_file_lxml(datapath):
-    xml = datapath("io", "data", "xml", "baby_names.xml")
-
-    with open(xml, "rb") as f:
-        f.read()
-
-    with pytest.raises(ValueError, match="I/O operation on closed file"):
-        read_xml(f, parser="lxml")
-
+def test_file_handle_close(datapath, parser):
+    xml_file = datapath("io", "data", "xml", "books.xml")
 
-def test_closed_file_etree(datapath):
-    xml = datapath("io", "data", "xml", "baby_names.xml")
+    with open(xml_file, "rb") as f:
+        read_xml(f.read(), parser=parser)
 
-    with open(xml, "rb") as f:
-        f.read()
-
-    with pytest.raises(ValueError, match="read of closed file"):
-        read_xml(f, parser="etree")
+        assert not f.closed
 
 
 @td.skip_if_no("lxml")
@@ -299,25 +396,6 @@ def test_wrong_file_path_etree():
         read_xml(filename, parser="etree")
 
 
-@td.skip_if_no("lxml")
-def test_none_file_path_lxml():
-    with tm.ensure_clean("test.xml") as path:
-        xml_var = geom_df.to_xml(path, parser="lxml")
-
-        with pytest.raises(AttributeError, match="__enter__"):
-            read_xml(xml_var, parser="lxml")
-
-
-def test_none_file_path_etree():
-    with tm.ensure_clean("test.xml") as path:
-        xml_var = geom_df.to_xml(path, parser="etree")
-
-        with pytest.raises(
-            TypeError, match="expected str, bytes or os.PathLike object, not NoneType"
-        ):
-            read_xml(xml_var, parser="etree")
-
-
 @tm.network
 @td.skip_if_no("lxml")
 def test_url():
@@ -675,113 +753,6 @@ def test_stylesheet_file(datapath):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    data = {
-        "id": {
-            0: "ID_00001",
-            1: "ID_00002",
-            2: "ID_00003",
-            3: "ID_00004",
-            4: "ID_00005",
-        },
-        "name": {
-            0: "Blue Line (Forest Park)",
-            1: "Red, Purple Line",
-            2: "Red, Purple Line",
-            3: "Red, Purple Line",
-            4: "Red, Purple Line",
-        },
-        "styleUrl": {
-            0: "#LineStyle01",
-            1: "#LineStyle01",
-            2: "#LineStyle01",
-            3: "#LineStyle01",
-            4: "#LineStyle01",
-        },
-        "extrude": {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
-        "altitudeMode": {
-            0: "clampedToGround",
-            1: "clampedToGround",
-            2: "clampedToGround",
-            3: "clampedToGround",
-            4: "clampedToGround",
-        },
-        "coordinates": {
-            0: (
-                "-87.77678526964958,41.8708863930319,0 "
-                "-87.77826234150609,41.87097820122218,0 "
-                "-87.78251583439344,41.87130129991005,0 "
-                "-87.78418294588424,41.87145055520308,0 "
-                "-87.7872369165933,41.8717239119163,0 "
-                "-87.79160214925886,41.87210797280065,0"
-            ),
-            1: (
-                "-87.65758750947528,41.96427269188822,0 "
-                "-87.65802133507393,41.96581929055245,0 "
-                "-87.65819033925305,41.96621846093642,0 "
-                "-87.6583189819129,41.96650362897086,0 "
-                "-87.65835858701473,41.96669002089185,0 "
-                "-87.65838428411853,41.96688150295095,0 "
-                "-87.65842208882658,41.96745896091846,0 "
-                "-87.65846556843937,41.9683761425439,0 "
-                "-87.65849296214573,41.96913893870342,0"
-            ),
-            2: (
-                "-87.65492939166126,41.95377494531437,0 "
-                "-87.65557043199591,41.95376544118533,0 "
-                "-87.65606302030132,41.95376391658746,0 "
-                "-87.65623502146268,41.95377379126367,0 "
-                "-87.65634748981634,41.95380103566435,0 "
-                "-87.65646537904269,41.95387703994676,0 "
-                "-87.65656532461145,41.95396622645799,0 "
-                "-87.65664760856414,41.95404201996044,0 "
-                "-87.65671750555913,41.95416647054043,0 "
-                "-87.65673983607117,41.95429949810849,0 "
-                "-87.65673866475777,41.95441024240925,0 "
-                "-87.6567690255541,41.95490657227902,0 "
-                "-87.65683672482363,41.95692259283837,0 "
-                "-87.6568900886376,41.95861070983142,0 "
-                "-87.65699865558875,41.96181418669004,0 "
-                "-87.65756347177603,41.96397045777844,0 "
-                "-87.65758750947528,41.96427269188822,0"
-            ),
-            3: (
-                "-87.65362593118043,41.94742799535678,0 "
-                "-87.65363554415794,41.94819886386848,0 "
-                "-87.6536456393239,41.95059994675451,0 "
-                "-87.65365831235026,41.95108288489359,0 "
-                "-87.6536604873874,41.9519954657554,0 "
-                "-87.65362592053201,41.95245597302328,0 "
-                "-87.65367158496069,41.95311153649393,0 "
-                "-87.65368468595476,41.9533202828916,0 "
-                "-87.65369271253692,41.95343095587119,0 "
-                "-87.65373335834569,41.95351536301472,0 "
-                "-87.65378605844126,41.95358212680591,0 "
-                "-87.65385067928185,41.95364452823767,0 "
-                "-87.6539390793817,41.95370263886964,0 "
-                "-87.6540786298351,41.95373403675265,0 "
-                "-87.65430648647626,41.9537535411832,0 "
-                "-87.65492939166126,41.95377494531437,0"
-            ),
-            4: (
-                "-87.65345391792157,41.94217681262115,0 "
-                "-87.65342448305786,41.94237224420864,0 "
-                "-87.65339745703922,41.94268217746244,0 "
-                "-87.65337753982941,41.94288140770284,0 "
-                "-87.65336256753105,41.94317369618263,0 "
-                "-87.65338799707138,41.94357253961736,0 "
-                "-87.65340240886648,41.94389158188269,0 "
-                "-87.65341837392448,41.94406444407721,0 "
-                "-87.65342275247338,41.94421065714904,0 "
-                "-87.65347469646018,41.94434829382345,0 "
-                "-87.65351486483024,41.94447699917548,0 "
-                "-87.65353483605053,41.9453896864472,0 "
-                "-87.65361975532807,41.94689193720703,0 "
-                "-87.65362593118043,41.94742799535678,0"
-            ),
-        },
-    }
-
-    df_expected = DataFrame(data)
     df_style = read_xml(
         kml,
         xpath=".//k:Placemark",
@@ -789,7 +760,7 @@ def test_stylesheet_file(datapath):
         stylesheet=xsl,
     )
 
-    tm.assert_frame_equal(df_expected, df_style)
+    tm.assert_frame_equal(df_kml, df_style)
 
 
 @td.skip_if_no("lxml")
@@ -821,7 +792,14 @@ def test_stylesheet_buffered_reader(datapath, mode):
     with open(xsl, mode) as f:
         xsl_obj = f.read()
 
-    read_xml(kml, stylesheet=xsl_obj)
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl_obj,
+    )
+
+    tm.assert_frame_equal(df_kml, df_style)
 
 
 @td.skip_if_no("lxml")
@@ -934,18 +912,17 @@ def test_wrong_stylesheet():
 
 
 @td.skip_if_no("lxml")
-def test_stylesheet_not_path_buffer():
-    from lxml.etree import XMLSyntaxError
+def test_stylesheet_file_close(datapath):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    with open(xsl, "rb") as f:
+        read_xml(kml, stylesheet=f.read())
 
-    with pytest.raises(
-        (AttributeError, XMLSyntaxError),
-        match=("__enter__|Start tag expected, '<' not found"),
-    ):
-        read_xml(kml, stylesheet={"a": 1})
+        assert not f.closed
 
 
+@td.skip_if_no("lxml")
 def test_stylesheet_with_etree(datapath):
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
     xsl = os.path.join("data", "xml", "flatten_doc.xsl")

From 5c0af6ec1206e10393b096d05b01b1629fc04684 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Thu, 25 Feb 2021 15:12:33 -0600
Subject: [PATCH 33/35] Update tests to conform to mypy

---
 pandas/tests/io/xml/test_to_xml.py | 204 +++++++++--------------------
 pandas/tests/io/xml/test_xml.py    |  46 +++++--
 2 files changed, 99 insertions(+), 151 deletions(-)

diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 3b915a9664210..2026035a23370 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -4,6 +4,7 @@
 )
 import os
 import sys
+from typing import Union
 
 import numpy as np
 import pytest
@@ -135,6 +136,17 @@
 </data>"""
 
 
+def equalize_decl(doc):
+    # etree and lxml differ on quotes and case in xml declaration
+    if doc is not None:
+        doc = doc.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+    return doc
+
+
 @pytest.fixture(params=["rb", "r"])
 def mode(request):
     return request.param
@@ -157,11 +169,7 @@ def test_file_output_str_read(datapath, parser):
         with open(path, "rb") as f:
             output = f.read().decode("utf-8").strip()
 
-        # etree and lxml differs on quotes and case in xml declaration
-        output = output.replace(
-            '<?xml version="1.0" encoding="utf-8"?',
-            "<?xml version='1.0' encoding='utf-8'?",
-        )
+        output = equalize_decl(output)
 
         assert output == from_file_expected
 
@@ -175,11 +183,7 @@ def test_file_output_bytes_read(datapath, parser):
         with open(path, "rb") as f:
             output = f.read().decode("utf-8").strip()
 
-        # etree and lxml differs on quotes and case in xml declaration
-        output = output.replace(
-            '<?xml version="1.0" encoding="utf-8"?',
-            "<?xml version='1.0' encoding='utf-8'?",
-        )
+        output = equalize_decl(output)
 
         assert output == from_file_expected
 
@@ -189,12 +193,7 @@ def test_str_output(datapath, parser):
     df_file = read_xml(filename, parser=parser)
 
     output = df_file.to_xml(parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == from_file_expected
 
@@ -244,11 +243,7 @@ def test_index_false(datapath, parser):
         with open(path, "rb") as f:
             output = f.read().decode("utf-8").strip()
 
-        # etree and lxml differs on quotes and case in xml declaration
-        output = output.replace(
-            '<?xml version="1.0" encoding="utf-8"?',
-            "<?xml version='1.0' encoding='utf-8'?",
-        )
+        output = equalize_decl(output)
 
         assert output == expected
 
@@ -290,11 +285,7 @@ def test_index_false_rename_row_root(datapath, parser):
         with open(path, "rb") as f:
             output = f.read().decode("utf-8").strip()
 
-        # etree and lxml differs on quotes and case in xml declaration
-        output = output.replace(
-            '<?xml version="1.0" encoding="utf-8"?',
-            "<?xml version='1.0' encoding='utf-8'?",
-        )
+        output = equalize_decl(output)
 
         assert output == expected
 
@@ -327,24 +318,14 @@ def test_index_false_rename_row_root(datapath, parser):
 
 def test_na_elem_output(datapath, parser):
     output = geom_df.to_xml(parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == na_expected
 
 
 def test_na_empty_str_elem_option(datapath, parser):
     output = geom_df.to_xml(na_rep="", parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == na_expected
 
@@ -374,12 +355,7 @@ def test_na_empty_elem_option(datapath, parser):
 </data>"""
 
     output = geom_df.to_xml(na_rep="0.0", parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -401,12 +377,7 @@ def test_attrs_cols_nan_output(datapath, parser):
 </data>"""
 
     output = geom_df.to_xml(attr_cols=["shape", "degrees", "sides"], parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -433,12 +404,7 @@ def test_attrs_cols_prefix(datapath, parser):
         prefix="doc",
         parser=parser,
     )
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -480,12 +446,7 @@ def test_elems_cols_nan_output(datapath, parser):
     output = geom_df.to_xml(
         index=False, elem_cols=["degrees", "sides", "shape"], parser=parser
     )
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == elems_cols_expected
 
@@ -524,12 +485,7 @@ def test_elems_and_attrs_cols(datapath, parser):
         attr_cols=["shape"],
         parser=parser,
     )
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == elems_cols_expected
 
@@ -579,12 +535,7 @@ def test_hierarchical_columns(datapath, parser):
     ).round(2)
 
     output = pvt.to_xml(parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -615,12 +566,7 @@ def test_hierarchical_attrs_columns(datapath, parser):
     ).round(2)
 
     output = pvt.to_xml(attr_cols=list(pvt.reset_index().columns.values), parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -662,12 +608,7 @@ def test_multi_index(datapath, parser):
     )
 
     output = agg.to_xml(parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -694,12 +635,7 @@ def test_multi_index_attrs_cols(datapath, parser):
         .round(2)
     )
     output = agg.to_xml(attr_cols=list(agg.reset_index().columns.values), parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -732,12 +668,7 @@ def test_default_namespace(parser):
 </data>"""
 
     output = geom_df.to_xml(namespaces={"": "http://example.com"}, parser=parser)
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -772,12 +703,7 @@ def test_namespace_prefix(parser):
     output = geom_df.to_xml(
         namespaces={"doc": "http://example.com"}, prefix="doc", parser=parser
     )
-
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert output == expected
 
@@ -819,18 +745,14 @@ def test_namespace_prefix_and_default(parser):
         prefix="doc",
         parser=parser,
     )
+    output = equalize_decl(output)
 
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
-
-    # etree and lxml differs on order of namespace prefixes
-    output = output.replace(
-        'xmlns:doc="http://other.org" xmlns="http://example.com"',
-        'xmlns="http://example.com" xmlns:doc="http://other.org"',
-    )
+    if output is not None:
+        # etree and lxml differs on order of namespace prefixes
+        output = output.replace(
+            'xmlns:doc="http://other.org" xmlns="http://example.com"',
+            'xmlns="http://example.com" xmlns:doc="http://other.org"',
+        )
 
     assert output == expected
 
@@ -879,11 +801,12 @@ def test_encoding_option_str(datapath, parser):
 
     output = df_file.to_xml(encoding="ISO-8859-1", parser=parser)
 
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="ISO-8859-1"?',
-        "<?xml version='1.0' encoding='ISO-8859-1'?",
-    )
+    if output is not None:
+        # etree and lxml differ on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="ISO-8859-1"?',
+            "<?xml version='1.0' encoding='ISO-8859-1'?",
+        )
 
     assert output == encoding_expected
 
@@ -956,13 +879,12 @@ def test_no_pretty_print_with_decl():
         "</row></data>"
     )
 
-    output = geom_df.to_xml(pretty_print=False)
+    output = geom_df.to_xml(pretty_print=False, parser="lxml")
+    output = equalize_decl(output)
 
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
-    output = output.replace(" />", "/>")
+    # etree adds space for closed tags
+    if output is not None:
+        output = output.replace(" />", "/>")
 
     assert output == expected
 
@@ -1037,14 +959,17 @@ def test_stylesheet_file_like(datapath, mode):
 
 @td.skip_if_no("lxml")
 def test_stylesheet_io(datapath, mode):
-    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+    xsl_path = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode) as f:
-        xsl_obj = f.read()
+    xsl_obj: Union[BytesIO, StringIO]
 
-    xsl_io = BytesIO(xsl_obj) if isinstance(xsl_obj, bytes) else StringIO(xsl_obj)
+    with open(xsl_path, mode) as f:
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
 
-    output = geom_df.to_xml(stylesheet=xsl_io)
+    output = geom_df.to_xml(stylesheet=xsl_obj)
 
     assert output == xsl_expected
 
@@ -1202,7 +1127,10 @@ def test_style_to_csv():
     </xsl:template>
 </xsl:stylesheet>"""
 
-    out_csv = geom_df.to_csv(line_terminator="\n").strip()
+    out_csv = geom_df.to_csv(line_terminator="\n")
+
+    if out_csv is not None:
+        out_csv = out_csv.strip()
     out_xml = geom_df.to_xml(stylesheet=xsl)
 
     assert out_csv == out_xml
@@ -1326,11 +1254,7 @@ def test_compression_output(parser, comp):
         ) as handle_obj:
             output = handle_obj.handle.read()
 
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert geom_xml == output.strip()
 
@@ -1348,11 +1272,7 @@ def test_filename_and_suffix_comp(parser, comp, compfile):
         ) as handle_obj:
             output = handle_obj.handle.read()
 
-    # etree and lxml differs on quotes and case in xml declaration
-    output = output.replace(
-        '<?xml version="1.0" encoding="utf-8"?',
-        "<?xml version='1.0' encoding='utf-8'?",
-    )
+    output = equalize_decl(output)
 
     assert geom_xml == output.strip()
 
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 4cf5618e852e5..6902b4e93443f 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -3,6 +3,7 @@
     StringIO,
 )
 import os
+from typing import Union
 from urllib.error import HTTPError
 
 import numpy as np
@@ -349,7 +350,7 @@ def test_file_handle_close(datapath, parser):
     xml_file = datapath("io", "data", "xml", "books.xml")
 
     with open(xml_file, "rb") as f:
-        read_xml(f.read(), parser=parser)
+        read_xml(BytesIO(f.read()), parser=parser)
 
         assert not f.closed
 
@@ -678,7 +679,9 @@ def test_names_option_wrong_type(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
 
     with pytest.raises(TypeError, match=("is not a valid type for names")):
-        read_xml(filename, names="Col1, Col2, Col3", parser=parser)
+        read_xml(
+            filename, names="Col1, Col2, Col3", parser=parser  # type: ignore[arg-type]
+        )
 
 
 # ENCODING
@@ -769,7 +772,14 @@ def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
     with open(xsl, mode) as f:
-        read_xml(kml, stylesheet=f)
+        df_style = read_xml(
+            kml,
+            xpath=".//k:Placemark",
+            namespaces={"k": "http://www.opengis.net/kml/2.2"},
+            stylesheet=f,
+        )
+
+    tm.assert_frame_equal(df_kml, df_style)
 
 
 @td.skip_if_no("lxml")
@@ -777,11 +787,22 @@ def test_stylesheet_io(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
+    xsl_obj: Union[BytesIO, StringIO]
+
     with open(xsl, mode) as f:
-        xsl_obj = f.read()
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
+
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl_obj,
+    )
 
-    xsl_io = BytesIO(xsl_obj) if isinstance(xsl_obj, bytes) else StringIO(xsl_obj)
-    read_xml(kml, stylesheet=xsl_io)
+    tm.assert_frame_equal(df_kml, df_style)
 
 
 @td.skip_if_no("lxml")
@@ -912,12 +933,19 @@ def test_wrong_stylesheet():
 
 
 @td.skip_if_no("lxml")
-def test_stylesheet_file_close(datapath):
+def test_stylesheet_file_close(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, "rb") as f:
-        read_xml(kml, stylesheet=f.read())
+    xsl_obj: Union[BytesIO, StringIO]
+
+    with open(xsl, mode) as f:
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
+
+        read_xml(kml, stylesheet=xsl_obj)
 
         assert not f.closed
 

From 603644efe2fe4f27d935b2c54f2114685ee34fa1 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 27 Feb 2021 07:46:50 -0600
Subject: [PATCH 34/35] Import methods to avoid duplication and add typing to
 parse_doc

---
 pandas/io/formats/xml.py | 85 ++++++----------------------------------
 pandas/io/xml.py         | 19 +++++----
 2 files changed, 25 insertions(+), 79 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index fd03dcd342089..11a9a2a54e717 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -13,7 +13,6 @@
 )
 
 from pandas._typing import (
-    Buffer,
     CompressionOptions,
     FilePathOrBuffer,
     StorageOptions,
@@ -22,14 +21,12 @@
 
 from pandas.core.dtypes.common import is_list_like
 
-from pandas.io.common import (
-    file_exists,
-    get_handle,
-    is_fsspec_url,
-    is_url,
-    stringify_path,
-)
+from pandas.io.common import get_handle
 from pandas.io.formats.format import DataFrameFormatter
+from pandas.io.xml import (
+    get_data_from_filepath,
+    preprocess_data,
+)
 
 
 class BaseXMLFormatter:
@@ -436,6 +433,11 @@ class LxmlXMLFormatter(BaseXMLFormatter):
     modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
     """
 
+    from lxml.etree import (
+        Element,
+        ElementTree,
+    )
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -561,7 +563,7 @@ def build_elems(self) -> None:
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
 
-    def parse_doc(self):
+    def parse_doc(self) -> Union[Element, ElementTree]:
         """
         Build tree from stylesheet.
 
@@ -577,14 +579,14 @@ def parse_doc(self):
 
         style_doc = self.stylesheet
 
-        handle_data = _get_data_from_filepath(
+        handle_data = get_data_from_filepath(
             filepath_or_buffer=style_doc,
             encoding=self.encoding,
             compression=self.compression,
             storage_options=self.storage_options,
         )
 
-        with _preprocess_data(handle_data) as xml_data:
+        with preprocess_data(handle_data) as xml_data:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
@@ -611,64 +613,3 @@ def transform_doc(self) -> bytes:
         new_doc = transformer(self.root)
 
         return bytes(new_doc)
-
-
-def _get_data_from_filepath(
-    filepath_or_buffer,
-    encoding,
-    compression,
-    storage_options,
-) -> Union[str, bytes, Buffer]:
-    """
-    Extract raw XML data.
-
-    The method accepts three input types:
-        1. filepath (string-like)
-        2. file-like object (e.g. open file object, StringIO)
-        3. XML string or bytes
-
-    This method turns (1) into (2) to simplify the rest of the processing.
-    It returns input types (2) and (3) unchanged.
-    """
-    filepath_or_buffer = stringify_path(filepath_or_buffer)
-
-    if (
-        isinstance(filepath_or_buffer, str)
-        and not filepath_or_buffer.startswith(("<?xml", "<"))
-    ) and (
-        not isinstance(filepath_or_buffer, str)
-        or is_url(filepath_or_buffer)
-        or is_fsspec_url(filepath_or_buffer)
-        or file_exists(filepath_or_buffer)
-    ):
-        with get_handle(
-            filepath_or_buffer,
-            "r",
-            encoding=encoding,
-            compression=compression,
-            storage_options=storage_options,
-        ) as handle_obj:
-            filepath_or_buffer = (
-                handle_obj.handle.read()
-                if hasattr(handle_obj.handle, "read")
-                else handle_obj.handle
-            )
-
-    return filepath_or_buffer
-
-
-def _preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
-    """
-    Convert extracted raw data.
-
-    This method will return underlying data of extracted XML content.
-    The data either has a `read` attribute (e.g. a file object or a
-    StringIO/BytesIO) or is a string or bytes that is an XML document.
-    """
-    if isinstance(data, str):
-        data = io.StringIO(data)
-
-    elif isinstance(data, bytes):
-        data = io.BytesIO(data)
-
-    return data
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 122057c2c625d..a797b30e17ba7 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -362,14 +362,14 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
             parse,
         )
 
-        handle_data = _get_data_from_filepath(
+        handle_data = get_data_from_filepath(
             filepath_or_buffer=self.path_or_buffer,
             encoding=self.encoding,
             compression=self.compression,
             storage_options=self.storage_options,
         )
 
-        with _preprocess_data(handle_data) as xml_data:
+        with preprocess_data(handle_data) as xml_data:
             curr_parser = XMLParser(encoding=self.encoding)
             r = parse(xml_data, parser=curr_parser)
 
@@ -383,6 +383,11 @@ class _LxmlFrameParser(_XMLFrameParser):
     XPath 1.0 and XSLT 1.0.
     """
 
+    from lxml.etree import (
+        Element,
+        ElementTree,
+    )
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -554,7 +559,7 @@ def _validate_names(self) -> None:
                     f"{type(self.names).__name__} is not a valid type for names"
                 )
 
-    def _parse_doc(self):
+    def _parse_doc(self) -> Union[Element, ElementTree]:
         from lxml.etree import (
             XMLParser,
             fromstring,
@@ -563,14 +568,14 @@ def _parse_doc(self):
 
         raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
 
-        handle_data = _get_data_from_filepath(
+        handle_data = get_data_from_filepath(
             filepath_or_buffer=raw_doc,
             encoding=self.encoding,
             compression=self.compression,
             storage_options=self.storage_options,
         )
 
-        with _preprocess_data(handle_data) as xml_data:
+        with preprocess_data(handle_data) as xml_data:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
@@ -583,7 +588,7 @@ def _parse_doc(self):
         return r
 
 
-def _get_data_from_filepath(
+def get_data_from_filepath(
     filepath_or_buffer,
     encoding,
     compression,
@@ -627,7 +632,7 @@ def _get_data_from_filepath(
     return filepath_or_buffer
 
 
-def _preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
+def preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
     """
     Convert extracted raw data.
 

From 6194f83c151f3c3b27ac3f50f594c1dd60d23a0a Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 27 Feb 2021 10:07:25 -0600
Subject: [PATCH 35/35] Refactor code and revert changes to avoid optional
 module type hints

---
 pandas/io/formats/xml.py | 30 +++++++-----------------------
 pandas/io/xml.py         | 20 ++++++--------------
 2 files changed, 13 insertions(+), 37 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index 11a9a2a54e717..044b03ba83714 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -433,11 +433,6 @@ class LxmlXMLFormatter(BaseXMLFormatter):
     modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
     """
 
-    from lxml.etree import (
-        Element,
-        ElementTree,
-    )
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -563,15 +558,17 @@ def build_elems(self) -> None:
             except KeyError:
                 raise KeyError(f"no valid column, {col}")
 
-    def parse_doc(self) -> Union[Element, ElementTree]:
+    def transform_doc(self) -> bytes:
         """
-        Build tree from stylesheet.
+        Parse stylesheet from file or buffer and run it.
 
         This method will parse stylesheet object into tree for parsing
-        conditionally by its specific object type.
+        conditionally by its specific object type, then transforms
+        original tree with XSLT script.
         """
 
         from lxml.etree import (
+            XSLT,
             XMLParser,
             fromstring,
             parse,
@@ -590,24 +587,11 @@ def parse_doc(self) -> Union[Element, ElementTree]:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
-                r = fromstring(
+                xsl_doc = fromstring(
                     xml_data.getvalue().encode(self.encoding), parser=curr_parser
                 )
             else:
-                r = parse(xml_data, parser=curr_parser)
-
-        return r
-
-    def transform_doc(self) -> bytes:
-        """
-        Transform original tree using stylesheet.
-
-        This method will transform built tree with XSLT script.
-        """
-
-        from lxml.etree import XSLT
-
-        xsl_doc = self.parse_doc()
+                xsl_doc = parse(xml_data, parser=curr_parser)
 
         transformer = XSLT(xsl_doc)
         new_doc = transformer(self.root)
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index a797b30e17ba7..83eba5f17c7b3 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -383,11 +383,6 @@ class _LxmlFrameParser(_XMLFrameParser):
     XPath 1.0 and XSLT 1.0.
     """
 
-    from lxml.etree import (
-        Element,
-        ElementTree,
-    )
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -400,11 +395,10 @@ def parse_data(self) -> List[Dict[str, Optional[str]]]:
         and parse original or transformed XML and return specific nodes.
         """
 
-        self.xml_doc = self._parse_doc()
+        self.xml_doc = self._parse_doc(self.path_or_buffer)
 
         if self.stylesheet is not None:
-            self.is_style = True
-            self.xsl_doc = self._parse_doc()
+            self.xsl_doc = self._parse_doc(self.stylesheet)
             self.xml_doc = self._transform_doc()
 
         self._validate_path()
@@ -559,15 +553,13 @@ def _validate_names(self) -> None:
                     f"{type(self.names).__name__} is not a valid type for names"
                 )
 
-    def _parse_doc(self) -> Union[Element, ElementTree]:
+    def _parse_doc(self, raw_doc):
         from lxml.etree import (
             XMLParser,
             fromstring,
             parse,
         )
 
-        raw_doc = self.stylesheet if self.is_style else self.path_or_buffer
-
         handle_data = get_data_from_filepath(
             filepath_or_buffer=raw_doc,
             encoding=self.encoding,
@@ -579,13 +571,13 @@ def _parse_doc(self) -> Union[Element, ElementTree]:
             curr_parser = XMLParser(encoding=self.encoding)
 
             if isinstance(xml_data, io.StringIO):
-                r = fromstring(
+                doc = fromstring(
                     xml_data.getvalue().encode(self.encoding), parser=curr_parser
                 )
             else:
-                r = parse(xml_data, parser=curr_parser)
+                doc = parse(xml_data, parser=curr_parser)
 
-        return r
+        return doc
 
 
 def get_data_from_filepath(