From e15ce87ddef0f412865b3ae96fba3387ad59995b Mon Sep 17 00:00:00 2001 From: Rogach Date: Thu, 10 Nov 2016 23:28:25 +0300 Subject: [PATCH 1/4] Change default encoding in XML.save to UTF-8 Was ISO-8859-1, which resulted in encoding errors at runtime if document contained non-latin1 characters. Also XML spec states that documents without xml declaration are expected to contain UTF-8 or UTF-16 - so writing in ISO-8859-1 without xml declaration (which was the default) can easily break compliant parsers. --- shared/src/main/scala/scala/xml/XML.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/src/main/scala/scala/xml/XML.scala b/shared/src/main/scala/scala/xml/XML.scala index 649c8b029..0914beff1 100755 --- a/shared/src/main/scala/scala/xml/XML.scala +++ b/shared/src/main/scala/scala/xml/XML.scala @@ -63,7 +63,7 @@ object XML extends XMLLoader[Elem] { val preserve = "preserve" val space = "space" val lang = "lang" - val encoding = "ISO-8859-1" + val encoding = "UTF-8" /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ def withSAXParser(p: SAXParser): XMLLoader[Elem] = @@ -82,7 +82,7 @@ object XML extends XMLLoader[Elem] { final def save( filename: String, node: Node, - enc: String = encoding, + enc: String = "UTF-8", xmlDecl: Boolean = false, doctype: dtd.DocType = null): Unit = { From 9b85c9ddab2423b4e3281ccc6fbf4dc201f19735 Mon Sep 17 00:00:00 2001 From: Rogach Date: Fri, 11 Nov 2016 00:02:01 +0300 Subject: [PATCH 2/4] Add note about default encoding change to XML.save method documentation. --- shared/src/main/scala/scala/xml/XML.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shared/src/main/scala/scala/xml/XML.scala b/shared/src/main/scala/scala/xml/XML.scala index 0914beff1..eee10c604 100755 --- a/shared/src/main/scala/scala/xml/XML.scala +++ b/shared/src/main/scala/scala/xml/XML.scala @@ -73,6 +73,10 @@ object XML extends XMLLoader[Elem] { * Saves a node to a file with given filename using given encoding * optionally with xmldecl and doctype declaration. * + * Note: default encoding was ISO-8859-1 (latin1) in pre-1.0.7 scala-xml versions. + * If your code depends on charaters in non-ASCII latin1 range, specify + * ISO-8859-1 encoding explicitly. + * * @param filename the filename * @param node the xml node we want to write * @param enc encoding to use From bcec7428624c667be75b8ceb4645300b53bc62bf Mon Sep 17 00:00:00 2001 From: Rogach Date: Fri, 11 Nov 2016 00:16:33 +0300 Subject: [PATCH 3/4] fix typo in method documentation --- shared/src/main/scala/scala/xml/XML.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/src/main/scala/scala/xml/XML.scala b/shared/src/main/scala/scala/xml/XML.scala index eee10c604..be1de76a5 100755 --- a/shared/src/main/scala/scala/xml/XML.scala +++ b/shared/src/main/scala/scala/xml/XML.scala @@ -74,7 +74,7 @@ object XML extends XMLLoader[Elem] { * optionally with xmldecl and doctype declaration. * * Note: default encoding was ISO-8859-1 (latin1) in pre-1.0.7 scala-xml versions. - * If your code depends on charaters in non-ASCII latin1 range, specify + * If your code depends on characters in non-ASCII latin1 range, specify * ISO-8859-1 encoding explicitly. * * @param filename the filename From c416c4eeea8727114f97a467646776fd216a7739 Mon Sep 17 00:00:00 2001 From: "Aaron S. Hawley" Date: Fri, 11 Nov 2016 01:35:54 -0500 Subject: [PATCH 4/4] Add test for ISO-8859-1 defect found with XML.save in #121 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to closely mimic bug in XML.save and XML.loadFile, but write tests that don't use the file system. Will fail in 1.0.6 and earlier: expected:<...klmnopqrstuvwxyz{|}~[ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ]> but was:<...klmnopqrstuvwxyz{|}~[????????????????????????????????????????????????????????????????????????????????????????????????]> Will be fixed in #122. --- jvm/src/test/scala/scala/xml/XMLTest.scala | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index 9ae6bc389..43dd182e2 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -300,6 +300,29 @@ class XMLTestJVM { """, wsdlTemplate4("service4", () => "target4") toString) } + // Issue found with ISO-8859-1 in #121 that was fixed with UTF-8 default + @UnitTest + def writeReadNoDeclarationDefaultEncoding: Unit = { + val chars = ((32 to 126) ++ (160 to 255)).map(_.toChar) + val xml = { chars.mkString } + + // com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: + // Invalid byte 1 of 1-byte UTF-8 sequence. + // scala.xml.XML.save("foo.xml", xml) + // scala.xml.XML.loadFile("foo.xml").toString) + + val outputStream = new java.io.ByteArrayOutputStream + val streamWriter = new java.io.OutputStreamWriter(outputStream, XML.encoding) + + XML.write(streamWriter, xml, XML.encoding, false, null) + streamWriter.flush + + val inputStream = new java.io.ByteArrayInputStream(outputStream.toByteArray) + val streamReader = new java.io.InputStreamReader(inputStream) + + assertEquals(xml.toString, XML.load(streamReader).toString) + } + @UnitTest def t0663 = { val src = scala.io.Source.fromString("")