Fix nxml-get-inside (Bug#32003)

npostavs · npostavs · commit ca14dd1d4628 · 2019-05-09T06:42:40.000-04:00
The change from 2016-01-16 "lisp/nxml: Use syntax-tables for comments"
made nxml-get-inside return non-nil for any string or comment,
including attribute strings.  This caused incorrect and therefore
indentation.
* lisp/nxml/nxml-rap.el: Update commentary to reflect changes to
nxml-mode parsing.
(nxml-get-inside): Only return non-nil when inside comments and
generic strings, not normal quote-delimited strings.
* test/lisp/nxml/nxml-mode-tests.el: New tests.
diff --git a/lisp/nxml/nxml-rap.el b/lisp/nxml/nxml-rap.el
@@ -35,35 +35,25 @@
 ;;
 ;; Our strategy is to keep track of just the problematic things.
 ;; Specifically, we keep track of all comments, CDATA sections and
-;; processing instructions in the instance.  We do this by marking all
-;; except the first character of these with a non-nil nxml-inside text
-;; property. The value of the nxml-inside property is comment,
-;; cdata-section or processing-instruction.  The first character does
-;; not have the nxml-inside property so we can find the beginning of
-;; the construct by looking for a change in a text property value
-;; (Emacs provides primitives for this).  We use text properties
-;; rather than overlays, since the implementation of overlays doesn't
-;; look like it scales to large numbers of overlays in a buffer.
-;;
-;; We don't in fact track all these constructs, but only track them in
-;; some initial part of the instance.
+;; processing instructions in the instance.  We do this by marking
+;; the first character of these with the generic string syntax by setting
+;; a 'syntax-table' text property in `sgml-syntax-propertize'.
 ;;
 ;; Thus to parse some random point in the file we first ensure that we
-;; have scanned up to that point.  Then we search backwards for a
-;; <. Then we check whether the < has an nxml-inside property. If it
-;; does we go backwards to first character that does not have an
-;; nxml-inside property (this character must be a <).  Then we start
-;; parsing forward from the < we have found.
+;; have scanned up to that point.  Then we search backwards for a <.
+;; Then we check whether the < has the generic string syntax.  If it
+;; does we go backwards to first character of the generic string (this
+;; character must be a <).  Then we start parsing forward from the <
+;; we have found.
 ;;
 ;; The prolog has to be parsed specially, so we also keep track of the
 ;; end of the prolog in `nxml-prolog-end'. The prolog is reparsed on
 ;; every change to the prolog.  This won't work well if people try to
 ;; edit huge internal subsets. Hopefully that will be rare.
 ;;
-;; We keep track of the changes by adding to the buffer's
-;; after-change-functions hook.  Scanning is also done as a
-;; prerequisite to fontification by adding to fontification-functions
-;; (in the same way as jit-lock).  This means that scanning for these
+;; We rely on the `syntax-propertize-function' machinery to keep track
+;; of the changes in the buffer.  Fontification also relies on correct
+;; `syntax-table' properties.  This means that scanning for these
 ;; constructs had better be quick.  Fortunately it is. Firstly, the
 ;; typical proportion of comments, CDATA sections and processing
 ;; instructions is small relative to other things.  Secondly, to scan
@@ -79,7 +69,15 @@
   "Integer giving position following end of the prolog.")
 
 (defsubst nxml-get-inside (pos)
-  (save-excursion (nth 8 (syntax-ppss pos))))
+  "Return non-nil if inside comment, CDATA, or PI."
+  (let ((ppss (save-excursion (syntax-ppss pos))))
+    (or
+     ;; Inside comment.
+     (nth 4 ppss)
+     ;; Inside "generic" string which is used for CDATA, and PI.
+     ;; "Normal" double and single quoted strings are used for
+     ;; attribute values.
+     (eq t (nth 3 ppss)))))
 
 (defun nxml-inside-end (pos)
   "Return the end of the inside region containing POS.
diff --git a/test/lisp/nxml/nxml-mode-tests.el b/test/lisp/nxml/nxml-mode-tests.el
@@ -0,0 +1,62 @@
+;;; nxml-mode-tests.el --- Test NXML Mode -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'nxml-mode)
+
+(defun nxml-mode-tests-correctly-indented-string (str)
+  (with-temp-buffer
+    (nxml-mode)
+    (insert str)
+    (indent-region (point-min) (point-max))
+    (equal (buffer-string) str)))
+
+(ert-deftest nxml-indent-line-after-attribute ()
+  (should (nxml-mode-tests-correctly-indented-string "
+<settings
+    xmlns=\"http://maven.apache.org/SETTINGS/1.0.0\"
+    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
+    xsi:schemaLocation=\"http://maven.apache.org/SETTINGS/1.0.0
+                        https://maven.apache.org/xsd/settings-1.0.0.xsd\">
+  <mirrors>
+    ...
+  </mirrors>
+</settings>
+"))
+  (should (nxml-mode-tests-correctly-indented-string "\
+<x>
+  <abc xx=\"x/x/x/x/x/x/x/
+           y/y/y/y/y/y/
+           \">
+    <zzz/>
+  </abc>
+  <nl>&#10;</nl>
+</x>
+")))
+
+(ert-deftest nxml-balanced-close-start-tag-inline ()
+  (with-temp-buffer
+    (nxml-mode)
+    (insert "<a><b c=\"\"</a>")
+    (search-backward "</a>")
+    (nxml-balanced-close-start-tag-inline)
+    (should (equal (buffer-string) "<a><b c=\"\"></b></a>"))))
+
+(provide 'nxml-mode-tests)
+;;; nxml-mode-tests.el ends here