|
35 | 35 | ;;
|
36 | 36 | ;; Our strategy is to keep track of just the problematic things.
|
37 | 37 | ;; Specifically, we keep track of all comments, CDATA sections and
|
38 |
| -;; processing instructions in the instance. We do this by marking all |
39 |
| -;; except the first character of these with a non-nil nxml-inside text |
40 |
| -;; property. The value of the nxml-inside property is comment, |
41 |
| -;; cdata-section or processing-instruction. The first character does |
42 |
| -;; not have the nxml-inside property so we can find the beginning of |
43 |
| -;; the construct by looking for a change in a text property value |
44 |
| -;; (Emacs provides primitives for this). We use text properties |
45 |
| -;; rather than overlays, since the implementation of overlays doesn't |
46 |
| -;; look like it scales to large numbers of overlays in a buffer. |
47 |
| -;; |
48 |
| -;; We don't in fact track all these constructs, but only track them in |
49 |
| -;; some initial part of the instance. |
| 38 | +;; processing instructions in the instance. We do this by marking |
| 39 | +;; the first character of these with the generic string syntax by setting |
| 40 | +;; a 'syntax-table' text property in `sgml-syntax-propertize'. |
50 | 41 | ;;
|
51 | 42 | ;; Thus to parse some random point in the file we first ensure that we
|
52 |
| -;; have scanned up to that point. Then we search backwards for a |
53 |
| -;; <. Then we check whether the < has an nxml-inside property. If it |
54 |
| -;; does we go backwards to first character that does not have an |
55 |
| -;; nxml-inside property (this character must be a <). Then we start |
56 |
| -;; parsing forward from the < we have found. |
| 43 | +;; have scanned up to that point. Then we search backwards for a <. |
| 44 | +;; Then we check whether the < has the generic string syntax. If it |
| 45 | +;; does we go backwards to first character of the generic string (this |
| 46 | +;; character must be a <). Then we start parsing forward from the < |
| 47 | +;; we have found. |
57 | 48 | ;;
|
58 | 49 | ;; The prolog has to be parsed specially, so we also keep track of the
|
59 | 50 | ;; end of the prolog in `nxml-prolog-end'. The prolog is reparsed on
|
60 | 51 | ;; every change to the prolog. This won't work well if people try to
|
61 | 52 | ;; edit huge internal subsets. Hopefully that will be rare.
|
62 | 53 | ;;
|
63 |
| -;; We keep track of the changes by adding to the buffer's |
64 |
| -;; after-change-functions hook. Scanning is also done as a |
65 |
| -;; prerequisite to fontification by adding to fontification-functions |
66 |
| -;; (in the same way as jit-lock). This means that scanning for these |
| 54 | +;; We rely on the `syntax-propertize-function' machinery to keep track |
| 55 | +;; of the changes in the buffer. Fontification also relies on correct |
| 56 | +;; `syntax-table' properties. This means that scanning for these |
67 | 57 | ;; constructs had better be quick. Fortunately it is. Firstly, the
|
68 | 58 | ;; typical proportion of comments, CDATA sections and processing
|
69 | 59 | ;; instructions is small relative to other things. Secondly, to scan
|
|
79 | 69 | "Integer giving position following end of the prolog.")
|
80 | 70 |
|
81 | 71 | (defsubst nxml-get-inside (pos)
|
82 |
| - (save-excursion (nth 8 (syntax-ppss pos)))) |
| 72 | + "Return non-nil if inside comment, CDATA, or PI." |
| 73 | + (let ((ppss (save-excursion (syntax-ppss pos)))) |
| 74 | + (or |
| 75 | + ;; Inside comment. |
| 76 | + (nth 4 ppss) |
| 77 | + ;; Inside "generic" string which is used for CDATA, and PI. |
| 78 | + ;; "Normal" double and single quoted strings are used for |
| 79 | + ;; attribute values. |
| 80 | + (eq t (nth 3 ppss))))) |
83 | 81 |
|
84 | 82 | (defun nxml-inside-end (pos)
|
85 | 83 | "Return the end of the inside region containing POS.
|
|
0 commit comments