Commit | Line | Data |
---|---|---|
e95a67dc | 1 | ;;; syntax.el --- helper functions to find syntactic context -*- lexical-binding: t -*- |
88a05faf | 2 | |
ba318903 | 3 | ;; Copyright (C) 2000-2014 Free Software Foundation, Inc. |
88a05faf | 4 | |
34dc21db | 5 | ;; Maintainer: emacs-devel@gnu.org |
30764597 PJ |
6 | ;; Keywords: internal |
7 | ||
88a05faf SM |
8 | ;; This file is part of GNU Emacs. |
9 | ||
d6cba7ae | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
88a05faf | 11 | ;; it under the terms of the GNU General Public License as published by |
d6cba7ae GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
88a05faf SM |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
d6cba7ae | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
88a05faf SM |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; The main exported function is `syntax-ppss'. You might also need | |
9c5b2653 | 26 | ;; to call `syntax-ppss-flush-cache' or to add it to |
ad8a840d | 27 | ;; before-change-functions'(although this is automatically done by |
88a05faf | 28 | ;; syntax-ppss when needed, but that might fail if syntax-ppss is |
ad8a840d | 29 | ;; called in a context where before-change-functions is temporarily |
88a05faf SM |
30 | ;; let-bound to nil). |
31 | ||
32 | ;;; Todo: | |
33 | ||
34 | ;; - do something about the case where the syntax-table is changed. | |
35 | ;; This typically happens with tex-mode and its `$' operator. | |
88a05faf SM |
36 | ;; - new functions `syntax-state', ... to replace uses of parse-partial-state |
37 | ;; with something higher-level (similar to syntax-ppss-context). | |
38 | ;; - interaction with mmm-mode. | |
88a05faf SM |
39 | |
40 | ;;; Code: | |
41 | ||
42 | ;; Note: PPSS stands for `parse-partial-sexp state' | |
43 | ||
f80efb86 | 44 | (eval-when-compile (require 'cl-lib)) |
88a05faf | 45 | |
746dca00 JB |
46 | (defvar font-lock-beginning-of-syntax-function) |
47 | ||
cf38dd42 SM |
48 | ;;; Applying syntax-table properties where needed. |
49 | ||
50 | (defvar syntax-propertize-function nil | |
51 | ;; Rather than a -functions hook, this is a -function because it's easier | |
52 | ;; to do a single scan than several scans: with multiple scans, one cannot | |
53 | ;; assume that the text before point has been propertized, so syntax-ppss | |
54 | ;; gives unreliable results (and stores them in its cache to boot, so we'd | |
55 | ;; have to flush that cache between each function, and we couldn't use | |
56 | ;; syntax-ppss-flush-cache since that would not only flush the cache but also | |
57 | ;; reset syntax-propertize--done which should not be done in this case). | |
ec6f8ce6 | 58 | "Mode-specific function to apply `syntax-table' text properties. |
539240a6 SM |
59 | It is the work horse of `syntax-propertize', which is called by things like |
60 | Font-Lock and indentation. | |
61 | ||
62 | It is given two arguments, START and END: the start and end of the text to | |
63 | which `syntax-table' might need to be applied. Major modes can use this to | |
64 | override the buffer's syntax table for special syntactic constructs that | |
65 | cannot be handled just by the buffer's syntax-table. | |
ec6f8ce6 CY |
66 | |
67 | The specified function may call `syntax-ppss' on any position | |
68 | before END, but it should not call `syntax-ppss-flush-cache', | |
69 | which means that it should not call `syntax-ppss' on some | |
70 | position and later modify the buffer on some earlier position.") | |
cf38dd42 SM |
71 | |
72 | (defvar syntax-propertize-chunk-size 500) | |
73 | ||
74 | (defvar syntax-propertize-extend-region-functions | |
75 | '(syntax-propertize-wholelines) | |
76 | "Special hook run just before proceeding to propertize a region. | |
77 | This is used to allow major modes to help `syntax-propertize' find safe buffer | |
78 | positions as beginning and end of the propertized region. Its most common use | |
79 | is to solve the problem of /identification/ of multiline elements by providing | |
80 | a function that tries to find such elements and move the boundaries such that | |
81 | they do not fall in the middle of one. | |
82 | Each function is called with two arguments (START and END) and it should return | |
83 | either a cons (NEW-START . NEW-END) or nil if no adjustment should be made. | |
84 | These functions are run in turn repeatedly until they all return nil. | |
85 | Put first the functions more likely to cause a change and cheaper to compute.") | |
86 | ;; Mark it as a special hook which doesn't use any global setting | |
87 | ;; (i.e. doesn't obey the element t in the buffer-local value). | |
88 | (make-variable-buffer-local 'syntax-propertize-extend-region-functions) | |
89 | ||
90 | (defun syntax-propertize-wholelines (start end) | |
91 | (goto-char start) | |
92 | (cons (line-beginning-position) | |
93 | (progn (goto-char end) | |
94 | (if (bolp) (point) (line-beginning-position 2))))) | |
95 | ||
96 | (defun syntax-propertize-multiline (beg end) | |
97 | "Let `syntax-propertize' pay attention to the syntax-multiline property." | |
98 | (when (and (> beg (point-min)) | |
99 | (get-text-property (1- beg) 'syntax-multiline)) | |
100 | (setq beg (or (previous-single-property-change beg 'syntax-multiline) | |
101 | (point-min)))) | |
102 | ;; | |
70203c2e | 103 | (when (get-text-property end 'syntax-multiline) |
cf38dd42 SM |
104 | (setq end (or (text-property-any end (point-max) |
105 | 'syntax-multiline nil) | |
106 | (point-max)))) | |
107 | (cons beg end)) | |
108 | ||
109 | (defvar syntax-propertize--done -1 | |
1b5eaeb3 | 110 | "Position up to which syntax-table properties have been set.") |
cf38dd42 SM |
111 | (make-variable-buffer-local 'syntax-propertize--done) |
112 | ||
113 | (defun syntax-propertize--shift-groups (re n) | |
114 | (replace-regexp-in-string | |
115 | "\\\\(\\?\\([0-9]+\\):" | |
116 | (lambda (s) | |
117 | (replace-match | |
118 | (number-to-string (+ n (string-to-number (match-string 1 s)))) | |
119 | t t s 1)) | |
120 | re t t)) | |
121 | ||
b879a6e2 SM |
122 | (defmacro syntax-propertize-precompile-rules (&rest rules) |
123 | "Return a precompiled form of RULES to pass to `syntax-propertize-rules'. | |
124 | The arg RULES can be of the same form as in `syntax-propertize-rules'. | |
125 | The return value is an object that can be passed as a rule to | |
126 | `syntax-propertize-rules'. | |
127 | I.e. this is useful only when you want to share rules among several | |
ec6f8ce6 | 128 | `syntax-propertize-function's." |
b879a6e2 SM |
129 | (declare (debug syntax-propertize-rules)) |
130 | ;; Precompile? Yeah, right! | |
131 | ;; Seriously, tho, this is a macro for 2 reasons: | |
132 | ;; - we could indeed do some pre-compilation at some point in the future, | |
133 | ;; e.g. fi/when we switch to a DFA-based implementation of | |
134 | ;; syntax-propertize-rules. | |
135 | ;; - this lets Edebug properly annotate the expressions inside RULES. | |
136 | `',rules) | |
137 | ||
cf38dd42 SM |
138 | (defmacro syntax-propertize-rules (&rest rules) |
139 | "Make a function that applies RULES for use in `syntax-propertize-function'. | |
140 | The function will scan the buffer, applying the rules where they match. | |
141 | The buffer is scanned a single time, like \"lex\" would, rather than once | |
142 | per rule. | |
143 | ||
b879a6e2 SM |
144 | Each RULE can be a symbol, in which case that symbol's value should be, |
145 | at macro-expansion time, a precompiled set of rules, as returned | |
146 | by `syntax-propertize-precompile-rules'. | |
147 | ||
148 | Otherwise, RULE should have the form (REGEXP HIGHLIGHT1 ... HIGHLIGHTn), where | |
149 | REGEXP is an expression (evaluated at time of macro-expansion) that returns | |
150 | a regexp, and where HIGHLIGHTs have the form (NUMBER SYNTAX) which means to | |
cf38dd42 SM |
151 | apply the property SYNTAX to the chars matched by the subgroup NUMBER |
152 | of the regular expression, if NUMBER did match. | |
153 | SYNTAX is an expression that returns a value to apply as `syntax-table' | |
154 | property. Some expressions are handled specially: | |
155 | - if SYNTAX is a string, then it is converted with `string-to-syntax'; | |
156 | - if SYNTAX has the form (prog1 EXP . EXPS) then the value returned by EXP | |
157 | will be applied to the buffer before running EXPS and if EXP is a string it | |
158 | is also converted with `string-to-syntax'. | |
159 | The SYNTAX expression is responsible to save the `match-data' if needed | |
160 | for subsequent HIGHLIGHTs. | |
161 | Also SYNTAX is free to move point, in which case RULES may not be applied to | |
162 | some parts of the text or may be applied several times to other parts. | |
163 | ||
164 | Note: back-references in REGEXPs do not work." | |
b879a6e2 SM |
165 | (declare (debug (&rest &or symbolp ;FIXME: edebug this eval step. |
166 | (form &rest | |
cf38dd42 | 167 | (numberp |
b879a6e2 | 168 | [&or stringp ;FIXME: Use &wrap |
cf38dd42 SM |
169 | ("prog1" [&or stringp def-form] def-body) |
170 | def-form]))))) | |
b879a6e2 SM |
171 | (let ((newrules nil)) |
172 | (while rules | |
173 | (if (symbolp (car rules)) | |
174 | (setq rules (append (symbol-value (pop rules)) rules)) | |
175 | (push (pop rules) newrules))) | |
176 | (setq rules (nreverse newrules))) | |
cf38dd42 SM |
177 | (let* ((offset 0) |
178 | (branches '()) | |
179 | ;; We'd like to use a real DFA-based lexer, usually, but since Emacs | |
180 | ;; doesn't have one yet, we fallback on building one large regexp | |
181 | ;; and use groups to determine which branch of the regexp matched. | |
182 | (re | |
183 | (mapconcat | |
184 | (lambda (rule) | |
b879a6e2 SM |
185 | (let* ((orig-re (eval (car rule))) |
186 | (re orig-re)) | |
cf38dd42 SM |
187 | (when (and (assq 0 rule) (cdr rules)) |
188 | ;; If there's more than 1 rule, and the rule want to apply | |
189 | ;; highlight to match 0, create an extra group to be able to | |
190 | ;; tell when *this* match 0 has succeeded. | |
f80efb86 | 191 | (cl-incf offset) |
cf38dd42 SM |
192 | (setq re (concat "\\(" re "\\)"))) |
193 | (setq re (syntax-propertize--shift-groups re offset)) | |
194 | (let ((code '()) | |
195 | (condition | |
196 | (cond | |
197 | ((assq 0 rule) (if (zerop offset) t | |
198 | `(match-beginning ,offset))) | |
199 | ((null (cddr rule)) | |
200 | `(match-beginning ,(+ offset (car (cadr rule))))) | |
201 | (t | |
202 | `(or ,@(mapcar | |
203 | (lambda (case) | |
204 | `(match-beginning ,(+ offset (car case)))) | |
205 | (cdr rule)))))) | |
206 | (nocode t) | |
207 | (offset offset)) | |
208 | ;; If some of the subgroup rules include Elisp code, then we | |
209 | ;; need to set the match-data so it's consistent with what the | |
210 | ;; code expects. If not, then we can simply use shifted | |
211 | ;; offset in our own code. | |
212 | (unless (zerop offset) | |
213 | (dolist (case (cdr rule)) | |
214 | (unless (stringp (cadr case)) | |
215 | (setq nocode nil))) | |
216 | (unless nocode | |
217 | (push `(let ((md (match-data 'ints))) | |
218 | ;; Keep match 0 as is, but shift everything else. | |
219 | (setcdr (cdr md) (nthcdr ,(* (1+ offset) 2) md)) | |
220 | (set-match-data md)) | |
221 | code) | |
222 | (setq offset 0))) | |
223 | ;; Now construct the code for each subgroup rules. | |
224 | (dolist (case (cdr rule)) | |
f80efb86 | 225 | (cl-assert (null (cddr case))) |
cf38dd42 SM |
226 | (let* ((gn (+ offset (car case))) |
227 | (action (nth 1 case)) | |
228 | (thiscode | |
229 | (cond | |
230 | ((stringp action) | |
231 | `((put-text-property | |
232 | (match-beginning ,gn) (match-end ,gn) | |
233 | 'syntax-table | |
234 | ',(string-to-syntax action)))) | |
235 | ((eq (car-safe action) 'ignore) | |
236 | (cdr action)) | |
237 | ((eq (car-safe action) 'prog1) | |
238 | (if (stringp (nth 1 action)) | |
239 | `((put-text-property | |
240 | (match-beginning ,gn) (match-end ,gn) | |
241 | 'syntax-table | |
242 | ',(string-to-syntax (nth 1 action))) | |
243 | ,@(nthcdr 2 action)) | |
244 | `((let ((mb (match-beginning ,gn)) | |
245 | (me (match-end ,gn)) | |
246 | (syntax ,(nth 1 action))) | |
247 | (if syntax | |
248 | (put-text-property | |
249 | mb me 'syntax-table syntax)) | |
250 | ,@(nthcdr 2 action))))) | |
251 | (t | |
252 | `((let ((mb (match-beginning ,gn)) | |
253 | (me (match-end ,gn)) | |
254 | (syntax ,action)) | |
255 | (if syntax | |
256 | (put-text-property | |
257 | mb me 'syntax-table syntax)))))))) | |
382c953b | 258 | |
cf38dd42 SM |
259 | (if (or (not (cddr rule)) (zerop gn)) |
260 | (setq code (nconc (nreverse thiscode) code)) | |
261 | (push `(if (match-beginning ,gn) | |
262 | ;; Try and generate clean code with no | |
263 | ;; extraneous progn. | |
264 | ,(if (null (cdr thiscode)) | |
265 | (car thiscode) | |
266 | `(progn ,@thiscode))) | |
267 | code)))) | |
268 | (push (cons condition (nreverse code)) | |
269 | branches)) | |
f80efb86 | 270 | (cl-incf offset (regexp-opt-depth orig-re)) |
cf38dd42 SM |
271 | re)) |
272 | rules | |
273 | "\\|"))) | |
274 | `(lambda (start end) | |
275 | (goto-char start) | |
276 | (while (and (< (point) end) | |
277 | (re-search-forward ,re end t)) | |
278 | (cond ,@(nreverse branches)))))) | |
279 | ||
280 | (defun syntax-propertize-via-font-lock (keywords) | |
281 | "Propertize for syntax in START..END using font-lock syntax. | |
282 | KEYWORDS obeys the format used in `font-lock-syntactic-keywords'. | |
283 | The return value is a function suitable for `syntax-propertize-function'." | |
e95a67dc SM |
284 | (lambda (start end) |
285 | (with-no-warnings | |
286 | (let ((font-lock-syntactic-keywords keywords)) | |
287 | (font-lock-fontify-syntactic-keywords-region start end) | |
288 | ;; In case it was eval'd/compiled. | |
289 | (setq keywords font-lock-syntactic-keywords))))) | |
cf38dd42 SM |
290 | |
291 | (defun syntax-propertize (pos) | |
1b5eaeb3 | 292 | "Ensure that syntax-table properties are set until POS." |
cf38dd42 SM |
293 | (when (and syntax-propertize-function |
294 | (< syntax-propertize--done pos)) | |
295 | ;; (message "Needs to syntax-propertize from %s to %s" | |
296 | ;; syntax-propertize--done pos) | |
297 | (set (make-local-variable 'parse-sexp-lookup-properties) t) | |
298 | (save-excursion | |
299 | (with-silent-modifications | |
300 | (let* ((start (max syntax-propertize--done (point-min))) | |
301 | (end (max pos | |
302 | (min (point-max) | |
303 | (+ start syntax-propertize-chunk-size)))) | |
304 | (funs syntax-propertize-extend-region-functions)) | |
305 | (while funs | |
306 | (let ((new (funcall (pop funs) start end))) | |
307 | (if (or (null new) | |
308 | (and (>= (car new) start) (<= (cdr new) end))) | |
309 | nil | |
310 | (setq start (car new)) | |
311 | (setq end (cdr new)) | |
312 | ;; If there's been a change, we should go through the | |
313 | ;; list again since this new position may | |
314 | ;; warrant a different answer from one of the funs we've | |
315 | ;; already seen. | |
316 | (unless (eq funs | |
317 | (cdr syntax-propertize-extend-region-functions)) | |
318 | (setq funs syntax-propertize-extend-region-functions))))) | |
319 | ;; Move the limit before calling the function, so the function | |
320 | ;; can use syntax-ppss. | |
321 | (setq syntax-propertize--done end) | |
322 | ;; (message "syntax-propertizing from %s to %s" start end) | |
323 | (remove-text-properties start end | |
324 | '(syntax-table nil syntax-multiline nil)) | |
325 | (funcall syntax-propertize-function start end)))))) | |
326 | ||
327 | ;;; Incrementally compute and memoize parser state. | |
328 | ||
88a05faf SM |
329 | (defsubst syntax-ppss-depth (ppss) |
330 | (nth 0 ppss)) | |
331 | ||
0e6c966c | 332 | (defun syntax-ppss-toplevel-pos (ppss) |
be357779 | 333 | "Get the latest syntactically outermost position found in a syntactic scan. |
9aea20c9 | 334 | PPSS is a scan state, as returned by `parse-partial-sexp' or `syntax-ppss'. |
be357779 SM |
335 | An \"outermost position\" means one that it is outside of any syntactic entity: |
336 | outside of any parentheses, comments, or strings encountered in the scan. | |
337 | If no such position is recorded in PPSS (because the end of the scan was | |
338 | itself at the outermost level), return nil." | |
6bddffd0 SM |
339 | ;; BEWARE! We rely on the undocumented 9th field. The 9th field currently |
340 | ;; contains the list of positions of the enclosing open-parens. | |
341 | ;; I.e. those positions are outside of any string/comment and the first of | |
342 | ;; those is outside of any paren (i.e. corresponds to a nil ppss). | |
343 | ;; If this list is empty but we are in a string or comment, then the 8th | |
344 | ;; field contains a similar "toplevel" position. | |
0e6c966c | 345 | (or (car (nth 9 ppss)) |
6bddffd0 | 346 | (nth 8 ppss))) |
0e6c966c | 347 | |
88a05faf SM |
348 | (defsubst syntax-ppss-context (ppss) |
349 | (cond | |
350 | ((nth 3 ppss) 'string) | |
351 | ((nth 4 ppss) 'comment) | |
352 | (t nil))) | |
353 | ||
354 | (defvar syntax-ppss-max-span 20000 | |
355 | "Threshold below which cache info is deemed unnecessary. | |
356 | We try to make sure that cache entries are at least this far apart | |
357 | from each other, to avoid keeping too much useless info.") | |
358 | ||
359 | (defvar syntax-begin-function nil | |
360 | "Function to move back outside of any comment/string/paren. | |
361 | This function should move the cursor back to some syntactically safe | |
362 | point (where the PPSS is equivalent to nil).") | |
363 | ||
364 | (defvar syntax-ppss-cache nil | |
365 | "List of (POS . PPSS) pairs, in decreasing POS order.") | |
366 | (make-variable-buffer-local 'syntax-ppss-cache) | |
367 | (defvar syntax-ppss-last nil | |
368 | "Cache of (LAST-POS . LAST-PPSS).") | |
369 | (make-variable-buffer-local 'syntax-ppss-last) | |
370 | ||
9c5b2653 SM |
371 | (defalias 'syntax-ppss-after-change-function 'syntax-ppss-flush-cache) |
372 | (defun syntax-ppss-flush-cache (beg &rest ignored) | |
373 | "Flush the cache of `syntax-ppss' starting at position BEG." | |
cf38dd42 SM |
374 | ;; Set syntax-propertize to refontify anything past beg. |
375 | (setq syntax-propertize--done (min beg syntax-propertize--done)) | |
88a05faf SM |
376 | ;; Flush invalid cache entries. |
377 | (while (and syntax-ppss-cache (> (caar syntax-ppss-cache) beg)) | |
378 | (setq syntax-ppss-cache (cdr syntax-ppss-cache))) | |
379 | ;; Throw away `last' value if made invalid. | |
380 | (when (< beg (or (car syntax-ppss-last) 0)) | |
889c5dad SM |
381 | ;; If syntax-begin-function jumped to BEG, then the old state at BEG can |
382 | ;; depend on the text after BEG (which is presumably changed). So if | |
383 | ;; BEG=(car (nth 10 syntax-ppss-last)) don't reuse that data because the | |
384 | ;; assumed nil state at BEG may not be valid any more. | |
6bddffd0 | 385 | (if (<= beg (or (syntax-ppss-toplevel-pos (cdr syntax-ppss-last)) |
8debde6e | 386 | (nth 3 syntax-ppss-last) |
889c5dad | 387 | 0)) |
88a05faf SM |
388 | (setq syntax-ppss-last nil) |
389 | (setcar syntax-ppss-last nil))) | |
390 | ;; Unregister if there's no cache left. Sadly this doesn't work | |
ad8a840d | 391 | ;; because `before-change-functions' is temporarily bound to nil here. |
88a05faf | 392 | ;; (unless syntax-ppss-cache |
ad8a840d | 393 | ;; (remove-hook 'before-change-functions 'syntax-ppss-flush-cache t)) |
88a05faf SM |
394 | ) |
395 | ||
396 | (defvar syntax-ppss-stats | |
397 | [(0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (1 . 2500.0)]) | |
398 | (defun syntax-ppss-stats () | |
e8ac59b8 SM |
399 | (mapcar (lambda (x) |
400 | (condition-case nil | |
401 | (cons (car x) (truncate (/ (cdr x) (car x)))) | |
402 | (error nil))) | |
88a05faf SM |
403 | syntax-ppss-stats)) |
404 | ||
405 | (defun syntax-ppss (&optional pos) | |
b55426e9 | 406 | "Parse-Partial-Sexp State at POS, defaulting to point. |
bfbbb27d | 407 | The returned value is the same as that of `parse-partial-sexp' |
63bd7f35 | 408 | run from `point-min' to POS except that values at positions 2 and 6 |
bfbbb27d | 409 | in the returned list (counting from 0) cannot be relied upon. |
63bd7f35 XF |
410 | Point is at POS when this function returns. |
411 | ||
412 | It is necessary to call `syntax-ppss-flush-cache' explicitly if | |
413 | this function is called while `before-change-functions' is | |
414 | temporarily let-bound, or if the buffer is modified without | |
415 | running the hook." | |
88a05faf SM |
416 | ;; Default values. |
417 | (unless pos (setq pos (point))) | |
cf38dd42 | 418 | (syntax-propertize pos) |
a1506d29 | 419 | ;; |
88a05faf SM |
420 | (let ((old-ppss (cdr syntax-ppss-last)) |
421 | (old-pos (car syntax-ppss-last)) | |
422 | (ppss nil) | |
423 | (pt-min (point-min))) | |
424 | (if (and old-pos (> old-pos pos)) (setq old-pos nil)) | |
425 | ;; Use the OLD-POS if usable and close. Don't update the `last' cache. | |
db14504a SM |
426 | (condition-case nil |
427 | (if (and old-pos (< (- pos old-pos) | |
428 | ;; The time to use syntax-begin-function and | |
429 | ;; find PPSS is assumed to be about 2 * distance. | |
430 | (* 2 (/ (cdr (aref syntax-ppss-stats 5)) | |
431 | (1+ (car (aref syntax-ppss-stats 5))))))) | |
432 | (progn | |
f80efb86 SM |
433 | (cl-incf (car (aref syntax-ppss-stats 0))) |
434 | (cl-incf (cdr (aref syntax-ppss-stats 0)) (- pos old-pos)) | |
db14504a | 435 | (parse-partial-sexp old-pos pos nil nil old-ppss)) |
88a05faf SM |
436 | |
437 | (cond | |
db14504a SM |
438 | ;; Use OLD-PPSS if possible and close enough. |
439 | ((and (not old-pos) old-ppss | |
6bddffd0 SM |
440 | ;; If `pt-min' is too far from `pos', we could try to use |
441 | ;; other positions in (nth 9 old-ppss), but that doesn't | |
442 | ;; seem to happen in practice and it would complicate this | |
443 | ;; code (and the before-change-function code even more). | |
444 | ;; But maybe it would be useful in "degenerate" cases such | |
445 | ;; as when the whole file is wrapped in a set | |
446 | ;; of parentheses. | |
447 | (setq pt-min (or (syntax-ppss-toplevel-pos old-ppss) | |
db14504a SM |
448 | (nth 2 old-ppss))) |
449 | (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span)) | |
f80efb86 SM |
450 | (cl-incf (car (aref syntax-ppss-stats 1))) |
451 | (cl-incf (cdr (aref syntax-ppss-stats 1)) (- pos pt-min)) | |
db14504a SM |
452 | (setq ppss (parse-partial-sexp pt-min pos))) |
453 | ;; The OLD-* data can't be used. Consult the cache. | |
88a05faf | 454 | (t |
db14504a SM |
455 | (let ((cache-pred nil) |
456 | (cache syntax-ppss-cache) | |
457 | (pt-min (point-min)) | |
458 | ;; I differentiate between PT-MIN and PT-BEST because | |
459 | ;; I feel like it might be important to ensure that the | |
460 | ;; cache is only filled with 100% sure data (whereas | |
461 | ;; syntax-begin-function might return incorrect data). | |
462 | ;; Maybe that's just stupid. | |
463 | (pt-best (point-min)) | |
464 | (ppss-best nil)) | |
465 | ;; look for a usable cache entry. | |
466 | (while (and cache (< pos (caar cache))) | |
467 | (setq cache-pred cache) | |
468 | (setq cache (cdr cache))) | |
469 | (if cache (setq pt-min (caar cache) ppss (cdar cache))) | |
470 | ||
ad8a840d | 471 | ;; Setup the before-change function if necessary. |
db14504a | 472 | (unless (or syntax-ppss-cache syntax-ppss-last) |
ad8a840d SM |
473 | (add-hook 'before-change-functions |
474 | 'syntax-ppss-flush-cache t t)) | |
db14504a SM |
475 | |
476 | ;; Use the best of OLD-POS and CACHE. | |
477 | (if (or (not old-pos) (< old-pos pt-min)) | |
478 | (setq pt-best pt-min ppss-best ppss) | |
f80efb86 SM |
479 | (cl-incf (car (aref syntax-ppss-stats 4))) |
480 | (cl-incf (cdr (aref syntax-ppss-stats 4)) (- pos old-pos)) | |
db14504a SM |
481 | (setq pt-best old-pos ppss-best old-ppss)) |
482 | ||
483 | ;; Use the `syntax-begin-function' if available. | |
484 | ;; We could try using that function earlier, but: | |
485 | ;; - The result might not be 100% reliable, so it's better to use | |
486 | ;; the cache if available. | |
487 | ;; - The function might be slow. | |
488 | ;; - If this function almost always finds a safe nearby spot, | |
489 | ;; the cache won't be populated, so consulting it is cheap. | |
490 | (when (and (not syntax-begin-function) | |
491 | (boundp 'font-lock-beginning-of-syntax-function) | |
492 | font-lock-beginning-of-syntax-function) | |
493 | (set (make-local-variable 'syntax-begin-function) | |
494 | font-lock-beginning-of-syntax-function)) | |
495 | (when (and syntax-begin-function | |
496 | (progn (goto-char pos) | |
497 | (funcall syntax-begin-function) | |
498 | ;; Make sure it's better. | |
499 | (> (point) pt-best)) | |
2e664aab SM |
500 | ;; Simple sanity checks. |
501 | (< (point) pos) ; backward-paragraph can fail here. | |
db14504a SM |
502 | (not (memq (get-text-property (point) 'face) |
503 | '(font-lock-string-face font-lock-doc-face | |
504 | font-lock-comment-face)))) | |
f80efb86 SM |
505 | (cl-incf (car (aref syntax-ppss-stats 5))) |
506 | (cl-incf (cdr (aref syntax-ppss-stats 5)) (- pos (point))) | |
db14504a SM |
507 | (setq pt-best (point) ppss-best nil)) |
508 | ||
509 | (cond | |
510 | ;; Quick case when we found a nearby pos. | |
511 | ((< (- pos pt-best) syntax-ppss-max-span) | |
f80efb86 SM |
512 | (cl-incf (car (aref syntax-ppss-stats 2))) |
513 | (cl-incf (cdr (aref syntax-ppss-stats 2)) (- pos pt-best)) | |
db14504a SM |
514 | (setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best))) |
515 | ;; Slow case: compute the state from some known position and | |
516 | ;; populate the cache so we won't need to do it again soon. | |
517 | (t | |
f80efb86 SM |
518 | (cl-incf (car (aref syntax-ppss-stats 3))) |
519 | (cl-incf (cdr (aref syntax-ppss-stats 3)) (- pos pt-min)) | |
db14504a SM |
520 | |
521 | ;; If `pt-min' is too far, add a few intermediate entries. | |
522 | (while (> (- pos pt-min) (* 2 syntax-ppss-max-span)) | |
523 | (setq ppss (parse-partial-sexp | |
524 | pt-min (setq pt-min (/ (+ pt-min pos) 2)) | |
525 | nil nil ppss)) | |
dc5d230c SM |
526 | (push (cons pt-min ppss) |
527 | (if cache-pred (cdr cache-pred) syntax-ppss-cache))) | |
db14504a SM |
528 | |
529 | ;; Compute the actual return value. | |
530 | (setq ppss (parse-partial-sexp pt-min pos nil nil ppss)) | |
531 | ||
532 | ;; Debugging check. | |
533 | ;; (let ((real-ppss (parse-partial-sexp (point-min) pos))) | |
534 | ;; (setcar (last ppss 4) 0) | |
535 | ;; (setcar (last real-ppss 4) 0) | |
536 | ;; (setcar (last ppss 8) nil) | |
537 | ;; (setcar (last real-ppss 8) nil) | |
538 | ;; (unless (equal ppss real-ppss) | |
539 | ;; (message "!!Syntax: %s != %s" ppss real-ppss) | |
540 | ;; (setq ppss real-ppss))) | |
541 | ||
542 | ;; Store it in the cache. | |
543 | (let ((pair (cons pos ppss))) | |
544 | (if cache-pred | |
545 | (if (> (- (caar cache-pred) pos) syntax-ppss-max-span) | |
2ee3d7f0 | 546 | (push pair (cdr cache-pred)) |
db14504a SM |
547 | (setcar cache-pred pair)) |
548 | (if (or (null syntax-ppss-cache) | |
549 | (> (- (caar syntax-ppss-cache) pos) | |
550 | syntax-ppss-max-span)) | |
551 | (push pair syntax-ppss-cache) | |
552 | (setcar syntax-ppss-cache pair))))))))) | |
553 | ||
554 | (setq syntax-ppss-last (cons pos ppss)) | |
555 | ppss) | |
556 | (args-out-of-range | |
557 | ;; If the buffer is more narrowed than when we built the cache, | |
558 | ;; we may end up calling parse-partial-sexp with a position before | |
559 | ;; point-min. In that case, just parse from point-min assuming | |
560 | ;; a nil state. | |
561 | (parse-partial-sexp (point-min) pos))))) | |
88a05faf SM |
562 | |
563 | ;; Debugging functions | |
564 | ||
565 | (defun syntax-ppss-debug () | |
566 | (let ((pt nil) | |
567 | (min-diffs nil)) | |
568 | (dolist (x (append syntax-ppss-cache (list (cons (point-min) nil)))) | |
569 | (when pt (push (- pt (car x)) min-diffs)) | |
570 | (setq pt (car x))) | |
571 | min-diffs)) | |
572 | ||
573 | ;; XEmacs compatibility functions | |
574 | ||
575 | ;; (defun buffer-syntactic-context (&optional buffer) | |
576 | ;; "Syntactic context at point in BUFFER. | |
577 | ;; Either of `string', `comment' or `nil'. | |
578 | ;; This is an XEmacs compatibility function." | |
579 | ;; (with-current-buffer (or buffer (current-buffer)) | |
580 | ;; (syntax-ppss-context (syntax-ppss)))) | |
581 | ||
582 | ;; (defun buffer-syntactic-context-depth (&optional buffer) | |
583 | ;; "Syntactic parenthesis depth at point in BUFFER. | |
584 | ;; This is an XEmacs compatibility function." | |
585 | ;; (with-current-buffer (or buffer (current-buffer)) | |
586 | ;; (syntax-ppss-depth (syntax-ppss)))) | |
587 | ||
88a05faf | 588 | (provide 'syntax) |
ab5796a9 | 589 | |
88a05faf | 590 | ;;; syntax.el ends here |