| 1 | ;;; syntax.el --- helper functions to find syntactic context |
| 2 | |
| 3 | ;; Copyright (C) 2000-2011 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Maintainer: FSF |
| 6 | ;; Keywords: internal |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; The main exported function is `syntax-ppss'. You might also need |
| 26 | ;; to call `syntax-ppss-flush-cache' or to add it to |
| 27 | ;; before-change-functions'(although this is automatically done by |
| 28 | ;; syntax-ppss when needed, but that might fail if syntax-ppss is |
| 29 | ;; called in a context where before-change-functions is temporarily |
| 30 | ;; let-bound to nil). |
| 31 | |
| 32 | ;;; Todo: |
| 33 | |
| 34 | ;; - do something about the case where the syntax-table is changed. |
| 35 | ;; This typically happens with tex-mode and its `$' operator. |
| 36 | ;; - new functions `syntax-state', ... to replace uses of parse-partial-state |
| 37 | ;; with something higher-level (similar to syntax-ppss-context). |
| 38 | ;; - interaction with mmm-mode. |
| 39 | |
| 40 | ;;; Code: |
| 41 | |
| 42 | ;; Note: PPSS stands for `parse-partial-sexp state' |
| 43 | |
| 44 | (eval-when-compile (require 'cl)) |
| 45 | |
| 46 | (defvar font-lock-beginning-of-syntax-function) |
| 47 | |
| 48 | ;;; Applying syntax-table properties where needed. |
| 49 | |
| 50 | (defvar syntax-propertize-function nil |
| 51 | ;; Rather than a -functions hook, this is a -function because it's easier |
| 52 | ;; to do a single scan than several scans: with multiple scans, one cannot |
| 53 | ;; assume that the text before point has been propertized, so syntax-ppss |
| 54 | ;; gives unreliable results (and stores them in its cache to boot, so we'd |
| 55 | ;; have to flush that cache between each function, and we couldn't use |
| 56 | ;; syntax-ppss-flush-cache since that would not only flush the cache but also |
| 57 | ;; reset syntax-propertize--done which should not be done in this case). |
| 58 | "Mode-specific function to apply the syntax-table properties. |
| 59 | Called with 2 arguments: START and END. |
| 60 | This function can call `syntax-ppss' on any position before END, but it |
| 61 | should not call `syntax-ppss-flush-cache', which means that it should not |
| 62 | call `syntax-ppss' on some position and later modify the buffer on some |
| 63 | earlier position.") |
| 64 | |
| 65 | (defvar syntax-propertize-chunk-size 500) |
| 66 | |
| 67 | (defvar syntax-propertize-extend-region-functions |
| 68 | '(syntax-propertize-wholelines) |
| 69 | "Special hook run just before proceeding to propertize a region. |
| 70 | This is used to allow major modes to help `syntax-propertize' find safe buffer |
| 71 | positions as beginning and end of the propertized region. Its most common use |
| 72 | is to solve the problem of /identification/ of multiline elements by providing |
| 73 | a function that tries to find such elements and move the boundaries such that |
| 74 | they do not fall in the middle of one. |
| 75 | Each function is called with two arguments (START and END) and it should return |
| 76 | either a cons (NEW-START . NEW-END) or nil if no adjustment should be made. |
| 77 | These functions are run in turn repeatedly until they all return nil. |
| 78 | Put first the functions more likely to cause a change and cheaper to compute.") |
| 79 | ;; Mark it as a special hook which doesn't use any global setting |
| 80 | ;; (i.e. doesn't obey the element t in the buffer-local value). |
| 81 | (make-variable-buffer-local 'syntax-propertize-extend-region-functions) |
| 82 | |
| 83 | (defun syntax-propertize-wholelines (start end) |
| 84 | (goto-char start) |
| 85 | (cons (line-beginning-position) |
| 86 | (progn (goto-char end) |
| 87 | (if (bolp) (point) (line-beginning-position 2))))) |
| 88 | |
| 89 | (defun syntax-propertize-multiline (beg end) |
| 90 | "Let `syntax-propertize' pay attention to the syntax-multiline property." |
| 91 | (when (and (> beg (point-min)) |
| 92 | (get-text-property (1- beg) 'syntax-multiline)) |
| 93 | (setq beg (or (previous-single-property-change beg 'syntax-multiline) |
| 94 | (point-min)))) |
| 95 | ;; |
| 96 | (when (get-text-property end 'font-lock-multiline) |
| 97 | (setq end (or (text-property-any end (point-max) |
| 98 | 'syntax-multiline nil) |
| 99 | (point-max)))) |
| 100 | (cons beg end)) |
| 101 | |
| 102 | (defvar syntax-propertize--done -1 |
| 103 | "Position upto which syntax-table properties have been set.") |
| 104 | (make-variable-buffer-local 'syntax-propertize--done) |
| 105 | |
| 106 | (defun syntax-propertize--shift-groups (re n) |
| 107 | (replace-regexp-in-string |
| 108 | "\\\\(\\?\\([0-9]+\\):" |
| 109 | (lambda (s) |
| 110 | (replace-match |
| 111 | (number-to-string (+ n (string-to-number (match-string 1 s)))) |
| 112 | t t s 1)) |
| 113 | re t t)) |
| 114 | |
| 115 | (defmacro syntax-propertize-precompile-rules (&rest rules) |
| 116 | "Return a precompiled form of RULES to pass to `syntax-propertize-rules'. |
| 117 | The arg RULES can be of the same form as in `syntax-propertize-rules'. |
| 118 | The return value is an object that can be passed as a rule to |
| 119 | `syntax-propertize-rules'. |
| 120 | I.e. this is useful only when you want to share rules among several |
| 121 | syntax-propertize-functions." |
| 122 | (declare (debug syntax-propertize-rules)) |
| 123 | ;; Precompile? Yeah, right! |
| 124 | ;; Seriously, tho, this is a macro for 2 reasons: |
| 125 | ;; - we could indeed do some pre-compilation at some point in the future, |
| 126 | ;; e.g. fi/when we switch to a DFA-based implementation of |
| 127 | ;; syntax-propertize-rules. |
| 128 | ;; - this lets Edebug properly annotate the expressions inside RULES. |
| 129 | `',rules) |
| 130 | |
| 131 | (defmacro syntax-propertize-rules (&rest rules) |
| 132 | "Make a function that applies RULES for use in `syntax-propertize-function'. |
| 133 | The function will scan the buffer, applying the rules where they match. |
| 134 | The buffer is scanned a single time, like \"lex\" would, rather than once |
| 135 | per rule. |
| 136 | |
| 137 | Each RULE can be a symbol, in which case that symbol's value should be, |
| 138 | at macro-expansion time, a precompiled set of rules, as returned |
| 139 | by `syntax-propertize-precompile-rules'. |
| 140 | |
| 141 | Otherwise, RULE should have the form (REGEXP HIGHLIGHT1 ... HIGHLIGHTn), where |
| 142 | REGEXP is an expression (evaluated at time of macro-expansion) that returns |
| 143 | a regexp, and where HIGHLIGHTs have the form (NUMBER SYNTAX) which means to |
| 144 | apply the property SYNTAX to the chars matched by the subgroup NUMBER |
| 145 | of the regular expression, if NUMBER did match. |
| 146 | SYNTAX is an expression that returns a value to apply as `syntax-table' |
| 147 | property. Some expressions are handled specially: |
| 148 | - if SYNTAX is a string, then it is converted with `string-to-syntax'; |
| 149 | - if SYNTAX has the form (prog1 EXP . EXPS) then the value returned by EXP |
| 150 | will be applied to the buffer before running EXPS and if EXP is a string it |
| 151 | is also converted with `string-to-syntax'. |
| 152 | The SYNTAX expression is responsible to save the `match-data' if needed |
| 153 | for subsequent HIGHLIGHTs. |
| 154 | Also SYNTAX is free to move point, in which case RULES may not be applied to |
| 155 | some parts of the text or may be applied several times to other parts. |
| 156 | |
| 157 | Note: back-references in REGEXPs do not work." |
| 158 | (declare (debug (&rest &or symbolp ;FIXME: edebug this eval step. |
| 159 | (form &rest |
| 160 | (numberp |
| 161 | [&or stringp ;FIXME: Use &wrap |
| 162 | ("prog1" [&or stringp def-form] def-body) |
| 163 | def-form]))))) |
| 164 | (let ((newrules nil)) |
| 165 | (while rules |
| 166 | (if (symbolp (car rules)) |
| 167 | (setq rules (append (symbol-value (pop rules)) rules)) |
| 168 | (push (pop rules) newrules))) |
| 169 | (setq rules (nreverse newrules))) |
| 170 | (let* ((offset 0) |
| 171 | (branches '()) |
| 172 | ;; We'd like to use a real DFA-based lexer, usually, but since Emacs |
| 173 | ;; doesn't have one yet, we fallback on building one large regexp |
| 174 | ;; and use groups to determine which branch of the regexp matched. |
| 175 | (re |
| 176 | (mapconcat |
| 177 | (lambda (rule) |
| 178 | (let* ((orig-re (eval (car rule))) |
| 179 | (re orig-re)) |
| 180 | (when (and (assq 0 rule) (cdr rules)) |
| 181 | ;; If there's more than 1 rule, and the rule want to apply |
| 182 | ;; highlight to match 0, create an extra group to be able to |
| 183 | ;; tell when *this* match 0 has succeeded. |
| 184 | (incf offset) |
| 185 | (setq re (concat "\\(" re "\\)"))) |
| 186 | (setq re (syntax-propertize--shift-groups re offset)) |
| 187 | (let ((code '()) |
| 188 | (condition |
| 189 | (cond |
| 190 | ((assq 0 rule) (if (zerop offset) t |
| 191 | `(match-beginning ,offset))) |
| 192 | ((null (cddr rule)) |
| 193 | `(match-beginning ,(+ offset (car (cadr rule))))) |
| 194 | (t |
| 195 | `(or ,@(mapcar |
| 196 | (lambda (case) |
| 197 | `(match-beginning ,(+ offset (car case)))) |
| 198 | (cdr rule)))))) |
| 199 | (nocode t) |
| 200 | (offset offset)) |
| 201 | ;; If some of the subgroup rules include Elisp code, then we |
| 202 | ;; need to set the match-data so it's consistent with what the |
| 203 | ;; code expects. If not, then we can simply use shifted |
| 204 | ;; offset in our own code. |
| 205 | (unless (zerop offset) |
| 206 | (dolist (case (cdr rule)) |
| 207 | (unless (stringp (cadr case)) |
| 208 | (setq nocode nil))) |
| 209 | (unless nocode |
| 210 | (push `(let ((md (match-data 'ints))) |
| 211 | ;; Keep match 0 as is, but shift everything else. |
| 212 | (setcdr (cdr md) (nthcdr ,(* (1+ offset) 2) md)) |
| 213 | (set-match-data md)) |
| 214 | code) |
| 215 | (setq offset 0))) |
| 216 | ;; Now construct the code for each subgroup rules. |
| 217 | (dolist (case (cdr rule)) |
| 218 | (assert (null (cddr case))) |
| 219 | (let* ((gn (+ offset (car case))) |
| 220 | (action (nth 1 case)) |
| 221 | (thiscode |
| 222 | (cond |
| 223 | ((stringp action) |
| 224 | `((put-text-property |
| 225 | (match-beginning ,gn) (match-end ,gn) |
| 226 | 'syntax-table |
| 227 | ',(string-to-syntax action)))) |
| 228 | ((eq (car-safe action) 'ignore) |
| 229 | (cdr action)) |
| 230 | ((eq (car-safe action) 'prog1) |
| 231 | (if (stringp (nth 1 action)) |
| 232 | `((put-text-property |
| 233 | (match-beginning ,gn) (match-end ,gn) |
| 234 | 'syntax-table |
| 235 | ',(string-to-syntax (nth 1 action))) |
| 236 | ,@(nthcdr 2 action)) |
| 237 | `((let ((mb (match-beginning ,gn)) |
| 238 | (me (match-end ,gn)) |
| 239 | (syntax ,(nth 1 action))) |
| 240 | (if syntax |
| 241 | (put-text-property |
| 242 | mb me 'syntax-table syntax)) |
| 243 | ,@(nthcdr 2 action))))) |
| 244 | (t |
| 245 | `((let ((mb (match-beginning ,gn)) |
| 246 | (me (match-end ,gn)) |
| 247 | (syntax ,action)) |
| 248 | (if syntax |
| 249 | (put-text-property |
| 250 | mb me 'syntax-table syntax)))))))) |
| 251 | |
| 252 | (if (or (not (cddr rule)) (zerop gn)) |
| 253 | (setq code (nconc (nreverse thiscode) code)) |
| 254 | (push `(if (match-beginning ,gn) |
| 255 | ;; Try and generate clean code with no |
| 256 | ;; extraneous progn. |
| 257 | ,(if (null (cdr thiscode)) |
| 258 | (car thiscode) |
| 259 | `(progn ,@thiscode))) |
| 260 | code)))) |
| 261 | (push (cons condition (nreverse code)) |
| 262 | branches)) |
| 263 | (incf offset (regexp-opt-depth orig-re)) |
| 264 | re)) |
| 265 | rules |
| 266 | "\\|"))) |
| 267 | `(lambda (start end) |
| 268 | (goto-char start) |
| 269 | (while (and (< (point) end) |
| 270 | (re-search-forward ,re end t)) |
| 271 | (cond ,@(nreverse branches)))))) |
| 272 | |
| 273 | (defun syntax-propertize-via-font-lock (keywords) |
| 274 | "Propertize for syntax in START..END using font-lock syntax. |
| 275 | KEYWORDS obeys the format used in `font-lock-syntactic-keywords'. |
| 276 | The return value is a function suitable for `syntax-propertize-function'." |
| 277 | (lexical-let ((keywords keywords)) |
| 278 | (lambda (start end) |
| 279 | (with-no-warnings |
| 280 | (let ((font-lock-syntactic-keywords keywords)) |
| 281 | (font-lock-fontify-syntactic-keywords-region start end) |
| 282 | ;; In case it was eval'd/compiled. |
| 283 | (setq keywords font-lock-syntactic-keywords)))))) |
| 284 | |
| 285 | (defun syntax-propertize (pos) |
| 286 | "Ensure that syntax-table properties are set upto POS." |
| 287 | (when (and syntax-propertize-function |
| 288 | (< syntax-propertize--done pos)) |
| 289 | ;; (message "Needs to syntax-propertize from %s to %s" |
| 290 | ;; syntax-propertize--done pos) |
| 291 | (set (make-local-variable 'parse-sexp-lookup-properties) t) |
| 292 | (save-excursion |
| 293 | (with-silent-modifications |
| 294 | (let* ((start (max syntax-propertize--done (point-min))) |
| 295 | (end (max pos |
| 296 | (min (point-max) |
| 297 | (+ start syntax-propertize-chunk-size)))) |
| 298 | (funs syntax-propertize-extend-region-functions)) |
| 299 | (while funs |
| 300 | (let ((new (funcall (pop funs) start end))) |
| 301 | (if (or (null new) |
| 302 | (and (>= (car new) start) (<= (cdr new) end))) |
| 303 | nil |
| 304 | (setq start (car new)) |
| 305 | (setq end (cdr new)) |
| 306 | ;; If there's been a change, we should go through the |
| 307 | ;; list again since this new position may |
| 308 | ;; warrant a different answer from one of the funs we've |
| 309 | ;; already seen. |
| 310 | (unless (eq funs |
| 311 | (cdr syntax-propertize-extend-region-functions)) |
| 312 | (setq funs syntax-propertize-extend-region-functions))))) |
| 313 | ;; Move the limit before calling the function, so the function |
| 314 | ;; can use syntax-ppss. |
| 315 | (setq syntax-propertize--done end) |
| 316 | ;; (message "syntax-propertizing from %s to %s" start end) |
| 317 | (remove-text-properties start end |
| 318 | '(syntax-table nil syntax-multiline nil)) |
| 319 | (funcall syntax-propertize-function start end)))))) |
| 320 | |
| 321 | ;;; Incrementally compute and memoize parser state. |
| 322 | |
| 323 | (defsubst syntax-ppss-depth (ppss) |
| 324 | (nth 0 ppss)) |
| 325 | |
| 326 | (defun syntax-ppss-toplevel-pos (ppss) |
| 327 | "Get the latest syntactically outermost position found in a syntactic scan. |
| 328 | PPSS is a scan state, as returned by `parse-partial-sexp' or `syntax-ppss'. |
| 329 | An \"outermost position\" means one that it is outside of any syntactic entity: |
| 330 | outside of any parentheses, comments, or strings encountered in the scan. |
| 331 | If no such position is recorded in PPSS (because the end of the scan was |
| 332 | itself at the outermost level), return nil." |
| 333 | ;; BEWARE! We rely on the undocumented 9th field. The 9th field currently |
| 334 | ;; contains the list of positions of the enclosing open-parens. |
| 335 | ;; I.e. those positions are outside of any string/comment and the first of |
| 336 | ;; those is outside of any paren (i.e. corresponds to a nil ppss). |
| 337 | ;; If this list is empty but we are in a string or comment, then the 8th |
| 338 | ;; field contains a similar "toplevel" position. |
| 339 | (or (car (nth 9 ppss)) |
| 340 | (nth 8 ppss))) |
| 341 | |
| 342 | (defsubst syntax-ppss-context (ppss) |
| 343 | (cond |
| 344 | ((nth 3 ppss) 'string) |
| 345 | ((nth 4 ppss) 'comment) |
| 346 | (t nil))) |
| 347 | |
| 348 | (defvar syntax-ppss-max-span 20000 |
| 349 | "Threshold below which cache info is deemed unnecessary. |
| 350 | We try to make sure that cache entries are at least this far apart |
| 351 | from each other, to avoid keeping too much useless info.") |
| 352 | |
| 353 | (defvar syntax-begin-function nil |
| 354 | "Function to move back outside of any comment/string/paren. |
| 355 | This function should move the cursor back to some syntactically safe |
| 356 | point (where the PPSS is equivalent to nil).") |
| 357 | |
| 358 | (defvar syntax-ppss-cache nil |
| 359 | "List of (POS . PPSS) pairs, in decreasing POS order.") |
| 360 | (make-variable-buffer-local 'syntax-ppss-cache) |
| 361 | (defvar syntax-ppss-last nil |
| 362 | "Cache of (LAST-POS . LAST-PPSS).") |
| 363 | (make-variable-buffer-local 'syntax-ppss-last) |
| 364 | |
| 365 | (defalias 'syntax-ppss-after-change-function 'syntax-ppss-flush-cache) |
| 366 | (defun syntax-ppss-flush-cache (beg &rest ignored) |
| 367 | "Flush the cache of `syntax-ppss' starting at position BEG." |
| 368 | ;; Set syntax-propertize to refontify anything past beg. |
| 369 | (setq syntax-propertize--done (min beg syntax-propertize--done)) |
| 370 | ;; Flush invalid cache entries. |
| 371 | (while (and syntax-ppss-cache (> (caar syntax-ppss-cache) beg)) |
| 372 | (setq syntax-ppss-cache (cdr syntax-ppss-cache))) |
| 373 | ;; Throw away `last' value if made invalid. |
| 374 | (when (< beg (or (car syntax-ppss-last) 0)) |
| 375 | ;; If syntax-begin-function jumped to BEG, then the old state at BEG can |
| 376 | ;; depend on the text after BEG (which is presumably changed). So if |
| 377 | ;; BEG=(car (nth 10 syntax-ppss-last)) don't reuse that data because the |
| 378 | ;; assumed nil state at BEG may not be valid any more. |
| 379 | (if (<= beg (or (syntax-ppss-toplevel-pos (cdr syntax-ppss-last)) |
| 380 | (nth 3 syntax-ppss-last) |
| 381 | 0)) |
| 382 | (setq syntax-ppss-last nil) |
| 383 | (setcar syntax-ppss-last nil))) |
| 384 | ;; Unregister if there's no cache left. Sadly this doesn't work |
| 385 | ;; because `before-change-functions' is temporarily bound to nil here. |
| 386 | ;; (unless syntax-ppss-cache |
| 387 | ;; (remove-hook 'before-change-functions 'syntax-ppss-flush-cache t)) |
| 388 | ) |
| 389 | |
| 390 | (defvar syntax-ppss-stats |
| 391 | [(0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (1 . 2500.0)]) |
| 392 | (defun syntax-ppss-stats () |
| 393 | (mapcar (lambda (x) |
| 394 | (condition-case nil |
| 395 | (cons (car x) (truncate (/ (cdr x) (car x)))) |
| 396 | (error nil))) |
| 397 | syntax-ppss-stats)) |
| 398 | |
| 399 | (defun syntax-ppss (&optional pos) |
| 400 | "Parse-Partial-Sexp State at POS, defaulting to point. |
| 401 | The returned value is the same as `parse-partial-sexp' except that |
| 402 | the 2nd and 6th values of the returned state cannot be relied upon. |
| 403 | Point is at POS when this function returns." |
| 404 | ;; Default values. |
| 405 | (unless pos (setq pos (point))) |
| 406 | (syntax-propertize pos) |
| 407 | ;; |
| 408 | (let ((old-ppss (cdr syntax-ppss-last)) |
| 409 | (old-pos (car syntax-ppss-last)) |
| 410 | (ppss nil) |
| 411 | (pt-min (point-min))) |
| 412 | (if (and old-pos (> old-pos pos)) (setq old-pos nil)) |
| 413 | ;; Use the OLD-POS if usable and close. Don't update the `last' cache. |
| 414 | (condition-case nil |
| 415 | (if (and old-pos (< (- pos old-pos) |
| 416 | ;; The time to use syntax-begin-function and |
| 417 | ;; find PPSS is assumed to be about 2 * distance. |
| 418 | (* 2 (/ (cdr (aref syntax-ppss-stats 5)) |
| 419 | (1+ (car (aref syntax-ppss-stats 5))))))) |
| 420 | (progn |
| 421 | (incf (car (aref syntax-ppss-stats 0))) |
| 422 | (incf (cdr (aref syntax-ppss-stats 0)) (- pos old-pos)) |
| 423 | (parse-partial-sexp old-pos pos nil nil old-ppss)) |
| 424 | |
| 425 | (cond |
| 426 | ;; Use OLD-PPSS if possible and close enough. |
| 427 | ((and (not old-pos) old-ppss |
| 428 | ;; If `pt-min' is too far from `pos', we could try to use |
| 429 | ;; other positions in (nth 9 old-ppss), but that doesn't |
| 430 | ;; seem to happen in practice and it would complicate this |
| 431 | ;; code (and the before-change-function code even more). |
| 432 | ;; But maybe it would be useful in "degenerate" cases such |
| 433 | ;; as when the whole file is wrapped in a set |
| 434 | ;; of parentheses. |
| 435 | (setq pt-min (or (syntax-ppss-toplevel-pos old-ppss) |
| 436 | (nth 2 old-ppss))) |
| 437 | (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span)) |
| 438 | (incf (car (aref syntax-ppss-stats 1))) |
| 439 | (incf (cdr (aref syntax-ppss-stats 1)) (- pos pt-min)) |
| 440 | (setq ppss (parse-partial-sexp pt-min pos))) |
| 441 | ;; The OLD-* data can't be used. Consult the cache. |
| 442 | (t |
| 443 | (let ((cache-pred nil) |
| 444 | (cache syntax-ppss-cache) |
| 445 | (pt-min (point-min)) |
| 446 | ;; I differentiate between PT-MIN and PT-BEST because |
| 447 | ;; I feel like it might be important to ensure that the |
| 448 | ;; cache is only filled with 100% sure data (whereas |
| 449 | ;; syntax-begin-function might return incorrect data). |
| 450 | ;; Maybe that's just stupid. |
| 451 | (pt-best (point-min)) |
| 452 | (ppss-best nil)) |
| 453 | ;; look for a usable cache entry. |
| 454 | (while (and cache (< pos (caar cache))) |
| 455 | (setq cache-pred cache) |
| 456 | (setq cache (cdr cache))) |
| 457 | (if cache (setq pt-min (caar cache) ppss (cdar cache))) |
| 458 | |
| 459 | ;; Setup the before-change function if necessary. |
| 460 | (unless (or syntax-ppss-cache syntax-ppss-last) |
| 461 | (add-hook 'before-change-functions |
| 462 | 'syntax-ppss-flush-cache t t)) |
| 463 | |
| 464 | ;; Use the best of OLD-POS and CACHE. |
| 465 | (if (or (not old-pos) (< old-pos pt-min)) |
| 466 | (setq pt-best pt-min ppss-best ppss) |
| 467 | (incf (car (aref syntax-ppss-stats 4))) |
| 468 | (incf (cdr (aref syntax-ppss-stats 4)) (- pos old-pos)) |
| 469 | (setq pt-best old-pos ppss-best old-ppss)) |
| 470 | |
| 471 | ;; Use the `syntax-begin-function' if available. |
| 472 | ;; We could try using that function earlier, but: |
| 473 | ;; - The result might not be 100% reliable, so it's better to use |
| 474 | ;; the cache if available. |
| 475 | ;; - The function might be slow. |
| 476 | ;; - If this function almost always finds a safe nearby spot, |
| 477 | ;; the cache won't be populated, so consulting it is cheap. |
| 478 | (when (and (not syntax-begin-function) |
| 479 | (boundp 'font-lock-beginning-of-syntax-function) |
| 480 | font-lock-beginning-of-syntax-function) |
| 481 | (set (make-local-variable 'syntax-begin-function) |
| 482 | font-lock-beginning-of-syntax-function)) |
| 483 | (when (and syntax-begin-function |
| 484 | (progn (goto-char pos) |
| 485 | (funcall syntax-begin-function) |
| 486 | ;; Make sure it's better. |
| 487 | (> (point) pt-best)) |
| 488 | ;; Simple sanity checks. |
| 489 | (< (point) pos) ; backward-paragraph can fail here. |
| 490 | (not (memq (get-text-property (point) 'face) |
| 491 | '(font-lock-string-face font-lock-doc-face |
| 492 | font-lock-comment-face)))) |
| 493 | (incf (car (aref syntax-ppss-stats 5))) |
| 494 | (incf (cdr (aref syntax-ppss-stats 5)) (- pos (point))) |
| 495 | (setq pt-best (point) ppss-best nil)) |
| 496 | |
| 497 | (cond |
| 498 | ;; Quick case when we found a nearby pos. |
| 499 | ((< (- pos pt-best) syntax-ppss-max-span) |
| 500 | (incf (car (aref syntax-ppss-stats 2))) |
| 501 | (incf (cdr (aref syntax-ppss-stats 2)) (- pos pt-best)) |
| 502 | (setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best))) |
| 503 | ;; Slow case: compute the state from some known position and |
| 504 | ;; populate the cache so we won't need to do it again soon. |
| 505 | (t |
| 506 | (incf (car (aref syntax-ppss-stats 3))) |
| 507 | (incf (cdr (aref syntax-ppss-stats 3)) (- pos pt-min)) |
| 508 | |
| 509 | ;; If `pt-min' is too far, add a few intermediate entries. |
| 510 | (while (> (- pos pt-min) (* 2 syntax-ppss-max-span)) |
| 511 | (setq ppss (parse-partial-sexp |
| 512 | pt-min (setq pt-min (/ (+ pt-min pos) 2)) |
| 513 | nil nil ppss)) |
| 514 | (let ((pair (cons pt-min ppss))) |
| 515 | (if cache-pred |
| 516 | (push pair (cdr cache-pred)) |
| 517 | (push pair syntax-ppss-cache)))) |
| 518 | |
| 519 | ;; Compute the actual return value. |
| 520 | (setq ppss (parse-partial-sexp pt-min pos nil nil ppss)) |
| 521 | |
| 522 | ;; Debugging check. |
| 523 | ;; (let ((real-ppss (parse-partial-sexp (point-min) pos))) |
| 524 | ;; (setcar (last ppss 4) 0) |
| 525 | ;; (setcar (last real-ppss 4) 0) |
| 526 | ;; (setcar (last ppss 8) nil) |
| 527 | ;; (setcar (last real-ppss 8) nil) |
| 528 | ;; (unless (equal ppss real-ppss) |
| 529 | ;; (message "!!Syntax: %s != %s" ppss real-ppss) |
| 530 | ;; (setq ppss real-ppss))) |
| 531 | |
| 532 | ;; Store it in the cache. |
| 533 | (let ((pair (cons pos ppss))) |
| 534 | (if cache-pred |
| 535 | (if (> (- (caar cache-pred) pos) syntax-ppss-max-span) |
| 536 | (push pair (cdr cache-pred)) |
| 537 | (setcar cache-pred pair)) |
| 538 | (if (or (null syntax-ppss-cache) |
| 539 | (> (- (caar syntax-ppss-cache) pos) |
| 540 | syntax-ppss-max-span)) |
| 541 | (push pair syntax-ppss-cache) |
| 542 | (setcar syntax-ppss-cache pair))))))))) |
| 543 | |
| 544 | (setq syntax-ppss-last (cons pos ppss)) |
| 545 | ppss) |
| 546 | (args-out-of-range |
| 547 | ;; If the buffer is more narrowed than when we built the cache, |
| 548 | ;; we may end up calling parse-partial-sexp with a position before |
| 549 | ;; point-min. In that case, just parse from point-min assuming |
| 550 | ;; a nil state. |
| 551 | (parse-partial-sexp (point-min) pos))))) |
| 552 | |
| 553 | ;; Debugging functions |
| 554 | |
| 555 | (defun syntax-ppss-debug () |
| 556 | (let ((pt nil) |
| 557 | (min-diffs nil)) |
| 558 | (dolist (x (append syntax-ppss-cache (list (cons (point-min) nil)))) |
| 559 | (when pt (push (- pt (car x)) min-diffs)) |
| 560 | (setq pt (car x))) |
| 561 | min-diffs)) |
| 562 | |
| 563 | ;; XEmacs compatibility functions |
| 564 | |
| 565 | ;; (defun buffer-syntactic-context (&optional buffer) |
| 566 | ;; "Syntactic context at point in BUFFER. |
| 567 | ;; Either of `string', `comment' or `nil'. |
| 568 | ;; This is an XEmacs compatibility function." |
| 569 | ;; (with-current-buffer (or buffer (current-buffer)) |
| 570 | ;; (syntax-ppss-context (syntax-ppss)))) |
| 571 | |
| 572 | ;; (defun buffer-syntactic-context-depth (&optional buffer) |
| 573 | ;; "Syntactic parenthesis depth at point in BUFFER. |
| 574 | ;; This is an XEmacs compatibility function." |
| 575 | ;; (with-current-buffer (or buffer (current-buffer)) |
| 576 | ;; (syntax-ppss-depth (syntax-ppss)))) |
| 577 | |
| 578 | (provide 'syntax) |
| 579 | |
| 580 | ;;; syntax.el ends here |