| 1 | ;;; syntax.el --- helper functions to find syntactic context |
| 2 | |
| 3 | ;; Copyright (C) 2000, 2003 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Maintainer: FSF |
| 6 | ;; Keywords: internal |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 13 | ;; any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 23 | ;; Boston, MA 02111-1307, USA. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;; The main exported function is `syntax-ppss'. You might also need |
| 28 | ;; to call `syntax-ppss-flush-cache' or to add it to |
| 29 | ;; after-change-functions'(although this is automatically done by |
| 30 | ;; syntax-ppss when needed, but that might fail if syntax-ppss is |
| 31 | ;; called in a context where after-change-functions is temporarily |
| 32 | ;; let-bound to nil). |
| 33 | |
| 34 | ;;; Todo: |
| 35 | |
| 36 | ;; - do something about the case where the syntax-table is changed. |
| 37 | ;; This typically happens with tex-mode and its `$' operator. |
| 38 | ;; - move font-lock-syntactic-keywords in here. Then again, maybe not. |
| 39 | ;; - new functions `syntax-state', ... to replace uses of parse-partial-state |
| 40 | ;; with something higher-level (similar to syntax-ppss-context). |
| 41 | ;; - interaction with mmm-mode. |
| 42 | |
| 43 | ;;; Code: |
| 44 | |
| 45 | ;; Note: PPSS stands for `parse-partial-sexp state' |
| 46 | |
| 47 | (eval-when-compile (require 'cl)) |
| 48 | |
| 49 | (defsubst syntax-ppss-depth (ppss) |
| 50 | (nth 0 ppss)) |
| 51 | |
| 52 | (defsubst syntax-ppss-context (ppss) |
| 53 | (cond |
| 54 | ((nth 3 ppss) 'string) |
| 55 | ((nth 4 ppss) 'comment) |
| 56 | (t nil))) |
| 57 | |
| 58 | (defvar syntax-ppss-max-span 20000 |
| 59 | "Threshold below which cache info is deemed unnecessary. |
| 60 | We try to make sure that cache entries are at least this far apart |
| 61 | from each other, to avoid keeping too much useless info.") |
| 62 | |
| 63 | (defvar syntax-begin-function nil |
| 64 | "Function to move back outside of any comment/string/paren. |
| 65 | This function should move the cursor back to some syntactically safe |
| 66 | point (where the PPSS is equivalent to nil).") |
| 67 | |
| 68 | (defvar syntax-ppss-cache nil |
| 69 | "List of (POS . PPSS) pairs, in decreasing POS order.") |
| 70 | (make-variable-buffer-local 'syntax-ppss-cache) |
| 71 | (defvar syntax-ppss-last nil |
| 72 | "Cache of (LAST-POS . LAST-PPSS).") |
| 73 | (make-variable-buffer-local 'syntax-ppss-last) |
| 74 | |
| 75 | (defalias 'syntax-ppss-after-change-function 'syntax-ppss-flush-cache) |
| 76 | (defun syntax-ppss-flush-cache (beg &rest ignored) |
| 77 | "Flush the cache of `syntax-ppss' starting at position BEG." |
| 78 | ;; Flush invalid cache entries. |
| 79 | (while (and syntax-ppss-cache (> (caar syntax-ppss-cache) beg)) |
| 80 | (setq syntax-ppss-cache (cdr syntax-ppss-cache))) |
| 81 | ;; Throw away `last' value if made invalid. |
| 82 | (when (< beg (or (car syntax-ppss-last) 0)) |
| 83 | (if (< beg (or (car (nth 10 syntax-ppss-last)) |
| 84 | (nth 9 syntax-ppss-last) |
| 85 | (nth 2 syntax-ppss-last) |
| 86 | 0)) |
| 87 | (setq syntax-ppss-last nil) |
| 88 | (setcar syntax-ppss-last nil))) |
| 89 | ;; Unregister if there's no cache left. Sadly this doesn't work |
| 90 | ;; because `after-change-functions' is temporarily bound to nil here. |
| 91 | ;; (unless syntax-ppss-cache |
| 92 | ;; (remove-hook 'after-change-functions |
| 93 | ;; 'syntax-ppss-after-change-function t)) |
| 94 | ) |
| 95 | |
| 96 | (defvar syntax-ppss-stats |
| 97 | [(0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (1 . 2500.0)]) |
| 98 | (defun syntax-ppss-stats () |
| 99 | (mapcar (lambda (x) |
| 100 | (condition-case nil |
| 101 | (cons (car x) (truncate (/ (cdr x) (car x)))) |
| 102 | (error nil))) |
| 103 | syntax-ppss-stats)) |
| 104 | |
| 105 | ;;;###autoload |
| 106 | (defun syntax-ppss (&optional pos) |
| 107 | "Parse-Partial-Sexp State at POS. |
| 108 | The returned value is the same as `parse-partial-sexp' except that |
| 109 | the 2nd and 6th values of the returned state cannot be relied upon. |
| 110 | Point is at POS when this function returns." |
| 111 | ;; Default values. |
| 112 | (unless pos (setq pos (point))) |
| 113 | ;; |
| 114 | (let ((old-ppss (cdr syntax-ppss-last)) |
| 115 | (old-pos (car syntax-ppss-last)) |
| 116 | (ppss nil) |
| 117 | (pt-min (point-min))) |
| 118 | (if (and old-pos (> old-pos pos)) (setq old-pos nil)) |
| 119 | ;; Use the OLD-POS if usable and close. Don't update the `last' cache. |
| 120 | (condition-case nil |
| 121 | (if (and old-pos (< (- pos old-pos) |
| 122 | ;; The time to use syntax-begin-function and |
| 123 | ;; find PPSS is assumed to be about 2 * distance. |
| 124 | (* 2 (/ (cdr (aref syntax-ppss-stats 5)) |
| 125 | (1+ (car (aref syntax-ppss-stats 5))))))) |
| 126 | (progn |
| 127 | (incf (car (aref syntax-ppss-stats 0))) |
| 128 | (incf (cdr (aref syntax-ppss-stats 0)) (- pos old-pos)) |
| 129 | (parse-partial-sexp old-pos pos nil nil old-ppss)) |
| 130 | |
| 131 | (cond |
| 132 | ;; Use OLD-PPSS if possible and close enough. |
| 133 | ((and (not old-pos) old-ppss |
| 134 | ;; BEWARE! We rely on the undocumented 9th field. The 9th |
| 135 | ;; field currently contains the list of positions of |
| 136 | ;; open-parens of the enclosing parens. I.e. those |
| 137 | ;; positions are outside of any string/comment |
| 138 | ;; and the first of those is outside of any paren |
| 139 | ;; (i.e. corresponds to a nil ppss). If this list is empty |
| 140 | ;; but we are in a string or comment, then the 8th field |
| 141 | ;; contains a similar "toplevel" position. If `pt-min' is |
| 142 | ;; too far from `pos', we could try to use other positions |
| 143 | ;; in (nth 9 old-ppss), but that doesn't seem to happen in |
| 144 | ;; practice and it would complicate this code (and the |
| 145 | ;; after-change-function code even more). But maybe it |
| 146 | ;; would be useful in "degenerate" cases such as when the |
| 147 | ;; whole file is wrapped in a set of parenthesis. |
| 148 | (setq pt-min (or (car (nth 9 old-ppss)) |
| 149 | (nth 8 old-ppss) |
| 150 | (nth 2 old-ppss))) |
| 151 | (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span)) |
| 152 | (incf (car (aref syntax-ppss-stats 1))) |
| 153 | (incf (cdr (aref syntax-ppss-stats 1)) (- pos pt-min)) |
| 154 | (setq ppss (parse-partial-sexp pt-min pos))) |
| 155 | ;; The OLD-* data can't be used. Consult the cache. |
| 156 | (t |
| 157 | (let ((cache-pred nil) |
| 158 | (cache syntax-ppss-cache) |
| 159 | (pt-min (point-min)) |
| 160 | ;; I differentiate between PT-MIN and PT-BEST because |
| 161 | ;; I feel like it might be important to ensure that the |
| 162 | ;; cache is only filled with 100% sure data (whereas |
| 163 | ;; syntax-begin-function might return incorrect data). |
| 164 | ;; Maybe that's just stupid. |
| 165 | (pt-best (point-min)) |
| 166 | (ppss-best nil)) |
| 167 | ;; look for a usable cache entry. |
| 168 | (while (and cache (< pos (caar cache))) |
| 169 | (setq cache-pred cache) |
| 170 | (setq cache (cdr cache))) |
| 171 | (if cache (setq pt-min (caar cache) ppss (cdar cache))) |
| 172 | |
| 173 | ;; Setup the after-change function if necessary. |
| 174 | (unless (or syntax-ppss-cache syntax-ppss-last) |
| 175 | (add-hook 'after-change-functions |
| 176 | 'syntax-ppss-flush-cache nil t)) |
| 177 | |
| 178 | ;; Use the best of OLD-POS and CACHE. |
| 179 | (if (or (not old-pos) (< old-pos pt-min)) |
| 180 | (setq pt-best pt-min ppss-best ppss) |
| 181 | (incf (car (aref syntax-ppss-stats 4))) |
| 182 | (incf (cdr (aref syntax-ppss-stats 4)) (- pos old-pos)) |
| 183 | (setq pt-best old-pos ppss-best old-ppss)) |
| 184 | |
| 185 | ;; Use the `syntax-begin-function' if available. |
| 186 | ;; We could try using that function earlier, but: |
| 187 | ;; - The result might not be 100% reliable, so it's better to use |
| 188 | ;; the cache if available. |
| 189 | ;; - The function might be slow. |
| 190 | ;; - If this function almost always finds a safe nearby spot, |
| 191 | ;; the cache won't be populated, so consulting it is cheap. |
| 192 | (when (and (not syntax-begin-function) |
| 193 | (boundp 'font-lock-beginning-of-syntax-function) |
| 194 | font-lock-beginning-of-syntax-function) |
| 195 | (set (make-local-variable 'syntax-begin-function) |
| 196 | font-lock-beginning-of-syntax-function)) |
| 197 | (when (and syntax-begin-function |
| 198 | (progn (goto-char pos) |
| 199 | (funcall syntax-begin-function) |
| 200 | ;; Make sure it's better. |
| 201 | (> (point) pt-best)) |
| 202 | ;; Simple sanity check. |
| 203 | (not (memq (get-text-property (point) 'face) |
| 204 | '(font-lock-string-face font-lock-doc-face |
| 205 | font-lock-comment-face)))) |
| 206 | (incf (car (aref syntax-ppss-stats 5))) |
| 207 | (incf (cdr (aref syntax-ppss-stats 5)) (- pos (point))) |
| 208 | (setq pt-best (point) ppss-best nil)) |
| 209 | |
| 210 | (cond |
| 211 | ;; Quick case when we found a nearby pos. |
| 212 | ((< (- pos pt-best) syntax-ppss-max-span) |
| 213 | (incf (car (aref syntax-ppss-stats 2))) |
| 214 | (incf (cdr (aref syntax-ppss-stats 2)) (- pos pt-best)) |
| 215 | (setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best))) |
| 216 | ;; Slow case: compute the state from some known position and |
| 217 | ;; populate the cache so we won't need to do it again soon. |
| 218 | (t |
| 219 | (incf (car (aref syntax-ppss-stats 3))) |
| 220 | (incf (cdr (aref syntax-ppss-stats 3)) (- pos pt-min)) |
| 221 | |
| 222 | ;; If `pt-min' is too far, add a few intermediate entries. |
| 223 | (while (> (- pos pt-min) (* 2 syntax-ppss-max-span)) |
| 224 | (setq ppss (parse-partial-sexp |
| 225 | pt-min (setq pt-min (/ (+ pt-min pos) 2)) |
| 226 | nil nil ppss)) |
| 227 | (let ((pair (cons pt-min ppss))) |
| 228 | (if cache-pred |
| 229 | (push pair (cdr cache-pred)) |
| 230 | (push pair syntax-ppss-cache)))) |
| 231 | |
| 232 | ;; Compute the actual return value. |
| 233 | (setq ppss (parse-partial-sexp pt-min pos nil nil ppss)) |
| 234 | |
| 235 | ;; Debugging check. |
| 236 | ;; (let ((real-ppss (parse-partial-sexp (point-min) pos))) |
| 237 | ;; (setcar (last ppss 4) 0) |
| 238 | ;; (setcar (last real-ppss 4) 0) |
| 239 | ;; (setcar (last ppss 8) nil) |
| 240 | ;; (setcar (last real-ppss 8) nil) |
| 241 | ;; (unless (equal ppss real-ppss) |
| 242 | ;; (message "!!Syntax: %s != %s" ppss real-ppss) |
| 243 | ;; (setq ppss real-ppss))) |
| 244 | |
| 245 | ;; Store it in the cache. |
| 246 | (let ((pair (cons pos ppss))) |
| 247 | (if cache-pred |
| 248 | (if (> (- (caar cache-pred) pos) syntax-ppss-max-span) |
| 249 | (push pair (cdr cache-pred)) |
| 250 | (setcar cache-pred pair)) |
| 251 | (if (or (null syntax-ppss-cache) |
| 252 | (> (- (caar syntax-ppss-cache) pos) |
| 253 | syntax-ppss-max-span)) |
| 254 | (push pair syntax-ppss-cache) |
| 255 | (setcar syntax-ppss-cache pair))))))))) |
| 256 | |
| 257 | (setq syntax-ppss-last (cons pos ppss)) |
| 258 | ppss) |
| 259 | (args-out-of-range |
| 260 | ;; If the buffer is more narrowed than when we built the cache, |
| 261 | ;; we may end up calling parse-partial-sexp with a position before |
| 262 | ;; point-min. In that case, just parse from point-min assuming |
| 263 | ;; a nil state. |
| 264 | (parse-partial-sexp (point-min) pos))))) |
| 265 | |
| 266 | ;; Debugging functions |
| 267 | |
| 268 | (defun syntax-ppss-debug () |
| 269 | (let ((pt nil) |
| 270 | (min-diffs nil)) |
| 271 | (dolist (x (append syntax-ppss-cache (list (cons (point-min) nil)))) |
| 272 | (when pt (push (- pt (car x)) min-diffs)) |
| 273 | (setq pt (car x))) |
| 274 | min-diffs)) |
| 275 | |
| 276 | ;; XEmacs compatibility functions |
| 277 | |
| 278 | ;; (defun buffer-syntactic-context (&optional buffer) |
| 279 | ;; "Syntactic context at point in BUFFER. |
| 280 | ;; Either of `string', `comment' or `nil'. |
| 281 | ;; This is an XEmacs compatibility function." |
| 282 | ;; (with-current-buffer (or buffer (current-buffer)) |
| 283 | ;; (syntax-ppss-context (syntax-ppss)))) |
| 284 | |
| 285 | ;; (defun buffer-syntactic-context-depth (&optional buffer) |
| 286 | ;; "Syntactic parenthesis depth at point in BUFFER. |
| 287 | ;; This is an XEmacs compatibility function." |
| 288 | ;; (with-current-buffer (or buffer (current-buffer)) |
| 289 | ;; (syntax-ppss-depth (syntax-ppss)))) |
| 290 | |
| 291 | (provide 'syntax) |
| 292 | |
| 293 | ;;; arch-tag: 302f1eeb-e77c-4680-a8c5-c543e01161a5 |
| 294 | ;;; syntax.el ends here |