| 1 | ;;; cc-awk.el --- AWK specific code within cc-mode. |
| 2 | |
| 3 | ;; Copyright (C) 1988, 1994, 1996, 2000-2014 Free Software Foundation, |
| 4 | ;; Inc. |
| 5 | |
| 6 | ;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el) |
| 7 | ;; Maintainer: emacs-devel@gnu.org |
| 8 | ;; Keywords: AWK, cc-mode, unix, languages |
| 9 | ;; Package: cc-mode |
| 10 | |
| 11 | ;; This file is part of GNU Emacs. |
| 12 | |
| 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 14 | ;; it under the terms of the GNU General Public License as published by |
| 15 | ;; the Free Software Foundation, either version 3 of the License, or |
| 16 | ;; (at your option) any later version. |
| 17 | |
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | ;; GNU General Public License for more details. |
| 22 | |
| 23 | ;; You should have received a copy of the GNU General Public License |
| 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 25 | |
| 26 | ;;; Commentary: |
| 27 | |
| 28 | ;; This file contains (most of) the adaptations to cc-mode required for the |
| 29 | ;; integration of AWK Mode. |
| 30 | ;; It is organized thusly, the sections being separated by page breaks: |
| 31 | ;; 1. The AWK Mode syntax table. |
| 32 | ;; 2. Regular expressions for analyzing AWK code. |
| 33 | ;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property"). |
| 34 | ;; 4. Syntax-table property/font-locking stuff, including the |
| 35 | ;; font-lock-keywords setting. |
| 36 | ;; 5. The AWK Mode before/after-change-functions. |
| 37 | ;; 6. AWK Mode specific versions of commands like beginning-of-defun. |
| 38 | ;; The AWK Mode keymap, abbreviation table, and the mode function itself are |
| 39 | ;; in cc-mode.el. |
| 40 | |
| 41 | ;;; Code: |
| 42 | |
| 43 | (eval-when-compile |
| 44 | (let ((load-path |
| 45 | (if (and (boundp 'byte-compile-dest-file) |
| 46 | (stringp byte-compile-dest-file)) |
| 47 | (cons (file-name-directory byte-compile-dest-file) load-path) |
| 48 | load-path))) |
| 49 | (load "cc-bytecomp" nil t))) |
| 50 | |
| 51 | (cc-require 'cc-defs) |
| 52 | |
| 53 | ;; Silence the byte compiler. |
| 54 | (cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use. |
| 55 | (cc-bytecomp-defvar c-new-BEG) |
| 56 | (cc-bytecomp-defvar c-new-END) |
| 57 | |
| 58 | ;; Some functions in cc-engine that are used below. There's a cyclic |
| 59 | ;; dependency so it can't be required here. (Perhaps some functions |
| 60 | ;; could be moved to cc-engine to avoid it.) |
| 61 | (cc-bytecomp-defun c-backward-token-1) |
| 62 | (cc-bytecomp-defun c-beginning-of-statement-1) |
| 63 | (cc-bytecomp-defun c-backward-sws) |
| 64 | (cc-bytecomp-defun c-forward-sws) |
| 65 | |
| 66 | (defvar awk-mode-syntax-table |
| 67 | (let ((st (make-syntax-table))) |
| 68 | (modify-syntax-entry ?\\ "\\" st) |
| 69 | (modify-syntax-entry ?\n "> " st) |
| 70 | (modify-syntax-entry ?\r "> " st) |
| 71 | (modify-syntax-entry ?\f "> " st) |
| 72 | (modify-syntax-entry ?\# "< " st) |
| 73 | ;; / can delimit regexes or be a division operator. By default we assume |
| 74 | ;; that it is a division sign, and fix the regexp operator cases with |
| 75 | ;; `font-lock-syntactic-keywords'. |
| 76 | (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27. |
| 77 | (modify-syntax-entry ?* "." st) |
| 78 | (modify-syntax-entry ?+ "." st) |
| 79 | (modify-syntax-entry ?- "." st) |
| 80 | (modify-syntax-entry ?= "." st) |
| 81 | (modify-syntax-entry ?% "." st) |
| 82 | (modify-syntax-entry ?< "." st) |
| 83 | (modify-syntax-entry ?> "." st) |
| 84 | (modify-syntax-entry ?& "." st) |
| 85 | (modify-syntax-entry ?| "." st) |
| 86 | (modify-syntax-entry ?_ "_" st) |
| 87 | (modify-syntax-entry ?\' "." st) |
| 88 | st) |
| 89 | "Syntax table in use in AWK Mode buffers.") |
| 90 | |
| 91 | \f |
| 92 | ;; This section defines regular expressions used in the analysis of AWK code. |
| 93 | |
| 94 | ;; N.B. In the following regexps, an EOL is either \n OR \r. This is because |
| 95 | ;; Emacs has in the past used \r to mark hidden lines in some fashion (and |
| 96 | ;; maybe still does). |
| 97 | |
| 98 | (defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") |
| 99 | ;; Matches any escaped (with \) character-pair, including an escaped newline. |
| 100 | (defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)") |
| 101 | ;; Matches any escaped (with \) character-pair, apart from an escaped newline. |
| 102 | (defconst c-awk-comment-without-nl "#.*") |
| 103 | ;; Matches an AWK comment, not including the terminating NL (if any). Note |
| 104 | ;; that the "enclosing" (elisp) regexp must ensure the # is real. |
| 105 | (defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)") |
| 106 | ;; Matches a newline, or the end of buffer. |
| 107 | |
| 108 | ;; "Space" regular expressions. |
| 109 | (eval-and-compile |
| 110 | (defconst c-awk-escaped-nl "\\\\[\n\r]")) |
| 111 | ;; Matches an escaped newline. |
| 112 | (eval-and-compile |
| 113 | (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))) |
| 114 | ;; Matches a possibly empty sequence of escaped newlines. Used in |
| 115 | ;; awk-font-lock-keywords. |
| 116 | ;; (defconst c-awk-escaped-nls*-with-space* |
| 117 | ;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*")) |
| 118 | ;; The above RE was very slow. It's runtime was doubling with each additional |
| 119 | ;; space :-( Reformulate it as below: |
| 120 | (eval-and-compile |
| 121 | (defconst c-awk-escaped-nls*-with-space* |
| 122 | (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))) |
| 123 | ;; Matches a possibly empty sequence of escaped newlines with optional |
| 124 | ;; interspersed spaces and tabs. Used in awk-font-lock-keywords. |
| 125 | (defconst c-awk-blank-or-comment-line-re |
| 126 | (concat "[ \t]*\\(#\\|\\\\?$\\)")) |
| 127 | ;; Matche (the tail of) a line containing at most either a comment or an |
| 128 | ;; escaped EOL. |
| 129 | |
| 130 | ;; REGEXPS FOR "HARMLESS" STRINGS/LINES. |
| 131 | (defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)") |
| 132 | ;; Matches an underline NOT followed by ". |
| 133 | (defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]") |
| 134 | ;; Matches any character not significant in the state machine applying |
| 135 | ;; syntax-table properties to "s and /s. |
| 136 | (defconst c-awk-harmless-string*-re |
| 137 | (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) |
| 138 | ;; Matches a (possibly empty) sequence of characters insignificant in the |
| 139 | ;; state machine applying syntax-table properties to "s and /s. |
| 140 | (defconst c-awk-harmless-string*-here-re |
| 141 | (concat "\\=" c-awk-harmless-string*-re)) |
| 142 | ;; Matches the (possibly empty) sequence of "insignificant" chars at point. |
| 143 | |
| 144 | (defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]") |
| 145 | ;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a |
| 146 | ;; localization string in gawk 3.1 |
| 147 | (defconst c-awk-harmless-line-string*-re |
| 148 | (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) |
| 149 | ;; Matches a (possibly empty) sequence of chars without unescaped /, ", \, |
| 150 | ;; #, or newlines. |
| 151 | (defconst c-awk-harmless-line-re |
| 152 | (concat c-awk-harmless-line-string*-re |
| 153 | "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob)) |
| 154 | ;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped |
| 155 | ;; " or /. "logical" means "possibly containing escaped newlines". A comment |
| 156 | ;; is matched as part of the line even if it contains a " or a /. The End of |
| 157 | ;; buffer is also an end of line. |
| 158 | (defconst c-awk-harmless-lines+-here-re |
| 159 | (concat "\\=\\(" c-awk-harmless-line-re "\\)+")) |
| 160 | ;; Matches a sequence of (at least one) \"harmless-line\" at point. |
| 161 | |
| 162 | |
| 163 | ;; REGEXPS FOR AWK STRINGS. |
| 164 | (defconst c-awk-string-ch-re "[^\"\\\n\r]") |
| 165 | ;; Matches any character which can appear unescaped in a string. |
| 166 | (defconst c-awk-string-innards-re |
| 167 | (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*")) |
| 168 | ;; Matches the inside of an AWK string (i.e. without the enclosing quotes). |
| 169 | (defconst c-awk-string-without-end-here-re |
| 170 | (concat "\\=_?\"" c-awk-string-innards-re)) |
| 171 | ;; Matches an AWK string at point up to, but not including, any terminator. |
| 172 | ;; A gawk 3.1+ string may look like _"localizable string". |
| 173 | (defconst c-awk-possibly-open-string-re |
| 174 | (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*" |
| 175 | "\\(\"\\|$\\|\\'\\)")) |
| 176 | |
| 177 | ;; REGEXPS FOR AWK REGEXPS. |
| 178 | (defconst c-awk-regexp-normal-re "[^[/\\\n\r]") |
| 179 | ;; Matches any AWK regexp character which doesn't require special analysis. |
| 180 | (defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*") |
| 181 | ;; Matches a (possibly empty) sequence of escaped newlines. |
| 182 | |
| 183 | ;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character |
| 184 | ;; list", and "[:alpha:]" inside a character list will be known as a |
| 185 | ;; "character class". These terms for these things vary between regexp |
| 186 | ;; descriptions . |
| 187 | (defconst c-awk-regexp-char-class-re |
| 188 | "\\[:[a-z]+:\\]") |
| 189 | ;; Matches a character class spec (e.g. [:alpha:]). |
| 190 | (defconst c-awk-regexp-char-list-re |
| 191 | (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?" |
| 192 | "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re |
| 193 | "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)")) |
| 194 | ;; Matches a regexp char list, up to (but not including) EOL if the ] is |
| 195 | ;; missing. |
| 196 | (defconst c-awk-regexp-innards-re |
| 197 | (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re |
| 198 | "\\|" c-awk-regexp-normal-re "\\)*")) |
| 199 | ;; Matches the inside of an AWK regexp (i.e. without the enclosing /s) |
| 200 | (defconst c-awk-regexp-without-end-re |
| 201 | (concat "/" c-awk-regexp-innards-re)) |
| 202 | ;; Matches an AWK regexp up to, but not including, any terminating /. |
| 203 | |
| 204 | ;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A |
| 205 | ;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant |
| 206 | ;; whether a '/' at the current position would by a regexp opener or a |
| 207 | ;; division sign. |
| 208 | (defconst c-awk-neutral-re |
| 209 | ; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7 |
| 210 | "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)") |
| 211 | ;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /. |
| 212 | ;; This is space/tab, close brace, an auto-increment/decrement operator or an |
| 213 | ;; escaped character. Or one of the (invalid) characters @ or `. But NOT an |
| 214 | ;; end of line (unless escaped). |
| 215 | (defconst c-awk-neutrals*-re |
| 216 | (concat "\\(" c-awk-neutral-re "\\)*")) |
| 217 | ;; A (possibly empty) string of neutral characters (or character pairs). |
| 218 | (defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+") |
| 219 | ;; Matches a char which is a constituent of a variable or number, or a ket |
| 220 | ;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to |
| 221 | ;; \xff are "letters". |
| 222 | (defconst c-awk-div-sign-re |
| 223 | (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/")) |
| 224 | ;; Will match a piece of AWK buffer ending in / which is a division sign, in |
| 225 | ;; a context where an immediate / would be a regexp bracket. It follows a |
| 226 | ;; variable or number (with optional intervening "neutral" characters). This |
| 227 | ;; will only work when there won't be a preceding " or / before the sought / |
| 228 | ;; to foul things up. |
| 229 | (defconst c-awk-non-arith-op-bra-re |
| 230 | "[[\({&=:!><,?;'~|]") |
| 231 | ;; Matches an opening BRAcket (of any sort), or any operator character |
| 232 | ;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a |
| 233 | ;; regexp bracket) these arith ops are unnecessary and a pain, because of "++" |
| 234 | ;; and "--". |
| 235 | (defconst c-awk-regexp-sign-re |
| 236 | (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/")) |
| 237 | ;; Will match a piece of AWK buffer ending in / which is an opening regexp |
| 238 | ;; bracket, in a context where an immediate / would be a division sign. This |
| 239 | ;; will only work when there won't be a preceding " or / before the sought / |
| 240 | ;; to foul things up. |
| 241 | (defconst c-awk-pre-exp-alphanum-kwd-re |
| 242 | (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<" |
| 243 | (regexp-opt '("print" "return" "case") t) |
| 244 | "\\>\\([^_\n\r]\\|$\\)")) |
| 245 | ;; Matches all AWK keywords which can precede expressions (including |
| 246 | ;; /regexp/). |
| 247 | (defconst c-awk-kwd-regexp-sign-re |
| 248 | (concat c-awk-pre-exp-alphanum-kwd-re c-awk-escaped-nls*-with-space* "/")) |
| 249 | ;; Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword |
| 250 | ;; which can precede an expression. |
| 251 | |
| 252 | ;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon" |
| 253 | (defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]") |
| 254 | (defconst c-awk-non-/-syn-ws*-re |
| 255 | (concat |
| 256 | "\\(" c-awk-escaped-nls*-with-space* |
| 257 | "\\(" c-awk-_-harmless-nonws-char-re "\\|" |
| 258 | c-awk-non-eol-esc-pair-re "\\|" |
| 259 | c-awk-possibly-open-string-re |
| 260 | "\\)" |
| 261 | "\\)*")) |
| 262 | (defconst c-awk-space*-/-re (concat c-awk-escaped-nls*-with-space* "/")) |
| 263 | ;; Matches optional whitespace followed by "/". |
| 264 | (defconst c-awk-space*-regexp-/-re |
| 265 | (concat c-awk-escaped-nls*-with-space* "\\s\"")) |
| 266 | ;; Matches optional whitespace followed by a "/" with string syntax (a matched |
| 267 | ;; regexp delimiter). |
| 268 | (defconst c-awk-space*-unclosed-regexp-/-re |
| 269 | (concat c-awk-escaped-nls*-with-space* "\\s\|")) |
| 270 | ;; Matches optional whitespace followed by a "/" with string fence syntax (an |
| 271 | ;; unmatched regexp delimiter). |
| 272 | |
| 273 | \f |
| 274 | ;; ACM, 2002/5/29: |
| 275 | ;; |
| 276 | ;; The next section of code is about determining whether or not an AWK |
| 277 | ;; statement is complete or not. We use this to indent the following line. |
| 278 | ;; The determination is pretty straightforward in C, where a statement ends |
| 279 | ;; with either a ; or a }. Only "while" really gives any trouble there, since |
| 280 | ;; it might be the end of a do-while. In AWK, on the other hand, semicolons |
| 281 | ;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In |
| 282 | ;; addition, we have the complexity of escaped EOLs. The core of this |
| 283 | ;; analysis is in the middle of the function |
| 284 | ;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down. |
| 285 | ;; |
| 286 | ;; To avoid continually repeating this expensive analysis, we "cache" its |
| 287 | ;; result in a text-property, c-awk-NL-prop, whose value for a line is set on |
| 288 | ;; the EOL (if any) which terminates that line. Should the property be |
| 289 | ;; required for the very last line (which has no EOL), it is calculated as |
| 290 | ;; required but not cached. The c-awk-NL-prop property should be thought of |
| 291 | ;; as only really valid immediately after a buffer change, not a permanently |
| 292 | ;; set property. (By contrast, the syntax-table text properties (set by an |
| 293 | ;; after-change function) must be constantly updated for the mode to work |
| 294 | ;; properly). |
| 295 | ;; |
| 296 | ;; This text property is also used for "syntactic whitespace" movement, this |
| 297 | ;; being where the distinction between the values '$' and '}' is significant. |
| 298 | ;; |
| 299 | ;; The valid values for c-awk-NL-prop are: |
| 300 | ;; |
| 301 | ;; nil The property is not currently set for this line. |
| 302 | ;; '#' There is NO statement on this line (at most a comment), and no open |
| 303 | ;; statement from a previous line which could have been completed on this |
| 304 | ;; line. |
| 305 | ;; '{' There is an unfinished statement on this (or a previous) line which |
| 306 | ;; doesn't require \s to continue onto another line, e.g. the line ends |
| 307 | ;; with {, or the && operator, or "if (condition)". Note that even if the |
| 308 | ;; newline is redundantly escaped, it remains a '{' line. |
| 309 | ;; '\' There is an escaped newline at the end of this line and this '\' is |
| 310 | ;; essential to the syntax of the program. (i.e. if it had been a |
| 311 | ;; frivolous \, it would have been ignored and the line been given one of |
| 312 | ;; the other property values.) |
| 313 | ;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual |
| 314 | ;; semicolon"). This might be a content-free line terminating a statement |
| 315 | ;; from the preceding (continued) line (which has property \). |
| 316 | ;; '}' A statement, being the last thing (aside from ws/comments) is |
| 317 | ;; explicitly terminated on this line by a closing brace (or sometimes a |
| 318 | ;; semicolon). |
| 319 | ;; |
| 320 | ;; This set of values has been chosen so that the property's value on a line |
| 321 | ;; is completely determined by the contents of the line and the property on |
| 322 | ;; the previous line, EXCEPT for where a "while" might be the closing |
| 323 | ;; statement of a do-while. |
| 324 | |
| 325 | (defun c-awk-after-if-for-while-condition-p (&optional do-lim) |
| 326 | ;; Are we just after the ) in "if/for/while (<condition>)"? |
| 327 | ;; |
| 328 | ;; Note that the end of the ) in a do .... while (<condition>) doesn't |
| 329 | ;; count, since the purpose of this routine is essentially to decide |
| 330 | ;; whether to indent the next line. |
| 331 | ;; |
| 332 | ;; DO-LIM sets a limit on how far back we search for the "do" of a possible |
| 333 | ;; do-while. |
| 334 | ;; |
| 335 | ;; This function might do hidden buffer changes. |
| 336 | (and |
| 337 | (eq (char-before) ?\)) |
| 338 | (save-excursion |
| 339 | (let ((par-pos (c-safe (scan-lists (point) -1 0)))) |
| 340 | (when par-pos |
| 341 | (goto-char par-pos) ; back over "(...)" |
| 342 | (c-backward-token-1) ; BOB isn't a problem. |
| 343 | (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)") |
| 344 | (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while. |
| 345 | (not (eq (c-beginning-of-statement-1 do-lim) |
| 346 | 'beginning))))))))) |
| 347 | |
| 348 | (defun c-awk-after-function-decl-param-list () |
| 349 | ;; Are we just after the ) in "function foo (bar)" ? |
| 350 | ;; |
| 351 | ;; This function might do hidden buffer changes. |
| 352 | (and (eq (char-before) ?\)) |
| 353 | (save-excursion |
| 354 | (let ((par-pos (c-safe (scan-lists (point) -1 0)))) |
| 355 | (when par-pos |
| 356 | (goto-char par-pos) ; back over "(...)" |
| 357 | (c-backward-token-1) ; BOB isn't a problem |
| 358 | (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>") |
| 359 | (progn (c-backward-token-1) |
| 360 | (looking-at "func\\(tion\\)?\\>")))))))) |
| 361 | |
| 362 | ;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code). |
| 363 | (defun c-awk-after-continue-token () |
| 364 | ;; Are we just after a token which can be continued onto the next line without |
| 365 | ;; a backslash? |
| 366 | ;; |
| 367 | ;; This function might do hidden buffer changes. |
| 368 | (save-excursion |
| 369 | (c-backward-token-1) ; FIXME 2002/10/27. What if this fails? |
| 370 | (if (and (looking-at "[&|]") (not (bobp))) |
| 371 | (backward-char)) ; c-backward-token-1 doesn't do this :-( |
| 372 | (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>"))) |
| 373 | |
| 374 | (defun c-awk-after-rbrace-or-statement-semicolon () |
| 375 | ;; Are we just after a } or a ; which closes a statement? |
| 376 | ;; Be careful about ;s in for loop control bits. They don't count! |
| 377 | ;; |
| 378 | ;; This function might do hidden buffer changes. |
| 379 | (or (eq (char-before) ?\}) |
| 380 | (and |
| 381 | (eq (char-before) ?\;) |
| 382 | (save-excursion |
| 383 | (let ((par-pos (c-safe (scan-lists (point) -1 1)))) |
| 384 | (when par-pos |
| 385 | (goto-char par-pos) ; go back to containing ( |
| 386 | (not (and (looking-at "(") |
| 387 | (c-backward-token-1) ; BOB isn't a problem |
| 388 | (looking-at "for\\>"))))))))) |
| 389 | |
| 390 | (defun c-awk-back-to-contentful-text-or-NL-prop () |
| 391 | ;; Move back to just after the first found of either (i) an EOL which has |
| 392 | ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB. |
| 393 | ;; We return either the value of c-awk-NL-prop (in case (i)) or nil. |
| 394 | ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp). |
| 395 | ;; |
| 396 | ;; Note that an escaped eol counts as whitespace here. |
| 397 | ;; |
| 398 | ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely |
| 399 | ;; that the previous line contains an unterminated string (without \). In |
| 400 | ;; this case, assume that the previous line's c-awk-NL-prop is a $. |
| 401 | ;; |
| 402 | ;; POINT MUST BE AT THE START OF A LINE when calling this function. This |
| 403 | ;; is to ensure that the various backward-comment functions will work |
| 404 | ;; properly. |
| 405 | ;; |
| 406 | ;; This function might do hidden buffer changes. |
| 407 | (let ((nl-prop nil) |
| 408 | bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call. |
| 409 | (while ;; We are at a BOL here. Go back one line each iteration. |
| 410 | (and |
| 411 | (not (bobp)) |
| 412 | (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop))) |
| 413 | (progn (setq bol-pos (c-point 'bopl)) |
| 414 | (setq bsws-pos (point)) |
| 415 | ;; N.B. the following function will not go back past an EOL if |
| 416 | ;; there is an open string (without \) on the previous line. |
| 417 | ;; If we find such, set the c-awk-NL-prop on it, too |
| 418 | ;; (2004/3/29). |
| 419 | (c-backward-syntactic-ws bol-pos) |
| 420 | (or (/= (point) bsws-pos) |
| 421 | (progn (setq nl-prop ?\$) |
| 422 | (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop) |
| 423 | nil))) |
| 424 | ;; If we had a backslash at EOL, c-backward-syntactic-ws will |
| 425 | ;; have gone backwards over it. Check the backslash was "real". |
| 426 | (progn |
| 427 | (if (looking-at "[ \t]*\\\\+$") |
| 428 | (if (progn |
| 429 | (end-of-line) |
| 430 | (search-backward-regexp |
| 431 | "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-) |
| 432 | bol-pos t)) |
| 433 | (progn (end-of-line) ; escaped EOL. |
| 434 | (backward-char) |
| 435 | (c-backward-syntactic-ws bol-pos)) |
| 436 | (end-of-line))) ; The \ at eol is a fake. |
| 437 | (bolp)))) |
| 438 | nl-prop)) |
| 439 | |
| 440 | (defun c-awk-calculate-NL-prop-prev-line (&optional do-lim) |
| 441 | ;; Calculate and set the value of the c-awk-NL-prop on the immediately |
| 442 | ;; preceding EOL. This may also involve doing the same for several |
| 443 | ;; preceding EOLs. |
| 444 | ;; |
| 445 | ;; NOTE that if the property was already set, we return it without |
| 446 | ;; recalculation. (This is by accident rather than design.) |
| 447 | ;; |
| 448 | ;; Return the property which got set (or was already set) on the previous |
| 449 | ;; line. Return nil if we hit BOB. |
| 450 | ;; |
| 451 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
| 452 | ;; |
| 453 | ;; This function might do hidden buffer changes. |
| 454 | (save-excursion |
| 455 | (save-match-data |
| 456 | (beginning-of-line) |
| 457 | (let* ((pos (point)) |
| 458 | (nl-prop (c-awk-back-to-contentful-text-or-NL-prop))) |
| 459 | ;; We are either (1) at a BOL (with nl-prop containing the previous |
| 460 | ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At |
| 461 | ;; the BOB counts as case (1), so we test next for bolp rather than |
| 462 | ;; non-nil nl-prop. |
| 463 | (when (not (bolp)) |
| 464 | (setq nl-prop |
| 465 | (cond |
| 466 | ;; Incomplete statement which doesn't require escaped EOL? |
| 467 | ((or (c-awk-after-if-for-while-condition-p do-lim) |
| 468 | (c-awk-after-function-decl-param-list) |
| 469 | (c-awk-after-continue-token)) |
| 470 | ?\{) |
| 471 | ;; Escaped EOL (where there's also something to continue)? |
| 472 | ((and (looking-at "[ \t]*\\\\$") |
| 473 | (not (c-awk-after-rbrace-or-statement-semicolon))) |
| 474 | ?\\) |
| 475 | ;; A statement was completed on this line. How? |
| 476 | ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or } |
| 477 | (t ?\$))) ; A virtual semicolon. |
| 478 | (end-of-line) |
| 479 | (c-put-char-property (point) 'c-awk-NL-prop nl-prop) |
| 480 | (forward-line)) |
| 481 | |
| 482 | ;; We are now at a (possibly empty) sequence of content-free lines. |
| 483 | ;; Set c-awk-NL-prop on each of these lines's EOL. |
| 484 | (while (< (point) pos) ; one content-free line each iteration. |
| 485 | (cond ; recalculate nl-prop from previous line's value. |
| 486 | ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#)) |
| 487 | ((eq nl-prop ?\\) |
| 488 | (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$))) |
| 489 | ;; ?\# (empty line) and ?\{ (open stmt) don't change. |
| 490 | ) |
| 491 | (forward-line) |
| 492 | (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)) |
| 493 | nl-prop)))) |
| 494 | |
| 495 | (defun c-awk-get-NL-prop-prev-line (&optional do-lim) |
| 496 | ;; Get the c-awk-NL-prop text-property from the previous line, calculating |
| 497 | ;; it if necessary. Return nil if we're already at BOB. |
| 498 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
| 499 | ;; |
| 500 | ;; This function might do hidden buffer changes. |
| 501 | (if (bobp) |
| 502 | nil |
| 503 | (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop) |
| 504 | (c-awk-calculate-NL-prop-prev-line do-lim)))) |
| 505 | |
| 506 | (defun c-awk-get-NL-prop-cur-line (&optional do-lim) |
| 507 | ;; Get the c-awk-NL-prop text-property from the current line, calculating it |
| 508 | ;; if necessary. (As a special case, the property doesn't get set on an |
| 509 | ;; empty line at EOB (there's no position to set the property on), but the |
| 510 | ;; function returns the property value an EOL would have got.) |
| 511 | ;; |
| 512 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
| 513 | ;; |
| 514 | ;; This function might do hidden buffer changes. |
| 515 | (save-excursion |
| 516 | (let ((extra-nl nil)) |
| 517 | (end-of-line) ; Necessary for the following test to work. |
| 518 | (when (= (forward-line) 1) ; if we were on the last line.... |
| 519 | (insert-char ?\n 1) ; ...artificial eol is needed for comment detection. |
| 520 | (setq extra-nl t)) |
| 521 | (prog1 (c-awk-get-NL-prop-prev-line do-lim) |
| 522 | (if extra-nl (delete-char -1)))))) |
| 523 | |
| 524 | (defsubst c-awk-prev-line-incomplete-p (&optional do-lim) |
| 525 | ;; Is there an incomplete statement at the end of the previous line? |
| 526 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
| 527 | ;; |
| 528 | ;; This function might do hidden buffer changes. |
| 529 | (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{))) |
| 530 | |
| 531 | (defsubst c-awk-cur-line-incomplete-p (&optional do-lim) |
| 532 | ;; Is there an incomplete statement at the end of the current line? |
| 533 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
| 534 | ;; |
| 535 | ;; This function might do hidden buffer changes. |
| 536 | (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{))) |
| 537 | |
| 538 | ;; NOTES ON "VIRTUAL SEMICOLONS" |
| 539 | ;; |
| 540 | ;; A "virtual semicolon" is what terminates a statement when there is no ; |
| 541 | ;; or } to do the job. Like point, it is considered to lie _between_ two |
| 542 | ;; characters. As from mid-March 2004, it is considered to lie just after |
| 543 | ;; the last non-syntactic-whitespace character on the line; (previously, it |
| 544 | ;; was considered an attribute of the EOL on the line). A real semicolon |
| 545 | ;; never counts as a virtual one. |
| 546 | |
| 547 | (defun c-awk-at-vsemi-p (&optional pos) |
| 548 | ;; Is there a virtual semicolon at POS (or POINT)? |
| 549 | (save-excursion |
| 550 | (let* (nl-prop |
| 551 | (pos-or-point (progn (if pos (goto-char pos)) (point))) |
| 552 | (bol (c-point 'bol)) (eol (c-point 'eol))) |
| 553 | (c-awk-beginning-of-logical-line) |
| 554 | ;; Next `while' goes round one logical line (ending in, e.g. "\\") per |
| 555 | ;; iteration. Such a line is rare, and can only be an open string |
| 556 | ;; ending in an escaped \. |
| 557 | (while |
| 558 | (progn |
| 559 | ;; Next `while' goes over a division sign or /regexp/ per iteration. |
| 560 | (while |
| 561 | (and |
| 562 | (< (point) eol) |
| 563 | (progn |
| 564 | (search-forward-regexp c-awk-non-/-syn-ws*-re eol) |
| 565 | (looking-at c-awk-space*-/-re))) |
| 566 | (cond |
| 567 | ((looking-at c-awk-space*-regexp-/-re) ; /regexp/ |
| 568 | (forward-sexp)) |
| 569 | ((looking-at c-awk-space*-unclosed-regexp-/-re) ; Unclosed /regexp |
| 570 | (condition-case nil |
| 571 | (progn |
| 572 | (forward-sexp) |
| 573 | (backward-char)) ; Move to end of (logical) line. |
| 574 | (error (end-of-line)))) ; Happens at EOB. |
| 575 | (t ; division sign |
| 576 | (c-forward-syntactic-ws) |
| 577 | (forward-char)))) |
| 578 | (< (point) bol)) |
| 579 | (forward-line)) |
| 580 | (and (eq (point) pos-or-point) |
| 581 | (progn |
| 582 | (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\) |
| 583 | (eq (forward-line) 0) |
| 584 | (looking-at c-awk-blank-or-comment-line-re))) |
| 585 | (eq nl-prop ?\$)))))) |
| 586 | |
| 587 | (defun c-awk-vsemi-status-unknown-p () |
| 588 | ;; Are we unsure whether there is a virtual semicolon on the current line? |
| 589 | ;; DO NOT under any circumstances attempt to calculate this; that would |
| 590 | ;; defeat the (admittedly kludgy) purpose of this function, which is to |
| 591 | ;; prevent an infinite recursion in c-beginning-of-statement-1 when point |
| 592 | ;; starts at a `while' token. |
| 593 | (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))) |
| 594 | |
| 595 | (defun c-awk-clear-NL-props (beg end) |
| 596 | ;; This function is run from before-change-hooks. It clears the |
| 597 | ;; c-awk-NL-prop text property from beg to the end of the buffer (The END |
| 598 | ;; parameter is ignored). This ensures that the indentation engine will |
| 599 | ;; never use stale values for this property. |
| 600 | ;; |
| 601 | ;; This function might do hidden buffer changes. |
| 602 | (save-restriction |
| 603 | (widen) |
| 604 | (c-clear-char-properties beg (point-max) 'c-awk-NL-prop))) |
| 605 | |
| 606 | (defun c-awk-unstick-NL-prop () |
| 607 | ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without |
| 608 | ;; this, a new newline inserted after an old newline (e.g. by C-j) would |
| 609 | ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad |
| 610 | ;; Thing. This function's action is required by c-put-char-property. |
| 611 | (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs |
| 612 | (not (assoc 'c-awk-NL-prop text-property-default-nonsticky))) |
| 613 | (setq text-property-default-nonsticky |
| 614 | (cons '(c-awk-NL-prop . t) text-property-default-nonsticky)))) |
| 615 | |
| 616 | ;; The following is purely a diagnostic command, to be commented out of the |
| 617 | ;; final release. ACM, 2002/6/1 |
| 618 | ;; (defun NL-props () |
| 619 | ;; (interactive) |
| 620 | ;; (let (pl-prop cl-prop) |
| 621 | ;; (message "Prev-line: %s Cur-line: %s" |
| 622 | ;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)) |
| 623 | ;; (char-to-string pl-prop) |
| 624 | ;; "nil") |
| 625 | ;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)) |
| 626 | ;; (char-to-string cl-prop) |
| 627 | ;; "nil")))) |
| 628 | ;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31 |
| 629 | ;for now. In the byte compiled version, this causes things to crash because |
| 630 | ;awk-mode-map isn't yet defined. :-( |
| 631 | |
| 632 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 633 | \f |
| 634 | ;; The following section of the code is to do with font-locking. The biggest |
| 635 | ;; problem for font-locking is deciding whether a / is a regular expression |
| 636 | ;; delimiter or a division sign - determining precisely where strings and |
| 637 | ;; regular expressions start and stop is also troublesome. This is the |
| 638 | ;; purpose of the function c-awk-set-syntax-table-properties and the myriad |
| 639 | ;; elisp regular expressions it uses. |
| 640 | ;; |
| 641 | ;; Because AWK is a line oriented language, I felt the normal cc-mode strategy |
| 642 | ;; for font-locking unterminated strings (i.e. font-locking the buffer up to |
| 643 | ;; the next string delimiter as a string) was inappropriate. Instead, |
| 644 | ;; unbalanced string/regexp delimiters are given the warning font, being |
| 645 | ;; refonted with the string font as soon as the matching delimiter is entered. |
| 646 | ;; |
| 647 | ;; This requires the region processed by the current font-lock after-change |
| 648 | ;; function to have access to the start of the string/regexp, which may be |
| 649 | ;; several lines back. The elisp "advice" feature is used on these functions |
| 650 | ;; to allow this. |
| 651 | |
| 652 | (defun c-awk-beginning-of-logical-line (&optional pos) |
| 653 | ;; Go back to the start of the (apparent) current line (or the start of the |
| 654 | ;; line containing POS), returning the buffer position of that point. I.e., |
| 655 | ;; go back to the last line which doesn't have an escaped EOL before it. |
| 656 | ;; |
| 657 | ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any |
| 658 | ;; comment, string or regexp. IT MAY WELL BE that this function should not be |
| 659 | ;; executed on a narrowed buffer. |
| 660 | ;; |
| 661 | ;; This function might do hidden buffer changes. |
| 662 | (if pos (goto-char pos)) |
| 663 | (forward-line 0) |
| 664 | (while (and (> (point) (point-min)) |
| 665 | (eq (char-before (1- (point))) ?\\)) |
| 666 | (forward-line -1)) |
| 667 | (point)) |
| 668 | |
| 669 | (defun c-awk-beyond-logical-line (&optional pos) |
| 670 | ;; Return the position just beyond the (apparent) current logical line, or the |
| 671 | ;; one containing POS. This is usually the beginning of the next line which |
| 672 | ;; doesn't follow an escaped EOL. At EOB, this will be EOB. |
| 673 | ;; |
| 674 | ;; Point is unchanged. |
| 675 | ;; |
| 676 | ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any |
| 677 | ;; comment, string or regexp. IT MAY WELL BE that this function should not be |
| 678 | ;; executed on a narrowed buffer. |
| 679 | (save-excursion |
| 680 | (if pos (goto-char pos)) |
| 681 | (end-of-line) |
| 682 | (while (and (< (point) (point-max)) |
| 683 | (eq (char-before) ?\\)) |
| 684 | (end-of-line 2)) |
| 685 | (if (< (point) (point-max)) |
| 686 | (1+ (point)) |
| 687 | (point)))) |
| 688 | |
| 689 | ;; ACM, 2002/02/15: The idea of the next function is to put the "Error font" |
| 690 | ;; on strings/regexps which are missing their closing delimiter. |
| 691 | ;; 2002/4/28. The default syntax for / has been changed from "string" to |
| 692 | ;; "punctuation", to reduce hassle when this character appears within a string |
| 693 | ;; or comment. |
| 694 | |
| 695 | (defun c-awk-set-string-regexp-syntax-table-properties (beg end) |
| 696 | ;; BEG and END bracket a (possibly unterminated) string or regexp. The |
| 697 | ;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER |
| 698 | ;; END. Set the appropriate syntax-table properties on the delimiters and |
| 699 | ;; contents of this string/regex. |
| 700 | ;; |
| 701 | ;; "String" here can also mean a gawk 3.1 "localizable" string which starts |
| 702 | ;; with _". In this case, we step over the _ and ignore it; It will get it's |
| 703 | ;; font from an entry in awk-font-lock-keywords. |
| 704 | ;; |
| 705 | ;; If the closing delimiter is missing (i.e., there is an EOL there) set the |
| 706 | ;; STRING-FENCE property on the opening " or / and closing EOL. |
| 707 | ;; |
| 708 | ;; This function does hidden buffer changes. |
| 709 | (if (eq (char-after beg) ?_) (setq beg (1+ beg))) |
| 710 | |
| 711 | ;; First put the properties on the delimiters. |
| 712 | (cond ((eq end (point-max)) ; string/regexp terminated by EOB |
| 713 | (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence" |
| 714 | ((/= (char-after beg) (char-after end)) ; missing end delimiter |
| 715 | (c-put-char-property beg 'syntax-table '(15)) |
| 716 | (c-put-char-property end 'syntax-table '(15))) |
| 717 | ((eq (char-after beg) ?/) ; Properly bracketed regexp |
| 718 | (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string" |
| 719 | (c-put-char-property end 'syntax-table '(7))) |
| 720 | (t)) ; Properly bracketed string: Nothing to do. |
| 721 | ;; Now change the properties of any escaped "s in the string to punctuation. |
| 722 | (save-excursion |
| 723 | (goto-char (1+ beg)) |
| 724 | (or (eobp) |
| 725 | (while (search-forward "\"" end t) |
| 726 | (c-put-char-property (1- (point)) 'syntax-table '(1)))))) |
| 727 | |
| 728 | (defun c-awk-syntax-tablify-string () |
| 729 | ;; Point is at the opening " or _" of a string. Set the syntax-table |
| 730 | ;; properties on this string, leaving point just after the string. |
| 731 | ;; |
| 732 | ;; The result is nil if a / immediately after the string would be a regexp |
| 733 | ;; opener, t if it would be a division sign. |
| 734 | ;; |
| 735 | ;; This function does hidden buffer changes. |
| 736 | (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string |
| 737 | (c-awk-set-string-regexp-syntax-table-properties |
| 738 | (match-beginning 0) (match-end 0)) |
| 739 | (cond ((looking-at "\"") |
| 740 | (forward-char) |
| 741 | t) ; In AWK, ("15" / 5) gives 3 ;-) |
| 742 | ((looking-at "[\n\r]") ; Unterminated string with EOL. |
| 743 | (forward-char) |
| 744 | nil) ; / on next line would start a regexp |
| 745 | (t nil))) ; Unterminated string at EOB |
| 746 | |
| 747 | (defun c-awk-syntax-tablify-/ (anchor anchor-state-/div) |
| 748 | ;; Point is at a /. Determine whether this is a division sign or a regexp |
| 749 | ;; opener, and if the latter, apply syntax-table properties to the entire |
| 750 | ;; regexp. Point is left immediately after the division sign or regexp, as |
| 751 | ;; the case may be. |
| 752 | ;; |
| 753 | ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a |
| 754 | ;; division sign (value t) or a regexp opener (value nil). The idea is that |
| 755 | ;; we analyze the line from ANCHOR up till point to determine what the / at |
| 756 | ;; point is. |
| 757 | ;; |
| 758 | ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left. |
| 759 | ;; |
| 760 | ;; This function does hidden buffer changes. |
| 761 | (let ((/point (point))) |
| 762 | (goto-char anchor) |
| 763 | ;; Analyze the line to find out what the / is. |
| 764 | (if (if anchor-state-/div |
| 765 | (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t)) |
| 766 | (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t)) |
| 767 | (search-forward-regexp c-awk-div-sign-re (1+ /point) t))) |
| 768 | ;; A division sign. |
| 769 | (progn (goto-char (1+ /point)) nil) |
| 770 | ;; A regexp opener |
| 771 | ;; Jump over the regexp innards, setting the match data. |
| 772 | (goto-char /point) |
| 773 | (search-forward-regexp c-awk-regexp-without-end-re) |
| 774 | (c-awk-set-string-regexp-syntax-table-properties |
| 775 | (match-beginning 0) (match-end 0)) |
| 776 | (cond ((looking-at "/") ; Terminating / |
| 777 | (forward-char) |
| 778 | t) |
| 779 | ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL |
| 780 | (forward-char) |
| 781 | nil) ; / on next line would start another regexp |
| 782 | (t nil))))) ; Unterminated regexp at EOB |
| 783 | |
| 784 | (defun c-awk-set-syntax-table-properties (lim) |
| 785 | ;; Scan the buffer text between point and LIM, setting (and clearing) the |
| 786 | ;; syntax-table property where necessary. |
| 787 | ;; |
| 788 | ;; This function is designed to be called as the FUNCTION in a MATCHER in |
| 789 | ;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit |
| 790 | ;; repeated calls from font-lock: See elisp info page "Search-based |
| 791 | ;; Fontification"). It also gets called, with a bit of glue, from |
| 792 | ;; after-change-functions when font-lock isn't active. Point is left |
| 793 | ;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN |
| 794 | ;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE. |
| 795 | ;; |
| 796 | ;; We need to set/clear the syntax-table property on: |
| 797 | ;; (i) / - It is set to "string" on a / which is the opening or closing |
| 798 | ;; delimiter of the properly terminated regexp (and left unset on a |
| 799 | ;; division sign). |
| 800 | ;; (ii) the opener of an unterminated string/regexp, we set the property |
| 801 | ;; "generic string delimiter" on both the opening " or / and the end of the |
| 802 | ;; line where the closing delimiter is missing. |
| 803 | ;; (iii) "s inside strings/regexps (these will all be escaped "s). They are |
| 804 | ;; given the property "punctuation". This will later allow other routines |
| 805 | ;; to use the regexp "\\S\"*" to skip over the string innards. |
| 806 | ;; (iv) Inside a comment, all syntax-table properties are cleared. |
| 807 | ;; |
| 808 | ;; This function does hidden buffer changes. |
| 809 | (let (anchor |
| 810 | (anchor-state-/div nil)) ; t means a following / would be a div sign. |
| 811 | (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant. |
| 812 | (c-clear-char-properties (point) lim 'syntax-table) |
| 813 | ;; Once round the next loop for each string, regexp, or div sign |
| 814 | (while (progn |
| 815 | ;; Skip any "harmless" lines before the next tricky one. |
| 816 | (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t) |
| 817 | (setq anchor-state-/div nil)) |
| 818 | (< (point) lim)) |
| 819 | (setq anchor (point)) |
| 820 | (search-forward-regexp c-awk-harmless-string*-here-re nil t) |
| 821 | ;; We are now looking at either a " or a / or a brace/paren/semicolon. |
| 822 | ;; Do our thing on the string, regexp or division sign or update |
| 823 | ;; our state. |
| 824 | (setq anchor-state-/div |
| 825 | (cond |
| 826 | ((looking-at "_?\"") |
| 827 | (c-awk-syntax-tablify-string)) |
| 828 | ((eq (char-after) ?/) |
| 829 | (c-awk-syntax-tablify-/ anchor anchor-state-/div)) |
| 830 | ((memq (char-after) '(?{ ?} ?\( ?\;)) |
| 831 | (forward-char) |
| 832 | nil) |
| 833 | (t ; ?\) |
| 834 | (forward-char) |
| 835 | t)))) |
| 836 | nil)) |
| 837 | |
| 838 | ;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set |
| 839 | ;; the syntax-table properties even when font-lock isn't enabled, for the |
| 840 | ;; subsequent use of movement functions, etc. However, it seems that if font |
| 841 | ;; lock _is_ enabled, we can always leave it to do the job. |
| 842 | (defvar c-awk-old-ByLL 0) |
| 843 | (make-variable-buffer-local 'c-awk-old-Byll) |
| 844 | ;; Just beyond logical line following the region which is about to be changed. |
| 845 | ;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change. |
| 846 | |
| 847 | (defun c-awk-record-region-clear-NL (beg end) |
| 848 | ;; This function is called exclusively from the before-change-functions hook. |
| 849 | ;; It does two things: Finds the end of the (logical) line on which END lies, |
| 850 | ;; and clears c-awk-NL-prop text properties from this point onwards. BEG is |
| 851 | ;; ignored. |
| 852 | ;; |
| 853 | ;; On entry, the buffer will have been widened and match-data will have been |
| 854 | ;; saved; point is undefined on both entry and exit; the return value is |
| 855 | ;; ignored. |
| 856 | ;; |
| 857 | ;; This function does hidden buffer changes. |
| 858 | (c-save-buffer-state () |
| 859 | (setq c-awk-old-ByLL (c-awk-beyond-logical-line end)) |
| 860 | (c-save-buffer-state nil |
| 861 | (c-awk-clear-NL-props end (point-max))))) |
| 862 | |
| 863 | (defun c-awk-end-of-change-region (beg end old-len) |
| 864 | ;; Find the end of the region which needs to be font-locked after a change. |
| 865 | ;; This is the end of the logical line on which the change happened, either |
| 866 | ;; as it was before the change, or as it is now, whichever is later. |
| 867 | ;; N.B. point is left undefined. |
| 868 | (max (+ (- c-awk-old-ByLL old-len) (- end beg)) |
| 869 | (c-awk-beyond-logical-line end))) |
| 870 | |
| 871 | ;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region |
| 872 | ;; specified by the font-lock after-change function must be expanded to |
| 873 | ;; include ALL of any string or regexp within the region. The simplest way to |
| 874 | ;; do this in practice is to use the beginning/end-of-logical-line functions. |
| 875 | ;; Don't overlook the possibility of the buffer change being the "recapturing" |
| 876 | ;; of a previously escaped newline. |
| 877 | |
| 878 | ;; ACM 2008-02-05: |
| 879 | (defun c-awk-extend-and-syntax-tablify-region (beg end old-len) |
| 880 | ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put |
| 881 | ;; `syntax-table' properties on this region. |
| 882 | ;; |
| 883 | ;; This function is called from an after-change function, BEG END and |
| 884 | ;; OLD-LEN being the standard parameters. |
| 885 | ;; |
| 886 | ;; Point is undefined both before and after this function call, the buffer |
| 887 | ;; has been widened, and match-data saved. The return value is ignored. |
| 888 | ;; |
| 889 | ;; It prepares the buffer for font |
| 890 | ;; locking, hence must get called before `font-lock-after-change-function'. |
| 891 | ;; |
| 892 | ;; This function is the AWK value of `c-before-font-lock-function'. |
| 893 | ;; It does hidden buffer changes. |
| 894 | (c-save-buffer-state () |
| 895 | (setq c-new-END (c-awk-end-of-change-region beg end old-len)) |
| 896 | (setq c-new-BEG (c-awk-beginning-of-logical-line beg)) |
| 897 | (goto-char c-new-BEG) |
| 898 | (c-awk-set-syntax-table-properties c-new-END))) |
| 899 | |
| 900 | ;; Awk regexps written with help from Peter Galbraith |
| 901 | ;; <galbraith@mixing.qc.dfo.ca>. |
| 902 | ;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work |
| 903 | ;; in XEmacs 21.4.4. acm 2002/9/19. |
| 904 | (defconst awk-font-lock-keywords |
| 905 | (eval-when-compile |
| 906 | (list |
| 907 | ;; Function names. |
| 908 | '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?" |
| 909 | (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t)) |
| 910 | ;; |
| 911 | ;; Variable names. |
| 912 | (cons |
| 913 | (concat "\\<" |
| 914 | (regexp-opt |
| 915 | '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" |
| 916 | "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE" |
| 917 | "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH" |
| 918 | "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>") |
| 919 | 'font-lock-variable-name-face) |
| 920 | |
| 921 | ;; Special file names. (acm, 2002/7/22) |
| 922 | ;; The following regexp was created by first evaluating this in GNU Emacs 21.1: |
| 923 | ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid" |
| 924 | ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words) |
| 925 | ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen) |
| 926 | ;; , replacing the "n" in "dev/fd/n" with "[0-9]+" |
| 927 | ;; , removing the unwanted \\< at the beginning, and finally filling out the |
| 928 | ;; regexp so that a " must come before, and either a " or heuristic stuff after. |
| 929 | ;; The surrounding quotes are fontified along with the filename, since, semantically, |
| 930 | ;; they are an indivisible unit. |
| 931 | '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\ |
| 932 | std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ |
| 933 | \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" |
| 934 | (1 font-lock-variable-name-face t) |
| 935 | (8 font-lock-variable-name-face t t)) |
| 936 | ;; Do the same (almost) with |
| 937 | ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport" |
| 938 | ;; "/inet/raw/lport/rhost/rport") 'words) |
| 939 | ;; This cannot be combined with the above pattern, because the match number |
| 940 | ;; for the (optional) closing \" would then exceed 9. |
| 941 | '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\ |
| 942 | \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" |
| 943 | (1 font-lock-variable-name-face t) |
| 944 | (6 font-lock-variable-name-face t t)) |
| 945 | |
| 946 | ;; Keywords. |
| 947 | (concat "\\<" |
| 948 | (regexp-opt |
| 949 | '("BEGIN" "END" "break" "case" "continue" "default" "delete" |
| 950 | "do" "else" "exit" "for" "getline" "if" "in" "next" |
| 951 | "nextfile" "return" "switch" "while") |
| 952 | t) "\\>") |
| 953 | |
| 954 | ;; Builtins. |
| 955 | `(eval . (list |
| 956 | ,(concat |
| 957 | "\\<" |
| 958 | (regexp-opt |
| 959 | '("adump" "and" "asort" "atan2" "bindtextdomain" "close" |
| 960 | "compl" "cos" "dcgettext" "exp" "extension" "fflush" |
| 961 | "gensub" "gsub" "index" "int" "length" "log" "lshift" |
| 962 | "match" "mktime" "or" "print" "printf" "rand" "rshift" |
| 963 | "sin" "split" "sprintf" "sqrt" "srand" "stopme" |
| 964 | "strftime" "strtonum" "sub" "substr" "system" |
| 965 | "systime" "tolower" "toupper" "xor") t) |
| 966 | "\\>") |
| 967 | 0 c-preprocessor-face-name)) |
| 968 | |
| 969 | ;; gawk debugging keywords. (acm, 2002/7/21) |
| 970 | ;; (Removed, 2003/6/6. These functions are now fontified as built-ins) |
| 971 | ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>") |
| 972 | ;; 0 'font-lock-warning-face) |
| 973 | |
| 974 | ;; User defined functions with an apparent spurious space before the |
| 975 | ;; opening parenthesis. acm, 2002/5/30. |
| 976 | `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s " |
| 977 | c-awk-escaped-nls*-with-space* "(") |
| 978 | (0 'font-lock-warning-face)) |
| 979 | |
| 980 | ;; Space after \ in what looks like an escaped newline. 2002/5/31 |
| 981 | '("\\\\\\s +$" 0 font-lock-warning-face t) |
| 982 | |
| 983 | ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16. |
| 984 | '("\\s|" 0 font-lock-warning-face t nil) |
| 985 | ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21 |
| 986 | '("\\(_\\)\\s|" 1 font-lock-warning-face) |
| 987 | '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6 |
| 988 | )) |
| 989 | "Default expressions to highlight in AWK mode.") |
| 990 | \f |
| 991 | ;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e |
| 992 | |
| 993 | ;; The following three regexps differ from those earlier on in cc-awk.el in |
| 994 | ;; that they assume the syntax-table properties have been set. They are thus |
| 995 | ;; not useful for code which sets these properties. |
| 996 | (defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"") |
| 997 | ;; Matches a terminated string/regexp. |
| 998 | |
| 999 | (defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$") |
| 1000 | ;; Matches an unterminated string/regexp, NOT including the eol at the end. |
| 1001 | |
| 1002 | (defconst c-awk-harmless-pattern-characters* |
| 1003 | (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*")) |
| 1004 | ;; Matches any "harmless" character in a pattern or an escaped character pair. |
| 1005 | |
| 1006 | (defun c-awk-at-statement-end-p () |
| 1007 | ;; Point is not inside a comment or string. Is it AT the end of a |
| 1008 | ;; statement? This means immediately after the last non-ws character of the |
| 1009 | ;; statement. The caller is responsible for widening the buffer, if |
| 1010 | ;; appropriate. |
| 1011 | (and (not (bobp)) |
| 1012 | (save-excursion |
| 1013 | (backward-char) |
| 1014 | (or (looking-at "[};]") |
| 1015 | (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\)) |
| 1016 | (looking-at |
| 1017 | (eval-when-compile |
| 1018 | (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space* |
| 1019 | "[#\n\r]")))))))) |
| 1020 | |
| 1021 | (defun c-awk-beginning-of-defun (&optional arg) |
| 1022 | "Move backward to the beginning of an AWK \"defun\". With ARG, do it that |
| 1023 | many times. Negative arg -N means move forward to Nth following beginning of |
| 1024 | defun. Returns t unless search stops due to beginning or end of buffer. |
| 1025 | |
| 1026 | By a \"defun\" is meant either a pattern-action pair or a function. The start |
| 1027 | of a defun is recognized as code starting at column zero which is neither a |
| 1028 | closing brace nor a comment nor a continuation of the previous line. Unlike |
| 1029 | in some other modes, having an opening brace at column 0 is neither necessary |
| 1030 | nor helpful. |
| 1031 | |
| 1032 | Note that this function might do hidden buffer changes. See the |
| 1033 | comment at the start of cc-engine.el for more info." |
| 1034 | (interactive "p") |
| 1035 | (or arg (setq arg 1)) |
| 1036 | (save-match-data |
| 1037 | (c-save-buffer-state ; ensures the buffer is writable. |
| 1038 | nil |
| 1039 | (let ((found t)) ; Has the most recent regexp search found b-of-defun? |
| 1040 | (if (>= arg 0) |
| 1041 | ;; Go back one defun each time round the following loop. (For +ve arg) |
| 1042 | (while (and found (> arg 0) (not (eq (point) (point-min)))) |
| 1043 | ;; Go back one "candidate" each time round the next loop until one |
| 1044 | ;; is genuinely a beginning-of-defun. |
| 1045 | (while (and (setq found (search-backward-regexp |
| 1046 | "^[^#} \t\n\r]" (point-min) 'stop-at-limit)) |
| 1047 | (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) |
| 1048 | (setq arg (1- arg))) |
| 1049 | ;; The same for a -ve arg. |
| 1050 | (if (not (eq (point) (point-max))) (forward-char 1)) |
| 1051 | (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg. |
| 1052 | (while (and (setq found (search-forward-regexp |
| 1053 | "^[^#} \t\n\r]" (point-max) 'stop-at-limit)) |
| 1054 | (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) |
| 1055 | (setq arg (1+ arg))) |
| 1056 | (if found (goto-char (match-beginning 0)))) |
| 1057 | (eq arg 0))))) |
| 1058 | |
| 1059 | (defun c-awk-forward-awk-pattern () |
| 1060 | ;; Point is at the start of an AWK pattern (which may be null) or function |
| 1061 | ;; declaration. Move to the pattern's end, and past any trailing space or |
| 1062 | ;; comment. Typically, we stop at the { which denotes the corresponding AWK |
| 1063 | ;; action/function body. Otherwise we stop at the EOL (or ;) marking the |
| 1064 | ;; absence of an explicit action. |
| 1065 | ;; |
| 1066 | ;; This function might do hidden buffer changes. |
| 1067 | (while |
| 1068 | (progn |
| 1069 | (search-forward-regexp c-awk-harmless-pattern-characters*) |
| 1070 | (if (looking-at "#") (end-of-line)) |
| 1071 | (cond |
| 1072 | ((eobp) nil) |
| 1073 | ((looking-at "[{;]") nil) ; We've finished! |
| 1074 | ((eolp) |
| 1075 | (if (c-awk-cur-line-incomplete-p) |
| 1076 | (forward-line) ; returns non-nil |
| 1077 | nil)) |
| 1078 | ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t)) |
| 1079 | ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t)) |
| 1080 | ((looking-at "/") (forward-char) t))))) ; division sign. |
| 1081 | |
| 1082 | (defun c-awk-end-of-defun1 () |
| 1083 | ;; point is at the start of a "defun". Move to its end. Return end position. |
| 1084 | ;; |
| 1085 | ;; This function might do hidden buffer changes. |
| 1086 | (c-awk-forward-awk-pattern) |
| 1087 | (cond |
| 1088 | ((looking-at "{") (goto-char (scan-sexps (point) 1))) |
| 1089 | ((looking-at ";") (forward-char)) |
| 1090 | ((eolp)) |
| 1091 | (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern"))) |
| 1092 | (point)) |
| 1093 | |
| 1094 | (defun c-awk-beginning-of-defun-p () |
| 1095 | ;; Are we already at the beginning of a defun? (i.e. at code in column 0 |
| 1096 | ;; which isn't a }, and isn't a continuation line of any sort. |
| 1097 | ;; |
| 1098 | ;; This function might do hidden buffer changes. |
| 1099 | (and (looking-at "^[^#} \t\n\r]") |
| 1100 | (not (c-awk-prev-line-incomplete-p)))) |
| 1101 | |
| 1102 | (defun c-awk-end-of-defun (&optional arg) |
| 1103 | "Move forward to next end of defun. With argument, do it that many times. |
| 1104 | Negative argument -N means move back to Nth preceding end of defun. |
| 1105 | |
| 1106 | An end of a defun occurs right after the closing brace that matches the |
| 1107 | opening brace at its start, or immediately after the AWK pattern when there is |
| 1108 | no explicit action; see function `c-awk-beginning-of-defun'. |
| 1109 | |
| 1110 | Note that this function might do hidden buffer changes. See the |
| 1111 | comment at the start of cc-engine.el for more info." |
| 1112 | (interactive "p") |
| 1113 | (or arg (setq arg 1)) |
| 1114 | (save-match-data |
| 1115 | (c-save-buffer-state |
| 1116 | nil |
| 1117 | (let ((start-point (point)) end-point) |
| 1118 | ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun, |
| 1119 | ;; move backwards to one. |
| 1120 | ;; Repeat [(i) move forward to end-of-current-defun (see below); |
| 1121 | ;; (ii) If this isn't it, move forward to beginning-of-defun]. |
| 1122 | ;; We start counting ARG only when step (i) has passed the original point. |
| 1123 | (when (> arg 0) |
| 1124 | ;; Try to move back to a beginning-of-defun, if not already at one. |
| 1125 | (if (not (c-awk-beginning-of-defun-p)) |
| 1126 | (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point. |
| 1127 | (goto-char start-point) |
| 1128 | (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough! |
| 1129 | ;; Now count forward, one defun at a time |
| 1130 | (while (and (not (eobp)) |
| 1131 | (c-awk-end-of-defun1) |
| 1132 | (if (> (point) start-point) (setq arg (1- arg)) t) |
| 1133 | (> arg 0) |
| 1134 | (c-awk-beginning-of-defun -1)))) |
| 1135 | |
| 1136 | (when (< arg 0) |
| 1137 | (setq end-point start-point) |
| 1138 | (while (and (not (bobp)) |
| 1139 | (c-awk-beginning-of-defun 1) |
| 1140 | (if (< (setq end-point (if (bobp) (point) |
| 1141 | (save-excursion (c-awk-end-of-defun1)))) |
| 1142 | start-point) |
| 1143 | (setq arg (1+ arg)) t) |
| 1144 | (< arg 0))) |
| 1145 | (goto-char (min start-point end-point))))))) |
| 1146 | |
| 1147 | \f |
| 1148 | (cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21 |
| 1149 | |
| 1150 | ;;; awk-mode.el ends here |