Commit | Line | Data |
---|---|---|
d9e94c22 MS |
1 | ;;; cc-awk.el --- AWK specific code within cc-mode. |
2 | ||
ab422c4d PE |
3 | ;; Copyright (C) 1988, 1994, 1996, 2000-2013 Free Software Foundation, |
4 | ;; Inc. | |
d9e94c22 | 5 | |
3efc2cd7 | 6 | ;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el) |
d9e94c22 MS |
7 | ;; Maintainer: FSF |
8 | ;; Keywords: AWK, cc-mode, unix, languages | |
bd78fa1d | 9 | ;; Package: cc-mode |
d9e94c22 MS |
10 | |
11 | ;; This file is part of GNU Emacs. | |
12 | ||
b1fc2b50 | 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
d9e94c22 | 14 | ;; it under the terms of the GNU General Public License as published by |
b1fc2b50 GM |
15 | ;; the Free Software Foundation, either version 3 of the License, or |
16 | ;; (at your option) any later version. | |
d9e94c22 MS |
17 | |
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
b1fc2b50 | 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
d9e94c22 MS |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; This file contains (most of) the adaptations to cc-mode required for the | |
29 | ;; integration of AWK Mode. | |
6772c8e1 | 30 | ;; It is organized thusly, the sections being separated by page breaks: |
d9e94c22 | 31 | ;; 1. The AWK Mode syntax table. |
6772c8e1 | 32 | ;; 2. Regular expressions for analyzing AWK code. |
0386b551 | 33 | ;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property"). |
3c0ab532 | 34 | ;; 4. Syntax-table property/font-locking stuff, including the |
d9e94c22 | 35 | ;; font-lock-keywords setting. |
0386b551 AM |
36 | ;; 5. The AWK Mode before/after-change-functions. |
37 | ;; 6. AWK Mode specific versions of commands like beginning-of-defun. | |
d9e94c22 MS |
38 | ;; The AWK Mode keymap, abbreviation table, and the mode function itself are |
39 | ;; in cc-mode.el. | |
40 | ||
41 | ;;; Code: | |
42 | ||
43 | (eval-when-compile | |
44 | (let ((load-path | |
45 | (if (and (boundp 'byte-compile-dest-file) | |
46 | (stringp byte-compile-dest-file)) | |
47 | (cons (file-name-directory byte-compile-dest-file) load-path) | |
48 | load-path))) | |
49 | (load "cc-bytecomp" nil t))) | |
50 | ||
51 | (cc-require 'cc-defs) | |
52 | ||
53 | ;; Silence the byte compiler. | |
54 | (cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use. | |
88a46e21 AM |
55 | (cc-bytecomp-defvar c-new-BEG) |
56 | (cc-bytecomp-defvar c-new-END) | |
d9e94c22 MS |
57 | |
58 | ;; Some functions in cc-engine that are used below. There's a cyclic | |
59 | ;; dependency so it can't be required here. (Perhaps some functions | |
60 | ;; could be moved to cc-engine to avoid it.) | |
61 | (cc-bytecomp-defun c-backward-token-1) | |
62 | (cc-bytecomp-defun c-beginning-of-statement-1) | |
63 | (cc-bytecomp-defun c-backward-sws) | |
bd15c390 | 64 | (cc-bytecomp-defun c-forward-sws) |
d9e94c22 MS |
65 | |
66 | (defvar awk-mode-syntax-table | |
67 | (let ((st (make-syntax-table))) | |
68 | (modify-syntax-entry ?\\ "\\" st) | |
69 | (modify-syntax-entry ?\n "> " st) | |
70 | (modify-syntax-entry ?\r "> " st) | |
71 | (modify-syntax-entry ?\f "> " st) | |
72 | (modify-syntax-entry ?\# "< " st) | |
73 | ;; / can delimit regexes or be a division operator. By default we assume | |
74 | ;; that it is a division sign, and fix the regexp operator cases with | |
75 | ;; `font-lock-syntactic-keywords'. | |
0d26e0b6 | 76 | (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27. |
d9e94c22 MS |
77 | (modify-syntax-entry ?* "." st) |
78 | (modify-syntax-entry ?+ "." st) | |
79 | (modify-syntax-entry ?- "." st) | |
80 | (modify-syntax-entry ?= "." st) | |
81 | (modify-syntax-entry ?% "." st) | |
82 | (modify-syntax-entry ?< "." st) | |
83 | (modify-syntax-entry ?> "." st) | |
84 | (modify-syntax-entry ?& "." st) | |
85 | (modify-syntax-entry ?| "." st) | |
86 | (modify-syntax-entry ?_ "_" st) | |
87 | (modify-syntax-entry ?\' "." st) | |
88 | st) | |
89 | "Syntax table in use in AWK Mode buffers.") | |
90 | ||
0386b551 AM |
91 | \f |
92 | ;; This section defines regular expressions used in the analysis of AWK code. | |
93 | ||
94 | ;; N.B. In the following regexps, an EOL is either \n OR \r. This is because | |
95 | ;; Emacs has in the past used \r to mark hidden lines in some fashion (and | |
96 | ;; maybe still does). | |
97 | ||
98 | (defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") | |
99 | ;; Matches any escaped (with \) character-pair, including an escaped newline. | |
100 | (defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)") | |
101 | ;; Matches any escaped (with \) character-pair, apart from an escaped newline. | |
102 | (defconst c-awk-comment-without-nl "#.*") | |
103 | ;; Matches an AWK comment, not including the terminating NL (if any). Note | |
104 | ;; that the "enclosing" (elisp) regexp must ensure the # is real. | |
105 | (defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)") | |
106 | ;; Matches a newline, or the end of buffer. | |
107 | ||
108 | ;; "Space" regular expressions. | |
109 | (eval-and-compile | |
110 | (defconst c-awk-escaped-nl "\\\\[\n\r]")) | |
111 | ;; Matches an escaped newline. | |
ca331935 CY |
112 | (eval-and-compile |
113 | (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))) | |
0386b551 AM |
114 | ;; Matches a possibly empty sequence of escaped newlines. Used in |
115 | ;; awk-font-lock-keywords. | |
116 | ;; (defconst c-awk-escaped-nls*-with-space* | |
117 | ;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*")) | |
118 | ;; The above RE was very slow. It's runtime was doubling with each additional | |
119 | ;; space :-( Reformulate it as below: | |
120 | (eval-and-compile | |
121 | (defconst c-awk-escaped-nls*-with-space* | |
122 | (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))) | |
123 | ;; Matches a possibly empty sequence of escaped newlines with optional | |
124 | ;; interspersed spaces and tabs. Used in awk-font-lock-keywords. | |
125 | (defconst c-awk-blank-or-comment-line-re | |
126 | (concat "[ \t]*\\(#\\|\\\\?$\\)")) | |
127 | ;; Matche (the tail of) a line containing at most either a comment or an | |
128 | ;; escaped EOL. | |
129 | ||
130 | ;; REGEXPS FOR "HARMLESS" STRINGS/LINES. | |
0386b551 AM |
131 | (defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)") |
132 | ;; Matches an underline NOT followed by ". | |
953e5b8c AM |
133 | (defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]") |
134 | ;; Matches any character not significant in the state machine applying | |
135 | ;; syntax-table properties to "s and /s. | |
0386b551 AM |
136 | (defconst c-awk-harmless-string*-re |
137 | (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) | |
953e5b8c AM |
138 | ;; Matches a (possibly empty) sequence of characters insignificant in the |
139 | ;; state machine applying syntax-table properties to "s and /s. | |
0386b551 AM |
140 | (defconst c-awk-harmless-string*-here-re |
141 | (concat "\\=" c-awk-harmless-string*-re)) | |
953e5b8c AM |
142 | ;; Matches the (possibly empty) sequence of "insignificant" chars at point. |
143 | ||
144 | (defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]") | |
145 | ;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a | |
d9c287e5 | 146 | ;; localization string in gawk 3.1 |
953e5b8c AM |
147 | (defconst c-awk-harmless-line-string*-re |
148 | (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) | |
149 | ;; Matches a (possibly empty) sequence of chars without unescaped /, ", \, | |
150 | ;; #, or newlines. | |
0386b551 | 151 | (defconst c-awk-harmless-line-re |
953e5b8c AM |
152 | (concat c-awk-harmless-line-string*-re |
153 | "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob)) | |
0386b551 AM |
154 | ;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped |
155 | ;; " or /. "logical" means "possibly containing escaped newlines". A comment | |
156 | ;; is matched as part of the line even if it contains a " or a /. The End of | |
157 | ;; buffer is also an end of line. | |
158 | (defconst c-awk-harmless-lines+-here-re | |
159 | (concat "\\=\\(" c-awk-harmless-line-re "\\)+")) | |
160 | ;; Matches a sequence of (at least one) \"harmless-line\" at point. | |
161 | ||
162 | ||
163 | ;; REGEXPS FOR AWK STRINGS. | |
164 | (defconst c-awk-string-ch-re "[^\"\\\n\r]") | |
165 | ;; Matches any character which can appear unescaped in a string. | |
166 | (defconst c-awk-string-innards-re | |
167 | (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*")) | |
168 | ;; Matches the inside of an AWK string (i.e. without the enclosing quotes). | |
169 | (defconst c-awk-string-without-end-here-re | |
170 | (concat "\\=_?\"" c-awk-string-innards-re)) | |
171 | ;; Matches an AWK string at point up to, but not including, any terminator. | |
53964682 | 172 | ;; A gawk 3.1+ string may look like _"localizable string". |
b5eb9035 AM |
173 | (defconst c-awk-possibly-open-string-re |
174 | (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*" | |
175 | "\\(\"\\|$\\|\\'\\)")) | |
0386b551 AM |
176 | |
177 | ;; REGEXPS FOR AWK REGEXPS. | |
178 | (defconst c-awk-regexp-normal-re "[^[/\\\n\r]") | |
179 | ;; Matches any AWK regexp character which doesn't require special analysis. | |
180 | (defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*") | |
181 | ;; Matches a (possibly empty) sequence of escaped newlines. | |
182 | ||
183 | ;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character | |
184 | ;; list", and "[:alpha:]" inside a character list will be known as a | |
185 | ;; "character class". These terms for these things vary between regexp | |
186 | ;; descriptions . | |
187 | (defconst c-awk-regexp-char-class-re | |
188 | "\\[:[a-z]+:\\]") | |
189 | ;; Matches a character class spec (e.g. [:alpha:]). | |
190 | (defconst c-awk-regexp-char-list-re | |
191 | (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?" | |
192 | "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re | |
193 | "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)")) | |
194 | ;; Matches a regexp char list, up to (but not including) EOL if the ] is | |
195 | ;; missing. | |
0386b551 AM |
196 | (defconst c-awk-regexp-innards-re |
197 | (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re | |
b5eb9035 | 198 | "\\|" c-awk-regexp-normal-re "\\)*")) |
0386b551 AM |
199 | ;; Matches the inside of an AWK regexp (i.e. without the enclosing /s) |
200 | (defconst c-awk-regexp-without-end-re | |
201 | (concat "/" c-awk-regexp-innards-re)) | |
0d26e0b6 | 202 | ;; Matches an AWK regexp up to, but not including, any terminating /. |
0386b551 AM |
203 | |
204 | ;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A | |
205 | ;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant | |
206 | ;; whether a '/' at the current position would by a regexp opener or a | |
207 | ;; division sign. | |
208 | (defconst c-awk-neutral-re | |
209 | ; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7 | |
953e5b8c | 210 | "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)") |
0386b551 | 211 | ;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /. |
953e5b8c | 212 | ;; This is space/tab, close brace, an auto-increment/decrement operator or an |
cb5bf6ba | 213 | ;; escaped character. Or one of the (invalid) characters @ or `. But NOT an |
953e5b8c | 214 | ;; end of line (unless escaped). |
0386b551 AM |
215 | (defconst c-awk-neutrals*-re |
216 | (concat "\\(" c-awk-neutral-re "\\)*")) | |
217 | ;; A (possibly empty) string of neutral characters (or character pairs). | |
218 | (defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+") | |
219 | ;; Matches a char which is a constituent of a variable or number, or a ket | |
220 | ;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to | |
221 | ;; \xff are "letters". | |
222 | (defconst c-awk-div-sign-re | |
223 | (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/")) | |
224 | ;; Will match a piece of AWK buffer ending in / which is a division sign, in | |
225 | ;; a context where an immediate / would be a regexp bracket. It follows a | |
226 | ;; variable or number (with optional intervening "neutral" characters). This | |
227 | ;; will only work when there won't be a preceding " or / before the sought / | |
228 | ;; to foul things up. | |
229 | (defconst c-awk-non-arith-op-bra-re | |
953e5b8c AM |
230 | "[[\({&=:!><,?;'~|]") |
231 | ;; Matches an opening BRAcket (of any sort), or any operator character | |
0386b551 AM |
232 | ;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a |
233 | ;; regexp bracket) these arith ops are unnecessary and a pain, because of "++" | |
234 | ;; and "--". | |
235 | (defconst c-awk-regexp-sign-re | |
236 | (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/")) | |
237 | ;; Will match a piece of AWK buffer ending in / which is an opening regexp | |
238 | ;; bracket, in a context where an immediate / would be a division sign. This | |
239 | ;; will only work when there won't be a preceding " or / before the sought / | |
240 | ;; to foul things up. | |
953e5b8c AM |
241 | (defconst c-awk-pre-exp-alphanum-kwd-re |
242 | (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<" | |
243 | (regexp-opt '("print" "return" "case") t) | |
244 | "\\>\\([^_\n\r]\\|$\\)")) | |
245 | ;; Matches all AWK keywords which can precede expressions (including | |
246 | ;; /regexp/). | |
247 | (defconst c-awk-kwd-regexp-sign-re | |
248 | (concat c-awk-pre-exp-alphanum-kwd-re c-awk-escaped-nls*-with-space* "/")) | |
249 | ;; Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword | |
250 | ;; which can precede an expression. | |
0386b551 AM |
251 | |
252 | ;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon" | |
253 | (defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]") | |
b5eb9035 AM |
254 | (defconst c-awk-non-/-syn-ws*-re |
255 | (concat | |
256 | "\\(" c-awk-escaped-nls*-with-space* | |
257 | "\\(" c-awk-_-harmless-nonws-char-re "\\|" | |
258 | c-awk-non-eol-esc-pair-re "\\|" | |
259 | c-awk-possibly-open-string-re | |
260 | "\\)" | |
261 | "\\)*")) | |
262 | (defconst c-awk-space*-/-re (concat c-awk-escaped-nls*-with-space* "/")) | |
263 | ;; Matches optional whitespace followed by "/". | |
264 | (defconst c-awk-space*-regexp-/-re | |
265 | (concat c-awk-escaped-nls*-with-space* "\\s\"")) | |
266 | ;; Matches optional whitespace followed by a "/" with string syntax (a matched | |
267 | ;; regexp delimiter). | |
268 | (defconst c-awk-space*-unclosed-regexp-/-re | |
269 | (concat c-awk-escaped-nls*-with-space* "\\s\|")) | |
270 | ;; Matches optional whitespace followed by a "/" with string fence syntax (an | |
271 | ;; unmatched regexp delimiter). | |
0386b551 AM |
272 | |
273 | \f | |
d9e94c22 | 274 | ;; ACM, 2002/5/29: |
0d26e0b6 | 275 | ;; |
d9e94c22 MS |
276 | ;; The next section of code is about determining whether or not an AWK |
277 | ;; statement is complete or not. We use this to indent the following line. | |
278 | ;; The determination is pretty straightforward in C, where a statement ends | |
279 | ;; with either a ; or a }. Only "while" really gives any trouble there, since | |
280 | ;; it might be the end of a do-while. In AWK, on the other hand, semicolons | |
281 | ;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In | |
282 | ;; addition, we have the complexity of escaped EOLs. The core of this | |
283 | ;; analysis is in the middle of the function | |
284 | ;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down. | |
285 | ;; | |
286 | ;; To avoid continually repeating this expensive analysis, we "cache" its | |
287 | ;; result in a text-property, c-awk-NL-prop, whose value for a line is set on | |
288 | ;; the EOL (if any) which terminates that line. Should the property be | |
289 | ;; required for the very last line (which has no EOL), it is calculated as | |
290 | ;; required but not cached. The c-awk-NL-prop property should be thought of | |
291 | ;; as only really valid immediately after a buffer change, not a permanently | |
292 | ;; set property. (By contrast, the syntax-table text properties (set by an | |
293 | ;; after-change function) must be constantly updated for the mode to work | |
294 | ;; properly). | |
295 | ;; | |
0386b551 AM |
296 | ;; This text property is also used for "syntactic whitespace" movement, this |
297 | ;; being where the distinction between the values '$' and '}' is significant. | |
298 | ;; | |
d9e94c22 MS |
299 | ;; The valid values for c-awk-NL-prop are: |
300 | ;; | |
301 | ;; nil The property is not currently set for this line. | |
302 | ;; '#' There is NO statement on this line (at most a comment), and no open | |
303 | ;; statement from a previous line which could have been completed on this | |
304 | ;; line. | |
305 | ;; '{' There is an unfinished statement on this (or a previous) line which | |
306 | ;; doesn't require \s to continue onto another line, e.g. the line ends | |
307 | ;; with {, or the && operator, or "if (condition)". Note that even if the | |
308 | ;; newline is redundantly escaped, it remains a '{' line. | |
309 | ;; '\' There is an escaped newline at the end of this line and this '\' is | |
310 | ;; essential to the syntax of the program. (i.e. if it had been a | |
311 | ;; frivolous \, it would have been ignored and the line been given one of | |
312 | ;; the other property values.) | |
0386b551 AM |
313 | ;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual |
314 | ;; semicolon"). This might be a content-free line terminating a statement | |
315 | ;; from the preceding (continued) line (which has property \). | |
316 | ;; '}' A statement, being the last thing (aside from ws/comments) is | |
317 | ;; explicitly terminated on this line by a closing brace (or sometimes a | |
318 | ;; semicolon). | |
d9e94c22 MS |
319 | ;; |
320 | ;; This set of values has been chosen so that the property's value on a line | |
321 | ;; is completely determined by the contents of the line and the property on | |
322 | ;; the previous line, EXCEPT for where a "while" might be the closing | |
323 | ;; statement of a do-while. | |
324 | ||
325 | (defun c-awk-after-if-for-while-condition-p (&optional do-lim) | |
326 | ;; Are we just after the ) in "if/for/while (<condition>)"? | |
327 | ;; | |
328 | ;; Note that the end of the ) in a do .... while (<condition>) doesn't | |
329 | ;; count, since the purpose of this routine is essentially to decide | |
330 | ;; whether to indent the next line. | |
331 | ;; | |
332 | ;; DO-LIM sets a limit on how far back we search for the "do" of a possible | |
333 | ;; do-while. | |
0386b551 AM |
334 | ;; |
335 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
336 | (and |
337 | (eq (char-before) ?\)) | |
338 | (save-excursion | |
339 | (let ((par-pos (c-safe (scan-lists (point) -1 0)))) | |
340 | (when par-pos | |
341 | (goto-char par-pos) ; back over "(...)" | |
342 | (c-backward-token-1) ; BOB isn't a problem. | |
343 | (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)") | |
344 | (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while. | |
345 | (not (eq (c-beginning-of-statement-1 do-lim) | |
346 | 'beginning))))))))) | |
347 | ||
348 | (defun c-awk-after-function-decl-param-list () | |
349 | ;; Are we just after the ) in "function foo (bar)" ? | |
0386b551 AM |
350 | ;; |
351 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
352 | (and (eq (char-before) ?\)) |
353 | (save-excursion | |
354 | (let ((par-pos (c-safe (scan-lists (point) -1 0)))) | |
355 | (when par-pos | |
356 | (goto-char par-pos) ; back over "(...)" | |
357 | (c-backward-token-1) ; BOB isn't a problem | |
358 | (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>") | |
359 | (progn (c-backward-token-1) | |
360 | (looking-at "func\\(tion\\)?\\>")))))))) | |
361 | ||
362 | ;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code). | |
363 | (defun c-awk-after-continue-token () | |
364 | ;; Are we just after a token which can be continued onto the next line without | |
365 | ;; a backslash? | |
0386b551 AM |
366 | ;; |
367 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
368 | (save-excursion |
369 | (c-backward-token-1) ; FIXME 2002/10/27. What if this fails? | |
370 | (if (and (looking-at "[&|]") (not (bobp))) | |
371 | (backward-char)) ; c-backward-token-1 doesn't do this :-( | |
372 | (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>"))) | |
373 | ||
374 | (defun c-awk-after-rbrace-or-statement-semicolon () | |
375 | ;; Are we just after a } or a ; which closes a statement? | |
376 | ;; Be careful about ;s in for loop control bits. They don't count! | |
0386b551 AM |
377 | ;; |
378 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
379 | (or (eq (char-before) ?\}) |
380 | (and | |
381 | (eq (char-before) ?\;) | |
382 | (save-excursion | |
383 | (let ((par-pos (c-safe (scan-lists (point) -1 1)))) | |
384 | (when par-pos | |
385 | (goto-char par-pos) ; go back to containing ( | |
386 | (not (and (looking-at "(") | |
387 | (c-backward-token-1) ; BOB isn't a problem | |
388 | (looking-at "for\\>"))))))))) | |
389 | ||
390 | (defun c-awk-back-to-contentful-text-or-NL-prop () | |
391 | ;; Move back to just after the first found of either (i) an EOL which has | |
392 | ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB. | |
393 | ;; We return either the value of c-awk-NL-prop (in case (i)) or nil. | |
0386b551 | 394 | ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp). |
d9e94c22 MS |
395 | ;; |
396 | ;; Note that an escaped eol counts as whitespace here. | |
397 | ;; | |
398 | ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely | |
399 | ;; that the previous line contains an unterminated string (without \). In | |
0386b551 | 400 | ;; this case, assume that the previous line's c-awk-NL-prop is a $. |
0d26e0b6 | 401 | ;; |
d9e94c22 MS |
402 | ;; POINT MUST BE AT THE START OF A LINE when calling this function. This |
403 | ;; is to ensure that the various backward-comment functions will work | |
404 | ;; properly. | |
0386b551 AM |
405 | ;; |
406 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
407 | (let ((nl-prop nil) |
408 | bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call. | |
409 | (while ;; We are at a BOL here. Go back one line each iteration. | |
410 | (and | |
411 | (not (bobp)) | |
412 | (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop))) | |
413 | (progn (setq bol-pos (c-point 'bopl)) | |
414 | (setq bsws-pos (point)) | |
415 | ;; N.B. the following function will not go back past an EOL if | |
416 | ;; there is an open string (without \) on the previous line. | |
0386b551 AM |
417 | ;; If we find such, set the c-awk-NL-prop on it, too |
418 | ;; (2004/3/29). | |
d9e94c22 MS |
419 | (c-backward-syntactic-ws bol-pos) |
420 | (or (/= (point) bsws-pos) | |
0386b551 AM |
421 | (progn (setq nl-prop ?\$) |
422 | (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop) | |
d9e94c22 MS |
423 | nil))) |
424 | ;; If we had a backslash at EOL, c-backward-syntactic-ws will | |
425 | ;; have gone backwards over it. Check the backslash was "real". | |
426 | (progn | |
427 | (if (looking-at "[ \t]*\\\\+$") | |
428 | (if (progn | |
429 | (end-of-line) | |
430 | (search-backward-regexp | |
431 | "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-) | |
432 | bol-pos t)) | |
433 | (progn (end-of-line) ; escaped EOL. | |
434 | (backward-char) | |
435 | (c-backward-syntactic-ws bol-pos)) | |
436 | (end-of-line))) ; The \ at eol is a fake. | |
437 | (bolp)))) | |
438 | nl-prop)) | |
439 | ||
440 | (defun c-awk-calculate-NL-prop-prev-line (&optional do-lim) | |
441 | ;; Calculate and set the value of the c-awk-NL-prop on the immediately | |
442 | ;; preceding EOL. This may also involve doing the same for several | |
443 | ;; preceding EOLs. | |
0d26e0b6 | 444 | ;; |
d9e94c22 MS |
445 | ;; NOTE that if the property was already set, we return it without |
446 | ;; recalculation. (This is by accident rather than design.) | |
0d26e0b6 | 447 | ;; |
d9e94c22 MS |
448 | ;; Return the property which got set (or was already set) on the previous |
449 | ;; line. Return nil if we hit BOB. | |
0d26e0b6 | 450 | ;; |
d9e94c22 | 451 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
0386b551 AM |
452 | ;; |
453 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
454 | (save-excursion |
455 | (save-match-data | |
456 | (beginning-of-line) | |
457 | (let* ((pos (point)) | |
458 | (nl-prop (c-awk-back-to-contentful-text-or-NL-prop))) | |
459 | ;; We are either (1) at a BOL (with nl-prop containing the previous | |
460 | ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At | |
461 | ;; the BOB counts as case (1), so we test next for bolp rather than | |
462 | ;; non-nil nl-prop. | |
463 | (when (not (bolp)) | |
464 | (setq nl-prop | |
465 | (cond | |
466 | ;; Incomplete statement which doesn't require escaped EOL? | |
467 | ((or (c-awk-after-if-for-while-condition-p do-lim) | |
468 | (c-awk-after-function-decl-param-list) | |
469 | (c-awk-after-continue-token)) | |
470 | ?\{) | |
471 | ;; Escaped EOL (where there's also something to continue)? | |
472 | ((and (looking-at "[ \t]*\\\\$") | |
473 | (not (c-awk-after-rbrace-or-statement-semicolon))) | |
474 | ?\\) | |
0386b551 AM |
475 | ;; A statement was completed on this line. How? |
476 | ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or } | |
477 | (t ?\$))) ; A virtual semicolon. | |
d9e94c22 MS |
478 | (end-of-line) |
479 | (c-put-char-property (point) 'c-awk-NL-prop nl-prop) | |
480 | (forward-line)) | |
481 | ||
482 | ;; We are now at a (possibly empty) sequence of content-free lines. | |
483 | ;; Set c-awk-NL-prop on each of these lines's EOL. | |
484 | (while (< (point) pos) ; one content-free line each iteration. | |
485 | (cond ; recalculate nl-prop from previous line's value. | |
0386b551 | 486 | ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#)) |
d9e94c22 | 487 | ((eq nl-prop ?\\) |
0386b551 | 488 | (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$))) |
d9e94c22 MS |
489 | ;; ?\# (empty line) and ?\{ (open stmt) don't change. |
490 | ) | |
491 | (forward-line) | |
492 | (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)) | |
493 | nl-prop)))) | |
494 | ||
495 | (defun c-awk-get-NL-prop-prev-line (&optional do-lim) | |
496 | ;; Get the c-awk-NL-prop text-property from the previous line, calculating | |
f6558e2d | 497 | ;; it if necessary. Return nil if we're already at BOB. |
d9e94c22 | 498 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
0386b551 AM |
499 | ;; |
500 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
501 | (if (bobp) |
502 | nil | |
503 | (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop) | |
504 | (c-awk-calculate-NL-prop-prev-line do-lim)))) | |
505 | ||
506 | (defun c-awk-get-NL-prop-cur-line (&optional do-lim) | |
507 | ;; Get the c-awk-NL-prop text-property from the current line, calculating it | |
508 | ;; if necessary. (As a special case, the property doesn't get set on an | |
509 | ;; empty line at EOB (there's no position to set the property on), but the | |
510 | ;; function returns the property value an EOL would have got.) | |
0d26e0b6 | 511 | ;; |
d9e94c22 | 512 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. |
0386b551 AM |
513 | ;; |
514 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
515 | (save-excursion |
516 | (let ((extra-nl nil)) | |
517 | (end-of-line) ; Necessary for the following test to work. | |
518 | (when (= (forward-line) 1) ; if we were on the last line.... | |
519 | (insert-char ?\n 1) ; ...artificial eol is needed for comment detection. | |
520 | (setq extra-nl t)) | |
521 | (prog1 (c-awk-get-NL-prop-prev-line do-lim) | |
d355a0b7 | 522 | (if extra-nl (delete-char -1)))))) |
d9e94c22 | 523 | |
0386b551 | 524 | (defsubst c-awk-prev-line-incomplete-p (&optional do-lim) |
d9e94c22 MS |
525 | ;; Is there an incomplete statement at the end of the previous line? |
526 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. | |
0386b551 AM |
527 | ;; |
528 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
529 | (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{))) |
530 | ||
0386b551 | 531 | (defsubst c-awk-cur-line-incomplete-p (&optional do-lim) |
d9e94c22 MS |
532 | ;; Is there an incomplete statement at the end of the current line? |
533 | ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. | |
0386b551 AM |
534 | ;; |
535 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
536 | (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{))) |
537 | ||
d355a0b7 SM |
538 | ;; NOTES ON "VIRTUAL SEMICOLONS" |
539 | ;; | |
540 | ;; A "virtual semicolon" is what terminates a statement when there is no ; | |
541 | ;; or } to do the job. Like point, it is considered to lie _between_ two | |
542 | ;; characters. As from mid-March 2004, it is considered to lie just after | |
543 | ;; the last non-syntactic-whitespace character on the line; (previously, it | |
544 | ;; was considered an attribute of the EOL on the line). A real semicolon | |
545 | ;; never counts as a virtual one. | |
0386b551 AM |
546 | |
547 | (defun c-awk-at-vsemi-p (&optional pos) | |
548 | ;; Is there a virtual semicolon at POS (or POINT)? | |
d9e94c22 | 549 | (save-excursion |
b5eb9035 AM |
550 | (let* (nl-prop |
551 | (pos-or-point (progn (if pos (goto-char pos)) (point))) | |
552 | (bol (c-point 'bol)) (eol (c-point 'eol))) | |
553 | (c-awk-beginning-of-logical-line) | |
554 | ;; Next `while' goes round one logical line (ending in, e.g. "\\") per | |
555 | ;; iteration. Such a line is rare, and can only be an open string | |
556 | ;; ending in an escaped \. | |
557 | (while | |
558 | (progn | |
559 | ;; Next `while' goes over a division sign or /regexp/ per iteration. | |
560 | (while | |
561 | (and | |
562 | (< (point) eol) | |
563 | (progn | |
564 | (search-forward-regexp c-awk-non-/-syn-ws*-re eol) | |
565 | (looking-at c-awk-space*-/-re))) | |
566 | (cond | |
567 | ((looking-at c-awk-space*-regexp-/-re) ; /regexp/ | |
568 | (forward-sexp)) | |
569 | ((looking-at c-awk-space*-unclosed-regexp-/-re) ; Unclosed /regexp | |
570 | (condition-case nil | |
571 | (progn | |
572 | (forward-sexp) | |
573 | (backward-char)) ; Move to end of (logical) line. | |
574 | (error (end-of-line)))) ; Happens at EOB. | |
575 | (t ; division sign | |
576 | (c-forward-syntactic-ws) | |
577 | (forward-char)))) | |
578 | (< (point) bol)) | |
579 | (forward-line)) | |
0386b551 AM |
580 | (and (eq (point) pos-or-point) |
581 | (progn | |
582 | (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\) | |
583 | (eq (forward-line) 0) | |
584 | (looking-at c-awk-blank-or-comment-line-re))) | |
585 | (eq nl-prop ?\$)))))) | |
586 | ||
587 | (defun c-awk-vsemi-status-unknown-p () | |
588 | ;; Are we unsure whether there is a virtual semicolon on the current line? | |
589 | ;; DO NOT under any circumstances attempt to calculate this; that would | |
333f9019 | 590 | ;; defeat the (admittedly kludgy) purpose of this function, which is to |
0386b551 AM |
591 | ;; prevent an infinite recursion in c-beginning-of-statement-1 when point |
592 | ;; starts at a `while' token. | |
d9e94c22 MS |
593 | (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))) |
594 | ||
595 | (defun c-awk-clear-NL-props (beg end) | |
596 | ;; This function is run from before-change-hooks. It clears the | |
597 | ;; c-awk-NL-prop text property from beg to the end of the buffer (The END | |
598 | ;; parameter is ignored). This ensures that the indentation engine will | |
599 | ;; never use stale values for this property. | |
0386b551 AM |
600 | ;; |
601 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
602 | (save-restriction |
603 | (widen) | |
604 | (c-clear-char-properties beg (point-max) 'c-awk-NL-prop))) | |
605 | ||
606 | (defun c-awk-unstick-NL-prop () | |
607 | ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without | |
608 | ;; this, a new newline inserted after an old newline (e.g. by C-j) would | |
609 | ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad | |
610 | ;; Thing. This function's action is required by c-put-char-property. | |
0d26e0b6 | 611 | (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs |
d9e94c22 MS |
612 | (not (assoc 'c-awk-NL-prop text-property-default-nonsticky))) |
613 | (setq text-property-default-nonsticky | |
614 | (cons '(c-awk-NL-prop . t) text-property-default-nonsticky)))) | |
615 | ||
616 | ;; The following is purely a diagnostic command, to be commented out of the | |
617 | ;; final release. ACM, 2002/6/1 | |
618 | ;; (defun NL-props () | |
619 | ;; (interactive) | |
620 | ;; (let (pl-prop cl-prop) | |
621 | ;; (message "Prev-line: %s Cur-line: %s" | |
622 | ;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)) | |
623 | ;; (char-to-string pl-prop) | |
624 | ;; "nil") | |
625 | ;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)) | |
626 | ;; (char-to-string cl-prop) | |
627 | ;; "nil")))) | |
628 | ;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31 | |
629 | ;for now. In the byte compiled version, this causes things to crash because | |
630 | ;awk-mode-map isn't yet defined. :-( | |
631 | ||
632 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
0386b551 | 633 | \f |
d9e94c22 MS |
634 | ;; The following section of the code is to do with font-locking. The biggest |
635 | ;; problem for font-locking is deciding whether a / is a regular expression | |
636 | ;; delimiter or a division sign - determining precisely where strings and | |
637 | ;; regular expressions start and stop is also troublesome. This is the | |
638 | ;; purpose of the function c-awk-set-syntax-table-properties and the myriad | |
639 | ;; elisp regular expressions it uses. | |
640 | ;; | |
641 | ;; Because AWK is a line oriented language, I felt the normal cc-mode strategy | |
642 | ;; for font-locking unterminated strings (i.e. font-locking the buffer up to | |
643 | ;; the next string delimiter as a string) was inappropriate. Instead, | |
644 | ;; unbalanced string/regexp delimiters are given the warning font, being | |
645 | ;; refonted with the string font as soon as the matching delimiter is entered. | |
646 | ;; | |
647 | ;; This requires the region processed by the current font-lock after-change | |
648 | ;; function to have access to the start of the string/regexp, which may be | |
649 | ;; several lines back. The elisp "advice" feature is used on these functions | |
650 | ;; to allow this. | |
651 | ||
652 | (defun c-awk-beginning-of-logical-line (&optional pos) | |
653 | ;; Go back to the start of the (apparent) current line (or the start of the | |
654 | ;; line containing POS), returning the buffer position of that point. I.e., | |
655 | ;; go back to the last line which doesn't have an escaped EOL before it. | |
0d26e0b6 | 656 | ;; |
d9e94c22 MS |
657 | ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any |
658 | ;; comment, string or regexp. IT MAY WELL BE that this function should not be | |
659 | ;; executed on a narrowed buffer. | |
0386b551 AM |
660 | ;; |
661 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
662 | (if pos (goto-char pos)) |
663 | (forward-line 0) | |
664 | (while (and (> (point) (point-min)) | |
665 | (eq (char-before (1- (point))) ?\\)) | |
666 | (forward-line -1)) | |
667 | (point)) | |
668 | ||
88a46e21 AM |
669 | (defun c-awk-beyond-logical-line (&optional pos) |
670 | ;; Return the position just beyond the (apparent) current logical line, or the | |
671 | ;; one containing POS. This is usually the beginning of the next line which | |
672 | ;; doesn't follow an escaped EOL. At EOB, this will be EOB. | |
673 | ;; | |
674 | ;; Point is unchanged. | |
d9e94c22 MS |
675 | ;; |
676 | ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any | |
677 | ;; comment, string or regexp. IT MAY WELL BE that this function should not be | |
678 | ;; executed on a narrowed buffer. | |
88a46e21 AM |
679 | (save-excursion |
680 | (if pos (goto-char pos)) | |
681 | (end-of-line) | |
682 | (while (and (< (point) (point-max)) | |
683 | (eq (char-before) ?\\)) | |
684 | (end-of-line 2)) | |
685 | (if (< (point) (point-max)) | |
686 | (1+ (point)) | |
687 | (point)))) | |
d9e94c22 | 688 | |
d9e94c22 MS |
689 | ;; ACM, 2002/02/15: The idea of the next function is to put the "Error font" |
690 | ;; on strings/regexps which are missing their closing delimiter. | |
691 | ;; 2002/4/28. The default syntax for / has been changed from "string" to | |
692 | ;; "punctuation", to reduce hassle when this character appears within a string | |
693 | ;; or comment. | |
694 | ||
695 | (defun c-awk-set-string-regexp-syntax-table-properties (beg end) | |
696 | ;; BEG and END bracket a (possibly unterminated) string or regexp. The | |
697 | ;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER | |
698 | ;; END. Set the appropriate syntax-table properties on the delimiters and | |
699 | ;; contents of this string/regex. | |
700 | ;; | |
701 | ;; "String" here can also mean a gawk 3.1 "localizable" string which starts | |
702 | ;; with _". In this case, we step over the _ and ignore it; It will get it's | |
703 | ;; font from an entry in awk-font-lock-keywords. | |
704 | ;; | |
705 | ;; If the closing delimiter is missing (i.e., there is an EOL there) set the | |
706 | ;; STRING-FENCE property on the opening " or / and closing EOL. | |
0386b551 AM |
707 | ;; |
708 | ;; This function does hidden buffer changes. | |
d9e94c22 MS |
709 | (if (eq (char-after beg) ?_) (setq beg (1+ beg))) |
710 | ||
711 | ;; First put the properties on the delimiters. | |
712 | (cond ((eq end (point-max)) ; string/regexp terminated by EOB | |
0386b551 | 713 | (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence" |
d9e94c22 | 714 | ((/= (char-after beg) (char-after end)) ; missing end delimiter |
0386b551 AM |
715 | (c-put-char-property beg 'syntax-table '(15)) |
716 | (c-put-char-property end 'syntax-table '(15))) | |
d9e94c22 | 717 | ((eq (char-after beg) ?/) ; Properly bracketed regexp |
0386b551 AM |
718 | (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string" |
719 | (c-put-char-property end 'syntax-table '(7))) | |
d9e94c22 MS |
720 | (t)) ; Properly bracketed string: Nothing to do. |
721 | ;; Now change the properties of any escaped "s in the string to punctuation. | |
722 | (save-excursion | |
723 | (goto-char (1+ beg)) | |
724 | (or (eobp) | |
725 | (while (search-forward "\"" end t) | |
0386b551 | 726 | (c-put-char-property (1- (point)) 'syntax-table '(1)))))) |
d9e94c22 MS |
727 | |
728 | (defun c-awk-syntax-tablify-string () | |
729 | ;; Point is at the opening " or _" of a string. Set the syntax-table | |
730 | ;; properties on this string, leaving point just after the string. | |
731 | ;; | |
732 | ;; The result is nil if a / immediately after the string would be a regexp | |
733 | ;; opener, t if it would be a division sign. | |
0386b551 AM |
734 | ;; |
735 | ;; This function does hidden buffer changes. | |
d9e94c22 MS |
736 | (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string |
737 | (c-awk-set-string-regexp-syntax-table-properties | |
738 | (match-beginning 0) (match-end 0)) | |
739 | (cond ((looking-at "\"") | |
740 | (forward-char) | |
741 | t) ; In AWK, ("15" / 5) gives 3 ;-) | |
742 | ((looking-at "[\n\r]") ; Unterminated string with EOL. | |
743 | (forward-char) | |
744 | nil) ; / on next line would start a regexp | |
745 | (t nil))) ; Unterminated string at EOB | |
746 | ||
747 | (defun c-awk-syntax-tablify-/ (anchor anchor-state-/div) | |
748 | ;; Point is at a /. Determine whether this is a division sign or a regexp | |
749 | ;; opener, and if the latter, apply syntax-table properties to the entire | |
750 | ;; regexp. Point is left immediately after the division sign or regexp, as | |
751 | ;; the case may be. | |
752 | ;; | |
753 | ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a | |
754 | ;; division sign (value t) or a regexp opener (value nil). The idea is that | |
6772c8e1 | 755 | ;; we analyze the line from ANCHOR up till point to determine what the / at |
d9e94c22 MS |
756 | ;; point is. |
757 | ;; | |
758 | ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left. | |
0386b551 | 759 | ;; |
88a46e21 | 760 | ;; This function does hidden buffer changes. |
d9e94c22 MS |
761 | (let ((/point (point))) |
762 | (goto-char anchor) | |
c7015153 | 763 | ;; Analyze the line to find out what the / is. |
d9e94c22 | 764 | (if (if anchor-state-/div |
953e5b8c AM |
765 | (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t)) |
766 | (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t)) | |
767 | (search-forward-regexp c-awk-div-sign-re (1+ /point) t))) | |
768 | ;; A division sign. | |
51c9af45 | 769 | (progn (goto-char (1+ /point)) nil) |
d9e94c22 MS |
770 | ;; A regexp opener |
771 | ;; Jump over the regexp innards, setting the match data. | |
772 | (goto-char /point) | |
773 | (search-forward-regexp c-awk-regexp-without-end-re) | |
774 | (c-awk-set-string-regexp-syntax-table-properties | |
775 | (match-beginning 0) (match-end 0)) | |
776 | (cond ((looking-at "/") ; Terminating / | |
777 | (forward-char) | |
778 | t) | |
779 | ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL | |
780 | (forward-char) | |
781 | nil) ; / on next line would start another regexp | |
782 | (t nil))))) ; Unterminated regexp at EOB | |
783 | ||
784 | (defun c-awk-set-syntax-table-properties (lim) | |
785 | ;; Scan the buffer text between point and LIM, setting (and clearing) the | |
786 | ;; syntax-table property where necessary. | |
787 | ;; | |
788 | ;; This function is designed to be called as the FUNCTION in a MATCHER in | |
789 | ;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit | |
790 | ;; repeated calls from font-lock: See elisp info page "Search-based | |
791 | ;; Fontification"). It also gets called, with a bit of glue, from | |
792 | ;; after-change-functions when font-lock isn't active. Point is left | |
793 | ;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN | |
794 | ;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE. | |
795 | ;; | |
796 | ;; We need to set/clear the syntax-table property on: | |
797 | ;; (i) / - It is set to "string" on a / which is the opening or closing | |
798 | ;; delimiter of the properly terminated regexp (and left unset on a | |
799 | ;; division sign). | |
800 | ;; (ii) the opener of an unterminated string/regexp, we set the property | |
801 | ;; "generic string delimiter" on both the opening " or / and the end of the | |
802 | ;; line where the closing delimiter is missing. | |
803 | ;; (iii) "s inside strings/regexps (these will all be escaped "s). They are | |
804 | ;; given the property "punctuation". This will later allow other routines | |
805 | ;; to use the regexp "\\S\"*" to skip over the string innards. | |
806 | ;; (iv) Inside a comment, all syntax-table properties are cleared. | |
0386b551 AM |
807 | ;; |
808 | ;; This function does hidden buffer changes. | |
d9e94c22 MS |
809 | (let (anchor |
810 | (anchor-state-/div nil)) ; t means a following / would be a div sign. | |
811 | (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant. | |
0386b551 | 812 | (c-clear-char-properties (point) lim 'syntax-table) |
d9e94c22 | 813 | ;; Once round the next loop for each string, regexp, or div sign |
0386b551 AM |
814 | (while (progn |
815 | ;; Skip any "harmless" lines before the next tricky one. | |
816 | (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t) | |
817 | (setq anchor-state-/div nil)) | |
818 | (< (point) lim)) | |
d9e94c22 MS |
819 | (setq anchor (point)) |
820 | (search-forward-regexp c-awk-harmless-string*-here-re nil t) | |
953e5b8c | 821 | ;; We are now looking at either a " or a / or a brace/paren/semicolon. |
d9c287e5 PE |
822 | ;; Do our thing on the string, regexp or division sign or update |
823 | ;; our state. | |
d9e94c22 | 824 | (setq anchor-state-/div |
953e5b8c AM |
825 | (cond |
826 | ((looking-at "_?\"") | |
827 | (c-awk-syntax-tablify-string)) | |
828 | ((eq (char-after) ?/) | |
829 | (c-awk-syntax-tablify-/ anchor anchor-state-/div)) | |
830 | ((memq (char-after) '(?{ ?} ?\( ?\;)) | |
831 | (forward-char) | |
832 | nil) | |
833 | (t ; ?\) | |
834 | (forward-char) | |
835 | t)))) | |
d9e94c22 MS |
836 | nil)) |
837 | ||
d9e94c22 MS |
838 | ;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set |
839 | ;; the syntax-table properties even when font-lock isn't enabled, for the | |
840 | ;; subsequent use of movement functions, etc. However, it seems that if font | |
841 | ;; lock _is_ enabled, we can always leave it to do the job. | |
88a46e21 AM |
842 | (defvar c-awk-old-ByLL 0) |
843 | (make-variable-buffer-local 'c-awk-old-Byll) | |
844 | ;; Just beyond logical line following the region which is about to be changed. | |
845 | ;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change. | |
d9e94c22 | 846 | |
88a46e21 | 847 | (defun c-awk-record-region-clear-NL (beg end) |
d9e94c22 MS |
848 | ;; This function is called exclusively from the before-change-functions hook. |
849 | ;; It does two things: Finds the end of the (logical) line on which END lies, | |
88a46e21 AM |
850 | ;; and clears c-awk-NL-prop text properties from this point onwards. BEG is |
851 | ;; ignored. | |
0386b551 | 852 | ;; |
88a46e21 AM |
853 | ;; On entry, the buffer will have been widened and match-data will have been |
854 | ;; saved; point is undefined on both entry and exit; the return value is | |
855 | ;; ignored. | |
856 | ;; | |
857 | ;; This function does hidden buffer changes. | |
858 | (c-save-buffer-state () | |
859 | (setq c-awk-old-ByLL (c-awk-beyond-logical-line end)) | |
860 | (c-save-buffer-state nil | |
861 | (c-awk-clear-NL-props end (point-max))))) | |
d9e94c22 MS |
862 | |
863 | (defun c-awk-end-of-change-region (beg end old-len) | |
864 | ;; Find the end of the region which needs to be font-locked after a change. | |
865 | ;; This is the end of the logical line on which the change happened, either | |
cb694ab7 | 866 | ;; as it was before the change, or as it is now, whichever is later. |
d9e94c22 | 867 | ;; N.B. point is left undefined. |
88a46e21 AM |
868 | (max (+ (- c-awk-old-ByLL old-len) (- end beg)) |
869 | (c-awk-beyond-logical-line end))) | |
d9e94c22 MS |
870 | |
871 | ;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region | |
872 | ;; specified by the font-lock after-change function must be expanded to | |
873 | ;; include ALL of any string or regexp within the region. The simplest way to | |
874 | ;; do this in practice is to use the beginning/end-of-logical-line functions. | |
875 | ;; Don't overlook the possibility of the buffer change being the "recapturing" | |
876 | ;; of a previously escaped newline. | |
88a46e21 | 877 | |
0d26e0b6 | 878 | ;; ACM 2008-02-05: |
88a46e21 AM |
879 | (defun c-awk-extend-and-syntax-tablify-region (beg end old-len) |
880 | ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put | |
881 | ;; `syntax-table' properties on this region. | |
882 | ;; | |
883 | ;; This function is called from an after-change function, BEG END and | |
884 | ;; OLD-LEN being the standard parameters. | |
0d26e0b6 | 885 | ;; |
88a46e21 AM |
886 | ;; Point is undefined both before and after this function call, the buffer |
887 | ;; has been widened, and match-data saved. The return value is ignored. | |
888 | ;; | |
889 | ;; It prepares the buffer for font | |
890 | ;; locking, hence must get called before `font-lock-after-change-function'. | |
891 | ;; | |
892 | ;; This function is the AWK value of `c-before-font-lock-function'. | |
893 | ;; It does hidden buffer changes. | |
894 | (c-save-buffer-state () | |
895 | (setq c-new-END (c-awk-end-of-change-region beg end old-len)) | |
896 | (setq c-new-BEG (c-awk-beginning-of-logical-line beg)) | |
897 | (goto-char c-new-BEG) | |
898 | (c-awk-set-syntax-table-properties c-new-END))) | |
d9e94c22 | 899 | |
3c0ab532 AM |
900 | ;; Awk regexps written with help from Peter Galbraith |
901 | ;; <galbraith@mixing.qc.dfo.ca>. | |
c7015153 | 902 | ;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work |
0d26e0b6 | 903 | ;; in XEmacs 21.4.4. acm 2002/9/19. |
3c0ab532 AM |
904 | (defconst awk-font-lock-keywords |
905 | (eval-when-compile | |
906 | (list | |
907 | ;; Function names. | |
908 | '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?" | |
909 | (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t)) | |
910 | ;; | |
911 | ;; Variable names. | |
912 | (cons | |
913 | (concat "\\<" | |
914 | (regexp-opt | |
915 | '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" | |
916 | "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE" | |
917 | "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH" | |
918 | "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>") | |
919 | 'font-lock-variable-name-face) | |
920 | ||
921 | ;; Special file names. (acm, 2002/7/22) | |
922 | ;; The following regexp was created by first evaluating this in GNU Emacs 21.1: | |
923 | ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid" | |
924 | ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words) | |
925 | ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen) | |
926 | ;; , replacing the "n" in "dev/fd/n" with "[0-9]+" | |
927 | ;; , removing the unwanted \\< at the beginning, and finally filling out the | |
928 | ;; regexp so that a " must come before, and either a " or heuristic stuff after. | |
929 | ;; The surrounding quotes are fontified along with the filename, since, semantically, | |
930 | ;; they are an indivisible unit. | |
931 | '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\ | |
932 | std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ | |
933 | \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" | |
934 | (1 font-lock-variable-name-face t) | |
935 | (8 font-lock-variable-name-face t t)) | |
936 | ;; Do the same (almost) with | |
937 | ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport" | |
938 | ;; "/inet/raw/lport/rhost/rport") 'words) | |
939 | ;; This cannot be combined with the above pattern, because the match number | |
940 | ;; for the (optional) closing \" would then exceed 9. | |
941 | '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\ | |
942 | \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" | |
943 | (1 font-lock-variable-name-face t) | |
944 | (6 font-lock-variable-name-face t t)) | |
945 | ||
946 | ;; Keywords. | |
947 | (concat "\\<" | |
948 | (regexp-opt | |
898169a2 AM |
949 | '("BEGIN" "END" "break" "case" "continue" "default" "delete" |
950 | "do" "else" "exit" "for" "getline" "if" "in" "next" | |
951 | "nextfile" "return" "switch" "while") | |
3c0ab532 AM |
952 | t) "\\>") |
953 | ||
954 | ;; Builtins. | |
955 | `(eval . (list | |
956 | ,(concat | |
957 | "\\<" | |
958 | (regexp-opt | |
959 | '("adump" "and" "asort" "atan2" "bindtextdomain" "close" | |
960 | "compl" "cos" "dcgettext" "exp" "extension" "fflush" | |
961 | "gensub" "gsub" "index" "int" "length" "log" "lshift" | |
962 | "match" "mktime" "or" "print" "printf" "rand" "rshift" | |
963 | "sin" "split" "sprintf" "sqrt" "srand" "stopme" | |
964 | "strftime" "strtonum" "sub" "substr" "system" | |
965 | "systime" "tolower" "toupper" "xor") t) | |
966 | "\\>") | |
967 | 0 c-preprocessor-face-name)) | |
968 | ||
969 | ;; gawk debugging keywords. (acm, 2002/7/21) | |
970 | ;; (Removed, 2003/6/6. These functions are now fontified as built-ins) | |
971 | ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>") | |
972 | ;; 0 'font-lock-warning-face) | |
973 | ||
974 | ;; User defined functions with an apparent spurious space before the | |
975 | ;; opening parenthesis. acm, 2002/5/30. | |
976 | `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s " | |
977 | c-awk-escaped-nls*-with-space* "(") | |
978 | (0 'font-lock-warning-face)) | |
979 | ||
980 | ;; Space after \ in what looks like an escaped newline. 2002/5/31 | |
981 | '("\\\\\\s +$" 0 font-lock-warning-face t) | |
982 | ||
983 | ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16. | |
984 | '("\\s|" 0 font-lock-warning-face t nil) | |
985 | ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21 | |
986 | '("\\(_\\)\\s|" 1 font-lock-warning-face) | |
987 | '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6 | |
988 | )) | |
989 | "Default expressions to highlight in AWK mode.") | |
0386b551 AM |
990 | \f |
991 | ;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e | |
d9e94c22 | 992 | |
0386b551 AM |
993 | ;; The following three regexps differ from those earlier on in cc-awk.el in |
994 | ;; that they assume the syntax-table properties have been set. They are thus | |
995 | ;; not useful for code which sets these properties. | |
d9e94c22 | 996 | (defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"") |
0386b551 | 997 | ;; Matches a terminated string/regexp. |
d9e94c22 MS |
998 | |
999 | (defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$") | |
1000 | ;; Matches an unterminated string/regexp, NOT including the eol at the end. | |
1001 | ||
1002 | (defconst c-awk-harmless-pattern-characters* | |
1003 | (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*")) | |
1004 | ;; Matches any "harmless" character in a pattern or an escaped character pair. | |
1005 | ||
0386b551 AM |
1006 | (defun c-awk-at-statement-end-p () |
1007 | ;; Point is not inside a comment or string. Is it AT the end of a | |
1008 | ;; statement? This means immediately after the last non-ws character of the | |
1009 | ;; statement. The caller is responsible for widening the buffer, if | |
1010 | ;; appropriate. | |
1011 | (and (not (bobp)) | |
1012 | (save-excursion | |
1013 | (backward-char) | |
1014 | (or (looking-at "[};]") | |
1015 | (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\)) | |
1016 | (looking-at | |
1017 | (eval-when-compile | |
1018 | (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space* | |
1019 | "[#\n\r]")))))))) | |
1020 | ||
d9e94c22 MS |
1021 | (defun c-awk-beginning-of-defun (&optional arg) |
1022 | "Move backward to the beginning of an AWK \"defun\". With ARG, do it that | |
1023 | many times. Negative arg -N means move forward to Nth following beginning of | |
1024 | defun. Returns t unless search stops due to beginning or end of buffer. | |
1025 | ||
1026 | By a \"defun\" is meant either a pattern-action pair or a function. The start | |
fd35a256 | 1027 | of a defun is recognized as code starting at column zero which is neither a |
d9e94c22 MS |
1028 | closing brace nor a comment nor a continuation of the previous line. Unlike |
1029 | in some other modes, having an opening brace at column 0 is neither necessary | |
0386b551 AM |
1030 | nor helpful. |
1031 | ||
1032 | Note that this function might do hidden buffer changes. See the | |
1033 | comment at the start of cc-engine.el for more info." | |
d9e94c22 | 1034 | (interactive "p") |
28abe5e2 | 1035 | (or arg (setq arg 1)) |
d9e94c22 MS |
1036 | (save-match-data |
1037 | (c-save-buffer-state ; ensures the buffer is writable. | |
1038 | nil | |
1039 | (let ((found t)) ; Has the most recent regexp search found b-of-defun? | |
1040 | (if (>= arg 0) | |
1041 | ;; Go back one defun each time round the following loop. (For +ve arg) | |
1042 | (while (and found (> arg 0) (not (eq (point) (point-min)))) | |
1043 | ;; Go back one "candidate" each time round the next loop until one | |
1044 | ;; is genuinely a beginning-of-defun. | |
1045 | (while (and (setq found (search-backward-regexp | |
1046 | "^[^#} \t\n\r]" (point-min) 'stop-at-limit)) | |
0386b551 | 1047 | (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) |
d9e94c22 MS |
1048 | (setq arg (1- arg))) |
1049 | ;; The same for a -ve arg. | |
1050 | (if (not (eq (point) (point-max))) (forward-char 1)) | |
1051 | (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg. | |
1052 | (while (and (setq found (search-forward-regexp | |
1053 | "^[^#} \t\n\r]" (point-max) 'stop-at-limit)) | |
0386b551 | 1054 | (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) |
d9e94c22 MS |
1055 | (setq arg (1+ arg))) |
1056 | (if found (goto-char (match-beginning 0)))) | |
1057 | (eq arg 0))))) | |
1058 | ||
1059 | (defun c-awk-forward-awk-pattern () | |
1060 | ;; Point is at the start of an AWK pattern (which may be null) or function | |
1061 | ;; declaration. Move to the pattern's end, and past any trailing space or | |
1062 | ;; comment. Typically, we stop at the { which denotes the corresponding AWK | |
1063 | ;; action/function body. Otherwise we stop at the EOL (or ;) marking the | |
1064 | ;; absence of an explicit action. | |
0386b551 AM |
1065 | ;; |
1066 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
1067 | (while |
1068 | (progn | |
1069 | (search-forward-regexp c-awk-harmless-pattern-characters*) | |
1070 | (if (looking-at "#") (end-of-line)) | |
1071 | (cond | |
1072 | ((eobp) nil) | |
1073 | ((looking-at "[{;]") nil) ; We've finished! | |
1074 | ((eolp) | |
1075 | (if (c-awk-cur-line-incomplete-p) | |
1076 | (forward-line) ; returns non-nil | |
1077 | nil)) | |
1078 | ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t)) | |
1079 | ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t)) | |
1080 | ((looking-at "/") (forward-char) t))))) ; division sign. | |
1081 | ||
1082 | (defun c-awk-end-of-defun1 () | |
1083 | ;; point is at the start of a "defun". Move to its end. Return end position. | |
0386b551 AM |
1084 | ;; |
1085 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
1086 | (c-awk-forward-awk-pattern) |
1087 | (cond | |
1088 | ((looking-at "{") (goto-char (scan-sexps (point) 1))) | |
1089 | ((looking-at ";") (forward-char)) | |
1090 | ((eolp)) | |
1091 | (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern"))) | |
1092 | (point)) | |
1093 | ||
1094 | (defun c-awk-beginning-of-defun-p () | |
1095 | ;; Are we already at the beginning of a defun? (i.e. at code in column 0 | |
1096 | ;; which isn't a }, and isn't a continuation line of any sort. | |
0386b551 AM |
1097 | ;; |
1098 | ;; This function might do hidden buffer changes. | |
d9e94c22 MS |
1099 | (and (looking-at "^[^#} \t\n\r]") |
1100 | (not (c-awk-prev-line-incomplete-p)))) | |
1101 | ||
1102 | (defun c-awk-end-of-defun (&optional arg) | |
1103 | "Move forward to next end of defun. With argument, do it that many times. | |
1104 | Negative argument -N means move back to Nth preceding end of defun. | |
1105 | ||
1106 | An end of a defun occurs right after the closing brace that matches the | |
1107 | opening brace at its start, or immediately after the AWK pattern when there is | |
0386b551 AM |
1108 | no explicit action; see function `c-awk-beginning-of-defun'. |
1109 | ||
1110 | Note that this function might do hidden buffer changes. See the | |
1111 | comment at the start of cc-engine.el for more info." | |
d9e94c22 MS |
1112 | (interactive "p") |
1113 | (or arg (setq arg 1)) | |
1114 | (save-match-data | |
1115 | (c-save-buffer-state | |
1116 | nil | |
1117 | (let ((start-point (point)) end-point) | |
1118 | ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun, | |
1119 | ;; move backwards to one. | |
1120 | ;; Repeat [(i) move forward to end-of-current-defun (see below); | |
1121 | ;; (ii) If this isn't it, move forward to beginning-of-defun]. | |
1122 | ;; We start counting ARG only when step (i) has passed the original point. | |
1123 | (when (> arg 0) | |
1124 | ;; Try to move back to a beginning-of-defun, if not already at one. | |
1125 | (if (not (c-awk-beginning-of-defun-p)) | |
1126 | (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point. | |
1127 | (goto-char start-point) | |
1128 | (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough! | |
1129 | ;; Now count forward, one defun at a time | |
1130 | (while (and (not (eobp)) | |
1131 | (c-awk-end-of-defun1) | |
1132 | (if (> (point) start-point) (setq arg (1- arg)) t) | |
1133 | (> arg 0) | |
1134 | (c-awk-beginning-of-defun -1)))) | |
1135 | ||
1136 | (when (< arg 0) | |
1137 | (setq end-point start-point) | |
1138 | (while (and (not (bobp)) | |
1139 | (c-awk-beginning-of-defun 1) | |
1140 | (if (< (setq end-point (if (bobp) (point) | |
1141 | (save-excursion (c-awk-end-of-defun1)))) | |
1142 | start-point) | |
1143 | (setq arg (1+ arg)) t) | |
1144 | (< arg 0))) | |
1145 | (goto-char (min start-point end-point))))))) | |
1146 | ||
0386b551 | 1147 | \f |
d9e94c22 | 1148 | (cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21 |
ab5796a9 | 1149 | |
d9e94c22 | 1150 | ;;; awk-mode.el ends here |