Replace "Maintainer: FSF" with the emacs-devel mailing address
[bpt/emacs.git] / lisp / progmodes / cc-awk.el
CommitLineData
d9e94c22
MS
1;;; cc-awk.el --- AWK specific code within cc-mode.
2
ba318903 3;; Copyright (C) 1988, 1994, 1996, 2000-2014 Free Software Foundation,
ab422c4d 4;; Inc.
d9e94c22 5
3efc2cd7 6;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
34dc21db 7;; Maintainer: emacs-devel@gnu.org
d9e94c22 8;; Keywords: AWK, cc-mode, unix, languages
bd78fa1d 9;; Package: cc-mode
d9e94c22
MS
10
11;; This file is part of GNU Emacs.
12
b1fc2b50 13;; GNU Emacs is free software: you can redistribute it and/or modify
d9e94c22 14;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
d9e94c22
MS
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
b1fc2b50 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
d9e94c22
MS
25
26;;; Commentary:
27
28;; This file contains (most of) the adaptations to cc-mode required for the
29;; integration of AWK Mode.
6772c8e1 30;; It is organized thusly, the sections being separated by page breaks:
d9e94c22 31;; 1. The AWK Mode syntax table.
6772c8e1 32;; 2. Regular expressions for analyzing AWK code.
0386b551 33;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property").
3c0ab532 34;; 4. Syntax-table property/font-locking stuff, including the
d9e94c22 35;; font-lock-keywords setting.
0386b551
AM
36;; 5. The AWK Mode before/after-change-functions.
37;; 6. AWK Mode specific versions of commands like beginning-of-defun.
d9e94c22
MS
38;; The AWK Mode keymap, abbreviation table, and the mode function itself are
39;; in cc-mode.el.
40
41;;; Code:
42
43(eval-when-compile
44 (let ((load-path
45 (if (and (boundp 'byte-compile-dest-file)
46 (stringp byte-compile-dest-file))
47 (cons (file-name-directory byte-compile-dest-file) load-path)
48 load-path)))
49 (load "cc-bytecomp" nil t)))
50
51(cc-require 'cc-defs)
52
53;; Silence the byte compiler.
54(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
88a46e21
AM
55(cc-bytecomp-defvar c-new-BEG)
56(cc-bytecomp-defvar c-new-END)
d9e94c22
MS
57
58;; Some functions in cc-engine that are used below. There's a cyclic
59;; dependency so it can't be required here. (Perhaps some functions
60;; could be moved to cc-engine to avoid it.)
61(cc-bytecomp-defun c-backward-token-1)
62(cc-bytecomp-defun c-beginning-of-statement-1)
63(cc-bytecomp-defun c-backward-sws)
bd15c390 64(cc-bytecomp-defun c-forward-sws)
d9e94c22
MS
65
66(defvar awk-mode-syntax-table
67 (let ((st (make-syntax-table)))
68 (modify-syntax-entry ?\\ "\\" st)
69 (modify-syntax-entry ?\n "> " st)
70 (modify-syntax-entry ?\r "> " st)
71 (modify-syntax-entry ?\f "> " st)
72 (modify-syntax-entry ?\# "< " st)
73 ;; / can delimit regexes or be a division operator. By default we assume
74 ;; that it is a division sign, and fix the regexp operator cases with
75 ;; `font-lock-syntactic-keywords'.
0d26e0b6 76 (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
d9e94c22
MS
77 (modify-syntax-entry ?* "." st)
78 (modify-syntax-entry ?+ "." st)
79 (modify-syntax-entry ?- "." st)
80 (modify-syntax-entry ?= "." st)
81 (modify-syntax-entry ?% "." st)
82 (modify-syntax-entry ?< "." st)
83 (modify-syntax-entry ?> "." st)
84 (modify-syntax-entry ?& "." st)
85 (modify-syntax-entry ?| "." st)
86 (modify-syntax-entry ?_ "_" st)
87 (modify-syntax-entry ?\' "." st)
88 st)
89 "Syntax table in use in AWK Mode buffers.")
90
0386b551
AM
91\f
92;; This section defines regular expressions used in the analysis of AWK code.
93
94;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
95;; Emacs has in the past used \r to mark hidden lines in some fashion (and
96;; maybe still does).
97
98(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
99;; Matches any escaped (with \) character-pair, including an escaped newline.
100(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
101;; Matches any escaped (with \) character-pair, apart from an escaped newline.
102(defconst c-awk-comment-without-nl "#.*")
103;; Matches an AWK comment, not including the terminating NL (if any). Note
104;; that the "enclosing" (elisp) regexp must ensure the # is real.
105(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
106;; Matches a newline, or the end of buffer.
107
108;; "Space" regular expressions.
109(eval-and-compile
110 (defconst c-awk-escaped-nl "\\\\[\n\r]"))
111;; Matches an escaped newline.
ca331935
CY
112(eval-and-compile
113 (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")))
0386b551
AM
114;; Matches a possibly empty sequence of escaped newlines. Used in
115;; awk-font-lock-keywords.
116;; (defconst c-awk-escaped-nls*-with-space*
117;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
118;; The above RE was very slow. It's runtime was doubling with each additional
119;; space :-( Reformulate it as below:
120(eval-and-compile
121 (defconst c-awk-escaped-nls*-with-space*
122 (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
123;; Matches a possibly empty sequence of escaped newlines with optional
124;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
125(defconst c-awk-blank-or-comment-line-re
126 (concat "[ \t]*\\(#\\|\\\\?$\\)"))
127;; Matche (the tail of) a line containing at most either a comment or an
128;; escaped EOL.
129
130;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
0386b551
AM
131(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
132;; Matches an underline NOT followed by ".
953e5b8c
AM
133(defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]")
134;; Matches any character not significant in the state machine applying
135;; syntax-table properties to "s and /s.
0386b551
AM
136(defconst c-awk-harmless-string*-re
137 (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
953e5b8c
AM
138;; Matches a (possibly empty) sequence of characters insignificant in the
139;; state machine applying syntax-table properties to "s and /s.
0386b551
AM
140(defconst c-awk-harmless-string*-here-re
141 (concat "\\=" c-awk-harmless-string*-re))
953e5b8c
AM
142;; Matches the (possibly empty) sequence of "insignificant" chars at point.
143
144(defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]")
145;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
d9c287e5 146;; localization string in gawk 3.1
953e5b8c
AM
147(defconst c-awk-harmless-line-string*-re
148 (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
149;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
150;; #, or newlines.
0386b551 151(defconst c-awk-harmless-line-re
953e5b8c
AM
152 (concat c-awk-harmless-line-string*-re
153 "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
0386b551
AM
154;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
155;; " or /. "logical" means "possibly containing escaped newlines". A comment
156;; is matched as part of the line even if it contains a " or a /. The End of
157;; buffer is also an end of line.
158(defconst c-awk-harmless-lines+-here-re
159 (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
160;; Matches a sequence of (at least one) \"harmless-line\" at point.
161
162
163;; REGEXPS FOR AWK STRINGS.
164(defconst c-awk-string-ch-re "[^\"\\\n\r]")
165;; Matches any character which can appear unescaped in a string.
166(defconst c-awk-string-innards-re
167 (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
168;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
169(defconst c-awk-string-without-end-here-re
170 (concat "\\=_?\"" c-awk-string-innards-re))
171;; Matches an AWK string at point up to, but not including, any terminator.
53964682 172;; A gawk 3.1+ string may look like _"localizable string".
b5eb9035
AM
173(defconst c-awk-possibly-open-string-re
174 (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"
175 "\\(\"\\|$\\|\\'\\)"))
0386b551
AM
176
177;; REGEXPS FOR AWK REGEXPS.
178(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
179;; Matches any AWK regexp character which doesn't require special analysis.
180(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
181;; Matches a (possibly empty) sequence of escaped newlines.
182
183;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
184;; list", and "[:alpha:]" inside a character list will be known as a
185;; "character class". These terms for these things vary between regexp
186;; descriptions .
187(defconst c-awk-regexp-char-class-re
188 "\\[:[a-z]+:\\]")
189 ;; Matches a character class spec (e.g. [:alpha:]).
190(defconst c-awk-regexp-char-list-re
191 (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
192 "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
193 "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
194;; Matches a regexp char list, up to (but not including) EOL if the ] is
195;; missing.
0386b551
AM
196(defconst c-awk-regexp-innards-re
197 (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
b5eb9035 198 "\\|" c-awk-regexp-normal-re "\\)*"))
0386b551
AM
199;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
200(defconst c-awk-regexp-without-end-re
201 (concat "/" c-awk-regexp-innards-re))
0d26e0b6 202;; Matches an AWK regexp up to, but not including, any terminating /.
0386b551
AM
203
204;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
205;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
206;; whether a '/' at the current position would by a regexp opener or a
207;; division sign.
208(defconst c-awk-neutral-re
209; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
953e5b8c 210 "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)")
0386b551 211;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
953e5b8c 212;; This is space/tab, close brace, an auto-increment/decrement operator or an
cb5bf6ba 213;; escaped character. Or one of the (invalid) characters @ or `. But NOT an
953e5b8c 214;; end of line (unless escaped).
0386b551
AM
215(defconst c-awk-neutrals*-re
216 (concat "\\(" c-awk-neutral-re "\\)*"))
217;; A (possibly empty) string of neutral characters (or character pairs).
218(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
219;; Matches a char which is a constituent of a variable or number, or a ket
220;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
221;; \xff are "letters".
222(defconst c-awk-div-sign-re
223 (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
224;; Will match a piece of AWK buffer ending in / which is a division sign, in
225;; a context where an immediate / would be a regexp bracket. It follows a
226;; variable or number (with optional intervening "neutral" characters). This
227;; will only work when there won't be a preceding " or / before the sought /
228;; to foul things up.
229(defconst c-awk-non-arith-op-bra-re
953e5b8c
AM
230 "[[\({&=:!><,?;'~|]")
231;; Matches an opening BRAcket (of any sort), or any operator character
0386b551
AM
232;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
233;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
234;; and "--".
235(defconst c-awk-regexp-sign-re
236 (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
237;; Will match a piece of AWK buffer ending in / which is an opening regexp
238;; bracket, in a context where an immediate / would be a division sign. This
239;; will only work when there won't be a preceding " or / before the sought /
240;; to foul things up.
953e5b8c
AM
241(defconst c-awk-pre-exp-alphanum-kwd-re
242 (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<"
243 (regexp-opt '("print" "return" "case") t)
244 "\\>\\([^_\n\r]\\|$\\)"))
245;; Matches all AWK keywords which can precede expressions (including
246;; /regexp/).
247(defconst c-awk-kwd-regexp-sign-re
248 (concat c-awk-pre-exp-alphanum-kwd-re c-awk-escaped-nls*-with-space* "/"))
249;; Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword
250;; which can precede an expression.
0386b551
AM
251
252;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
253(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
b5eb9035
AM
254(defconst c-awk-non-/-syn-ws*-re
255 (concat
256 "\\(" c-awk-escaped-nls*-with-space*
257 "\\(" c-awk-_-harmless-nonws-char-re "\\|"
258 c-awk-non-eol-esc-pair-re "\\|"
259 c-awk-possibly-open-string-re
260 "\\)"
261 "\\)*"))
262(defconst c-awk-space*-/-re (concat c-awk-escaped-nls*-with-space* "/"))
263;; Matches optional whitespace followed by "/".
264(defconst c-awk-space*-regexp-/-re
265 (concat c-awk-escaped-nls*-with-space* "\\s\""))
266;; Matches optional whitespace followed by a "/" with string syntax (a matched
267;; regexp delimiter).
268(defconst c-awk-space*-unclosed-regexp-/-re
269 (concat c-awk-escaped-nls*-with-space* "\\s\|"))
270;; Matches optional whitespace followed by a "/" with string fence syntax (an
271;; unmatched regexp delimiter).
0386b551
AM
272
273\f
d9e94c22 274;; ACM, 2002/5/29:
0d26e0b6 275;;
d9e94c22
MS
276;; The next section of code is about determining whether or not an AWK
277;; statement is complete or not. We use this to indent the following line.
278;; The determination is pretty straightforward in C, where a statement ends
279;; with either a ; or a }. Only "while" really gives any trouble there, since
280;; it might be the end of a do-while. In AWK, on the other hand, semicolons
281;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
282;; addition, we have the complexity of escaped EOLs. The core of this
283;; analysis is in the middle of the function
284;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
285;;
286;; To avoid continually repeating this expensive analysis, we "cache" its
287;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
288;; the EOL (if any) which terminates that line. Should the property be
289;; required for the very last line (which has no EOL), it is calculated as
290;; required but not cached. The c-awk-NL-prop property should be thought of
291;; as only really valid immediately after a buffer change, not a permanently
292;; set property. (By contrast, the syntax-table text properties (set by an
293;; after-change function) must be constantly updated for the mode to work
294;; properly).
295;;
0386b551
AM
296;; This text property is also used for "syntactic whitespace" movement, this
297;; being where the distinction between the values '$' and '}' is significant.
298;;
d9e94c22
MS
299;; The valid values for c-awk-NL-prop are:
300;;
301;; nil The property is not currently set for this line.
302;; '#' There is NO statement on this line (at most a comment), and no open
303;; statement from a previous line which could have been completed on this
304;; line.
305;; '{' There is an unfinished statement on this (or a previous) line which
306;; doesn't require \s to continue onto another line, e.g. the line ends
307;; with {, or the && operator, or "if (condition)". Note that even if the
308;; newline is redundantly escaped, it remains a '{' line.
309;; '\' There is an escaped newline at the end of this line and this '\' is
310;; essential to the syntax of the program. (i.e. if it had been a
311;; frivolous \, it would have been ignored and the line been given one of
312;; the other property values.)
0386b551
AM
313;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual
314;; semicolon"). This might be a content-free line terminating a statement
315;; from the preceding (continued) line (which has property \).
316;; '}' A statement, being the last thing (aside from ws/comments) is
317;; explicitly terminated on this line by a closing brace (or sometimes a
318;; semicolon).
d9e94c22
MS
319;;
320;; This set of values has been chosen so that the property's value on a line
321;; is completely determined by the contents of the line and the property on
322;; the previous line, EXCEPT for where a "while" might be the closing
323;; statement of a do-while.
324
325(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
326 ;; Are we just after the ) in "if/for/while (<condition>)"?
327 ;;
328 ;; Note that the end of the ) in a do .... while (<condition>) doesn't
329 ;; count, since the purpose of this routine is essentially to decide
330 ;; whether to indent the next line.
331 ;;
332 ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
333 ;; do-while.
0386b551
AM
334 ;;
335 ;; This function might do hidden buffer changes.
d9e94c22
MS
336 (and
337 (eq (char-before) ?\))
338 (save-excursion
339 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
340 (when par-pos
341 (goto-char par-pos) ; back over "(...)"
342 (c-backward-token-1) ; BOB isn't a problem.
343 (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
344 (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
345 (not (eq (c-beginning-of-statement-1 do-lim)
346 'beginning)))))))))
347
348(defun c-awk-after-function-decl-param-list ()
349 ;; Are we just after the ) in "function foo (bar)" ?
0386b551
AM
350 ;;
351 ;; This function might do hidden buffer changes.
d9e94c22
MS
352 (and (eq (char-before) ?\))
353 (save-excursion
354 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
355 (when par-pos
356 (goto-char par-pos) ; back over "(...)"
357 (c-backward-token-1) ; BOB isn't a problem
358 (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
359 (progn (c-backward-token-1)
360 (looking-at "func\\(tion\\)?\\>"))))))))
361
362;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
363(defun c-awk-after-continue-token ()
364;; Are we just after a token which can be continued onto the next line without
365;; a backslash?
0386b551
AM
366;;
367;; This function might do hidden buffer changes.
d9e94c22
MS
368 (save-excursion
369 (c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
370 (if (and (looking-at "[&|]") (not (bobp)))
371 (backward-char)) ; c-backward-token-1 doesn't do this :-(
372 (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
373
374(defun c-awk-after-rbrace-or-statement-semicolon ()
375 ;; Are we just after a } or a ; which closes a statement?
376 ;; Be careful about ;s in for loop control bits. They don't count!
0386b551
AM
377 ;;
378 ;; This function might do hidden buffer changes.
d9e94c22
MS
379 (or (eq (char-before) ?\})
380 (and
381 (eq (char-before) ?\;)
382 (save-excursion
383 (let ((par-pos (c-safe (scan-lists (point) -1 1))))
384 (when par-pos
385 (goto-char par-pos) ; go back to containing (
386 (not (and (looking-at "(")
387 (c-backward-token-1) ; BOB isn't a problem
388 (looking-at "for\\>")))))))))
389
390(defun c-awk-back-to-contentful-text-or-NL-prop ()
391 ;; Move back to just after the first found of either (i) an EOL which has
392 ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
393 ;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
0386b551 394 ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp).
d9e94c22
MS
395 ;;
396 ;; Note that an escaped eol counts as whitespace here.
397 ;;
398 ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
399 ;; that the previous line contains an unterminated string (without \). In
0386b551 400 ;; this case, assume that the previous line's c-awk-NL-prop is a $.
0d26e0b6 401 ;;
d9e94c22
MS
402 ;; POINT MUST BE AT THE START OF A LINE when calling this function. This
403 ;; is to ensure that the various backward-comment functions will work
404 ;; properly.
0386b551
AM
405 ;;
406 ;; This function might do hidden buffer changes.
d9e94c22
MS
407 (let ((nl-prop nil)
408 bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
409 (while ;; We are at a BOL here. Go back one line each iteration.
410 (and
411 (not (bobp))
412 (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
413 (progn (setq bol-pos (c-point 'bopl))
414 (setq bsws-pos (point))
415 ;; N.B. the following function will not go back past an EOL if
416 ;; there is an open string (without \) on the previous line.
0386b551
AM
417 ;; If we find such, set the c-awk-NL-prop on it, too
418 ;; (2004/3/29).
d9e94c22
MS
419 (c-backward-syntactic-ws bol-pos)
420 (or (/= (point) bsws-pos)
0386b551
AM
421 (progn (setq nl-prop ?\$)
422 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)
d9e94c22
MS
423 nil)))
424 ;; If we had a backslash at EOL, c-backward-syntactic-ws will
425 ;; have gone backwards over it. Check the backslash was "real".
426 (progn
427 (if (looking-at "[ \t]*\\\\+$")
428 (if (progn
429 (end-of-line)
430 (search-backward-regexp
431 "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
432 bol-pos t))
433 (progn (end-of-line) ; escaped EOL.
434 (backward-char)
435 (c-backward-syntactic-ws bol-pos))
436 (end-of-line))) ; The \ at eol is a fake.
437 (bolp))))
438 nl-prop))
439
440(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
441 ;; Calculate and set the value of the c-awk-NL-prop on the immediately
442 ;; preceding EOL. This may also involve doing the same for several
443 ;; preceding EOLs.
0d26e0b6 444 ;;
d9e94c22
MS
445 ;; NOTE that if the property was already set, we return it without
446 ;; recalculation. (This is by accident rather than design.)
0d26e0b6 447 ;;
d9e94c22
MS
448 ;; Return the property which got set (or was already set) on the previous
449 ;; line. Return nil if we hit BOB.
0d26e0b6 450 ;;
d9e94c22 451 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
452 ;;
453 ;; This function might do hidden buffer changes.
d9e94c22
MS
454 (save-excursion
455 (save-match-data
456 (beginning-of-line)
457 (let* ((pos (point))
458 (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
459 ;; We are either (1) at a BOL (with nl-prop containing the previous
460 ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
461 ;; the BOB counts as case (1), so we test next for bolp rather than
462 ;; non-nil nl-prop.
463 (when (not (bolp))
464 (setq nl-prop
465 (cond
466 ;; Incomplete statement which doesn't require escaped EOL?
467 ((or (c-awk-after-if-for-while-condition-p do-lim)
468 (c-awk-after-function-decl-param-list)
469 (c-awk-after-continue-token))
470 ?\{)
471 ;; Escaped EOL (where there's also something to continue)?
472 ((and (looking-at "[ \t]*\\\\$")
473 (not (c-awk-after-rbrace-or-statement-semicolon)))
474 ?\\)
0386b551
AM
475 ;; A statement was completed on this line. How?
476 ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or }
477 (t ?\$))) ; A virtual semicolon.
d9e94c22
MS
478 (end-of-line)
479 (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
480 (forward-line))
481
482 ;; We are now at a (possibly empty) sequence of content-free lines.
483 ;; Set c-awk-NL-prop on each of these lines's EOL.
484 (while (< (point) pos) ; one content-free line each iteration.
485 (cond ; recalculate nl-prop from previous line's value.
0386b551 486 ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#))
d9e94c22 487 ((eq nl-prop ?\\)
0386b551 488 (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$)))
d9e94c22
MS
489 ;; ?\# (empty line) and ?\{ (open stmt) don't change.
490 )
491 (forward-line)
492 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
493 nl-prop))))
494
495(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
496 ;; Get the c-awk-NL-prop text-property from the previous line, calculating
f6558e2d 497 ;; it if necessary. Return nil if we're already at BOB.
d9e94c22 498 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
499 ;;
500 ;; This function might do hidden buffer changes.
d9e94c22
MS
501 (if (bobp)
502 nil
503 (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
504 (c-awk-calculate-NL-prop-prev-line do-lim))))
505
506(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
507 ;; Get the c-awk-NL-prop text-property from the current line, calculating it
508 ;; if necessary. (As a special case, the property doesn't get set on an
509 ;; empty line at EOB (there's no position to set the property on), but the
510 ;; function returns the property value an EOL would have got.)
0d26e0b6 511 ;;
d9e94c22 512 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
513 ;;
514 ;; This function might do hidden buffer changes.
d9e94c22
MS
515 (save-excursion
516 (let ((extra-nl nil))
517 (end-of-line) ; Necessary for the following test to work.
518 (when (= (forward-line) 1) ; if we were on the last line....
519 (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
520 (setq extra-nl t))
521 (prog1 (c-awk-get-NL-prop-prev-line do-lim)
d355a0b7 522 (if extra-nl (delete-char -1))))))
d9e94c22 523
0386b551 524(defsubst c-awk-prev-line-incomplete-p (&optional do-lim)
d9e94c22
MS
525 ;; Is there an incomplete statement at the end of the previous line?
526 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
527 ;;
528 ;; This function might do hidden buffer changes.
d9e94c22
MS
529 (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
530
0386b551 531(defsubst c-awk-cur-line-incomplete-p (&optional do-lim)
d9e94c22
MS
532 ;; Is there an incomplete statement at the end of the current line?
533 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
534 ;;
535 ;; This function might do hidden buffer changes.
d9e94c22
MS
536 (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
537
d355a0b7
SM
538;; NOTES ON "VIRTUAL SEMICOLONS"
539;;
540;; A "virtual semicolon" is what terminates a statement when there is no ;
541;; or } to do the job. Like point, it is considered to lie _between_ two
542;; characters. As from mid-March 2004, it is considered to lie just after
543;; the last non-syntactic-whitespace character on the line; (previously, it
544;; was considered an attribute of the EOL on the line). A real semicolon
545;; never counts as a virtual one.
0386b551
AM
546
547(defun c-awk-at-vsemi-p (&optional pos)
548 ;; Is there a virtual semicolon at POS (or POINT)?
d9e94c22 549 (save-excursion
b5eb9035
AM
550 (let* (nl-prop
551 (pos-or-point (progn (if pos (goto-char pos)) (point)))
552 (bol (c-point 'bol)) (eol (c-point 'eol)))
553 (c-awk-beginning-of-logical-line)
554 ;; Next `while' goes round one logical line (ending in, e.g. "\\") per
555 ;; iteration. Such a line is rare, and can only be an open string
556 ;; ending in an escaped \.
557 (while
558 (progn
559 ;; Next `while' goes over a division sign or /regexp/ per iteration.
560 (while
561 (and
562 (< (point) eol)
563 (progn
564 (search-forward-regexp c-awk-non-/-syn-ws*-re eol)
565 (looking-at c-awk-space*-/-re)))
566 (cond
567 ((looking-at c-awk-space*-regexp-/-re) ; /regexp/
568 (forward-sexp))
569 ((looking-at c-awk-space*-unclosed-regexp-/-re) ; Unclosed /regexp
570 (condition-case nil
571 (progn
572 (forward-sexp)
573 (backward-char)) ; Move to end of (logical) line.
574 (error (end-of-line)))) ; Happens at EOB.
575 (t ; division sign
576 (c-forward-syntactic-ws)
577 (forward-char))))
578 (< (point) bol))
579 (forward-line))
0386b551
AM
580 (and (eq (point) pos-or-point)
581 (progn
582 (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\)
583 (eq (forward-line) 0)
584 (looking-at c-awk-blank-or-comment-line-re)))
585 (eq nl-prop ?\$))))))
586
587(defun c-awk-vsemi-status-unknown-p ()
588 ;; Are we unsure whether there is a virtual semicolon on the current line?
589 ;; DO NOT under any circumstances attempt to calculate this; that would
333f9019 590 ;; defeat the (admittedly kludgy) purpose of this function, which is to
0386b551
AM
591 ;; prevent an infinite recursion in c-beginning-of-statement-1 when point
592 ;; starts at a `while' token.
d9e94c22
MS
593 (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
594
595(defun c-awk-clear-NL-props (beg end)
596 ;; This function is run from before-change-hooks. It clears the
597 ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
598 ;; parameter is ignored). This ensures that the indentation engine will
599 ;; never use stale values for this property.
0386b551
AM
600 ;;
601 ;; This function might do hidden buffer changes.
d9e94c22
MS
602 (save-restriction
603 (widen)
604 (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
605
606(defun c-awk-unstick-NL-prop ()
607 ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
608 ;; this, a new newline inserted after an old newline (e.g. by C-j) would
609 ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
610 ;; Thing. This function's action is required by c-put-char-property.
0d26e0b6 611 (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs
d9e94c22
MS
612 (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
613 (setq text-property-default-nonsticky
614 (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
615
616;; The following is purely a diagnostic command, to be commented out of the
617;; final release. ACM, 2002/6/1
618;; (defun NL-props ()
619;; (interactive)
620;; (let (pl-prop cl-prop)
621;; (message "Prev-line: %s Cur-line: %s"
622;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
623;; (char-to-string pl-prop)
624;; "nil")
625;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
626;; (char-to-string cl-prop)
627;; "nil"))))
628;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
629;for now. In the byte compiled version, this causes things to crash because
630;awk-mode-map isn't yet defined. :-(
631
632;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
0386b551 633\f
d9e94c22
MS
634;; The following section of the code is to do with font-locking. The biggest
635;; problem for font-locking is deciding whether a / is a regular expression
636;; delimiter or a division sign - determining precisely where strings and
637;; regular expressions start and stop is also troublesome. This is the
638;; purpose of the function c-awk-set-syntax-table-properties and the myriad
639;; elisp regular expressions it uses.
640;;
641;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
642;; for font-locking unterminated strings (i.e. font-locking the buffer up to
643;; the next string delimiter as a string) was inappropriate. Instead,
644;; unbalanced string/regexp delimiters are given the warning font, being
645;; refonted with the string font as soon as the matching delimiter is entered.
646;;
647;; This requires the region processed by the current font-lock after-change
648;; function to have access to the start of the string/regexp, which may be
649;; several lines back. The elisp "advice" feature is used on these functions
650;; to allow this.
651
652(defun c-awk-beginning-of-logical-line (&optional pos)
653;; Go back to the start of the (apparent) current line (or the start of the
654;; line containing POS), returning the buffer position of that point. I.e.,
655;; go back to the last line which doesn't have an escaped EOL before it.
0d26e0b6 656;;
d9e94c22
MS
657;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
658;; comment, string or regexp. IT MAY WELL BE that this function should not be
659;; executed on a narrowed buffer.
0386b551
AM
660;;
661;; This function might do hidden buffer changes.
d9e94c22
MS
662 (if pos (goto-char pos))
663 (forward-line 0)
664 (while (and (> (point) (point-min))
665 (eq (char-before (1- (point))) ?\\))
666 (forward-line -1))
667 (point))
668
88a46e21
AM
669(defun c-awk-beyond-logical-line (&optional pos)
670;; Return the position just beyond the (apparent) current logical line, or the
671;; one containing POS. This is usually the beginning of the next line which
672;; doesn't follow an escaped EOL. At EOB, this will be EOB.
673;;
674;; Point is unchanged.
d9e94c22
MS
675;;
676;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
677;; comment, string or regexp. IT MAY WELL BE that this function should not be
678;; executed on a narrowed buffer.
88a46e21
AM
679 (save-excursion
680 (if pos (goto-char pos))
681 (end-of-line)
682 (while (and (< (point) (point-max))
683 (eq (char-before) ?\\))
684 (end-of-line 2))
685 (if (< (point) (point-max))
686 (1+ (point))
687 (point))))
d9e94c22 688
d9e94c22
MS
689;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
690;; on strings/regexps which are missing their closing delimiter.
691;; 2002/4/28. The default syntax for / has been changed from "string" to
692;; "punctuation", to reduce hassle when this character appears within a string
693;; or comment.
694
695(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
696;; BEG and END bracket a (possibly unterminated) string or regexp. The
697;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
698;; END. Set the appropriate syntax-table properties on the delimiters and
699;; contents of this string/regex.
700;;
701;; "String" here can also mean a gawk 3.1 "localizable" string which starts
702;; with _". In this case, we step over the _ and ignore it; It will get it's
703;; font from an entry in awk-font-lock-keywords.
704;;
705;; If the closing delimiter is missing (i.e., there is an EOL there) set the
706;; STRING-FENCE property on the opening " or / and closing EOL.
0386b551
AM
707;;
708;; This function does hidden buffer changes.
d9e94c22
MS
709 (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
710
711 ;; First put the properties on the delimiters.
712 (cond ((eq end (point-max)) ; string/regexp terminated by EOB
0386b551 713 (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence"
d9e94c22 714 ((/= (char-after beg) (char-after end)) ; missing end delimiter
0386b551
AM
715 (c-put-char-property beg 'syntax-table '(15))
716 (c-put-char-property end 'syntax-table '(15)))
d9e94c22 717 ((eq (char-after beg) ?/) ; Properly bracketed regexp
0386b551
AM
718 (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string"
719 (c-put-char-property end 'syntax-table '(7)))
d9e94c22
MS
720 (t)) ; Properly bracketed string: Nothing to do.
721 ;; Now change the properties of any escaped "s in the string to punctuation.
722 (save-excursion
723 (goto-char (1+ beg))
724 (or (eobp)
725 (while (search-forward "\"" end t)
0386b551 726 (c-put-char-property (1- (point)) 'syntax-table '(1))))))
d9e94c22
MS
727
728(defun c-awk-syntax-tablify-string ()
729 ;; Point is at the opening " or _" of a string. Set the syntax-table
730 ;; properties on this string, leaving point just after the string.
731 ;;
732 ;; The result is nil if a / immediately after the string would be a regexp
733 ;; opener, t if it would be a division sign.
0386b551
AM
734 ;;
735 ;; This function does hidden buffer changes.
d9e94c22
MS
736 (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
737 (c-awk-set-string-regexp-syntax-table-properties
738 (match-beginning 0) (match-end 0))
739 (cond ((looking-at "\"")
740 (forward-char)
741 t) ; In AWK, ("15" / 5) gives 3 ;-)
742 ((looking-at "[\n\r]") ; Unterminated string with EOL.
743 (forward-char)
744 nil) ; / on next line would start a regexp
745 (t nil))) ; Unterminated string at EOB
746
747(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
748 ;; Point is at a /. Determine whether this is a division sign or a regexp
749 ;; opener, and if the latter, apply syntax-table properties to the entire
750 ;; regexp. Point is left immediately after the division sign or regexp, as
751 ;; the case may be.
752 ;;
753 ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
754 ;; division sign (value t) or a regexp opener (value nil). The idea is that
6772c8e1 755 ;; we analyze the line from ANCHOR up till point to determine what the / at
d9e94c22
MS
756 ;; point is.
757 ;;
758 ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
0386b551 759 ;;
88a46e21 760 ;; This function does hidden buffer changes.
d9e94c22
MS
761 (let ((/point (point)))
762 (goto-char anchor)
c7015153 763 ;; Analyze the line to find out what the / is.
d9e94c22 764 (if (if anchor-state-/div
953e5b8c
AM
765 (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
766 (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t))
767 (search-forward-regexp c-awk-div-sign-re (1+ /point) t)))
768 ;; A division sign.
51c9af45 769 (progn (goto-char (1+ /point)) nil)
d9e94c22
MS
770 ;; A regexp opener
771 ;; Jump over the regexp innards, setting the match data.
772 (goto-char /point)
773 (search-forward-regexp c-awk-regexp-without-end-re)
774 (c-awk-set-string-regexp-syntax-table-properties
775 (match-beginning 0) (match-end 0))
776 (cond ((looking-at "/") ; Terminating /
777 (forward-char)
778 t)
779 ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
780 (forward-char)
781 nil) ; / on next line would start another regexp
782 (t nil))))) ; Unterminated regexp at EOB
783
784(defun c-awk-set-syntax-table-properties (lim)
785;; Scan the buffer text between point and LIM, setting (and clearing) the
786;; syntax-table property where necessary.
787;;
788;; This function is designed to be called as the FUNCTION in a MATCHER in
789;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
790;; repeated calls from font-lock: See elisp info page "Search-based
791;; Fontification"). It also gets called, with a bit of glue, from
792;; after-change-functions when font-lock isn't active. Point is left
793;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
794;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
795;;
796;; We need to set/clear the syntax-table property on:
797;; (i) / - It is set to "string" on a / which is the opening or closing
798;; delimiter of the properly terminated regexp (and left unset on a
799;; division sign).
800;; (ii) the opener of an unterminated string/regexp, we set the property
801;; "generic string delimiter" on both the opening " or / and the end of the
802;; line where the closing delimiter is missing.
803;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
804;; given the property "punctuation". This will later allow other routines
805;; to use the regexp "\\S\"*" to skip over the string innards.
806;; (iv) Inside a comment, all syntax-table properties are cleared.
0386b551
AM
807;;
808;; This function does hidden buffer changes.
d9e94c22
MS
809 (let (anchor
810 (anchor-state-/div nil)) ; t means a following / would be a div sign.
811 (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
0386b551 812 (c-clear-char-properties (point) lim 'syntax-table)
d9e94c22 813 ;; Once round the next loop for each string, regexp, or div sign
0386b551
AM
814 (while (progn
815 ;; Skip any "harmless" lines before the next tricky one.
816 (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
817 (setq anchor-state-/div nil))
818 (< (point) lim))
d9e94c22
MS
819 (setq anchor (point))
820 (search-forward-regexp c-awk-harmless-string*-here-re nil t)
953e5b8c 821 ;; We are now looking at either a " or a / or a brace/paren/semicolon.
d9c287e5
PE
822 ;; Do our thing on the string, regexp or division sign or update
823 ;; our state.
d9e94c22 824 (setq anchor-state-/div
953e5b8c
AM
825 (cond
826 ((looking-at "_?\"")
827 (c-awk-syntax-tablify-string))
828 ((eq (char-after) ?/)
829 (c-awk-syntax-tablify-/ anchor anchor-state-/div))
830 ((memq (char-after) '(?{ ?} ?\( ?\;))
831 (forward-char)
832 nil)
833 (t ; ?\)
834 (forward-char)
835 t))))
d9e94c22
MS
836 nil))
837
d9e94c22
MS
838;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
839;; the syntax-table properties even when font-lock isn't enabled, for the
840;; subsequent use of movement functions, etc. However, it seems that if font
841;; lock _is_ enabled, we can always leave it to do the job.
88a46e21
AM
842(defvar c-awk-old-ByLL 0)
843(make-variable-buffer-local 'c-awk-old-Byll)
844;; Just beyond logical line following the region which is about to be changed.
845;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change.
d9e94c22 846
88a46e21 847(defun c-awk-record-region-clear-NL (beg end)
d9e94c22
MS
848;; This function is called exclusively from the before-change-functions hook.
849;; It does two things: Finds the end of the (logical) line on which END lies,
88a46e21
AM
850;; and clears c-awk-NL-prop text properties from this point onwards. BEG is
851;; ignored.
0386b551 852;;
88a46e21
AM
853;; On entry, the buffer will have been widened and match-data will have been
854;; saved; point is undefined on both entry and exit; the return value is
855;; ignored.
856;;
857;; This function does hidden buffer changes.
858 (c-save-buffer-state ()
859 (setq c-awk-old-ByLL (c-awk-beyond-logical-line end))
860 (c-save-buffer-state nil
861 (c-awk-clear-NL-props end (point-max)))))
d9e94c22
MS
862
863(defun c-awk-end-of-change-region (beg end old-len)
864 ;; Find the end of the region which needs to be font-locked after a change.
865 ;; This is the end of the logical line on which the change happened, either
cb694ab7 866 ;; as it was before the change, or as it is now, whichever is later.
d9e94c22 867 ;; N.B. point is left undefined.
88a46e21
AM
868 (max (+ (- c-awk-old-ByLL old-len) (- end beg))
869 (c-awk-beyond-logical-line end)))
d9e94c22
MS
870
871;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
872;; specified by the font-lock after-change function must be expanded to
873;; include ALL of any string or regexp within the region. The simplest way to
874;; do this in practice is to use the beginning/end-of-logical-line functions.
875;; Don't overlook the possibility of the buffer change being the "recapturing"
876;; of a previously escaped newline.
88a46e21 877
0d26e0b6 878;; ACM 2008-02-05:
88a46e21
AM
879(defun c-awk-extend-and-syntax-tablify-region (beg end old-len)
880 ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put
881 ;; `syntax-table' properties on this region.
882 ;;
883 ;; This function is called from an after-change function, BEG END and
884 ;; OLD-LEN being the standard parameters.
0d26e0b6 885 ;;
88a46e21
AM
886 ;; Point is undefined both before and after this function call, the buffer
887 ;; has been widened, and match-data saved. The return value is ignored.
888 ;;
889 ;; It prepares the buffer for font
890 ;; locking, hence must get called before `font-lock-after-change-function'.
891 ;;
892 ;; This function is the AWK value of `c-before-font-lock-function'.
893 ;; It does hidden buffer changes.
894 (c-save-buffer-state ()
895 (setq c-new-END (c-awk-end-of-change-region beg end old-len))
896 (setq c-new-BEG (c-awk-beginning-of-logical-line beg))
897 (goto-char c-new-BEG)
898 (c-awk-set-syntax-table-properties c-new-END)))
d9e94c22 899
3c0ab532
AM
900;; Awk regexps written with help from Peter Galbraith
901;; <galbraith@mixing.qc.dfo.ca>.
c7015153 902;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work
0d26e0b6 903;; in XEmacs 21.4.4. acm 2002/9/19.
3c0ab532
AM
904(defconst awk-font-lock-keywords
905 (eval-when-compile
906 (list
907 ;; Function names.
908 '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?"
909 (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t))
910 ;;
911 ;; Variable names.
912 (cons
913 (concat "\\<"
914 (regexp-opt
915 '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON"
916 "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE"
917 "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH"
918 "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>")
919 'font-lock-variable-name-face)
920
921 ;; Special file names. (acm, 2002/7/22)
922 ;; The following regexp was created by first evaluating this in GNU Emacs 21.1:
923 ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid"
924 ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words)
925 ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen)
926 ;; , replacing the "n" in "dev/fd/n" with "[0-9]+"
927 ;; , removing the unwanted \\< at the beginning, and finally filling out the
928 ;; regexp so that a " must come before, and either a " or heuristic stuff after.
929 ;; The surrounding quotes are fontified along with the filename, since, semantically,
930 ;; they are an indivisible unit.
931 '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\
932std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\
933\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
934 (1 font-lock-variable-name-face t)
935 (8 font-lock-variable-name-face t t))
936 ;; Do the same (almost) with
937 ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport"
938 ;; "/inet/raw/lport/rhost/rport") 'words)
939 ;; This cannot be combined with the above pattern, because the match number
940 ;; for the (optional) closing \" would then exceed 9.
941 '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\
942\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
943 (1 font-lock-variable-name-face t)
944 (6 font-lock-variable-name-face t t))
945
946 ;; Keywords.
947 (concat "\\<"
948 (regexp-opt
898169a2
AM
949 '("BEGIN" "END" "break" "case" "continue" "default" "delete"
950 "do" "else" "exit" "for" "getline" "if" "in" "next"
951 "nextfile" "return" "switch" "while")
3c0ab532
AM
952 t) "\\>")
953
954 ;; Builtins.
955 `(eval . (list
956 ,(concat
957 "\\<"
958 (regexp-opt
959 '("adump" "and" "asort" "atan2" "bindtextdomain" "close"
960 "compl" "cos" "dcgettext" "exp" "extension" "fflush"
961 "gensub" "gsub" "index" "int" "length" "log" "lshift"
962 "match" "mktime" "or" "print" "printf" "rand" "rshift"
963 "sin" "split" "sprintf" "sqrt" "srand" "stopme"
964 "strftime" "strtonum" "sub" "substr" "system"
965 "systime" "tolower" "toupper" "xor") t)
966 "\\>")
967 0 c-preprocessor-face-name))
968
969 ;; gawk debugging keywords. (acm, 2002/7/21)
970 ;; (Removed, 2003/6/6. These functions are now fontified as built-ins)
971 ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>")
972 ;; 0 'font-lock-warning-face)
973
974 ;; User defined functions with an apparent spurious space before the
975 ;; opening parenthesis. acm, 2002/5/30.
976 `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s "
977 c-awk-escaped-nls*-with-space* "(")
978 (0 'font-lock-warning-face))
979
980 ;; Space after \ in what looks like an escaped newline. 2002/5/31
981 '("\\\\\\s +$" 0 font-lock-warning-face t)
982
983 ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16.
984 '("\\s|" 0 font-lock-warning-face t nil)
985 ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21
986 '("\\(_\\)\\s|" 1 font-lock-warning-face)
987 '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6
988 ))
989 "Default expressions to highlight in AWK mode.")
0386b551
AM
990\f
991;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e
d9e94c22 992
0386b551
AM
993;; The following three regexps differ from those earlier on in cc-awk.el in
994;; that they assume the syntax-table properties have been set. They are thus
995;; not useful for code which sets these properties.
d9e94c22 996(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
0386b551 997;; Matches a terminated string/regexp.
d9e94c22
MS
998
999(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
1000;; Matches an unterminated string/regexp, NOT including the eol at the end.
1001
1002(defconst c-awk-harmless-pattern-characters*
1003 (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
1004;; Matches any "harmless" character in a pattern or an escaped character pair.
1005
0386b551
AM
1006(defun c-awk-at-statement-end-p ()
1007 ;; Point is not inside a comment or string. Is it AT the end of a
1008 ;; statement? This means immediately after the last non-ws character of the
1009 ;; statement. The caller is responsible for widening the buffer, if
1010 ;; appropriate.
1011 (and (not (bobp))
1012 (save-excursion
1013 (backward-char)
1014 (or (looking-at "[};]")
1015 (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\))
1016 (looking-at
1017 (eval-when-compile
1018 (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space*
1019 "[#\n\r]"))))))))
1020
d9e94c22
MS
1021(defun c-awk-beginning-of-defun (&optional arg)
1022 "Move backward to the beginning of an AWK \"defun\". With ARG, do it that
1023many times. Negative arg -N means move forward to Nth following beginning of
1024defun. Returns t unless search stops due to beginning or end of buffer.
1025
1026By a \"defun\" is meant either a pattern-action pair or a function. The start
fd35a256 1027of a defun is recognized as code starting at column zero which is neither a
d9e94c22
MS
1028closing brace nor a comment nor a continuation of the previous line. Unlike
1029in some other modes, having an opening brace at column 0 is neither necessary
0386b551
AM
1030nor helpful.
1031
1032Note that this function might do hidden buffer changes. See the
1033comment at the start of cc-engine.el for more info."
d9e94c22 1034 (interactive "p")
28abe5e2 1035 (or arg (setq arg 1))
d9e94c22
MS
1036 (save-match-data
1037 (c-save-buffer-state ; ensures the buffer is writable.
1038 nil
1039 (let ((found t)) ; Has the most recent regexp search found b-of-defun?
1040 (if (>= arg 0)
1041 ;; Go back one defun each time round the following loop. (For +ve arg)
1042 (while (and found (> arg 0) (not (eq (point) (point-min))))
1043 ;; Go back one "candidate" each time round the next loop until one
1044 ;; is genuinely a beginning-of-defun.
1045 (while (and (setq found (search-backward-regexp
1046 "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
0386b551 1047 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
1048 (setq arg (1- arg)))
1049 ;; The same for a -ve arg.
1050 (if (not (eq (point) (point-max))) (forward-char 1))
1051 (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
1052 (while (and (setq found (search-forward-regexp
1053 "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
0386b551 1054 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
1055 (setq arg (1+ arg)))
1056 (if found (goto-char (match-beginning 0))))
1057 (eq arg 0)))))
1058
1059(defun c-awk-forward-awk-pattern ()
1060 ;; Point is at the start of an AWK pattern (which may be null) or function
1061 ;; declaration. Move to the pattern's end, and past any trailing space or
1062 ;; comment. Typically, we stop at the { which denotes the corresponding AWK
1063 ;; action/function body. Otherwise we stop at the EOL (or ;) marking the
1064 ;; absence of an explicit action.
0386b551
AM
1065 ;;
1066 ;; This function might do hidden buffer changes.
d9e94c22
MS
1067 (while
1068 (progn
1069 (search-forward-regexp c-awk-harmless-pattern-characters*)
1070 (if (looking-at "#") (end-of-line))
1071 (cond
1072 ((eobp) nil)
1073 ((looking-at "[{;]") nil) ; We've finished!
1074 ((eolp)
1075 (if (c-awk-cur-line-incomplete-p)
1076 (forward-line) ; returns non-nil
1077 nil))
1078 ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
1079 ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
1080 ((looking-at "/") (forward-char) t))))) ; division sign.
1081
1082(defun c-awk-end-of-defun1 ()
1083 ;; point is at the start of a "defun". Move to its end. Return end position.
0386b551
AM
1084 ;;
1085 ;; This function might do hidden buffer changes.
d9e94c22
MS
1086 (c-awk-forward-awk-pattern)
1087 (cond
1088 ((looking-at "{") (goto-char (scan-sexps (point) 1)))
1089 ((looking-at ";") (forward-char))
1090 ((eolp))
1091 (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
1092 (point))
1093
1094(defun c-awk-beginning-of-defun-p ()
1095 ;; Are we already at the beginning of a defun? (i.e. at code in column 0
1096 ;; which isn't a }, and isn't a continuation line of any sort.
0386b551
AM
1097 ;;
1098 ;; This function might do hidden buffer changes.
d9e94c22
MS
1099 (and (looking-at "^[^#} \t\n\r]")
1100 (not (c-awk-prev-line-incomplete-p))))
1101
1102(defun c-awk-end-of-defun (&optional arg)
1103 "Move forward to next end of defun. With argument, do it that many times.
1104Negative argument -N means move back to Nth preceding end of defun.
1105
1106An end of a defun occurs right after the closing brace that matches the
1107opening brace at its start, or immediately after the AWK pattern when there is
0386b551
AM
1108no explicit action; see function `c-awk-beginning-of-defun'.
1109
1110Note that this function might do hidden buffer changes. See the
1111comment at the start of cc-engine.el for more info."
d9e94c22
MS
1112 (interactive "p")
1113 (or arg (setq arg 1))
1114 (save-match-data
1115 (c-save-buffer-state
1116 nil
1117 (let ((start-point (point)) end-point)
1118 ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
1119 ;; move backwards to one.
1120 ;; Repeat [(i) move forward to end-of-current-defun (see below);
1121 ;; (ii) If this isn't it, move forward to beginning-of-defun].
1122 ;; We start counting ARG only when step (i) has passed the original point.
1123 (when (> arg 0)
1124 ;; Try to move back to a beginning-of-defun, if not already at one.
1125 (if (not (c-awk-beginning-of-defun-p))
1126 (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
1127 (goto-char start-point)
1128 (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
1129 ;; Now count forward, one defun at a time
1130 (while (and (not (eobp))
1131 (c-awk-end-of-defun1)
1132 (if (> (point) start-point) (setq arg (1- arg)) t)
1133 (> arg 0)
1134 (c-awk-beginning-of-defun -1))))
1135
1136 (when (< arg 0)
1137 (setq end-point start-point)
1138 (while (and (not (bobp))
1139 (c-awk-beginning-of-defun 1)
1140 (if (< (setq end-point (if (bobp) (point)
1141 (save-excursion (c-awk-end-of-defun1))))
1142 start-point)
1143 (setq arg (1+ arg)) t)
1144 (< arg 0)))
1145 (goto-char (min start-point end-point)))))))
1146
0386b551 1147\f
d9e94c22 1148(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
ab5796a9 1149
d9e94c22 1150;;; awk-mode.el ends here