Add 2012 to FSF copyright years for Emacs files
[bpt/emacs.git] / lisp / progmodes / cc-awk.el
CommitLineData
d9e94c22
MS
1;;; cc-awk.el --- AWK specific code within cc-mode.
2
acaf905b 3;; Copyright (C) 1988, 1994, 1996, 2000-2012 Free Software Foundation, Inc.
d9e94c22 4
3efc2cd7 5;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
d9e94c22
MS
6;; Maintainer: FSF
7;; Keywords: AWK, cc-mode, unix, languages
bd78fa1d 8;; Package: cc-mode
d9e94c22
MS
9
10;; This file is part of GNU Emacs.
11
b1fc2b50 12;; GNU Emacs is free software: you can redistribute it and/or modify
d9e94c22 13;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
d9e94c22
MS
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
b1fc2b50 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
d9e94c22
MS
24
25;;; Commentary:
26
27;; This file contains (most of) the adaptations to cc-mode required for the
28;; integration of AWK Mode.
6772c8e1 29;; It is organized thusly, the sections being separated by page breaks:
d9e94c22 30;; 1. The AWK Mode syntax table.
6772c8e1 31;; 2. Regular expressions for analyzing AWK code.
0386b551 32;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property").
3c0ab532 33;; 4. Syntax-table property/font-locking stuff, including the
d9e94c22 34;; font-lock-keywords setting.
0386b551
AM
35;; 5. The AWK Mode before/after-change-functions.
36;; 6. AWK Mode specific versions of commands like beginning-of-defun.
d9e94c22
MS
37;; The AWK Mode keymap, abbreviation table, and the mode function itself are
38;; in cc-mode.el.
39
40;;; Code:
41
42(eval-when-compile
43 (let ((load-path
44 (if (and (boundp 'byte-compile-dest-file)
45 (stringp byte-compile-dest-file))
46 (cons (file-name-directory byte-compile-dest-file) load-path)
47 load-path)))
48 (load "cc-bytecomp" nil t)))
49
50(cc-require 'cc-defs)
51
52;; Silence the byte compiler.
53(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
88a46e21
AM
54(cc-bytecomp-defvar c-new-BEG)
55(cc-bytecomp-defvar c-new-END)
d9e94c22
MS
56
57;; Some functions in cc-engine that are used below. There's a cyclic
58;; dependency so it can't be required here. (Perhaps some functions
59;; could be moved to cc-engine to avoid it.)
60(cc-bytecomp-defun c-backward-token-1)
61(cc-bytecomp-defun c-beginning-of-statement-1)
62(cc-bytecomp-defun c-backward-sws)
63
64(defvar awk-mode-syntax-table
65 (let ((st (make-syntax-table)))
66 (modify-syntax-entry ?\\ "\\" st)
67 (modify-syntax-entry ?\n "> " st)
68 (modify-syntax-entry ?\r "> " st)
69 (modify-syntax-entry ?\f "> " st)
70 (modify-syntax-entry ?\# "< " st)
71 ;; / can delimit regexes or be a division operator. By default we assume
72 ;; that it is a division sign, and fix the regexp operator cases with
73 ;; `font-lock-syntactic-keywords'.
0d26e0b6 74 (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
d9e94c22
MS
75 (modify-syntax-entry ?* "." st)
76 (modify-syntax-entry ?+ "." st)
77 (modify-syntax-entry ?- "." st)
78 (modify-syntax-entry ?= "." st)
79 (modify-syntax-entry ?% "." st)
80 (modify-syntax-entry ?< "." st)
81 (modify-syntax-entry ?> "." st)
82 (modify-syntax-entry ?& "." st)
83 (modify-syntax-entry ?| "." st)
84 (modify-syntax-entry ?_ "_" st)
85 (modify-syntax-entry ?\' "." st)
86 st)
87 "Syntax table in use in AWK Mode buffers.")
88
0386b551
AM
89\f
90;; This section defines regular expressions used in the analysis of AWK code.
91
92;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
93;; Emacs has in the past used \r to mark hidden lines in some fashion (and
94;; maybe still does).
95
96(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
97;; Matches any escaped (with \) character-pair, including an escaped newline.
98(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
99;; Matches any escaped (with \) character-pair, apart from an escaped newline.
100(defconst c-awk-comment-without-nl "#.*")
101;; Matches an AWK comment, not including the terminating NL (if any). Note
102;; that the "enclosing" (elisp) regexp must ensure the # is real.
103(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
104;; Matches a newline, or the end of buffer.
105
106;; "Space" regular expressions.
107(eval-and-compile
108 (defconst c-awk-escaped-nl "\\\\[\n\r]"))
109;; Matches an escaped newline.
ca331935
CY
110(eval-and-compile
111 (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")))
0386b551
AM
112;; Matches a possibly empty sequence of escaped newlines. Used in
113;; awk-font-lock-keywords.
114;; (defconst c-awk-escaped-nls*-with-space*
115;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
116;; The above RE was very slow. It's runtime was doubling with each additional
117;; space :-( Reformulate it as below:
118(eval-and-compile
119 (defconst c-awk-escaped-nls*-with-space*
120 (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
121;; Matches a possibly empty sequence of escaped newlines with optional
122;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
123(defconst c-awk-blank-or-comment-line-re
124 (concat "[ \t]*\\(#\\|\\\\?$\\)"))
125;; Matche (the tail of) a line containing at most either a comment or an
126;; escaped EOL.
127
128;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
129(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
130;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
53964682 131;; localization string in gawk 3.1
0386b551
AM
132(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
133;; Matches an underline NOT followed by ".
134(defconst c-awk-harmless-string*-re
135 (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
136;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
137;; #, or newlines.
138(defconst c-awk-harmless-string*-here-re
139 (concat "\\=" c-awk-harmless-string*-re))
140;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
141;; at point.
142(defconst c-awk-harmless-line-re
143 (concat c-awk-harmless-string*-re
144 "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
145;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
146;; " or /. "logical" means "possibly containing escaped newlines". A comment
147;; is matched as part of the line even if it contains a " or a /. The End of
148;; buffer is also an end of line.
149(defconst c-awk-harmless-lines+-here-re
150 (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
151;; Matches a sequence of (at least one) \"harmless-line\" at point.
152
153
154;; REGEXPS FOR AWK STRINGS.
155(defconst c-awk-string-ch-re "[^\"\\\n\r]")
156;; Matches any character which can appear unescaped in a string.
157(defconst c-awk-string-innards-re
158 (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
159;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
160(defconst c-awk-string-without-end-here-re
161 (concat "\\=_?\"" c-awk-string-innards-re))
162;; Matches an AWK string at point up to, but not including, any terminator.
53964682 163;; A gawk 3.1+ string may look like _"localizable string".
0386b551
AM
164(defconst c-awk-one-line-possibly-open-string-re
165 (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*"
166 "\\(\"\\|\\\\?$\\|\\'\\)"))
167
168;; REGEXPS FOR AWK REGEXPS.
169(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
170;; Matches any AWK regexp character which doesn't require special analysis.
171(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
172;; Matches a (possibly empty) sequence of escaped newlines.
173
174;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
175;; list", and "[:alpha:]" inside a character list will be known as a
176;; "character class". These terms for these things vary between regexp
177;; descriptions .
178(defconst c-awk-regexp-char-class-re
179 "\\[:[a-z]+:\\]")
180 ;; Matches a character class spec (e.g. [:alpha:]).
181(defconst c-awk-regexp-char-list-re
182 (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
183 "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
184 "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
185;; Matches a regexp char list, up to (but not including) EOL if the ] is
186;; missing.
187(defconst c-awk-regexp-one-line-possibly-open-char-list-re
188 (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*"
189 "\\(]\\|\\\\?$\\|\\'\\)"))
190;; Matches the head (or all) of a regexp char class, up to (but not
191;; including) the first EOL.
192(defconst c-awk-regexp-innards-re
193 (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
194 "\\|" c-awk-regexp-normal-re "\\)*"))
195;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
196(defconst c-awk-regexp-without-end-re
197 (concat "/" c-awk-regexp-innards-re))
0d26e0b6 198;; Matches an AWK regexp up to, but not including, any terminating /.
0386b551
AM
199(defconst c-awk-one-line-possibly-open-regexp-re
200 (concat "/\\(" c-awk-non-eol-esc-pair-re
201 "\\|" c-awk-regexp-one-line-possibly-open-char-list-re
202 "\\|" c-awk-regexp-normal-re "\\)*"
203 "\\(/\\|\\\\?$\\|\\'\\)"))
204;; Matches as much of the head of an AWK regexp which fits on one line,
205;; possibly all of it.
206
207;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
208;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
209;; whether a '/' at the current position would by a regexp opener or a
210;; division sign.
211(defconst c-awk-neutral-re
212; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
213 "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
214;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
215;; This is space/tab, braces, an auto-increment/decrement operator or an
cb5bf6ba 216;; escaped character. Or one of the (invalid) characters @ or `. But NOT an
0386b551
AM
217;; end of line (even if escaped).
218(defconst c-awk-neutrals*-re
219 (concat "\\(" c-awk-neutral-re "\\)*"))
220;; A (possibly empty) string of neutral characters (or character pairs).
221(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
222;; Matches a char which is a constituent of a variable or number, or a ket
223;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
224;; \xff are "letters".
225(defconst c-awk-div-sign-re
226 (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
227;; Will match a piece of AWK buffer ending in / which is a division sign, in
228;; a context where an immediate / would be a regexp bracket. It follows a
229;; variable or number (with optional intervening "neutral" characters). This
230;; will only work when there won't be a preceding " or / before the sought /
231;; to foul things up.
232(defconst c-awk-non-arith-op-bra-re
233 "[[\(&=:!><,?;'~|]")
e1dbe924 234;; Matches an opening BRAcket, round or square, or any operator character
0386b551
AM
235;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
236;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
237;; and "--".
238(defconst c-awk-regexp-sign-re
239 (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
240;; Will match a piece of AWK buffer ending in / which is an opening regexp
241;; bracket, in a context where an immediate / would be a division sign. This
242;; will only work when there won't be a preceding " or / before the sought /
243;; to foul things up.
244
245;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
246(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
d355a0b7 247;; NEW VERSION! (which will be restricted to the current line)
0386b551
AM
248(defconst c-awk-one-line-non-syn-ws*-re
249 (concat "\\([ \t]*"
250 "\\(" c-awk-_-harmless-nonws-char-re "\\|"
251 c-awk-non-eol-esc-pair-re "\\|"
252 c-awk-one-line-possibly-open-string-re "\\|"
253 c-awk-one-line-possibly-open-regexp-re
254 "\\)"
255 "\\)*"))
256
257\f
d9e94c22 258;; ACM, 2002/5/29:
0d26e0b6 259;;
d9e94c22
MS
260;; The next section of code is about determining whether or not an AWK
261;; statement is complete or not. We use this to indent the following line.
262;; The determination is pretty straightforward in C, where a statement ends
263;; with either a ; or a }. Only "while" really gives any trouble there, since
264;; it might be the end of a do-while. In AWK, on the other hand, semicolons
265;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
266;; addition, we have the complexity of escaped EOLs. The core of this
267;; analysis is in the middle of the function
268;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
269;;
270;; To avoid continually repeating this expensive analysis, we "cache" its
271;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
272;; the EOL (if any) which terminates that line. Should the property be
273;; required for the very last line (which has no EOL), it is calculated as
274;; required but not cached. The c-awk-NL-prop property should be thought of
275;; as only really valid immediately after a buffer change, not a permanently
276;; set property. (By contrast, the syntax-table text properties (set by an
277;; after-change function) must be constantly updated for the mode to work
278;; properly).
279;;
0386b551
AM
280;; This text property is also used for "syntactic whitespace" movement, this
281;; being where the distinction between the values '$' and '}' is significant.
282;;
d9e94c22
MS
283;; The valid values for c-awk-NL-prop are:
284;;
285;; nil The property is not currently set for this line.
286;; '#' There is NO statement on this line (at most a comment), and no open
287;; statement from a previous line which could have been completed on this
288;; line.
289;; '{' There is an unfinished statement on this (or a previous) line which
290;; doesn't require \s to continue onto another line, e.g. the line ends
291;; with {, or the && operator, or "if (condition)". Note that even if the
292;; newline is redundantly escaped, it remains a '{' line.
293;; '\' There is an escaped newline at the end of this line and this '\' is
294;; essential to the syntax of the program. (i.e. if it had been a
295;; frivolous \, it would have been ignored and the line been given one of
296;; the other property values.)
0386b551
AM
297;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual
298;; semicolon"). This might be a content-free line terminating a statement
299;; from the preceding (continued) line (which has property \).
300;; '}' A statement, being the last thing (aside from ws/comments) is
301;; explicitly terminated on this line by a closing brace (or sometimes a
302;; semicolon).
d9e94c22
MS
303;;
304;; This set of values has been chosen so that the property's value on a line
305;; is completely determined by the contents of the line and the property on
306;; the previous line, EXCEPT for where a "while" might be the closing
307;; statement of a do-while.
308
309(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
310 ;; Are we just after the ) in "if/for/while (<condition>)"?
311 ;;
312 ;; Note that the end of the ) in a do .... while (<condition>) doesn't
313 ;; count, since the purpose of this routine is essentially to decide
314 ;; whether to indent the next line.
315 ;;
316 ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
317 ;; do-while.
0386b551
AM
318 ;;
319 ;; This function might do hidden buffer changes.
d9e94c22
MS
320 (and
321 (eq (char-before) ?\))
322 (save-excursion
323 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
324 (when par-pos
325 (goto-char par-pos) ; back over "(...)"
326 (c-backward-token-1) ; BOB isn't a problem.
327 (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
328 (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
329 (not (eq (c-beginning-of-statement-1 do-lim)
330 'beginning)))))))))
331
332(defun c-awk-after-function-decl-param-list ()
333 ;; Are we just after the ) in "function foo (bar)" ?
0386b551
AM
334 ;;
335 ;; This function might do hidden buffer changes.
d9e94c22
MS
336 (and (eq (char-before) ?\))
337 (save-excursion
338 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
339 (when par-pos
340 (goto-char par-pos) ; back over "(...)"
341 (c-backward-token-1) ; BOB isn't a problem
342 (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
343 (progn (c-backward-token-1)
344 (looking-at "func\\(tion\\)?\\>"))))))))
345
346;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
347(defun c-awk-after-continue-token ()
348;; Are we just after a token which can be continued onto the next line without
349;; a backslash?
0386b551
AM
350;;
351;; This function might do hidden buffer changes.
d9e94c22
MS
352 (save-excursion
353 (c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
354 (if (and (looking-at "[&|]") (not (bobp)))
355 (backward-char)) ; c-backward-token-1 doesn't do this :-(
356 (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
357
358(defun c-awk-after-rbrace-or-statement-semicolon ()
359 ;; Are we just after a } or a ; which closes a statement?
360 ;; Be careful about ;s in for loop control bits. They don't count!
0386b551
AM
361 ;;
362 ;; This function might do hidden buffer changes.
d9e94c22
MS
363 (or (eq (char-before) ?\})
364 (and
365 (eq (char-before) ?\;)
366 (save-excursion
367 (let ((par-pos (c-safe (scan-lists (point) -1 1))))
368 (when par-pos
369 (goto-char par-pos) ; go back to containing (
370 (not (and (looking-at "(")
371 (c-backward-token-1) ; BOB isn't a problem
372 (looking-at "for\\>")))))))))
373
374(defun c-awk-back-to-contentful-text-or-NL-prop ()
375 ;; Move back to just after the first found of either (i) an EOL which has
376 ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
377 ;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
0386b551 378 ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp).
d9e94c22
MS
379 ;;
380 ;; Note that an escaped eol counts as whitespace here.
381 ;;
382 ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
383 ;; that the previous line contains an unterminated string (without \). In
0386b551 384 ;; this case, assume that the previous line's c-awk-NL-prop is a $.
0d26e0b6 385 ;;
d9e94c22
MS
386 ;; POINT MUST BE AT THE START OF A LINE when calling this function. This
387 ;; is to ensure that the various backward-comment functions will work
388 ;; properly.
0386b551
AM
389 ;;
390 ;; This function might do hidden buffer changes.
d9e94c22
MS
391 (let ((nl-prop nil)
392 bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
393 (while ;; We are at a BOL here. Go back one line each iteration.
394 (and
395 (not (bobp))
396 (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
397 (progn (setq bol-pos (c-point 'bopl))
398 (setq bsws-pos (point))
399 ;; N.B. the following function will not go back past an EOL if
400 ;; there is an open string (without \) on the previous line.
0386b551
AM
401 ;; If we find such, set the c-awk-NL-prop on it, too
402 ;; (2004/3/29).
d9e94c22
MS
403 (c-backward-syntactic-ws bol-pos)
404 (or (/= (point) bsws-pos)
0386b551
AM
405 (progn (setq nl-prop ?\$)
406 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)
d9e94c22
MS
407 nil)))
408 ;; If we had a backslash at EOL, c-backward-syntactic-ws will
409 ;; have gone backwards over it. Check the backslash was "real".
410 (progn
411 (if (looking-at "[ \t]*\\\\+$")
412 (if (progn
413 (end-of-line)
414 (search-backward-regexp
415 "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
416 bol-pos t))
417 (progn (end-of-line) ; escaped EOL.
418 (backward-char)
419 (c-backward-syntactic-ws bol-pos))
420 (end-of-line))) ; The \ at eol is a fake.
421 (bolp))))
422 nl-prop))
423
424(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
425 ;; Calculate and set the value of the c-awk-NL-prop on the immediately
426 ;; preceding EOL. This may also involve doing the same for several
427 ;; preceding EOLs.
0d26e0b6 428 ;;
d9e94c22
MS
429 ;; NOTE that if the property was already set, we return it without
430 ;; recalculation. (This is by accident rather than design.)
0d26e0b6 431 ;;
d9e94c22
MS
432 ;; Return the property which got set (or was already set) on the previous
433 ;; line. Return nil if we hit BOB.
0d26e0b6 434 ;;
d9e94c22 435 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
436 ;;
437 ;; This function might do hidden buffer changes.
d9e94c22
MS
438 (save-excursion
439 (save-match-data
440 (beginning-of-line)
441 (let* ((pos (point))
442 (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
443 ;; We are either (1) at a BOL (with nl-prop containing the previous
444 ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
445 ;; the BOB counts as case (1), so we test next for bolp rather than
446 ;; non-nil nl-prop.
447 (when (not (bolp))
448 (setq nl-prop
449 (cond
450 ;; Incomplete statement which doesn't require escaped EOL?
451 ((or (c-awk-after-if-for-while-condition-p do-lim)
452 (c-awk-after-function-decl-param-list)
453 (c-awk-after-continue-token))
454 ?\{)
455 ;; Escaped EOL (where there's also something to continue)?
456 ((and (looking-at "[ \t]*\\\\$")
457 (not (c-awk-after-rbrace-or-statement-semicolon)))
458 ?\\)
0386b551
AM
459 ;; A statement was completed on this line. How?
460 ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or }
461 (t ?\$))) ; A virtual semicolon.
d9e94c22
MS
462 (end-of-line)
463 (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
464 (forward-line))
465
466 ;; We are now at a (possibly empty) sequence of content-free lines.
467 ;; Set c-awk-NL-prop on each of these lines's EOL.
468 (while (< (point) pos) ; one content-free line each iteration.
469 (cond ; recalculate nl-prop from previous line's value.
0386b551 470 ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#))
d9e94c22 471 ((eq nl-prop ?\\)
0386b551 472 (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$)))
d9e94c22
MS
473 ;; ?\# (empty line) and ?\{ (open stmt) don't change.
474 )
475 (forward-line)
476 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
477 nl-prop))))
478
479(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
480 ;; Get the c-awk-NL-prop text-property from the previous line, calculating
f6558e2d 481 ;; it if necessary. Return nil if we're already at BOB.
d9e94c22 482 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
483 ;;
484 ;; This function might do hidden buffer changes.
d9e94c22
MS
485 (if (bobp)
486 nil
487 (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
488 (c-awk-calculate-NL-prop-prev-line do-lim))))
489
490(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
491 ;; Get the c-awk-NL-prop text-property from the current line, calculating it
492 ;; if necessary. (As a special case, the property doesn't get set on an
493 ;; empty line at EOB (there's no position to set the property on), but the
494 ;; function returns the property value an EOL would have got.)
0d26e0b6 495 ;;
d9e94c22 496 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
497 ;;
498 ;; This function might do hidden buffer changes.
d9e94c22
MS
499 (save-excursion
500 (let ((extra-nl nil))
501 (end-of-line) ; Necessary for the following test to work.
502 (when (= (forward-line) 1) ; if we were on the last line....
503 (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
504 (setq extra-nl t))
505 (prog1 (c-awk-get-NL-prop-prev-line do-lim)
d355a0b7 506 (if extra-nl (delete-char -1))))))
d9e94c22 507
0386b551 508(defsubst c-awk-prev-line-incomplete-p (&optional do-lim)
d9e94c22
MS
509 ;; Is there an incomplete statement at the end of the previous line?
510 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
511 ;;
512 ;; This function might do hidden buffer changes.
d9e94c22
MS
513 (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
514
0386b551 515(defsubst c-awk-cur-line-incomplete-p (&optional do-lim)
d9e94c22
MS
516 ;; Is there an incomplete statement at the end of the current line?
517 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
518 ;;
519 ;; This function might do hidden buffer changes.
d9e94c22
MS
520 (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
521
d355a0b7
SM
522;; NOTES ON "VIRTUAL SEMICOLONS"
523;;
524;; A "virtual semicolon" is what terminates a statement when there is no ;
525;; or } to do the job. Like point, it is considered to lie _between_ two
526;; characters. As from mid-March 2004, it is considered to lie just after
527;; the last non-syntactic-whitespace character on the line; (previously, it
528;; was considered an attribute of the EOL on the line). A real semicolon
529;; never counts as a virtual one.
0386b551
AM
530
531(defun c-awk-at-vsemi-p (&optional pos)
532 ;; Is there a virtual semicolon at POS (or POINT)?
d9e94c22 533 (save-excursion
0386b551
AM
534 (let (nl-prop
535 (pos-or-point (progn (if pos (goto-char pos)) (point))))
536 (forward-line 0)
537 (search-forward-regexp c-awk-one-line-non-syn-ws*-re)
538 (and (eq (point) pos-or-point)
539 (progn
540 (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\)
541 (eq (forward-line) 0)
542 (looking-at c-awk-blank-or-comment-line-re)))
543 (eq nl-prop ?\$))))))
544
545(defun c-awk-vsemi-status-unknown-p ()
546 ;; Are we unsure whether there is a virtual semicolon on the current line?
547 ;; DO NOT under any circumstances attempt to calculate this; that would
333f9019 548 ;; defeat the (admittedly kludgy) purpose of this function, which is to
0386b551
AM
549 ;; prevent an infinite recursion in c-beginning-of-statement-1 when point
550 ;; starts at a `while' token.
d9e94c22
MS
551 (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
552
553(defun c-awk-clear-NL-props (beg end)
554 ;; This function is run from before-change-hooks. It clears the
555 ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
556 ;; parameter is ignored). This ensures that the indentation engine will
557 ;; never use stale values for this property.
0386b551
AM
558 ;;
559 ;; This function might do hidden buffer changes.
d9e94c22
MS
560 (save-restriction
561 (widen)
562 (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
563
564(defun c-awk-unstick-NL-prop ()
565 ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
566 ;; this, a new newline inserted after an old newline (e.g. by C-j) would
567 ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
568 ;; Thing. This function's action is required by c-put-char-property.
0d26e0b6 569 (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs
d9e94c22
MS
570 (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
571 (setq text-property-default-nonsticky
572 (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
573
574;; The following is purely a diagnostic command, to be commented out of the
575;; final release. ACM, 2002/6/1
576;; (defun NL-props ()
577;; (interactive)
578;; (let (pl-prop cl-prop)
579;; (message "Prev-line: %s Cur-line: %s"
580;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
581;; (char-to-string pl-prop)
582;; "nil")
583;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
584;; (char-to-string cl-prop)
585;; "nil"))))
586;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
587;for now. In the byte compiled version, this causes things to crash because
588;awk-mode-map isn't yet defined. :-(
589
590;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
0386b551 591\f
d9e94c22
MS
592;; The following section of the code is to do with font-locking. The biggest
593;; problem for font-locking is deciding whether a / is a regular expression
594;; delimiter or a division sign - determining precisely where strings and
595;; regular expressions start and stop is also troublesome. This is the
596;; purpose of the function c-awk-set-syntax-table-properties and the myriad
597;; elisp regular expressions it uses.
598;;
599;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
600;; for font-locking unterminated strings (i.e. font-locking the buffer up to
601;; the next string delimiter as a string) was inappropriate. Instead,
602;; unbalanced string/regexp delimiters are given the warning font, being
603;; refonted with the string font as soon as the matching delimiter is entered.
604;;
605;; This requires the region processed by the current font-lock after-change
606;; function to have access to the start of the string/regexp, which may be
607;; several lines back. The elisp "advice" feature is used on these functions
608;; to allow this.
609
610(defun c-awk-beginning-of-logical-line (&optional pos)
611;; Go back to the start of the (apparent) current line (or the start of the
612;; line containing POS), returning the buffer position of that point. I.e.,
613;; go back to the last line which doesn't have an escaped EOL before it.
0d26e0b6 614;;
d9e94c22
MS
615;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
616;; comment, string or regexp. IT MAY WELL BE that this function should not be
617;; executed on a narrowed buffer.
0386b551
AM
618;;
619;; This function might do hidden buffer changes.
d9e94c22
MS
620 (if pos (goto-char pos))
621 (forward-line 0)
622 (while (and (> (point) (point-min))
623 (eq (char-before (1- (point))) ?\\))
624 (forward-line -1))
625 (point))
626
88a46e21
AM
627(defun c-awk-beyond-logical-line (&optional pos)
628;; Return the position just beyond the (apparent) current logical line, or the
629;; one containing POS. This is usually the beginning of the next line which
630;; doesn't follow an escaped EOL. At EOB, this will be EOB.
631;;
632;; Point is unchanged.
d9e94c22
MS
633;;
634;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
635;; comment, string or regexp. IT MAY WELL BE that this function should not be
636;; executed on a narrowed buffer.
88a46e21
AM
637 (save-excursion
638 (if pos (goto-char pos))
639 (end-of-line)
640 (while (and (< (point) (point-max))
641 (eq (char-before) ?\\))
642 (end-of-line 2))
643 (if (< (point) (point-max))
644 (1+ (point))
645 (point))))
d9e94c22 646
d9e94c22
MS
647;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
648;; on strings/regexps which are missing their closing delimiter.
649;; 2002/4/28. The default syntax for / has been changed from "string" to
650;; "punctuation", to reduce hassle when this character appears within a string
651;; or comment.
652
653(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
654;; BEG and END bracket a (possibly unterminated) string or regexp. The
655;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
656;; END. Set the appropriate syntax-table properties on the delimiters and
657;; contents of this string/regex.
658;;
659;; "String" here can also mean a gawk 3.1 "localizable" string which starts
660;; with _". In this case, we step over the _ and ignore it; It will get it's
661;; font from an entry in awk-font-lock-keywords.
662;;
663;; If the closing delimiter is missing (i.e., there is an EOL there) set the
664;; STRING-FENCE property on the opening " or / and closing EOL.
0386b551
AM
665;;
666;; This function does hidden buffer changes.
d9e94c22
MS
667 (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
668
669 ;; First put the properties on the delimiters.
670 (cond ((eq end (point-max)) ; string/regexp terminated by EOB
0386b551 671 (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence"
d9e94c22 672 ((/= (char-after beg) (char-after end)) ; missing end delimiter
0386b551
AM
673 (c-put-char-property beg 'syntax-table '(15))
674 (c-put-char-property end 'syntax-table '(15)))
d9e94c22 675 ((eq (char-after beg) ?/) ; Properly bracketed regexp
0386b551
AM
676 (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string"
677 (c-put-char-property end 'syntax-table '(7)))
d9e94c22
MS
678 (t)) ; Properly bracketed string: Nothing to do.
679 ;; Now change the properties of any escaped "s in the string to punctuation.
680 (save-excursion
681 (goto-char (1+ beg))
682 (or (eobp)
683 (while (search-forward "\"" end t)
0386b551 684 (c-put-char-property (1- (point)) 'syntax-table '(1))))))
d9e94c22
MS
685
686(defun c-awk-syntax-tablify-string ()
687 ;; Point is at the opening " or _" of a string. Set the syntax-table
688 ;; properties on this string, leaving point just after the string.
689 ;;
690 ;; The result is nil if a / immediately after the string would be a regexp
691 ;; opener, t if it would be a division sign.
0386b551
AM
692 ;;
693 ;; This function does hidden buffer changes.
d9e94c22
MS
694 (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
695 (c-awk-set-string-regexp-syntax-table-properties
696 (match-beginning 0) (match-end 0))
697 (cond ((looking-at "\"")
698 (forward-char)
699 t) ; In AWK, ("15" / 5) gives 3 ;-)
700 ((looking-at "[\n\r]") ; Unterminated string with EOL.
701 (forward-char)
702 nil) ; / on next line would start a regexp
703 (t nil))) ; Unterminated string at EOB
704
705(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
706 ;; Point is at a /. Determine whether this is a division sign or a regexp
707 ;; opener, and if the latter, apply syntax-table properties to the entire
708 ;; regexp. Point is left immediately after the division sign or regexp, as
709 ;; the case may be.
710 ;;
711 ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
712 ;; division sign (value t) or a regexp opener (value nil). The idea is that
6772c8e1 713 ;; we analyze the line from ANCHOR up till point to determine what the / at
d9e94c22
MS
714 ;; point is.
715 ;;
716 ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
0386b551 717 ;;
88a46e21 718 ;; This function does hidden buffer changes.
d9e94c22
MS
719 (let ((/point (point)))
720 (goto-char anchor)
c7015153 721 ;; Analyze the line to find out what the / is.
d9e94c22
MS
722 (if (if anchor-state-/div
723 (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
724 (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
725 ;; A division sign.
51c9af45 726 (progn (goto-char (1+ /point)) nil)
d9e94c22
MS
727 ;; A regexp opener
728 ;; Jump over the regexp innards, setting the match data.
729 (goto-char /point)
730 (search-forward-regexp c-awk-regexp-without-end-re)
731 (c-awk-set-string-regexp-syntax-table-properties
732 (match-beginning 0) (match-end 0))
733 (cond ((looking-at "/") ; Terminating /
734 (forward-char)
735 t)
736 ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
737 (forward-char)
738 nil) ; / on next line would start another regexp
739 (t nil))))) ; Unterminated regexp at EOB
740
741(defun c-awk-set-syntax-table-properties (lim)
742;; Scan the buffer text between point and LIM, setting (and clearing) the
743;; syntax-table property where necessary.
744;;
745;; This function is designed to be called as the FUNCTION in a MATCHER in
746;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
747;; repeated calls from font-lock: See elisp info page "Search-based
748;; Fontification"). It also gets called, with a bit of glue, from
749;; after-change-functions when font-lock isn't active. Point is left
750;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
751;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
752;;
753;; We need to set/clear the syntax-table property on:
754;; (i) / - It is set to "string" on a / which is the opening or closing
755;; delimiter of the properly terminated regexp (and left unset on a
756;; division sign).
757;; (ii) the opener of an unterminated string/regexp, we set the property
758;; "generic string delimiter" on both the opening " or / and the end of the
759;; line where the closing delimiter is missing.
760;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
761;; given the property "punctuation". This will later allow other routines
762;; to use the regexp "\\S\"*" to skip over the string innards.
763;; (iv) Inside a comment, all syntax-table properties are cleared.
0386b551
AM
764;;
765;; This function does hidden buffer changes.
d9e94c22
MS
766 (let (anchor
767 (anchor-state-/div nil)) ; t means a following / would be a div sign.
768 (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
0386b551 769 (c-clear-char-properties (point) lim 'syntax-table)
d9e94c22 770 ;; Once round the next loop for each string, regexp, or div sign
0386b551
AM
771 (while (progn
772 ;; Skip any "harmless" lines before the next tricky one.
773 (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
774 (setq anchor-state-/div nil))
775 (< (point) lim))
d9e94c22
MS
776 (setq anchor (point))
777 (search-forward-regexp c-awk-harmless-string*-here-re nil t)
778 ;; We are now looking at either a " or a /.
4c36be58 779 ;; Do our thing on the string, regexp or division sign.
d9e94c22
MS
780 (setq anchor-state-/div
781 (if (looking-at "_?\"")
782 (c-awk-syntax-tablify-string)
0386b551 783 (c-awk-syntax-tablify-/ anchor anchor-state-/div))))
d9e94c22
MS
784 nil))
785
d9e94c22
MS
786;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
787;; the syntax-table properties even when font-lock isn't enabled, for the
788;; subsequent use of movement functions, etc. However, it seems that if font
789;; lock _is_ enabled, we can always leave it to do the job.
88a46e21
AM
790(defvar c-awk-old-ByLL 0)
791(make-variable-buffer-local 'c-awk-old-Byll)
792;; Just beyond logical line following the region which is about to be changed.
793;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change.
d9e94c22 794
88a46e21 795(defun c-awk-record-region-clear-NL (beg end)
d9e94c22
MS
796;; This function is called exclusively from the before-change-functions hook.
797;; It does two things: Finds the end of the (logical) line on which END lies,
88a46e21
AM
798;; and clears c-awk-NL-prop text properties from this point onwards. BEG is
799;; ignored.
0386b551 800;;
88a46e21
AM
801;; On entry, the buffer will have been widened and match-data will have been
802;; saved; point is undefined on both entry and exit; the return value is
803;; ignored.
804;;
805;; This function does hidden buffer changes.
806 (c-save-buffer-state ()
807 (setq c-awk-old-ByLL (c-awk-beyond-logical-line end))
808 (c-save-buffer-state nil
809 (c-awk-clear-NL-props end (point-max)))))
d9e94c22
MS
810
811(defun c-awk-end-of-change-region (beg end old-len)
812 ;; Find the end of the region which needs to be font-locked after a change.
813 ;; This is the end of the logical line on which the change happened, either
cb694ab7 814 ;; as it was before the change, or as it is now, whichever is later.
d9e94c22 815 ;; N.B. point is left undefined.
88a46e21
AM
816 (max (+ (- c-awk-old-ByLL old-len) (- end beg))
817 (c-awk-beyond-logical-line end)))
d9e94c22
MS
818
819;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
820;; specified by the font-lock after-change function must be expanded to
821;; include ALL of any string or regexp within the region. The simplest way to
822;; do this in practice is to use the beginning/end-of-logical-line functions.
823;; Don't overlook the possibility of the buffer change being the "recapturing"
824;; of a previously escaped newline.
88a46e21 825
0d26e0b6 826;; ACM 2008-02-05:
88a46e21
AM
827(defun c-awk-extend-and-syntax-tablify-region (beg end old-len)
828 ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put
829 ;; `syntax-table' properties on this region.
830 ;;
831 ;; This function is called from an after-change function, BEG END and
832 ;; OLD-LEN being the standard parameters.
0d26e0b6 833 ;;
88a46e21
AM
834 ;; Point is undefined both before and after this function call, the buffer
835 ;; has been widened, and match-data saved. The return value is ignored.
836 ;;
837 ;; It prepares the buffer for font
838 ;; locking, hence must get called before `font-lock-after-change-function'.
839 ;;
840 ;; This function is the AWK value of `c-before-font-lock-function'.
841 ;; It does hidden buffer changes.
842 (c-save-buffer-state ()
843 (setq c-new-END (c-awk-end-of-change-region beg end old-len))
844 (setq c-new-BEG (c-awk-beginning-of-logical-line beg))
845 (goto-char c-new-BEG)
846 (c-awk-set-syntax-table-properties c-new-END)))
d9e94c22 847
3c0ab532
AM
848;; Awk regexps written with help from Peter Galbraith
849;; <galbraith@mixing.qc.dfo.ca>.
c7015153 850;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work
0d26e0b6 851;; in XEmacs 21.4.4. acm 2002/9/19.
3c0ab532
AM
852(defconst awk-font-lock-keywords
853 (eval-when-compile
854 (list
855 ;; Function names.
856 '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?"
857 (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t))
858 ;;
859 ;; Variable names.
860 (cons
861 (concat "\\<"
862 (regexp-opt
863 '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON"
864 "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE"
865 "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH"
866 "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>")
867 'font-lock-variable-name-face)
868
869 ;; Special file names. (acm, 2002/7/22)
870 ;; The following regexp was created by first evaluating this in GNU Emacs 21.1:
871 ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid"
872 ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words)
873 ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen)
874 ;; , replacing the "n" in "dev/fd/n" with "[0-9]+"
875 ;; , removing the unwanted \\< at the beginning, and finally filling out the
876 ;; regexp so that a " must come before, and either a " or heuristic stuff after.
877 ;; The surrounding quotes are fontified along with the filename, since, semantically,
878 ;; they are an indivisible unit.
879 '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\
880std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\
881\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
882 (1 font-lock-variable-name-face t)
883 (8 font-lock-variable-name-face t t))
884 ;; Do the same (almost) with
885 ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport"
886 ;; "/inet/raw/lport/rhost/rport") 'words)
887 ;; This cannot be combined with the above pattern, because the match number
888 ;; for the (optional) closing \" would then exceed 9.
889 '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\
890\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
891 (1 font-lock-variable-name-face t)
892 (6 font-lock-variable-name-face t t))
893
894 ;; Keywords.
895 (concat "\\<"
896 (regexp-opt
898169a2
AM
897 '("BEGIN" "END" "break" "case" "continue" "default" "delete"
898 "do" "else" "exit" "for" "getline" "if" "in" "next"
899 "nextfile" "return" "switch" "while")
3c0ab532
AM
900 t) "\\>")
901
902 ;; Builtins.
903 `(eval . (list
904 ,(concat
905 "\\<"
906 (regexp-opt
907 '("adump" "and" "asort" "atan2" "bindtextdomain" "close"
908 "compl" "cos" "dcgettext" "exp" "extension" "fflush"
909 "gensub" "gsub" "index" "int" "length" "log" "lshift"
910 "match" "mktime" "or" "print" "printf" "rand" "rshift"
911 "sin" "split" "sprintf" "sqrt" "srand" "stopme"
912 "strftime" "strtonum" "sub" "substr" "system"
913 "systime" "tolower" "toupper" "xor") t)
914 "\\>")
915 0 c-preprocessor-face-name))
916
917 ;; gawk debugging keywords. (acm, 2002/7/21)
918 ;; (Removed, 2003/6/6. These functions are now fontified as built-ins)
919 ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>")
920 ;; 0 'font-lock-warning-face)
921
922 ;; User defined functions with an apparent spurious space before the
923 ;; opening parenthesis. acm, 2002/5/30.
924 `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s "
925 c-awk-escaped-nls*-with-space* "(")
926 (0 'font-lock-warning-face))
927
928 ;; Space after \ in what looks like an escaped newline. 2002/5/31
929 '("\\\\\\s +$" 0 font-lock-warning-face t)
930
931 ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16.
932 '("\\s|" 0 font-lock-warning-face t nil)
933 ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21
934 '("\\(_\\)\\s|" 1 font-lock-warning-face)
935 '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6
936 ))
937 "Default expressions to highlight in AWK mode.")
0386b551
AM
938\f
939;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e
d9e94c22 940
0386b551
AM
941;; The following three regexps differ from those earlier on in cc-awk.el in
942;; that they assume the syntax-table properties have been set. They are thus
943;; not useful for code which sets these properties.
d9e94c22 944(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
0386b551 945;; Matches a terminated string/regexp.
d9e94c22
MS
946
947(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
948;; Matches an unterminated string/regexp, NOT including the eol at the end.
949
950(defconst c-awk-harmless-pattern-characters*
951 (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
952;; Matches any "harmless" character in a pattern or an escaped character pair.
953
0386b551
AM
954(defun c-awk-at-statement-end-p ()
955 ;; Point is not inside a comment or string. Is it AT the end of a
956 ;; statement? This means immediately after the last non-ws character of the
957 ;; statement. The caller is responsible for widening the buffer, if
958 ;; appropriate.
959 (and (not (bobp))
960 (save-excursion
961 (backward-char)
962 (or (looking-at "[};]")
963 (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\))
964 (looking-at
965 (eval-when-compile
966 (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space*
967 "[#\n\r]"))))))))
968
d9e94c22
MS
969(defun c-awk-beginning-of-defun (&optional arg)
970 "Move backward to the beginning of an AWK \"defun\". With ARG, do it that
971many times. Negative arg -N means move forward to Nth following beginning of
972defun. Returns t unless search stops due to beginning or end of buffer.
973
974By a \"defun\" is meant either a pattern-action pair or a function. The start
fd35a256 975of a defun is recognized as code starting at column zero which is neither a
d9e94c22
MS
976closing brace nor a comment nor a continuation of the previous line. Unlike
977in some other modes, having an opening brace at column 0 is neither necessary
0386b551
AM
978nor helpful.
979
980Note that this function might do hidden buffer changes. See the
981comment at the start of cc-engine.el for more info."
d9e94c22 982 (interactive "p")
28abe5e2 983 (or arg (setq arg 1))
d9e94c22
MS
984 (save-match-data
985 (c-save-buffer-state ; ensures the buffer is writable.
986 nil
987 (let ((found t)) ; Has the most recent regexp search found b-of-defun?
988 (if (>= arg 0)
989 ;; Go back one defun each time round the following loop. (For +ve arg)
990 (while (and found (> arg 0) (not (eq (point) (point-min))))
991 ;; Go back one "candidate" each time round the next loop until one
992 ;; is genuinely a beginning-of-defun.
993 (while (and (setq found (search-backward-regexp
994 "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
0386b551 995 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
996 (setq arg (1- arg)))
997 ;; The same for a -ve arg.
998 (if (not (eq (point) (point-max))) (forward-char 1))
999 (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
1000 (while (and (setq found (search-forward-regexp
1001 "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
0386b551 1002 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
1003 (setq arg (1+ arg)))
1004 (if found (goto-char (match-beginning 0))))
1005 (eq arg 0)))))
1006
1007(defun c-awk-forward-awk-pattern ()
1008 ;; Point is at the start of an AWK pattern (which may be null) or function
1009 ;; declaration. Move to the pattern's end, and past any trailing space or
1010 ;; comment. Typically, we stop at the { which denotes the corresponding AWK
1011 ;; action/function body. Otherwise we stop at the EOL (or ;) marking the
1012 ;; absence of an explicit action.
0386b551
AM
1013 ;;
1014 ;; This function might do hidden buffer changes.
d9e94c22
MS
1015 (while
1016 (progn
1017 (search-forward-regexp c-awk-harmless-pattern-characters*)
1018 (if (looking-at "#") (end-of-line))
1019 (cond
1020 ((eobp) nil)
1021 ((looking-at "[{;]") nil) ; We've finished!
1022 ((eolp)
1023 (if (c-awk-cur-line-incomplete-p)
1024 (forward-line) ; returns non-nil
1025 nil))
1026 ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
1027 ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
1028 ((looking-at "/") (forward-char) t))))) ; division sign.
1029
1030(defun c-awk-end-of-defun1 ()
1031 ;; point is at the start of a "defun". Move to its end. Return end position.
0386b551
AM
1032 ;;
1033 ;; This function might do hidden buffer changes.
d9e94c22
MS
1034 (c-awk-forward-awk-pattern)
1035 (cond
1036 ((looking-at "{") (goto-char (scan-sexps (point) 1)))
1037 ((looking-at ";") (forward-char))
1038 ((eolp))
1039 (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
1040 (point))
1041
1042(defun c-awk-beginning-of-defun-p ()
1043 ;; Are we already at the beginning of a defun? (i.e. at code in column 0
1044 ;; which isn't a }, and isn't a continuation line of any sort.
0386b551
AM
1045 ;;
1046 ;; This function might do hidden buffer changes.
d9e94c22
MS
1047 (and (looking-at "^[^#} \t\n\r]")
1048 (not (c-awk-prev-line-incomplete-p))))
1049
1050(defun c-awk-end-of-defun (&optional arg)
1051 "Move forward to next end of defun. With argument, do it that many times.
1052Negative argument -N means move back to Nth preceding end of defun.
1053
1054An end of a defun occurs right after the closing brace that matches the
1055opening brace at its start, or immediately after the AWK pattern when there is
0386b551
AM
1056no explicit action; see function `c-awk-beginning-of-defun'.
1057
1058Note that this function might do hidden buffer changes. See the
1059comment at the start of cc-engine.el for more info."
d9e94c22
MS
1060 (interactive "p")
1061 (or arg (setq arg 1))
1062 (save-match-data
1063 (c-save-buffer-state
1064 nil
1065 (let ((start-point (point)) end-point)
1066 ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
1067 ;; move backwards to one.
1068 ;; Repeat [(i) move forward to end-of-current-defun (see below);
1069 ;; (ii) If this isn't it, move forward to beginning-of-defun].
1070 ;; We start counting ARG only when step (i) has passed the original point.
1071 (when (> arg 0)
1072 ;; Try to move back to a beginning-of-defun, if not already at one.
1073 (if (not (c-awk-beginning-of-defun-p))
1074 (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
1075 (goto-char start-point)
1076 (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
1077 ;; Now count forward, one defun at a time
1078 (while (and (not (eobp))
1079 (c-awk-end-of-defun1)
1080 (if (> (point) start-point) (setq arg (1- arg)) t)
1081 (> arg 0)
1082 (c-awk-beginning-of-defun -1))))
1083
1084 (when (< arg 0)
1085 (setq end-point start-point)
1086 (while (and (not (bobp))
1087 (c-awk-beginning-of-defun 1)
1088 (if (< (setq end-point (if (bobp) (point)
1089 (save-excursion (c-awk-end-of-defun1))))
1090 start-point)
1091 (setq arg (1+ arg)) t)
1092 (< arg 0)))
1093 (goto-char (min start-point end-point)))))))
1094
0386b551 1095\f
d9e94c22 1096(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
ab5796a9 1097
d9e94c22 1098;;; awk-mode.el ends here