Switch to recommended form of GPLv3 permissions notice.
[bpt/emacs.git] / lisp / progmodes / cc-awk.el
CommitLineData
d9e94c22
MS
1;;; cc-awk.el --- AWK specific code within cc-mode.
2
92ab3834 3;; Copyright (C) 1988, 1994, 1996, 2000, 2001, 2002, 2003, 2004, 2005,
4e643dd2 4;; 2006, 2007, 2008 Free Software Foundation, Inc.
d9e94c22 5
3efc2cd7 6;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
d9e94c22
MS
7;; Maintainer: FSF
8;; Keywords: AWK, cc-mode, unix, languages
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
1a484753 14;; the Free Software Foundation; either version 3, or (at your option)
d9e94c22
MS
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
0386b551 23;; along with this program; see the file COPYING. If not, write to the
3a35cf56
LK
24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25;; Boston, MA 02110-1301, USA.
d9e94c22
MS
26
27;;; Commentary:
28
29;; This file contains (most of) the adaptations to cc-mode required for the
30;; integration of AWK Mode.
0386b551 31;; It is organised thusly, the sections being separated by page breaks:
d9e94c22 32;; 1. The AWK Mode syntax table.
0386b551
AM
33;; 2. Regular expressions for analysing AWK code.
34;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property").
3c0ab532 35;; 4. Syntax-table property/font-locking stuff, including the
d9e94c22 36;; font-lock-keywords setting.
0386b551
AM
37;; 5. The AWK Mode before/after-change-functions.
38;; 6. AWK Mode specific versions of commands like beginning-of-defun.
d9e94c22
MS
39;; The AWK Mode keymap, abbreviation table, and the mode function itself are
40;; in cc-mode.el.
41
42;;; Code:
43
44(eval-when-compile
45 (let ((load-path
46 (if (and (boundp 'byte-compile-dest-file)
47 (stringp byte-compile-dest-file))
48 (cons (file-name-directory byte-compile-dest-file) load-path)
49 load-path)))
50 (load "cc-bytecomp" nil t)))
51
52(cc-require 'cc-defs)
53
54;; Silence the byte compiler.
55(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
88a46e21
AM
56(cc-bytecomp-defvar c-new-BEG)
57(cc-bytecomp-defvar c-new-END)
d9e94c22
MS
58
59;; Some functions in cc-engine that are used below. There's a cyclic
60;; dependency so it can't be required here. (Perhaps some functions
61;; could be moved to cc-engine to avoid it.)
62(cc-bytecomp-defun c-backward-token-1)
63(cc-bytecomp-defun c-beginning-of-statement-1)
64(cc-bytecomp-defun c-backward-sws)
65
66(defvar awk-mode-syntax-table
67 (let ((st (make-syntax-table)))
68 (modify-syntax-entry ?\\ "\\" st)
69 (modify-syntax-entry ?\n "> " st)
70 (modify-syntax-entry ?\r "> " st)
71 (modify-syntax-entry ?\f "> " st)
72 (modify-syntax-entry ?\# "< " st)
73 ;; / can delimit regexes or be a division operator. By default we assume
74 ;; that it is a division sign, and fix the regexp operator cases with
75 ;; `font-lock-syntactic-keywords'.
0386b551 76 (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
d9e94c22
MS
77 (modify-syntax-entry ?* "." st)
78 (modify-syntax-entry ?+ "." st)
79 (modify-syntax-entry ?- "." st)
80 (modify-syntax-entry ?= "." st)
81 (modify-syntax-entry ?% "." st)
82 (modify-syntax-entry ?< "." st)
83 (modify-syntax-entry ?> "." st)
84 (modify-syntax-entry ?& "." st)
85 (modify-syntax-entry ?| "." st)
86 (modify-syntax-entry ?_ "_" st)
87 (modify-syntax-entry ?\' "." st)
88 st)
89 "Syntax table in use in AWK Mode buffers.")
90
0386b551
AM
91\f
92;; This section defines regular expressions used in the analysis of AWK code.
93
94;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
95;; Emacs has in the past used \r to mark hidden lines in some fashion (and
96;; maybe still does).
97
98(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
99;; Matches any escaped (with \) character-pair, including an escaped newline.
100(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
101;; Matches any escaped (with \) character-pair, apart from an escaped newline.
102(defconst c-awk-comment-without-nl "#.*")
103;; Matches an AWK comment, not including the terminating NL (if any). Note
104;; that the "enclosing" (elisp) regexp must ensure the # is real.
105(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
106;; Matches a newline, or the end of buffer.
107
108;; "Space" regular expressions.
109(eval-and-compile
110 (defconst c-awk-escaped-nl "\\\\[\n\r]"))
111;; Matches an escaped newline.
ca331935
CY
112(eval-and-compile
113 (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")))
0386b551
AM
114;; Matches a possibly empty sequence of escaped newlines. Used in
115;; awk-font-lock-keywords.
116;; (defconst c-awk-escaped-nls*-with-space*
117;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
118;; The above RE was very slow. It's runtime was doubling with each additional
119;; space :-( Reformulate it as below:
120(eval-and-compile
121 (defconst c-awk-escaped-nls*-with-space*
122 (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
123;; Matches a possibly empty sequence of escaped newlines with optional
124;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
125(defconst c-awk-blank-or-comment-line-re
126 (concat "[ \t]*\\(#\\|\\\\?$\\)"))
127;; Matche (the tail of) a line containing at most either a comment or an
128;; escaped EOL.
129
130;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
131(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
132;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
133;; localisation string in gawk 3.1
134(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
135;; Matches an underline NOT followed by ".
136(defconst c-awk-harmless-string*-re
137 (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
138;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
139;; #, or newlines.
140(defconst c-awk-harmless-string*-here-re
141 (concat "\\=" c-awk-harmless-string*-re))
142;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
143;; at point.
144(defconst c-awk-harmless-line-re
145 (concat c-awk-harmless-string*-re
146 "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
147;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
148;; " or /. "logical" means "possibly containing escaped newlines". A comment
149;; is matched as part of the line even if it contains a " or a /. The End of
150;; buffer is also an end of line.
151(defconst c-awk-harmless-lines+-here-re
152 (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
153;; Matches a sequence of (at least one) \"harmless-line\" at point.
154
155
156;; REGEXPS FOR AWK STRINGS.
157(defconst c-awk-string-ch-re "[^\"\\\n\r]")
158;; Matches any character which can appear unescaped in a string.
159(defconst c-awk-string-innards-re
160 (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
161;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
162(defconst c-awk-string-without-end-here-re
163 (concat "\\=_?\"" c-awk-string-innards-re))
164;; Matches an AWK string at point up to, but not including, any terminator.
165;; A gawk 3.1+ string may look like _"localisable string".
166(defconst c-awk-one-line-possibly-open-string-re
167 (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*"
168 "\\(\"\\|\\\\?$\\|\\'\\)"))
169
170;; REGEXPS FOR AWK REGEXPS.
171(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
172;; Matches any AWK regexp character which doesn't require special analysis.
173(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
174;; Matches a (possibly empty) sequence of escaped newlines.
175
176;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
177;; list", and "[:alpha:]" inside a character list will be known as a
178;; "character class". These terms for these things vary between regexp
179;; descriptions .
180(defconst c-awk-regexp-char-class-re
181 "\\[:[a-z]+:\\]")
182 ;; Matches a character class spec (e.g. [:alpha:]).
183(defconst c-awk-regexp-char-list-re
184 (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
185 "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
186 "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
187;; Matches a regexp char list, up to (but not including) EOL if the ] is
188;; missing.
189(defconst c-awk-regexp-one-line-possibly-open-char-list-re
190 (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*"
191 "\\(]\\|\\\\?$\\|\\'\\)"))
192;; Matches the head (or all) of a regexp char class, up to (but not
193;; including) the first EOL.
194(defconst c-awk-regexp-innards-re
195 (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
196 "\\|" c-awk-regexp-normal-re "\\)*"))
197;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
198(defconst c-awk-regexp-without-end-re
199 (concat "/" c-awk-regexp-innards-re))
200;; Matches an AWK regexp up to, but not including, any terminating /.
201(defconst c-awk-one-line-possibly-open-regexp-re
202 (concat "/\\(" c-awk-non-eol-esc-pair-re
203 "\\|" c-awk-regexp-one-line-possibly-open-char-list-re
204 "\\|" c-awk-regexp-normal-re "\\)*"
205 "\\(/\\|\\\\?$\\|\\'\\)"))
206;; Matches as much of the head of an AWK regexp which fits on one line,
207;; possibly all of it.
208
209;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
210;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
211;; whether a '/' at the current position would by a regexp opener or a
212;; division sign.
213(defconst c-awk-neutral-re
214; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
215 "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
216;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
217;; This is space/tab, braces, an auto-increment/decrement operator or an
cb5bf6ba 218;; escaped character. Or one of the (invalid) characters @ or `. But NOT an
0386b551
AM
219;; end of line (even if escaped).
220(defconst c-awk-neutrals*-re
221 (concat "\\(" c-awk-neutral-re "\\)*"))
222;; A (possibly empty) string of neutral characters (or character pairs).
223(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
224;; Matches a char which is a constituent of a variable or number, or a ket
225;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
226;; \xff are "letters".
227(defconst c-awk-div-sign-re
228 (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
229;; Will match a piece of AWK buffer ending in / which is a division sign, in
230;; a context where an immediate / would be a regexp bracket. It follows a
231;; variable or number (with optional intervening "neutral" characters). This
232;; will only work when there won't be a preceding " or / before the sought /
233;; to foul things up.
234(defconst c-awk-non-arith-op-bra-re
235 "[[\(&=:!><,?;'~|]")
236;; Matches an openeing BRAcket ,round or square, or any operator character
237;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
238;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
239;; and "--".
240(defconst c-awk-regexp-sign-re
241 (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
242;; Will match a piece of AWK buffer ending in / which is an opening regexp
243;; bracket, in a context where an immediate / would be a division sign. This
244;; will only work when there won't be a preceding " or / before the sought /
245;; to foul things up.
246
247;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
248(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
249;;;; NEW VERSION! (which will be restricted to the current line)
250(defconst c-awk-one-line-non-syn-ws*-re
251 (concat "\\([ \t]*"
252 "\\(" c-awk-_-harmless-nonws-char-re "\\|"
253 c-awk-non-eol-esc-pair-re "\\|"
254 c-awk-one-line-possibly-open-string-re "\\|"
255 c-awk-one-line-possibly-open-regexp-re
256 "\\)"
257 "\\)*"))
258
259\f
d9e94c22 260;; ACM, 2002/5/29:
0386b551 261;;
d9e94c22
MS
262;; The next section of code is about determining whether or not an AWK
263;; statement is complete or not. We use this to indent the following line.
264;; The determination is pretty straightforward in C, where a statement ends
265;; with either a ; or a }. Only "while" really gives any trouble there, since
266;; it might be the end of a do-while. In AWK, on the other hand, semicolons
267;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
268;; addition, we have the complexity of escaped EOLs. The core of this
269;; analysis is in the middle of the function
270;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
271;;
272;; To avoid continually repeating this expensive analysis, we "cache" its
273;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
274;; the EOL (if any) which terminates that line. Should the property be
275;; required for the very last line (which has no EOL), it is calculated as
276;; required but not cached. The c-awk-NL-prop property should be thought of
277;; as only really valid immediately after a buffer change, not a permanently
278;; set property. (By contrast, the syntax-table text properties (set by an
279;; after-change function) must be constantly updated for the mode to work
280;; properly).
281;;
0386b551
AM
282;; This text property is also used for "syntactic whitespace" movement, this
283;; being where the distinction between the values '$' and '}' is significant.
284;;
d9e94c22
MS
285;; The valid values for c-awk-NL-prop are:
286;;
287;; nil The property is not currently set for this line.
288;; '#' There is NO statement on this line (at most a comment), and no open
289;; statement from a previous line which could have been completed on this
290;; line.
291;; '{' There is an unfinished statement on this (or a previous) line which
292;; doesn't require \s to continue onto another line, e.g. the line ends
293;; with {, or the && operator, or "if (condition)". Note that even if the
294;; newline is redundantly escaped, it remains a '{' line.
295;; '\' There is an escaped newline at the end of this line and this '\' is
296;; essential to the syntax of the program. (i.e. if it had been a
297;; frivolous \, it would have been ignored and the line been given one of
298;; the other property values.)
0386b551
AM
299;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual
300;; semicolon"). This might be a content-free line terminating a statement
301;; from the preceding (continued) line (which has property \).
302;; '}' A statement, being the last thing (aside from ws/comments) is
303;; explicitly terminated on this line by a closing brace (or sometimes a
304;; semicolon).
d9e94c22
MS
305;;
306;; This set of values has been chosen so that the property's value on a line
307;; is completely determined by the contents of the line and the property on
308;; the previous line, EXCEPT for where a "while" might be the closing
309;; statement of a do-while.
310
311(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
312 ;; Are we just after the ) in "if/for/while (<condition>)"?
313 ;;
314 ;; Note that the end of the ) in a do .... while (<condition>) doesn't
315 ;; count, since the purpose of this routine is essentially to decide
316 ;; whether to indent the next line.
317 ;;
318 ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
319 ;; do-while.
0386b551
AM
320 ;;
321 ;; This function might do hidden buffer changes.
d9e94c22
MS
322 (and
323 (eq (char-before) ?\))
324 (save-excursion
325 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
326 (when par-pos
327 (goto-char par-pos) ; back over "(...)"
328 (c-backward-token-1) ; BOB isn't a problem.
329 (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
330 (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
331 (not (eq (c-beginning-of-statement-1 do-lim)
332 'beginning)))))))))
333
334(defun c-awk-after-function-decl-param-list ()
335 ;; Are we just after the ) in "function foo (bar)" ?
0386b551
AM
336 ;;
337 ;; This function might do hidden buffer changes.
d9e94c22
MS
338 (and (eq (char-before) ?\))
339 (save-excursion
340 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
341 (when par-pos
342 (goto-char par-pos) ; back over "(...)"
343 (c-backward-token-1) ; BOB isn't a problem
344 (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
345 (progn (c-backward-token-1)
346 (looking-at "func\\(tion\\)?\\>"))))))))
347
348;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
349(defun c-awk-after-continue-token ()
350;; Are we just after a token which can be continued onto the next line without
351;; a backslash?
0386b551
AM
352;;
353;; This function might do hidden buffer changes.
d9e94c22
MS
354 (save-excursion
355 (c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
356 (if (and (looking-at "[&|]") (not (bobp)))
357 (backward-char)) ; c-backward-token-1 doesn't do this :-(
358 (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
359
360(defun c-awk-after-rbrace-or-statement-semicolon ()
361 ;; Are we just after a } or a ; which closes a statement?
362 ;; Be careful about ;s in for loop control bits. They don't count!
0386b551
AM
363 ;;
364 ;; This function might do hidden buffer changes.
d9e94c22
MS
365 (or (eq (char-before) ?\})
366 (and
367 (eq (char-before) ?\;)
368 (save-excursion
369 (let ((par-pos (c-safe (scan-lists (point) -1 1))))
370 (when par-pos
371 (goto-char par-pos) ; go back to containing (
372 (not (and (looking-at "(")
373 (c-backward-token-1) ; BOB isn't a problem
374 (looking-at "for\\>")))))))))
375
376(defun c-awk-back-to-contentful-text-or-NL-prop ()
377 ;; Move back to just after the first found of either (i) an EOL which has
378 ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
379 ;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
0386b551 380 ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp).
d9e94c22
MS
381 ;;
382 ;; Note that an escaped eol counts as whitespace here.
383 ;;
384 ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
385 ;; that the previous line contains an unterminated string (without \). In
0386b551
AM
386 ;; this case, assume that the previous line's c-awk-NL-prop is a $.
387 ;;
d9e94c22
MS
388 ;; POINT MUST BE AT THE START OF A LINE when calling this function. This
389 ;; is to ensure that the various backward-comment functions will work
390 ;; properly.
0386b551
AM
391 ;;
392 ;; This function might do hidden buffer changes.
d9e94c22
MS
393 (let ((nl-prop nil)
394 bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
395 (while ;; We are at a BOL here. Go back one line each iteration.
396 (and
397 (not (bobp))
398 (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
399 (progn (setq bol-pos (c-point 'bopl))
400 (setq bsws-pos (point))
401 ;; N.B. the following function will not go back past an EOL if
402 ;; there is an open string (without \) on the previous line.
0386b551
AM
403 ;; If we find such, set the c-awk-NL-prop on it, too
404 ;; (2004/3/29).
d9e94c22
MS
405 (c-backward-syntactic-ws bol-pos)
406 (or (/= (point) bsws-pos)
0386b551
AM
407 (progn (setq nl-prop ?\$)
408 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)
d9e94c22
MS
409 nil)))
410 ;; If we had a backslash at EOL, c-backward-syntactic-ws will
411 ;; have gone backwards over it. Check the backslash was "real".
412 (progn
413 (if (looking-at "[ \t]*\\\\+$")
414 (if (progn
415 (end-of-line)
416 (search-backward-regexp
417 "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
418 bol-pos t))
419 (progn (end-of-line) ; escaped EOL.
420 (backward-char)
421 (c-backward-syntactic-ws bol-pos))
422 (end-of-line))) ; The \ at eol is a fake.
423 (bolp))))
424 nl-prop))
425
426(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
427 ;; Calculate and set the value of the c-awk-NL-prop on the immediately
428 ;; preceding EOL. This may also involve doing the same for several
429 ;; preceding EOLs.
0386b551 430 ;;
d9e94c22
MS
431 ;; NOTE that if the property was already set, we return it without
432 ;; recalculation. (This is by accident rather than design.)
0386b551 433 ;;
d9e94c22
MS
434 ;; Return the property which got set (or was already set) on the previous
435 ;; line. Return nil if we hit BOB.
0386b551 436 ;;
d9e94c22 437 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
438 ;;
439 ;; This function might do hidden buffer changes.
d9e94c22
MS
440 (save-excursion
441 (save-match-data
442 (beginning-of-line)
443 (let* ((pos (point))
444 (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
445 ;; We are either (1) at a BOL (with nl-prop containing the previous
446 ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
447 ;; the BOB counts as case (1), so we test next for bolp rather than
448 ;; non-nil nl-prop.
449 (when (not (bolp))
450 (setq nl-prop
451 (cond
452 ;; Incomplete statement which doesn't require escaped EOL?
453 ((or (c-awk-after-if-for-while-condition-p do-lim)
454 (c-awk-after-function-decl-param-list)
455 (c-awk-after-continue-token))
456 ?\{)
457 ;; Escaped EOL (where there's also something to continue)?
458 ((and (looking-at "[ \t]*\\\\$")
459 (not (c-awk-after-rbrace-or-statement-semicolon)))
460 ?\\)
0386b551
AM
461 ;; A statement was completed on this line. How?
462 ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or }
463 (t ?\$))) ; A virtual semicolon.
d9e94c22
MS
464 (end-of-line)
465 (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
466 (forward-line))
467
468 ;; We are now at a (possibly empty) sequence of content-free lines.
469 ;; Set c-awk-NL-prop on each of these lines's EOL.
470 (while (< (point) pos) ; one content-free line each iteration.
471 (cond ; recalculate nl-prop from previous line's value.
0386b551 472 ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#))
d9e94c22 473 ((eq nl-prop ?\\)
0386b551 474 (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$)))
d9e94c22
MS
475 ;; ?\# (empty line) and ?\{ (open stmt) don't change.
476 )
477 (forward-line)
478 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
479 nl-prop))))
480
481(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
482 ;; Get the c-awk-NL-prop text-property from the previous line, calculating
f6558e2d 483 ;; it if necessary. Return nil if we're already at BOB.
d9e94c22 484 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
485 ;;
486 ;; This function might do hidden buffer changes.
d9e94c22
MS
487 (if (bobp)
488 nil
489 (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
490 (c-awk-calculate-NL-prop-prev-line do-lim))))
491
492(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
493 ;; Get the c-awk-NL-prop text-property from the current line, calculating it
494 ;; if necessary. (As a special case, the property doesn't get set on an
495 ;; empty line at EOB (there's no position to set the property on), but the
496 ;; function returns the property value an EOL would have got.)
0386b551 497 ;;
d9e94c22 498 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
499 ;;
500 ;; This function might do hidden buffer changes.
d9e94c22
MS
501 (save-excursion
502 (let ((extra-nl nil))
503 (end-of-line) ; Necessary for the following test to work.
504 (when (= (forward-line) 1) ; if we were on the last line....
505 (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
506 (setq extra-nl t))
507 (prog1 (c-awk-get-NL-prop-prev-line do-lim)
508 (if extra-nl (delete-backward-char 1))))))
509
0386b551 510(defsubst c-awk-prev-line-incomplete-p (&optional do-lim)
d9e94c22
MS
511 ;; Is there an incomplete statement at the end of the previous line?
512 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
513 ;;
514 ;; This function might do hidden buffer changes.
d9e94c22
MS
515 (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
516
0386b551 517(defsubst c-awk-cur-line-incomplete-p (&optional do-lim)
d9e94c22
MS
518 ;; Is there an incomplete statement at the end of the current line?
519 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
520 ;;
521 ;; This function might do hidden buffer changes.
d9e94c22
MS
522 (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
523
0386b551
AM
524;;;; NOTES ON "VIRTUAL SEMICOLONS"
525;;;;
526;;;; A "virtual semicolon" is what terminates a statement when there is no ;
527;;;; or } to do the job. Like point, it is considered to lie _between_ two
528;;;; characters. As from mid-March 2004, it is considered to lie just after
529;;;; the last non-syntactic-whitespace character on the line; (previously, it
530;;;; was considered an attribute of the EOL on the line). A real semicolon
531;;;; never counts as a virtual one.
532
533(defun c-awk-at-vsemi-p (&optional pos)
534 ;; Is there a virtual semicolon at POS (or POINT)?
d9e94c22 535 (save-excursion
0386b551
AM
536 (let (nl-prop
537 (pos-or-point (progn (if pos (goto-char pos)) (point))))
538 (forward-line 0)
539 (search-forward-regexp c-awk-one-line-non-syn-ws*-re)
540 (and (eq (point) pos-or-point)
541 (progn
542 (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\)
543 (eq (forward-line) 0)
544 (looking-at c-awk-blank-or-comment-line-re)))
545 (eq nl-prop ?\$))))))
546
547(defun c-awk-vsemi-status-unknown-p ()
548 ;; Are we unsure whether there is a virtual semicolon on the current line?
549 ;; DO NOT under any circumstances attempt to calculate this; that would
550 ;; defeat the (admittedly kludgey) purpose of this function, which is to
551 ;; prevent an infinite recursion in c-beginning-of-statement-1 when point
552 ;; starts at a `while' token.
d9e94c22
MS
553 (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
554
555(defun c-awk-clear-NL-props (beg end)
556 ;; This function is run from before-change-hooks. It clears the
557 ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
558 ;; parameter is ignored). This ensures that the indentation engine will
559 ;; never use stale values for this property.
0386b551
AM
560 ;;
561 ;; This function might do hidden buffer changes.
d9e94c22
MS
562 (save-restriction
563 (widen)
564 (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
565
566(defun c-awk-unstick-NL-prop ()
567 ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
568 ;; this, a new newline inserted after an old newline (e.g. by C-j) would
569 ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
570 ;; Thing. This function's action is required by c-put-char-property.
571 (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
572 (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
573 (setq text-property-default-nonsticky
574 (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
575
576;; The following is purely a diagnostic command, to be commented out of the
577;; final release. ACM, 2002/6/1
578;; (defun NL-props ()
579;; (interactive)
580;; (let (pl-prop cl-prop)
581;; (message "Prev-line: %s Cur-line: %s"
582;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
583;; (char-to-string pl-prop)
584;; "nil")
585;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
586;; (char-to-string cl-prop)
587;; "nil"))))
588;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
589;for now. In the byte compiled version, this causes things to crash because
590;awk-mode-map isn't yet defined. :-(
591
592;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
0386b551 593\f
d9e94c22
MS
594;; The following section of the code is to do with font-locking. The biggest
595;; problem for font-locking is deciding whether a / is a regular expression
596;; delimiter or a division sign - determining precisely where strings and
597;; regular expressions start and stop is also troublesome. This is the
598;; purpose of the function c-awk-set-syntax-table-properties and the myriad
599;; elisp regular expressions it uses.
600;;
601;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
602;; for font-locking unterminated strings (i.e. font-locking the buffer up to
603;; the next string delimiter as a string) was inappropriate. Instead,
604;; unbalanced string/regexp delimiters are given the warning font, being
605;; refonted with the string font as soon as the matching delimiter is entered.
606;;
607;; This requires the region processed by the current font-lock after-change
608;; function to have access to the start of the string/regexp, which may be
609;; several lines back. The elisp "advice" feature is used on these functions
610;; to allow this.
611
612(defun c-awk-beginning-of-logical-line (&optional pos)
613;; Go back to the start of the (apparent) current line (or the start of the
614;; line containing POS), returning the buffer position of that point. I.e.,
615;; go back to the last line which doesn't have an escaped EOL before it.
0386b551 616;;
d9e94c22
MS
617;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
618;; comment, string or regexp. IT MAY WELL BE that this function should not be
619;; executed on a narrowed buffer.
0386b551
AM
620;;
621;; This function might do hidden buffer changes.
d9e94c22
MS
622 (if pos (goto-char pos))
623 (forward-line 0)
624 (while (and (> (point) (point-min))
625 (eq (char-before (1- (point))) ?\\))
626 (forward-line -1))
627 (point))
628
88a46e21
AM
629(defun c-awk-beyond-logical-line (&optional pos)
630;; Return the position just beyond the (apparent) current logical line, or the
631;; one containing POS. This is usually the beginning of the next line which
632;; doesn't follow an escaped EOL. At EOB, this will be EOB.
633;;
634;; Point is unchanged.
d9e94c22
MS
635;;
636;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
637;; comment, string or regexp. IT MAY WELL BE that this function should not be
638;; executed on a narrowed buffer.
88a46e21
AM
639 (save-excursion
640 (if pos (goto-char pos))
641 (end-of-line)
642 (while (and (< (point) (point-max))
643 (eq (char-before) ?\\))
644 (end-of-line 2))
645 (if (< (point) (point-max))
646 (1+ (point))
647 (point))))
d9e94c22 648
d9e94c22
MS
649;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
650;; on strings/regexps which are missing their closing delimiter.
651;; 2002/4/28. The default syntax for / has been changed from "string" to
652;; "punctuation", to reduce hassle when this character appears within a string
653;; or comment.
654
655(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
656;; BEG and END bracket a (possibly unterminated) string or regexp. The
657;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
658;; END. Set the appropriate syntax-table properties on the delimiters and
659;; contents of this string/regex.
660;;
661;; "String" here can also mean a gawk 3.1 "localizable" string which starts
662;; with _". In this case, we step over the _ and ignore it; It will get it's
663;; font from an entry in awk-font-lock-keywords.
664;;
665;; If the closing delimiter is missing (i.e., there is an EOL there) set the
666;; STRING-FENCE property on the opening " or / and closing EOL.
0386b551
AM
667;;
668;; This function does hidden buffer changes.
d9e94c22
MS
669 (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
670
671 ;; First put the properties on the delimiters.
672 (cond ((eq end (point-max)) ; string/regexp terminated by EOB
0386b551 673 (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence"
d9e94c22 674 ((/= (char-after beg) (char-after end)) ; missing end delimiter
0386b551
AM
675 (c-put-char-property beg 'syntax-table '(15))
676 (c-put-char-property end 'syntax-table '(15)))
d9e94c22 677 ((eq (char-after beg) ?/) ; Properly bracketed regexp
0386b551
AM
678 (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string"
679 (c-put-char-property end 'syntax-table '(7)))
d9e94c22
MS
680 (t)) ; Properly bracketed string: Nothing to do.
681 ;; Now change the properties of any escaped "s in the string to punctuation.
682 (save-excursion
683 (goto-char (1+ beg))
684 (or (eobp)
685 (while (search-forward "\"" end t)
0386b551 686 (c-put-char-property (1- (point)) 'syntax-table '(1))))))
d9e94c22
MS
687
688(defun c-awk-syntax-tablify-string ()
689 ;; Point is at the opening " or _" of a string. Set the syntax-table
690 ;; properties on this string, leaving point just after the string.
691 ;;
692 ;; The result is nil if a / immediately after the string would be a regexp
693 ;; opener, t if it would be a division sign.
0386b551
AM
694 ;;
695 ;; This function does hidden buffer changes.
d9e94c22
MS
696 (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
697 (c-awk-set-string-regexp-syntax-table-properties
698 (match-beginning 0) (match-end 0))
699 (cond ((looking-at "\"")
700 (forward-char)
701 t) ; In AWK, ("15" / 5) gives 3 ;-)
702 ((looking-at "[\n\r]") ; Unterminated string with EOL.
703 (forward-char)
704 nil) ; / on next line would start a regexp
705 (t nil))) ; Unterminated string at EOB
706
707(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
708 ;; Point is at a /. Determine whether this is a division sign or a regexp
709 ;; opener, and if the latter, apply syntax-table properties to the entire
710 ;; regexp. Point is left immediately after the division sign or regexp, as
711 ;; the case may be.
712 ;;
713 ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
714 ;; division sign (value t) or a regexp opener (value nil). The idea is that
715 ;; we analyse the line from ANCHOR up till point to determine what the / at
716 ;; point is.
717 ;;
718 ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
0386b551 719 ;;
88a46e21 720 ;; This function does hidden buffer changes.
d9e94c22
MS
721 (let ((/point (point)))
722 (goto-char anchor)
723 ;; Analyse the line to find out what the / is.
724 (if (if anchor-state-/div
725 (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
726 (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
727 ;; A division sign.
51c9af45 728 (progn (goto-char (1+ /point)) nil)
d9e94c22
MS
729 ;; A regexp opener
730 ;; Jump over the regexp innards, setting the match data.
731 (goto-char /point)
732 (search-forward-regexp c-awk-regexp-without-end-re)
733 (c-awk-set-string-regexp-syntax-table-properties
734 (match-beginning 0) (match-end 0))
735 (cond ((looking-at "/") ; Terminating /
736 (forward-char)
737 t)
738 ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
739 (forward-char)
740 nil) ; / on next line would start another regexp
741 (t nil))))) ; Unterminated regexp at EOB
742
743(defun c-awk-set-syntax-table-properties (lim)
744;; Scan the buffer text between point and LIM, setting (and clearing) the
745;; syntax-table property where necessary.
746;;
747;; This function is designed to be called as the FUNCTION in a MATCHER in
748;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
749;; repeated calls from font-lock: See elisp info page "Search-based
750;; Fontification"). It also gets called, with a bit of glue, from
751;; after-change-functions when font-lock isn't active. Point is left
752;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
753;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
754;;
755;; We need to set/clear the syntax-table property on:
756;; (i) / - It is set to "string" on a / which is the opening or closing
757;; delimiter of the properly terminated regexp (and left unset on a
758;; division sign).
759;; (ii) the opener of an unterminated string/regexp, we set the property
760;; "generic string delimiter" on both the opening " or / and the end of the
761;; line where the closing delimiter is missing.
762;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
763;; given the property "punctuation". This will later allow other routines
764;; to use the regexp "\\S\"*" to skip over the string innards.
765;; (iv) Inside a comment, all syntax-table properties are cleared.
0386b551
AM
766;;
767;; This function does hidden buffer changes.
d9e94c22
MS
768 (let (anchor
769 (anchor-state-/div nil)) ; t means a following / would be a div sign.
770 (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
0386b551 771 (c-clear-char-properties (point) lim 'syntax-table)
d9e94c22 772 ;; Once round the next loop for each string, regexp, or div sign
0386b551
AM
773 (while (progn
774 ;; Skip any "harmless" lines before the next tricky one.
775 (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
776 (setq anchor-state-/div nil))
777 (< (point) lim))
d9e94c22
MS
778 (setq anchor (point))
779 (search-forward-regexp c-awk-harmless-string*-here-re nil t)
780 ;; We are now looking at either a " or a /.
781 ;; Do our thing on the string, regexp or divsion sign.
782 (setq anchor-state-/div
783 (if (looking-at "_?\"")
784 (c-awk-syntax-tablify-string)
0386b551 785 (c-awk-syntax-tablify-/ anchor anchor-state-/div))))
d9e94c22
MS
786 nil))
787
d9e94c22
MS
788;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
789;; the syntax-table properties even when font-lock isn't enabled, for the
790;; subsequent use of movement functions, etc. However, it seems that if font
791;; lock _is_ enabled, we can always leave it to do the job.
88a46e21
AM
792(defvar c-awk-old-ByLL 0)
793(make-variable-buffer-local 'c-awk-old-Byll)
794;; Just beyond logical line following the region which is about to be changed.
795;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change.
d9e94c22 796
88a46e21 797(defun c-awk-record-region-clear-NL (beg end)
d9e94c22
MS
798;; This function is called exclusively from the before-change-functions hook.
799;; It does two things: Finds the end of the (logical) line on which END lies,
88a46e21
AM
800;; and clears c-awk-NL-prop text properties from this point onwards. BEG is
801;; ignored.
0386b551 802;;
88a46e21
AM
803;; On entry, the buffer will have been widened and match-data will have been
804;; saved; point is undefined on both entry and exit; the return value is
805;; ignored.
806;;
807;; This function does hidden buffer changes.
808 (c-save-buffer-state ()
809 (setq c-awk-old-ByLL (c-awk-beyond-logical-line end))
810 (c-save-buffer-state nil
811 (c-awk-clear-NL-props end (point-max)))))
d9e94c22
MS
812
813(defun c-awk-end-of-change-region (beg end old-len)
814 ;; Find the end of the region which needs to be font-locked after a change.
815 ;; This is the end of the logical line on which the change happened, either
cb694ab7 816 ;; as it was before the change, or as it is now, whichever is later.
d9e94c22 817 ;; N.B. point is left undefined.
88a46e21
AM
818 (max (+ (- c-awk-old-ByLL old-len) (- end beg))
819 (c-awk-beyond-logical-line end)))
d9e94c22
MS
820
821;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
822;; specified by the font-lock after-change function must be expanded to
823;; include ALL of any string or regexp within the region. The simplest way to
824;; do this in practice is to use the beginning/end-of-logical-line functions.
825;; Don't overlook the possibility of the buffer change being the "recapturing"
826;; of a previously escaped newline.
88a46e21
AM
827
828;; ACM 2008-02-05:
829(defun c-awk-extend-and-syntax-tablify-region (beg end old-len)
830 ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put
831 ;; `syntax-table' properties on this region.
832 ;;
833 ;; This function is called from an after-change function, BEG END and
834 ;; OLD-LEN being the standard parameters.
835 ;;
836 ;; Point is undefined both before and after this function call, the buffer
837 ;; has been widened, and match-data saved. The return value is ignored.
838 ;;
839 ;; It prepares the buffer for font
840 ;; locking, hence must get called before `font-lock-after-change-function'.
841 ;;
842 ;; This function is the AWK value of `c-before-font-lock-function'.
843 ;; It does hidden buffer changes.
844 (c-save-buffer-state ()
845 (setq c-new-END (c-awk-end-of-change-region beg end old-len))
846 (setq c-new-BEG (c-awk-beginning-of-logical-line beg))
847 (goto-char c-new-BEG)
848 (c-awk-set-syntax-table-properties c-new-END)))
d9e94c22 849
3c0ab532
AM
850;; Awk regexps written with help from Peter Galbraith
851;; <galbraith@mixing.qc.dfo.ca>.
852;; Take GNU Emacs's 'words out of the following regexp-opts. They dont work
853;; in Xemacs 21.4.4. acm 2002/9/19.
854(defconst awk-font-lock-keywords
855 (eval-when-compile
856 (list
857 ;; Function names.
858 '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?"
859 (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t))
860 ;;
861 ;; Variable names.
862 (cons
863 (concat "\\<"
864 (regexp-opt
865 '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON"
866 "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE"
867 "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH"
868 "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>")
869 'font-lock-variable-name-face)
870
871 ;; Special file names. (acm, 2002/7/22)
872 ;; The following regexp was created by first evaluating this in GNU Emacs 21.1:
873 ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid"
874 ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words)
875 ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen)
876 ;; , replacing the "n" in "dev/fd/n" with "[0-9]+"
877 ;; , removing the unwanted \\< at the beginning, and finally filling out the
878 ;; regexp so that a " must come before, and either a " or heuristic stuff after.
879 ;; The surrounding quotes are fontified along with the filename, since, semantically,
880 ;; they are an indivisible unit.
881 '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\
882std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\
883\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
884 (1 font-lock-variable-name-face t)
885 (8 font-lock-variable-name-face t t))
886 ;; Do the same (almost) with
887 ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport"
888 ;; "/inet/raw/lport/rhost/rport") 'words)
889 ;; This cannot be combined with the above pattern, because the match number
890 ;; for the (optional) closing \" would then exceed 9.
891 '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\
892\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
893 (1 font-lock-variable-name-face t)
894 (6 font-lock-variable-name-face t t))
895
896 ;; Keywords.
897 (concat "\\<"
898 (regexp-opt
899 '("BEGIN" "END" "break" "continue" "delete" "do" "else"
900 "exit" "for" "getline" "if" "in" "next" "nextfile"
901 "return" "while")
902 t) "\\>")
903
904 ;; Builtins.
905 `(eval . (list
906 ,(concat
907 "\\<"
908 (regexp-opt
909 '("adump" "and" "asort" "atan2" "bindtextdomain" "close"
910 "compl" "cos" "dcgettext" "exp" "extension" "fflush"
911 "gensub" "gsub" "index" "int" "length" "log" "lshift"
912 "match" "mktime" "or" "print" "printf" "rand" "rshift"
913 "sin" "split" "sprintf" "sqrt" "srand" "stopme"
914 "strftime" "strtonum" "sub" "substr" "system"
915 "systime" "tolower" "toupper" "xor") t)
916 "\\>")
917 0 c-preprocessor-face-name))
918
919 ;; gawk debugging keywords. (acm, 2002/7/21)
920 ;; (Removed, 2003/6/6. These functions are now fontified as built-ins)
921 ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>")
922 ;; 0 'font-lock-warning-face)
923
924 ;; User defined functions with an apparent spurious space before the
925 ;; opening parenthesis. acm, 2002/5/30.
926 `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s "
927 c-awk-escaped-nls*-with-space* "(")
928 (0 'font-lock-warning-face))
929
930 ;; Space after \ in what looks like an escaped newline. 2002/5/31
931 '("\\\\\\s +$" 0 font-lock-warning-face t)
932
933 ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16.
934 '("\\s|" 0 font-lock-warning-face t nil)
935 ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21
936 '("\\(_\\)\\s|" 1 font-lock-warning-face)
937 '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6
938 ))
939 "Default expressions to highlight in AWK mode.")
0386b551
AM
940\f
941;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e
d9e94c22 942
0386b551
AM
943;; The following three regexps differ from those earlier on in cc-awk.el in
944;; that they assume the syntax-table properties have been set. They are thus
945;; not useful for code which sets these properties.
d9e94c22 946(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
0386b551 947;; Matches a terminated string/regexp.
d9e94c22
MS
948
949(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
950;; Matches an unterminated string/regexp, NOT including the eol at the end.
951
952(defconst c-awk-harmless-pattern-characters*
953 (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
954;; Matches any "harmless" character in a pattern or an escaped character pair.
955
0386b551
AM
956(defun c-awk-at-statement-end-p ()
957 ;; Point is not inside a comment or string. Is it AT the end of a
958 ;; statement? This means immediately after the last non-ws character of the
959 ;; statement. The caller is responsible for widening the buffer, if
960 ;; appropriate.
961 (and (not (bobp))
962 (save-excursion
963 (backward-char)
964 (or (looking-at "[};]")
965 (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\))
966 (looking-at
967 (eval-when-compile
968 (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space*
969 "[#\n\r]"))))))))
970
d9e94c22
MS
971(defun c-awk-beginning-of-defun (&optional arg)
972 "Move backward to the beginning of an AWK \"defun\". With ARG, do it that
973many times. Negative arg -N means move forward to Nth following beginning of
974defun. Returns t unless search stops due to beginning or end of buffer.
975
976By a \"defun\" is meant either a pattern-action pair or a function. The start
fd35a256 977of a defun is recognized as code starting at column zero which is neither a
d9e94c22
MS
978closing brace nor a comment nor a continuation of the previous line. Unlike
979in some other modes, having an opening brace at column 0 is neither necessary
0386b551
AM
980nor helpful.
981
982Note that this function might do hidden buffer changes. See the
983comment at the start of cc-engine.el for more info."
d9e94c22 984 (interactive "p")
28abe5e2 985 (or arg (setq arg 1))
d9e94c22
MS
986 (save-match-data
987 (c-save-buffer-state ; ensures the buffer is writable.
988 nil
989 (let ((found t)) ; Has the most recent regexp search found b-of-defun?
990 (if (>= arg 0)
991 ;; Go back one defun each time round the following loop. (For +ve arg)
992 (while (and found (> arg 0) (not (eq (point) (point-min))))
993 ;; Go back one "candidate" each time round the next loop until one
994 ;; is genuinely a beginning-of-defun.
995 (while (and (setq found (search-backward-regexp
996 "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
0386b551 997 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
998 (setq arg (1- arg)))
999 ;; The same for a -ve arg.
1000 (if (not (eq (point) (point-max))) (forward-char 1))
1001 (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
1002 (while (and (setq found (search-forward-regexp
1003 "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
0386b551 1004 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
1005 (setq arg (1+ arg)))
1006 (if found (goto-char (match-beginning 0))))
1007 (eq arg 0)))))
1008
1009(defun c-awk-forward-awk-pattern ()
1010 ;; Point is at the start of an AWK pattern (which may be null) or function
1011 ;; declaration. Move to the pattern's end, and past any trailing space or
1012 ;; comment. Typically, we stop at the { which denotes the corresponding AWK
1013 ;; action/function body. Otherwise we stop at the EOL (or ;) marking the
1014 ;; absence of an explicit action.
0386b551
AM
1015 ;;
1016 ;; This function might do hidden buffer changes.
d9e94c22
MS
1017 (while
1018 (progn
1019 (search-forward-regexp c-awk-harmless-pattern-characters*)
1020 (if (looking-at "#") (end-of-line))
1021 (cond
1022 ((eobp) nil)
1023 ((looking-at "[{;]") nil) ; We've finished!
1024 ((eolp)
1025 (if (c-awk-cur-line-incomplete-p)
1026 (forward-line) ; returns non-nil
1027 nil))
1028 ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
1029 ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
1030 ((looking-at "/") (forward-char) t))))) ; division sign.
1031
1032(defun c-awk-end-of-defun1 ()
1033 ;; point is at the start of a "defun". Move to its end. Return end position.
0386b551
AM
1034 ;;
1035 ;; This function might do hidden buffer changes.
d9e94c22
MS
1036 (c-awk-forward-awk-pattern)
1037 (cond
1038 ((looking-at "{") (goto-char (scan-sexps (point) 1)))
1039 ((looking-at ";") (forward-char))
1040 ((eolp))
1041 (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
1042 (point))
1043
1044(defun c-awk-beginning-of-defun-p ()
1045 ;; Are we already at the beginning of a defun? (i.e. at code in column 0
1046 ;; which isn't a }, and isn't a continuation line of any sort.
0386b551
AM
1047 ;;
1048 ;; This function might do hidden buffer changes.
d9e94c22
MS
1049 (and (looking-at "^[^#} \t\n\r]")
1050 (not (c-awk-prev-line-incomplete-p))))
1051
1052(defun c-awk-end-of-defun (&optional arg)
1053 "Move forward to next end of defun. With argument, do it that many times.
1054Negative argument -N means move back to Nth preceding end of defun.
1055
1056An end of a defun occurs right after the closing brace that matches the
1057opening brace at its start, or immediately after the AWK pattern when there is
0386b551
AM
1058no explicit action; see function `c-awk-beginning-of-defun'.
1059
1060Note that this function might do hidden buffer changes. See the
1061comment at the start of cc-engine.el for more info."
d9e94c22
MS
1062 (interactive "p")
1063 (or arg (setq arg 1))
1064 (save-match-data
1065 (c-save-buffer-state
1066 nil
1067 (let ((start-point (point)) end-point)
1068 ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
1069 ;; move backwards to one.
1070 ;; Repeat [(i) move forward to end-of-current-defun (see below);
1071 ;; (ii) If this isn't it, move forward to beginning-of-defun].
1072 ;; We start counting ARG only when step (i) has passed the original point.
1073 (when (> arg 0)
1074 ;; Try to move back to a beginning-of-defun, if not already at one.
1075 (if (not (c-awk-beginning-of-defun-p))
1076 (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
1077 (goto-char start-point)
1078 (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
1079 ;; Now count forward, one defun at a time
1080 (while (and (not (eobp))
1081 (c-awk-end-of-defun1)
1082 (if (> (point) start-point) (setq arg (1- arg)) t)
1083 (> arg 0)
1084 (c-awk-beginning-of-defun -1))))
1085
1086 (when (< arg 0)
1087 (setq end-point start-point)
1088 (while (and (not (bobp))
1089 (c-awk-beginning-of-defun 1)
1090 (if (< (setq end-point (if (bobp) (point)
1091 (save-excursion (c-awk-end-of-defun1))))
1092 start-point)
1093 (setq arg (1+ arg)) t)
1094 (< arg 0)))
1095 (goto-char (min start-point end-point)))))))
1096
0386b551 1097\f
d9e94c22 1098(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
ab5796a9 1099
cbee283d 1100;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3
d9e94c22 1101;;; awk-mode.el ends here