(Hungry WS Deletion): Rename c-hungry-backspace to
[bpt/emacs.git] / lisp / progmodes / cc-awk.el
CommitLineData
d9e94c22
MS
1;;; cc-awk.el --- AWK specific code within cc-mode.
2
d91362c9
NR
3;; Copyright (C) 1988,94,96,2000, 2001, 2002, 2003, 2004, 2005, 2006
4;; Free Software Foundation, Inc.
d9e94c22 5
3efc2cd7 6;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
d9e94c22
MS
7;; Maintainer: FSF
8;; Keywords: AWK, cc-mode, unix, languages
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
0386b551 23;; along with this program; see the file COPYING. If not, write to the
3a35cf56
LK
24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25;; Boston, MA 02110-1301, USA.
d9e94c22
MS
26
27;;; Commentary:
28
29;; This file contains (most of) the adaptations to cc-mode required for the
30;; integration of AWK Mode.
0386b551 31;; It is organised thusly, the sections being separated by page breaks:
d9e94c22 32;; 1. The AWK Mode syntax table.
0386b551
AM
33;; 2. Regular expressions for analysing AWK code.
34;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property").
35;; 4. Syntax-table property/font-locking stuff, but not including the
d9e94c22 36;; font-lock-keywords setting.
0386b551
AM
37;; 5. The AWK Mode before/after-change-functions.
38;; 6. AWK Mode specific versions of commands like beginning-of-defun.
d9e94c22
MS
39;; The AWK Mode keymap, abbreviation table, and the mode function itself are
40;; in cc-mode.el.
41
42;;; Code:
43
44(eval-when-compile
45 (let ((load-path
46 (if (and (boundp 'byte-compile-dest-file)
47 (stringp byte-compile-dest-file))
48 (cons (file-name-directory byte-compile-dest-file) load-path)
49 load-path)))
50 (load "cc-bytecomp" nil t)))
51
52(cc-require 'cc-defs)
53
54;; Silence the byte compiler.
55(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
56
57;; Some functions in cc-engine that are used below. There's a cyclic
58;; dependency so it can't be required here. (Perhaps some functions
59;; could be moved to cc-engine to avoid it.)
60(cc-bytecomp-defun c-backward-token-1)
61(cc-bytecomp-defun c-beginning-of-statement-1)
62(cc-bytecomp-defun c-backward-sws)
63
64(defvar awk-mode-syntax-table
65 (let ((st (make-syntax-table)))
66 (modify-syntax-entry ?\\ "\\" st)
67 (modify-syntax-entry ?\n "> " st)
68 (modify-syntax-entry ?\r "> " st)
69 (modify-syntax-entry ?\f "> " st)
70 (modify-syntax-entry ?\# "< " st)
71 ;; / can delimit regexes or be a division operator. By default we assume
72 ;; that it is a division sign, and fix the regexp operator cases with
73 ;; `font-lock-syntactic-keywords'.
0386b551 74 (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
d9e94c22
MS
75 (modify-syntax-entry ?* "." st)
76 (modify-syntax-entry ?+ "." st)
77 (modify-syntax-entry ?- "." st)
78 (modify-syntax-entry ?= "." st)
79 (modify-syntax-entry ?% "." st)
80 (modify-syntax-entry ?< "." st)
81 (modify-syntax-entry ?> "." st)
82 (modify-syntax-entry ?& "." st)
83 (modify-syntax-entry ?| "." st)
84 (modify-syntax-entry ?_ "_" st)
85 (modify-syntax-entry ?\' "." st)
86 st)
87 "Syntax table in use in AWK Mode buffers.")
88
0386b551
AM
89\f
90;; This section defines regular expressions used in the analysis of AWK code.
91
92;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
93;; Emacs has in the past used \r to mark hidden lines in some fashion (and
94;; maybe still does).
95
96(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
97;; Matches any escaped (with \) character-pair, including an escaped newline.
98(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
99;; Matches any escaped (with \) character-pair, apart from an escaped newline.
100(defconst c-awk-comment-without-nl "#.*")
101;; Matches an AWK comment, not including the terminating NL (if any). Note
102;; that the "enclosing" (elisp) regexp must ensure the # is real.
103(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
104;; Matches a newline, or the end of buffer.
105
106;; "Space" regular expressions.
107(eval-and-compile
108 (defconst c-awk-escaped-nl "\\\\[\n\r]"))
109;; Matches an escaped newline.
110(defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))
111;; Matches a possibly empty sequence of escaped newlines. Used in
112;; awk-font-lock-keywords.
113;; (defconst c-awk-escaped-nls*-with-space*
114;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
115;; The above RE was very slow. It's runtime was doubling with each additional
116;; space :-( Reformulate it as below:
117(eval-and-compile
118 (defconst c-awk-escaped-nls*-with-space*
119 (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
120;; Matches a possibly empty sequence of escaped newlines with optional
121;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
122(defconst c-awk-blank-or-comment-line-re
123 (concat "[ \t]*\\(#\\|\\\\?$\\)"))
124;; Matche (the tail of) a line containing at most either a comment or an
125;; escaped EOL.
126
127;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
128(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
129;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
130;; localisation string in gawk 3.1
131(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
132;; Matches an underline NOT followed by ".
133(defconst c-awk-harmless-string*-re
134 (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
135;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
136;; #, or newlines.
137(defconst c-awk-harmless-string*-here-re
138 (concat "\\=" c-awk-harmless-string*-re))
139;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
140;; at point.
141(defconst c-awk-harmless-line-re
142 (concat c-awk-harmless-string*-re
143 "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
144;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
145;; " or /. "logical" means "possibly containing escaped newlines". A comment
146;; is matched as part of the line even if it contains a " or a /. The End of
147;; buffer is also an end of line.
148(defconst c-awk-harmless-lines+-here-re
149 (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
150;; Matches a sequence of (at least one) \"harmless-line\" at point.
151
152
153;; REGEXPS FOR AWK STRINGS.
154(defconst c-awk-string-ch-re "[^\"\\\n\r]")
155;; Matches any character which can appear unescaped in a string.
156(defconst c-awk-string-innards-re
157 (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
158;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
159(defconst c-awk-string-without-end-here-re
160 (concat "\\=_?\"" c-awk-string-innards-re))
161;; Matches an AWK string at point up to, but not including, any terminator.
162;; A gawk 3.1+ string may look like _"localisable string".
163(defconst c-awk-one-line-possibly-open-string-re
164 (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*"
165 "\\(\"\\|\\\\?$\\|\\'\\)"))
166
167;; REGEXPS FOR AWK REGEXPS.
168(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
169;; Matches any AWK regexp character which doesn't require special analysis.
170(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
171;; Matches a (possibly empty) sequence of escaped newlines.
172
173;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
174;; list", and "[:alpha:]" inside a character list will be known as a
175;; "character class". These terms for these things vary between regexp
176;; descriptions .
177(defconst c-awk-regexp-char-class-re
178 "\\[:[a-z]+:\\]")
179 ;; Matches a character class spec (e.g. [:alpha:]).
180(defconst c-awk-regexp-char-list-re
181 (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
182 "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
183 "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
184;; Matches a regexp char list, up to (but not including) EOL if the ] is
185;; missing.
186(defconst c-awk-regexp-one-line-possibly-open-char-list-re
187 (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*"
188 "\\(]\\|\\\\?$\\|\\'\\)"))
189;; Matches the head (or all) of a regexp char class, up to (but not
190;; including) the first EOL.
191(defconst c-awk-regexp-innards-re
192 (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
193 "\\|" c-awk-regexp-normal-re "\\)*"))
194;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
195(defconst c-awk-regexp-without-end-re
196 (concat "/" c-awk-regexp-innards-re))
197;; Matches an AWK regexp up to, but not including, any terminating /.
198(defconst c-awk-one-line-possibly-open-regexp-re
199 (concat "/\\(" c-awk-non-eol-esc-pair-re
200 "\\|" c-awk-regexp-one-line-possibly-open-char-list-re
201 "\\|" c-awk-regexp-normal-re "\\)*"
202 "\\(/\\|\\\\?$\\|\\'\\)"))
203;; Matches as much of the head of an AWK regexp which fits on one line,
204;; possibly all of it.
205
206;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
207;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
208;; whether a '/' at the current position would by a regexp opener or a
209;; division sign.
210(defconst c-awk-neutral-re
211; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
212 "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
213;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
214;; This is space/tab, braces, an auto-increment/decrement operator or an
215;; escaped character. Or one of the (illegal) characters @ or `. But NOT an
216;; end of line (even if escaped).
217(defconst c-awk-neutrals*-re
218 (concat "\\(" c-awk-neutral-re "\\)*"))
219;; A (possibly empty) string of neutral characters (or character pairs).
220(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
221;; Matches a char which is a constituent of a variable or number, or a ket
222;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
223;; \xff are "letters".
224(defconst c-awk-div-sign-re
225 (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
226;; Will match a piece of AWK buffer ending in / which is a division sign, in
227;; a context where an immediate / would be a regexp bracket. It follows a
228;; variable or number (with optional intervening "neutral" characters). This
229;; will only work when there won't be a preceding " or / before the sought /
230;; to foul things up.
231(defconst c-awk-non-arith-op-bra-re
232 "[[\(&=:!><,?;'~|]")
233;; Matches an openeing BRAcket ,round or square, or any operator character
234;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
235;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
236;; and "--".
237(defconst c-awk-regexp-sign-re
238 (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
239;; Will match a piece of AWK buffer ending in / which is an opening regexp
240;; bracket, in a context where an immediate / would be a division sign. This
241;; will only work when there won't be a preceding " or / before the sought /
242;; to foul things up.
243
244;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
245(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
246;;;; NEW VERSION! (which will be restricted to the current line)
247(defconst c-awk-one-line-non-syn-ws*-re
248 (concat "\\([ \t]*"
249 "\\(" c-awk-_-harmless-nonws-char-re "\\|"
250 c-awk-non-eol-esc-pair-re "\\|"
251 c-awk-one-line-possibly-open-string-re "\\|"
252 c-awk-one-line-possibly-open-regexp-re
253 "\\)"
254 "\\)*"))
255
256\f
d9e94c22 257;; ACM, 2002/5/29:
0386b551 258;;
d9e94c22
MS
259;; The next section of code is about determining whether or not an AWK
260;; statement is complete or not. We use this to indent the following line.
261;; The determination is pretty straightforward in C, where a statement ends
262;; with either a ; or a }. Only "while" really gives any trouble there, since
263;; it might be the end of a do-while. In AWK, on the other hand, semicolons
264;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
265;; addition, we have the complexity of escaped EOLs. The core of this
266;; analysis is in the middle of the function
267;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
268;;
269;; To avoid continually repeating this expensive analysis, we "cache" its
270;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
271;; the EOL (if any) which terminates that line. Should the property be
272;; required for the very last line (which has no EOL), it is calculated as
273;; required but not cached. The c-awk-NL-prop property should be thought of
274;; as only really valid immediately after a buffer change, not a permanently
275;; set property. (By contrast, the syntax-table text properties (set by an
276;; after-change function) must be constantly updated for the mode to work
277;; properly).
278;;
0386b551
AM
279;; This text property is also used for "syntactic whitespace" movement, this
280;; being where the distinction between the values '$' and '}' is significant.
281;;
d9e94c22
MS
282;; The valid values for c-awk-NL-prop are:
283;;
284;; nil The property is not currently set for this line.
285;; '#' There is NO statement on this line (at most a comment), and no open
286;; statement from a previous line which could have been completed on this
287;; line.
288;; '{' There is an unfinished statement on this (or a previous) line which
289;; doesn't require \s to continue onto another line, e.g. the line ends
290;; with {, or the && operator, or "if (condition)". Note that even if the
291;; newline is redundantly escaped, it remains a '{' line.
292;; '\' There is an escaped newline at the end of this line and this '\' is
293;; essential to the syntax of the program. (i.e. if it had been a
294;; frivolous \, it would have been ignored and the line been given one of
295;; the other property values.)
0386b551
AM
296;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual
297;; semicolon"). This might be a content-free line terminating a statement
298;; from the preceding (continued) line (which has property \).
299;; '}' A statement, being the last thing (aside from ws/comments) is
300;; explicitly terminated on this line by a closing brace (or sometimes a
301;; semicolon).
d9e94c22
MS
302;;
303;; This set of values has been chosen so that the property's value on a line
304;; is completely determined by the contents of the line and the property on
305;; the previous line, EXCEPT for where a "while" might be the closing
306;; statement of a do-while.
307
308(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
309 ;; Are we just after the ) in "if/for/while (<condition>)"?
310 ;;
311 ;; Note that the end of the ) in a do .... while (<condition>) doesn't
312 ;; count, since the purpose of this routine is essentially to decide
313 ;; whether to indent the next line.
314 ;;
315 ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
316 ;; do-while.
0386b551
AM
317 ;;
318 ;; This function might do hidden buffer changes.
d9e94c22
MS
319 (and
320 (eq (char-before) ?\))
321 (save-excursion
322 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
323 (when par-pos
324 (goto-char par-pos) ; back over "(...)"
325 (c-backward-token-1) ; BOB isn't a problem.
326 (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
327 (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
328 (not (eq (c-beginning-of-statement-1 do-lim)
329 'beginning)))))))))
330
331(defun c-awk-after-function-decl-param-list ()
332 ;; Are we just after the ) in "function foo (bar)" ?
0386b551
AM
333 ;;
334 ;; This function might do hidden buffer changes.
d9e94c22
MS
335 (and (eq (char-before) ?\))
336 (save-excursion
337 (let ((par-pos (c-safe (scan-lists (point) -1 0))))
338 (when par-pos
339 (goto-char par-pos) ; back over "(...)"
340 (c-backward-token-1) ; BOB isn't a problem
341 (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
342 (progn (c-backward-token-1)
343 (looking-at "func\\(tion\\)?\\>"))))))))
344
345;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
346(defun c-awk-after-continue-token ()
347;; Are we just after a token which can be continued onto the next line without
348;; a backslash?
0386b551
AM
349;;
350;; This function might do hidden buffer changes.
d9e94c22
MS
351 (save-excursion
352 (c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
353 (if (and (looking-at "[&|]") (not (bobp)))
354 (backward-char)) ; c-backward-token-1 doesn't do this :-(
355 (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
356
357(defun c-awk-after-rbrace-or-statement-semicolon ()
358 ;; Are we just after a } or a ; which closes a statement?
359 ;; Be careful about ;s in for loop control bits. They don't count!
0386b551
AM
360 ;;
361 ;; This function might do hidden buffer changes.
d9e94c22
MS
362 (or (eq (char-before) ?\})
363 (and
364 (eq (char-before) ?\;)
365 (save-excursion
366 (let ((par-pos (c-safe (scan-lists (point) -1 1))))
367 (when par-pos
368 (goto-char par-pos) ; go back to containing (
369 (not (and (looking-at "(")
370 (c-backward-token-1) ; BOB isn't a problem
371 (looking-at "for\\>")))))))))
372
373(defun c-awk-back-to-contentful-text-or-NL-prop ()
374 ;; Move back to just after the first found of either (i) an EOL which has
375 ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
376 ;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
0386b551 377 ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp).
d9e94c22
MS
378 ;;
379 ;; Note that an escaped eol counts as whitespace here.
380 ;;
381 ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
382 ;; that the previous line contains an unterminated string (without \). In
0386b551
AM
383 ;; this case, assume that the previous line's c-awk-NL-prop is a $.
384 ;;
d9e94c22
MS
385 ;; POINT MUST BE AT THE START OF A LINE when calling this function. This
386 ;; is to ensure that the various backward-comment functions will work
387 ;; properly.
0386b551
AM
388 ;;
389 ;; This function might do hidden buffer changes.
d9e94c22
MS
390 (let ((nl-prop nil)
391 bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
392 (while ;; We are at a BOL here. Go back one line each iteration.
393 (and
394 (not (bobp))
395 (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
396 (progn (setq bol-pos (c-point 'bopl))
397 (setq bsws-pos (point))
398 ;; N.B. the following function will not go back past an EOL if
399 ;; there is an open string (without \) on the previous line.
0386b551
AM
400 ;; If we find such, set the c-awk-NL-prop on it, too
401 ;; (2004/3/29).
d9e94c22
MS
402 (c-backward-syntactic-ws bol-pos)
403 (or (/= (point) bsws-pos)
0386b551
AM
404 (progn (setq nl-prop ?\$)
405 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)
d9e94c22
MS
406 nil)))
407 ;; If we had a backslash at EOL, c-backward-syntactic-ws will
408 ;; have gone backwards over it. Check the backslash was "real".
409 (progn
410 (if (looking-at "[ \t]*\\\\+$")
411 (if (progn
412 (end-of-line)
413 (search-backward-regexp
414 "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
415 bol-pos t))
416 (progn (end-of-line) ; escaped EOL.
417 (backward-char)
418 (c-backward-syntactic-ws bol-pos))
419 (end-of-line))) ; The \ at eol is a fake.
420 (bolp))))
421 nl-prop))
422
423(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
424 ;; Calculate and set the value of the c-awk-NL-prop on the immediately
425 ;; preceding EOL. This may also involve doing the same for several
426 ;; preceding EOLs.
0386b551 427 ;;
d9e94c22
MS
428 ;; NOTE that if the property was already set, we return it without
429 ;; recalculation. (This is by accident rather than design.)
0386b551 430 ;;
d9e94c22
MS
431 ;; Return the property which got set (or was already set) on the previous
432 ;; line. Return nil if we hit BOB.
0386b551 433 ;;
d9e94c22 434 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
435 ;;
436 ;; This function might do hidden buffer changes.
d9e94c22
MS
437 (save-excursion
438 (save-match-data
439 (beginning-of-line)
440 (let* ((pos (point))
441 (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
442 ;; We are either (1) at a BOL (with nl-prop containing the previous
443 ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
444 ;; the BOB counts as case (1), so we test next for bolp rather than
445 ;; non-nil nl-prop.
446 (when (not (bolp))
447 (setq nl-prop
448 (cond
449 ;; Incomplete statement which doesn't require escaped EOL?
450 ((or (c-awk-after-if-for-while-condition-p do-lim)
451 (c-awk-after-function-decl-param-list)
452 (c-awk-after-continue-token))
453 ?\{)
454 ;; Escaped EOL (where there's also something to continue)?
455 ((and (looking-at "[ \t]*\\\\$")
456 (not (c-awk-after-rbrace-or-statement-semicolon)))
457 ?\\)
0386b551
AM
458 ;; A statement was completed on this line. How?
459 ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or }
460 (t ?\$))) ; A virtual semicolon.
d9e94c22
MS
461 (end-of-line)
462 (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
463 (forward-line))
464
465 ;; We are now at a (possibly empty) sequence of content-free lines.
466 ;; Set c-awk-NL-prop on each of these lines's EOL.
467 (while (< (point) pos) ; one content-free line each iteration.
468 (cond ; recalculate nl-prop from previous line's value.
0386b551 469 ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#))
d9e94c22 470 ((eq nl-prop ?\\)
0386b551 471 (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$)))
d9e94c22
MS
472 ;; ?\# (empty line) and ?\{ (open stmt) don't change.
473 )
474 (forward-line)
475 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
476 nl-prop))))
477
478(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
479 ;; Get the c-awk-NL-prop text-property from the previous line, calculating
480 ;; it if necessary. Return nil iff we're already at BOB.
481 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
482 ;;
483 ;; This function might do hidden buffer changes.
d9e94c22
MS
484 (if (bobp)
485 nil
486 (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
487 (c-awk-calculate-NL-prop-prev-line do-lim))))
488
489(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
490 ;; Get the c-awk-NL-prop text-property from the current line, calculating it
491 ;; if necessary. (As a special case, the property doesn't get set on an
492 ;; empty line at EOB (there's no position to set the property on), but the
493 ;; function returns the property value an EOL would have got.)
0386b551 494 ;;
d9e94c22 495 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
496 ;;
497 ;; This function might do hidden buffer changes.
d9e94c22
MS
498 (save-excursion
499 (let ((extra-nl nil))
500 (end-of-line) ; Necessary for the following test to work.
501 (when (= (forward-line) 1) ; if we were on the last line....
502 (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
503 (setq extra-nl t))
504 (prog1 (c-awk-get-NL-prop-prev-line do-lim)
505 (if extra-nl (delete-backward-char 1))))))
506
0386b551 507(defsubst c-awk-prev-line-incomplete-p (&optional do-lim)
d9e94c22
MS
508 ;; Is there an incomplete statement at the end of the previous line?
509 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
510 ;;
511 ;; This function might do hidden buffer changes.
d9e94c22
MS
512 (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
513
0386b551 514(defsubst c-awk-cur-line-incomplete-p (&optional do-lim)
d9e94c22
MS
515 ;; Is there an incomplete statement at the end of the current line?
516 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
0386b551
AM
517 ;;
518 ;; This function might do hidden buffer changes.
d9e94c22
MS
519 (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
520
0386b551
AM
521;;;; NOTES ON "VIRTUAL SEMICOLONS"
522;;;;
523;;;; A "virtual semicolon" is what terminates a statement when there is no ;
524;;;; or } to do the job. Like point, it is considered to lie _between_ two
525;;;; characters. As from mid-March 2004, it is considered to lie just after
526;;;; the last non-syntactic-whitespace character on the line; (previously, it
527;;;; was considered an attribute of the EOL on the line). A real semicolon
528;;;; never counts as a virtual one.
529
530(defun c-awk-at-vsemi-p (&optional pos)
531 ;; Is there a virtual semicolon at POS (or POINT)?
d9e94c22 532 (save-excursion
0386b551
AM
533 (let (nl-prop
534 (pos-or-point (progn (if pos (goto-char pos)) (point))))
535 (forward-line 0)
536 (search-forward-regexp c-awk-one-line-non-syn-ws*-re)
537 (and (eq (point) pos-or-point)
538 (progn
539 (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\)
540 (eq (forward-line) 0)
541 (looking-at c-awk-blank-or-comment-line-re)))
542 (eq nl-prop ?\$))))))
543
544(defun c-awk-vsemi-status-unknown-p ()
545 ;; Are we unsure whether there is a virtual semicolon on the current line?
546 ;; DO NOT under any circumstances attempt to calculate this; that would
547 ;; defeat the (admittedly kludgey) purpose of this function, which is to
548 ;; prevent an infinite recursion in c-beginning-of-statement-1 when point
549 ;; starts at a `while' token.
d9e94c22
MS
550 (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
551
552(defun c-awk-clear-NL-props (beg end)
553 ;; This function is run from before-change-hooks. It clears the
554 ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
555 ;; parameter is ignored). This ensures that the indentation engine will
556 ;; never use stale values for this property.
0386b551
AM
557 ;;
558 ;; This function might do hidden buffer changes.
d9e94c22
MS
559 (save-restriction
560 (widen)
561 (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
562
563(defun c-awk-unstick-NL-prop ()
564 ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
565 ;; this, a new newline inserted after an old newline (e.g. by C-j) would
566 ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
567 ;; Thing. This function's action is required by c-put-char-property.
568 (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
569 (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
570 (setq text-property-default-nonsticky
571 (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
572
573;; The following is purely a diagnostic command, to be commented out of the
574;; final release. ACM, 2002/6/1
575;; (defun NL-props ()
576;; (interactive)
577;; (let (pl-prop cl-prop)
578;; (message "Prev-line: %s Cur-line: %s"
579;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
580;; (char-to-string pl-prop)
581;; "nil")
582;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
583;; (char-to-string cl-prop)
584;; "nil"))))
585;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
586;for now. In the byte compiled version, this causes things to crash because
587;awk-mode-map isn't yet defined. :-(
588
589;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
0386b551 590\f
d9e94c22
MS
591;; The following section of the code is to do with font-locking. The biggest
592;; problem for font-locking is deciding whether a / is a regular expression
593;; delimiter or a division sign - determining precisely where strings and
594;; regular expressions start and stop is also troublesome. This is the
595;; purpose of the function c-awk-set-syntax-table-properties and the myriad
596;; elisp regular expressions it uses.
597;;
598;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
599;; for font-locking unterminated strings (i.e. font-locking the buffer up to
600;; the next string delimiter as a string) was inappropriate. Instead,
601;; unbalanced string/regexp delimiters are given the warning font, being
602;; refonted with the string font as soon as the matching delimiter is entered.
603;;
604;; This requires the region processed by the current font-lock after-change
605;; function to have access to the start of the string/regexp, which may be
606;; several lines back. The elisp "advice" feature is used on these functions
607;; to allow this.
608
609(defun c-awk-beginning-of-logical-line (&optional pos)
610;; Go back to the start of the (apparent) current line (or the start of the
611;; line containing POS), returning the buffer position of that point. I.e.,
612;; go back to the last line which doesn't have an escaped EOL before it.
0386b551 613;;
d9e94c22
MS
614;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
615;; comment, string or regexp. IT MAY WELL BE that this function should not be
616;; executed on a narrowed buffer.
0386b551
AM
617;;
618;; This function might do hidden buffer changes.
d9e94c22
MS
619 (if pos (goto-char pos))
620 (forward-line 0)
621 (while (and (> (point) (point-min))
622 (eq (char-before (1- (point))) ?\\))
623 (forward-line -1))
624 (point))
625
626(defun c-awk-end-of-logical-line (&optional pos)
627;; Go forward to the end of the (apparent) current logical line (or the end of
628;; the line containing POS), returning the buffer position of that point. I.e.,
629;; go to the end of the next line which doesn't have an escaped EOL.
630;;
631;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
632;; comment, string or regexp. IT MAY WELL BE that this function should not be
633;; executed on a narrowed buffer.
0386b551
AM
634;;
635;; This function might do hidden buffer changes.
d9e94c22
MS
636 (if pos (goto-char pos))
637 (end-of-line)
638 (while (and (< (point) (point-max))
639 (eq (char-before) ?\\))
640 (end-of-line 2))
641 (point))
642
d9e94c22
MS
643;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
644;; on strings/regexps which are missing their closing delimiter.
645;; 2002/4/28. The default syntax for / has been changed from "string" to
646;; "punctuation", to reduce hassle when this character appears within a string
647;; or comment.
648
649(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
650;; BEG and END bracket a (possibly unterminated) string or regexp. The
651;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
652;; END. Set the appropriate syntax-table properties on the delimiters and
653;; contents of this string/regex.
654;;
655;; "String" here can also mean a gawk 3.1 "localizable" string which starts
656;; with _". In this case, we step over the _ and ignore it; It will get it's
657;; font from an entry in awk-font-lock-keywords.
658;;
659;; If the closing delimiter is missing (i.e., there is an EOL there) set the
660;; STRING-FENCE property on the opening " or / and closing EOL.
0386b551
AM
661;;
662;; This function does hidden buffer changes.
d9e94c22
MS
663 (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
664
665 ;; First put the properties on the delimiters.
666 (cond ((eq end (point-max)) ; string/regexp terminated by EOB
0386b551 667 (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence"
d9e94c22 668 ((/= (char-after beg) (char-after end)) ; missing end delimiter
0386b551
AM
669 (c-put-char-property beg 'syntax-table '(15))
670 (c-put-char-property end 'syntax-table '(15)))
d9e94c22 671 ((eq (char-after beg) ?/) ; Properly bracketed regexp
0386b551
AM
672 (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string"
673 (c-put-char-property end 'syntax-table '(7)))
d9e94c22
MS
674 (t)) ; Properly bracketed string: Nothing to do.
675 ;; Now change the properties of any escaped "s in the string to punctuation.
676 (save-excursion
677 (goto-char (1+ beg))
678 (or (eobp)
679 (while (search-forward "\"" end t)
0386b551 680 (c-put-char-property (1- (point)) 'syntax-table '(1))))))
d9e94c22
MS
681
682(defun c-awk-syntax-tablify-string ()
683 ;; Point is at the opening " or _" of a string. Set the syntax-table
684 ;; properties on this string, leaving point just after the string.
685 ;;
686 ;; The result is nil if a / immediately after the string would be a regexp
687 ;; opener, t if it would be a division sign.
0386b551
AM
688 ;;
689 ;; This function does hidden buffer changes.
d9e94c22
MS
690 (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
691 (c-awk-set-string-regexp-syntax-table-properties
692 (match-beginning 0) (match-end 0))
693 (cond ((looking-at "\"")
694 (forward-char)
695 t) ; In AWK, ("15" / 5) gives 3 ;-)
696 ((looking-at "[\n\r]") ; Unterminated string with EOL.
697 (forward-char)
698 nil) ; / on next line would start a regexp
699 (t nil))) ; Unterminated string at EOB
700
701(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
702 ;; Point is at a /. Determine whether this is a division sign or a regexp
703 ;; opener, and if the latter, apply syntax-table properties to the entire
704 ;; regexp. Point is left immediately after the division sign or regexp, as
705 ;; the case may be.
706 ;;
707 ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
708 ;; division sign (value t) or a regexp opener (value nil). The idea is that
709 ;; we analyse the line from ANCHOR up till point to determine what the / at
710 ;; point is.
711 ;;
712 ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
0386b551
AM
713 ;;
714 ;; This function might do hidden buffer changes.
d9e94c22
MS
715 (let ((/point (point)))
716 (goto-char anchor)
717 ;; Analyse the line to find out what the / is.
718 (if (if anchor-state-/div
719 (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
720 (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
721 ;; A division sign.
722 (progn (goto-char (1+ /point)) nil)
723 ;; A regexp opener
724 ;; Jump over the regexp innards, setting the match data.
725 (goto-char /point)
726 (search-forward-regexp c-awk-regexp-without-end-re)
727 (c-awk-set-string-regexp-syntax-table-properties
728 (match-beginning 0) (match-end 0))
729 (cond ((looking-at "/") ; Terminating /
730 (forward-char)
731 t)
732 ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
733 (forward-char)
734 nil) ; / on next line would start another regexp
735 (t nil))))) ; Unterminated regexp at EOB
736
737(defun c-awk-set-syntax-table-properties (lim)
738;; Scan the buffer text between point and LIM, setting (and clearing) the
739;; syntax-table property where necessary.
740;;
741;; This function is designed to be called as the FUNCTION in a MATCHER in
742;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
743;; repeated calls from font-lock: See elisp info page "Search-based
744;; Fontification"). It also gets called, with a bit of glue, from
745;; after-change-functions when font-lock isn't active. Point is left
746;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
747;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
748;;
749;; We need to set/clear the syntax-table property on:
750;; (i) / - It is set to "string" on a / which is the opening or closing
751;; delimiter of the properly terminated regexp (and left unset on a
752;; division sign).
753;; (ii) the opener of an unterminated string/regexp, we set the property
754;; "generic string delimiter" on both the opening " or / and the end of the
755;; line where the closing delimiter is missing.
756;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
757;; given the property "punctuation". This will later allow other routines
758;; to use the regexp "\\S\"*" to skip over the string innards.
759;; (iv) Inside a comment, all syntax-table properties are cleared.
0386b551
AM
760;;
761;; This function does hidden buffer changes.
d9e94c22
MS
762 (let (anchor
763 (anchor-state-/div nil)) ; t means a following / would be a div sign.
764 (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
0386b551 765 (c-clear-char-properties (point) lim 'syntax-table)
d9e94c22 766 ;; Once round the next loop for each string, regexp, or div sign
0386b551
AM
767 (while (progn
768 ;; Skip any "harmless" lines before the next tricky one.
769 (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
770 (setq anchor-state-/div nil))
771 (< (point) lim))
d9e94c22
MS
772 (setq anchor (point))
773 (search-forward-regexp c-awk-harmless-string*-here-re nil t)
774 ;; We are now looking at either a " or a /.
775 ;; Do our thing on the string, regexp or divsion sign.
776 (setq anchor-state-/div
777 (if (looking-at "_?\"")
778 (c-awk-syntax-tablify-string)
0386b551 779 (c-awk-syntax-tablify-/ anchor anchor-state-/div))))
d9e94c22
MS
780 nil))
781
782
783;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
784;; the syntax-table properties even when font-lock isn't enabled, for the
785;; subsequent use of movement functions, etc. However, it seems that if font
786;; lock _is_ enabled, we can always leave it to do the job.
787(defvar c-awk-old-EOLL 0)
788(make-variable-buffer-local 'c-awk-old-EOLL)
789;; End of logical line following the region which is about to be changed. Set
790;; in c-awk-before-change and used in c-awk-after-change.
791
792(defun c-awk-before-change (beg end)
793;; This function is called exclusively from the before-change-functions hook.
794;; It does two things: Finds the end of the (logical) line on which END lies,
795;; and clears c-awk-NL-prop text properties from this point onwards.
0386b551
AM
796;;
797;; This function might do hidden buffer changes.
d9e94c22
MS
798 (save-restriction
799 (save-excursion
800 (setq c-awk-old-EOLL (c-awk-end-of-logical-line end))
801 (c-save-buffer-state nil
802 (c-awk-clear-NL-props end (point-max))))))
803
804(defun c-awk-end-of-change-region (beg end old-len)
805 ;; Find the end of the region which needs to be font-locked after a change.
806 ;; This is the end of the logical line on which the change happened, either
807 ;; as it was before the change, or as it is now, which ever is later.
808 ;; N.B. point is left undefined.
0386b551
AM
809 ;;
810 ;; This function might do hidden buffer changes.
d9e94c22
MS
811 (max (+ (- c-awk-old-EOLL old-len) (- end beg))
812 (c-awk-end-of-logical-line end)))
813
814(defun c-awk-after-change (beg end old-len)
815;; This function is called exclusively as an after-change function in
816;; AWK Mode. It ensures that the syntax-table properties get set in the
817;; changed region. However, if font-lock is enabled, this function does
818;; nothing, since an enabled font-lock after-change function will always do
819;; this.
0386b551
AM
820;;
821;; This function might do hidden buffer changes.
d9e94c22
MS
822 (unless (and (boundp 'font-lock-mode) font-lock-mode)
823 (save-restriction
824 (save-excursion
825 (setq end (c-awk-end-of-change-region beg end old-len))
826 (c-awk-beginning-of-logical-line beg)
827 (c-save-buffer-state nil ; So that read-only status isn't affected.
828 ; (e.g. when first loading the buffer)
829 (c-awk-set-syntax-table-properties end))))))
830
831;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
832;; specified by the font-lock after-change function must be expanded to
833;; include ALL of any string or regexp within the region. The simplest way to
834;; do this in practice is to use the beginning/end-of-logical-line functions.
835;; Don't overlook the possibility of the buffer change being the "recapturing"
836;; of a previously escaped newline.
837(defmacro c-awk-advise-fl-for-awk-region (function)
838 `(defadvice ,function (before get-awk-region activate)
839;; When font-locking an AWK Mode buffer, make sure that any string/regexp is
840;; completely font-locked.
841 (when (eq major-mode 'awk-mode)
842 (save-excursion
843 (ad-set-arg 1 (c-awk-end-of-change-region
844 (ad-get-arg 0) ; beg
845 (ad-get-arg 1) ; end
846 (ad-get-arg 2))) ; old-len
847 (ad-set-arg 0 (c-awk-beginning-of-logical-line (ad-get-arg 0)))))))
848
849(c-awk-advise-fl-for-awk-region font-lock-after-change-function)
850(c-awk-advise-fl-for-awk-region jit-lock-after-change)
851(c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change)
852(c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change)
853
0386b551
AM
854\f
855;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e
d9e94c22 856
0386b551
AM
857;; The following three regexps differ from those earlier on in cc-awk.el in
858;; that they assume the syntax-table properties have been set. They are thus
859;; not useful for code which sets these properties.
d9e94c22 860(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
0386b551 861;; Matches a terminated string/regexp.
d9e94c22
MS
862
863(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
864;; Matches an unterminated string/regexp, NOT including the eol at the end.
865
866(defconst c-awk-harmless-pattern-characters*
867 (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
868;; Matches any "harmless" character in a pattern or an escaped character pair.
869
0386b551
AM
870(defun c-awk-at-statement-end-p ()
871 ;; Point is not inside a comment or string. Is it AT the end of a
872 ;; statement? This means immediately after the last non-ws character of the
873 ;; statement. The caller is responsible for widening the buffer, if
874 ;; appropriate.
875 (and (not (bobp))
876 (save-excursion
877 (backward-char)
878 (or (looking-at "[};]")
879 (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\))
880 (looking-at
881 (eval-when-compile
882 (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space*
883 "[#\n\r]"))))))))
884
d9e94c22
MS
885(defun c-awk-beginning-of-defun (&optional arg)
886 "Move backward to the beginning of an AWK \"defun\". With ARG, do it that
887many times. Negative arg -N means move forward to Nth following beginning of
888defun. Returns t unless search stops due to beginning or end of buffer.
889
890By a \"defun\" is meant either a pattern-action pair or a function. The start
fd35a256 891of a defun is recognized as code starting at column zero which is neither a
d9e94c22
MS
892closing brace nor a comment nor a continuation of the previous line. Unlike
893in some other modes, having an opening brace at column 0 is neither necessary
0386b551
AM
894nor helpful.
895
896Note that this function might do hidden buffer changes. See the
897comment at the start of cc-engine.el for more info."
d9e94c22
MS
898 (interactive "p")
899 (save-match-data
900 (c-save-buffer-state ; ensures the buffer is writable.
901 nil
902 (let ((found t)) ; Has the most recent regexp search found b-of-defun?
903 (if (>= arg 0)
904 ;; Go back one defun each time round the following loop. (For +ve arg)
905 (while (and found (> arg 0) (not (eq (point) (point-min))))
906 ;; Go back one "candidate" each time round the next loop until one
907 ;; is genuinely a beginning-of-defun.
908 (while (and (setq found (search-backward-regexp
909 "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
0386b551 910 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
911 (setq arg (1- arg)))
912 ;; The same for a -ve arg.
913 (if (not (eq (point) (point-max))) (forward-char 1))
914 (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
915 (while (and (setq found (search-forward-regexp
916 "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
0386b551 917 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
d9e94c22
MS
918 (setq arg (1+ arg)))
919 (if found (goto-char (match-beginning 0))))
920 (eq arg 0)))))
921
922(defun c-awk-forward-awk-pattern ()
923 ;; Point is at the start of an AWK pattern (which may be null) or function
924 ;; declaration. Move to the pattern's end, and past any trailing space or
925 ;; comment. Typically, we stop at the { which denotes the corresponding AWK
926 ;; action/function body. Otherwise we stop at the EOL (or ;) marking the
927 ;; absence of an explicit action.
0386b551
AM
928 ;;
929 ;; This function might do hidden buffer changes.
d9e94c22
MS
930 (while
931 (progn
932 (search-forward-regexp c-awk-harmless-pattern-characters*)
933 (if (looking-at "#") (end-of-line))
934 (cond
935 ((eobp) nil)
936 ((looking-at "[{;]") nil) ; We've finished!
937 ((eolp)
938 (if (c-awk-cur-line-incomplete-p)
939 (forward-line) ; returns non-nil
940 nil))
941 ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
942 ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
943 ((looking-at "/") (forward-char) t))))) ; division sign.
944
945(defun c-awk-end-of-defun1 ()
946 ;; point is at the start of a "defun". Move to its end. Return end position.
0386b551
AM
947 ;;
948 ;; This function might do hidden buffer changes.
d9e94c22
MS
949 (c-awk-forward-awk-pattern)
950 (cond
951 ((looking-at "{") (goto-char (scan-sexps (point) 1)))
952 ((looking-at ";") (forward-char))
953 ((eolp))
954 (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
955 (point))
956
957(defun c-awk-beginning-of-defun-p ()
958 ;; Are we already at the beginning of a defun? (i.e. at code in column 0
959 ;; which isn't a }, and isn't a continuation line of any sort.
0386b551
AM
960 ;;
961 ;; This function might do hidden buffer changes.
d9e94c22
MS
962 (and (looking-at "^[^#} \t\n\r]")
963 (not (c-awk-prev-line-incomplete-p))))
964
965(defun c-awk-end-of-defun (&optional arg)
966 "Move forward to next end of defun. With argument, do it that many times.
967Negative argument -N means move back to Nth preceding end of defun.
968
969An end of a defun occurs right after the closing brace that matches the
970opening brace at its start, or immediately after the AWK pattern when there is
0386b551
AM
971no explicit action; see function `c-awk-beginning-of-defun'.
972
973Note that this function might do hidden buffer changes. See the
974comment at the start of cc-engine.el for more info."
d9e94c22
MS
975 (interactive "p")
976 (or arg (setq arg 1))
977 (save-match-data
978 (c-save-buffer-state
979 nil
980 (let ((start-point (point)) end-point)
981 ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
982 ;; move backwards to one.
983 ;; Repeat [(i) move forward to end-of-current-defun (see below);
984 ;; (ii) If this isn't it, move forward to beginning-of-defun].
985 ;; We start counting ARG only when step (i) has passed the original point.
986 (when (> arg 0)
987 ;; Try to move back to a beginning-of-defun, if not already at one.
988 (if (not (c-awk-beginning-of-defun-p))
989 (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
990 (goto-char start-point)
991 (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
992 ;; Now count forward, one defun at a time
993 (while (and (not (eobp))
994 (c-awk-end-of-defun1)
995 (if (> (point) start-point) (setq arg (1- arg)) t)
996 (> arg 0)
997 (c-awk-beginning-of-defun -1))))
998
999 (when (< arg 0)
1000 (setq end-point start-point)
1001 (while (and (not (bobp))
1002 (c-awk-beginning-of-defun 1)
1003 (if (< (setq end-point (if (bobp) (point)
1004 (save-excursion (c-awk-end-of-defun1))))
1005 start-point)
1006 (setq arg (1+ arg)) t)
1007 (< arg 0)))
1008 (goto-char (min start-point end-point)))))))
1009
0386b551 1010\f
d9e94c22 1011(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
ab5796a9
MB
1012
1013;;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3
d9e94c22 1014;;; awk-mode.el ends here