;;; cc-awk.el --- AWK specific code within cc-mode.
-;; Copyright (C) 1988, 1994, 1996, 2000, 2001, 2002, 2003, 2004, 2005,
-;; 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+;; Copyright (C) 1988, 1994, 1996, 2000-2013 Free Software Foundation,
+;; Inc.
;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
;; Maintainer: FSF
;; / can delimit regexes or be a division operator. By default we assume
;; that it is a division sign, and fix the regexp operator cases with
;; `font-lock-syntactic-keywords'.
- (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
+ (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
(modify-syntax-entry ?* "." st)
(modify-syntax-entry ?+ "." st)
(modify-syntax-entry ?- "." st)
;; escaped EOL.
;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
-(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
-;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
-;; localisation string in gawk 3.1
(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
;; Matches an underline NOT followed by ".
+(defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]")
+;; Matches any character not significant in the state machine applying
+;; syntax-table properties to "s and /s.
(defconst c-awk-harmless-string*-re
(concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
-;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
-;; #, or newlines.
+;; Matches a (possibly empty) sequence of characters insignificant in the
+;; state machine applying syntax-table properties to "s and /s.
(defconst c-awk-harmless-string*-here-re
(concat "\\=" c-awk-harmless-string*-re))
-;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
-;; at point.
+;; Matches the (possibly empty) sequence of "insignificant" chars at point.
+
+(defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]")
+;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
+;; localization string in gawk 3.1
+(defconst c-awk-harmless-line-string*-re
+ (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
+;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
+;; #, or newlines.
(defconst c-awk-harmless-line-re
- (concat c-awk-harmless-string*-re
- "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
+ (concat c-awk-harmless-line-string*-re
+ "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
;; " or /. "logical" means "possibly containing escaped newlines". A comment
;; is matched as part of the line even if it contains a " or a /. The End of
(defconst c-awk-string-without-end-here-re
(concat "\\=_?\"" c-awk-string-innards-re))
;; Matches an AWK string at point up to, but not including, any terminator.
-;; A gawk 3.1+ string may look like _"localisable string".
+;; A gawk 3.1+ string may look like _"localizable string".
(defconst c-awk-one-line-possibly-open-string-re
(concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*"
"\\(\"\\|\\\\?$\\|\\'\\)"))
;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
(defconst c-awk-regexp-without-end-re
(concat "/" c-awk-regexp-innards-re))
-;; Matches an AWK regexp up to, but not including, any terminating /.
+;; Matches an AWK regexp up to, but not including, any terminating /.
(defconst c-awk-one-line-possibly-open-regexp-re
(concat "/\\(" c-awk-non-eol-esc-pair-re
"\\|" c-awk-regexp-one-line-possibly-open-char-list-re
;; division sign.
(defconst c-awk-neutral-re
; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
- "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
+ "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)")
;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
-;; This is space/tab, braces, an auto-increment/decrement operator or an
+;; This is space/tab, close brace, an auto-increment/decrement operator or an
;; escaped character. Or one of the (invalid) characters @ or `. But NOT an
-;; end of line (even if escaped).
+;; end of line (unless escaped).
(defconst c-awk-neutrals*-re
(concat "\\(" c-awk-neutral-re "\\)*"))
;; A (possibly empty) string of neutral characters (or character pairs).
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.
(defconst c-awk-non-arith-op-bra-re
- "[[\(&=:!><,?;'~|]")
-;; Matches an openeing BRAcket ,round or square, or any operator character
+ "[[\({&=:!><,?;'~|]")
+;; Matches an opening BRAcket (of any sort), or any operator character
;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
;; and "--".
;; bracket, in a context where an immediate / would be a division sign. This
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.
+(defconst c-awk-pre-exp-alphanum-kwd-re
+ (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<"
+ (regexp-opt '("print" "return" "case") t)
+ "\\>\\([^_\n\r]\\|$\\)"))
+;; Matches all AWK keywords which can precede expressions (including
+;; /regexp/).
+(defconst c-awk-kwd-regexp-sign-re
+ (concat c-awk-pre-exp-alphanum-kwd-re c-awk-escaped-nls*-with-space* "/"))
+;; Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword
+;; which can precede an expression.
;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
\f
;; ACM, 2002/5/29:
-;;
+;;
;; The next section of code is about determining whether or not an AWK
;; statement is complete or not. We use this to indent the following line.
;; The determination is pretty straightforward in C, where a statement ends
;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
;; that the previous line contains an unterminated string (without \). In
;; this case, assume that the previous line's c-awk-NL-prop is a $.
- ;;
+ ;;
;; POINT MUST BE AT THE START OF A LINE when calling this function. This
;; is to ensure that the various backward-comment functions will work
;; properly.
;; Calculate and set the value of the c-awk-NL-prop on the immediately
;; preceding EOL. This may also involve doing the same for several
;; preceding EOLs.
- ;;
+ ;;
;; NOTE that if the property was already set, we return it without
;; recalculation. (This is by accident rather than design.)
- ;;
+ ;;
;; Return the property which got set (or was already set) on the previous
;; line. Return nil if we hit BOB.
- ;;
+ ;;
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
;;
;; This function might do hidden buffer changes.
;; if necessary. (As a special case, the property doesn't get set on an
;; empty line at EOB (there's no position to set the property on), but the
;; function returns the property value an EOL would have got.)
- ;;
+ ;;
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
;;
;; This function might do hidden buffer changes.
(defun c-awk-vsemi-status-unknown-p ()
;; Are we unsure whether there is a virtual semicolon on the current line?
;; DO NOT under any circumstances attempt to calculate this; that would
- ;; defeat the (admittedly kludgey) purpose of this function, which is to
+ ;; defeat the (admittedly kludgy) purpose of this function, which is to
;; prevent an infinite recursion in c-beginning-of-statement-1 when point
;; starts at a `while' token.
(not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
;; this, a new newline inserted after an old newline (e.g. by C-j) would
;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
;; Thing. This function's action is required by c-put-char-property.
- (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
+ (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs
(not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
(setq text-property-default-nonsticky
(cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
;; Go back to the start of the (apparent) current line (or the start of the
;; line containing POS), returning the buffer position of that point. I.e.,
;; go back to the last line which doesn't have an escaped EOL before it.
-;;
+;;
;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
;; comment, string or regexp. IT MAY WELL BE that this function should not be
;; executed on a narrowed buffer.
;; This function does hidden buffer changes.
(let ((/point (point)))
(goto-char anchor)
- ;; Analyse the line to find out what the / is.
+ ;; Analyze the line to find out what the / is.
(if (if anchor-state-/div
- (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
- (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
- ;; A division sign.
+ (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
+ (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t))
+ (search-forward-regexp c-awk-div-sign-re (1+ /point) t)))
+ ;; A division sign.
(progn (goto-char (1+ /point)) nil)
;; A regexp opener
;; Jump over the regexp innards, setting the match data.
(< (point) lim))
(setq anchor (point))
(search-forward-regexp c-awk-harmless-string*-here-re nil t)
- ;; We are now looking at either a " or a /.
- ;; Do our thing on the string, regexp or divsion sign.
+ ;; We are now looking at either a " or a / or a brace/paren/semicolon.
+ ;; Do our thing on the string, regexp or division sign or update
+ ;; our state.
(setq anchor-state-/div
- (if (looking-at "_?\"")
- (c-awk-syntax-tablify-string)
- (c-awk-syntax-tablify-/ anchor anchor-state-/div))))
+ (cond
+ ((looking-at "_?\"")
+ (c-awk-syntax-tablify-string))
+ ((eq (char-after) ?/)
+ (c-awk-syntax-tablify-/ anchor anchor-state-/div))
+ ((memq (char-after) '(?{ ?} ?\( ?\;))
+ (forward-char)
+ nil)
+ (t ; ?\)
+ (forward-char)
+ t))))
nil))
;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
;; Don't overlook the possibility of the buffer change being the "recapturing"
;; of a previously escaped newline.
-;; ACM 2008-02-05:
+;; ACM 2008-02-05:
(defun c-awk-extend-and-syntax-tablify-region (beg end old-len)
;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put
;; `syntax-table' properties on this region.
;;
;; This function is called from an after-change function, BEG END and
;; OLD-LEN being the standard parameters.
- ;;
+ ;;
;; Point is undefined both before and after this function call, the buffer
;; has been widened, and match-data saved. The return value is ignored.
;;
;; Awk regexps written with help from Peter Galbraith
;; <galbraith@mixing.qc.dfo.ca>.
-;; Take GNU Emacs's 'words out of the following regexp-opts. They dont work
-;; in Xemacs 21.4.4. acm 2002/9/19.
+;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work
+;; in XEmacs 21.4.4. acm 2002/9/19.
(defconst awk-font-lock-keywords
(eval-when-compile
(list
;; Keywords.
(concat "\\<"
(regexp-opt
- '("BEGIN" "END" "break" "continue" "delete" "do" "else"
- "exit" "for" "getline" "if" "in" "next" "nextfile"
- "return" "while")
+ '("BEGIN" "END" "break" "case" "continue" "default" "delete"
+ "do" "else" "exit" "for" "getline" "if" "in" "next"
+ "nextfile" "return" "switch" "while")
t) "\\>")
;; Builtins.
\f
(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
-;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3
;;; awk-mode.el ends here