(imenu-default-create-index-function): Detect infinite
[bpt/emacs.git] / lisp / cedet / semantic / wisent / python.el
CommitLineData
52bee098
CY
1;;; wisent-python.el --- Semantic support for Python
2
3;; Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009, 2010
4;; Free Software Foundation, Inc.
5
6;; Author: Richard Kim <emacs18@gmail.com>
7;; Maintainer: Richard Kim <emacs18@gmail.com>
8;; Created: June 2002
9;; Keywords: syntax
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software: you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25
26;;; Commentary:
27;;
28;; Parser support for Python.
29
30;;; Code:
31
32(require 'semantic/wisent)
33(require 'semantic/wisent/python-wy)
34(require 'semantic/dep)
35(require 'semantic/ctxt)
36
37\f
38;;; Lexical analysis
39;;
40
41;; Python strings are delimited by either single quotes or double
42;; quotes, e.g., "I'm a string" and 'I too am s string'.
43;; In addition a string can have either a 'r' and/or 'u' prefix.
44;; The 'r' prefix means raw, i.e., normal backslash substitutions are
45;; to be suppressed. For example, r"01\n34" is a string with six
46;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
47;; string is a unicode.
48(defconst wisent-python-string-re
49 (concat (regexp-opt '("r" "u" "ur" "R" "U" "UR" "Ur" "uR") t)
50 "?['\"]")
51 "Regexp matching beginning of a python string.")
52
53(defvar wisent-python-EXPANDING-block nil
54 "Non-nil when expanding a paren block for Python lexical analyzer.")
55
56(defun wisent-python-implicit-line-joining-p ()
57 "Return non-nil if implicit line joining is active.
58That is, if inside an expressions in parentheses, square brackets or
59curly braces."
60 wisent-python-EXPANDING-block)
61
62(defsubst wisent-python-forward-string ()
63 "Move point at the end of the python string at point."
64 (when (looking-at wisent-python-string-re)
65 ;; skip the prefix
66 (and (match-end 1) (goto-char (match-end 1)))
67 ;; skip the quoted part
68 (cond
69 ((looking-at "\"\"\"[^\"]")
70 (search-forward "\"\"\"" nil nil 2))
71 ((looking-at "'''[^']")
72 (search-forward "'''" nil nil 2))
73 ((forward-sexp 1)))))
74
75(defun wisent-python-forward-line ()
76 "Move point to the beginning of the next logical line.
77Usually this is simply the next physical line unless strings,
78implicit/explicit line continuation, blank lines, or comment lines are
79encountered. This function skips over such items so that the point is
80at the beginning of the next logical line. If the current logical
81line ends at the end of the buffer, leave the point there."
82 (while (not (eolp))
83 (when (= (point)
84 (progn
85 (cond
86 ;; Skip over python strings.
87 ((looking-at wisent-python-string-re)
88 (wisent-python-forward-string))
89 ;; At a comment start just goto end of line.
90 ((looking-at "\\s<")
91 (end-of-line))
92 ;; Skip over generic lists and strings.
93 ((looking-at "\\(\\s(\\|\\s\"\\)")
94 (forward-sexp 1))
95 ;; At the explicit line continuation character
96 ;; (backslash) move to next line.
97 ((looking-at "\\s\\")
98 (forward-line 1))
99 ;; Skip over white space, word, symbol, punctuation,
100 ;; and paired delimiter (backquote) characters.
101 ((skip-syntax-forward "-w_.$)")))
102 (point)))
103 (error "python-forward-line endless loop detected")))
104 ;; The point is at eol, skip blank and comment lines.
105 (forward-comment (point-max))
106 ;; Goto the beginning of the next line.
107 (or (eobp) (beginning-of-line)))
108
109(defun wisent-python-forward-line-skip-indented ()
110 "Move point to the next logical line, skipping indented lines.
111That is the next line whose indentation is less than or equal to the
112identation of the current line."
113 (let ((indent (current-indentation)))
114 (while (progn (wisent-python-forward-line)
115 (and (not (eobp))
116 (> (current-indentation) indent))))))
117
118(defun wisent-python-end-of-block ()
119 "Move point to the end of the current block."
120 (let ((indent (current-indentation)))
121 (while (and (not (eobp)) (>= (current-indentation) indent))
122 (wisent-python-forward-line-skip-indented))
123 ;; Don't include final comments in current block bounds
124 (forward-comment (- (point-max)))
125 (or (bolp) (forward-line 1))
126 ))
127
128;; Indentation stack, what the Python (2.3) language spec. says:
129;;
130;; The indentation levels of consecutive lines are used to generate
131;; INDENT and DEDENT tokens, using a stack, as follows.
132;;
133;; Before the first line of the file is read, a single zero is pushed
134;; on the stack; this will never be popped off again. The numbers
135;; pushed on the stack will always be strictly increasing from bottom
136;; to top. At the beginning of each logical line, the line's
137;; indentation level is compared to the top of the stack. If it is
138;; equal, nothing happens. If it is larger, it is pushed on the stack,
139;; and one INDENT token is generated. If it is smaller, it must be one
140;; of the numbers occurring on the stack; all numbers on the stack
141;; that are larger are popped off, and for each number popped off a
142;; DEDENT token is generated. At the end of the file, a DEDENT token
143;; is generated for each number remaining on the stack that is larger
144;; than zero.
145(defvar wisent-python-indent-stack)
146
147(define-lex-analyzer wisent-python-lex-beginning-of-line
148 "Detect and create python indentation tokens at beginning of line."
149 (and
150 (bolp) (not (wisent-python-implicit-line-joining-p))
151 (let ((last-indent (car wisent-python-indent-stack))
152 (last-pos (point))
153 (curr-indent (current-indentation)))
154 (skip-syntax-forward "-")
155 (cond
156 ;; Skip comments and blank lines. No change in indentation.
157 ((or (eolp) (looking-at semantic-lex-comment-regex))
158 (forward-comment (point-max))
159 (or (eobp) (beginning-of-line))
160 (setq semantic-lex-end-point (point))
161 ;; Loop lexer to handle the next line.
162 t)
163 ;; No change in indentation.
164 ((= curr-indent last-indent)
165 (setq semantic-lex-end-point (point))
166 ;; Try next analyzers.
167 nil)
168 ;; Indentation increased
169 ((> curr-indent last-indent)
170 (if (or (not semantic-lex-maximum-depth)
171 (< semantic-lex-current-depth semantic-lex-maximum-depth))
172 (progn
173 ;; Return an INDENT lexical token
174 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
175 (push curr-indent wisent-python-indent-stack)
176 (semantic-lex-push-token
177 (semantic-lex-token 'INDENT last-pos (point))))
178 ;; Add an INDENT_BLOCK token
179 (semantic-lex-push-token
180 (semantic-lex-token
181 'INDENT_BLOCK
182 (progn (beginning-of-line) (point))
183 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
184 (wisent-python-end-of-block)
185 (point)))))
186 ;; Loop lexer to handle tokens in current line.
187 t)
188 ;; Indentation decreased
189 (t
190 ;; Pop items from indentation stack
191 (while (< curr-indent last-indent)
192 (pop wisent-python-indent-stack)
193 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
194 last-indent (car wisent-python-indent-stack))
195 (semantic-lex-push-token
196 (semantic-lex-token 'DEDENT last-pos (point))))
197 ;; If pos did not change, then we must return nil so that
198 ;; other lexical analyzers can be run.
199 (/= last-pos (point))))))
200 ;; All the work was done in the above analyzer matching condition.
201 )
202
203(define-lex-regex-analyzer wisent-python-lex-end-of-line
204 "Detect and create python newline tokens.
205Just skip the newline character if the following line is an implicit
206continuation of current line."
207 "\\(\n\\|\\s>\\)"
208 (if (wisent-python-implicit-line-joining-p)
209 (setq semantic-lex-end-point (match-end 0))
210 (semantic-lex-push-token
211 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
212
213(define-lex-regex-analyzer wisent-python-lex-string
214 "Detect and create python string tokens."
215 wisent-python-string-re
216 (semantic-lex-push-token
217 (semantic-lex-token
218 'STRING_LITERAL
219 (point)
220 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
221 (wisent-python-forward-string)
222 (point)))))
223
224(define-lex-regex-analyzer wisent-python-lex-ignore-backslash
225 "Detect and skip over backslash (explicit line joining) tokens.
226A backslash must be the last token of a physical line, it is illegal
227elsewhere on a line outside a string literal."
228 "\\s\\\\s-*$"
229 ;; Skip over the detected backslash and go to the first
230 ;; non-whitespace character in the next physical line.
231 (forward-line)
232 (skip-syntax-forward "-")
233 (setq semantic-lex-end-point (point)))
234
235(define-lex wisent-python-lexer
236 "Lexical Analyzer for Python code."
237 ;; Must analyze beginning of line first to handle indentation.
238 wisent-python-lex-beginning-of-line
239 wisent-python-lex-end-of-line
240 ;; Must analyze string before symbol to handle string prefix.
241 wisent-python-lex-string
242 ;; Analyzers auto-generated from grammar.
243 wisent-python-wy--<number>-regexp-analyzer
244 wisent-python-wy--<keyword>-keyword-analyzer
245 wisent-python-wy--<symbol>-regexp-analyzer
246 wisent-python-wy--<block>-block-analyzer
247 wisent-python-wy--<punctuation>-string-analyzer
248 ;; Ignored things.
249 wisent-python-lex-ignore-backslash
250 semantic-lex-ignore-whitespace
251 semantic-lex-ignore-comments
252 ;; Signal error on unhandled syntax.
253 semantic-lex-default-action)
254\f
255;;; Overridden Semantic API.
256;;
257(define-mode-local-override semantic-lex python-mode
258 (start end &optional depth length)
259 "Lexically analyze python code in current buffer.
260See the function `semantic-lex' for the meaning of the START, END,
261DEPTH and LENGTH arguments.
262This function calls `wisent-python-lexer' to actually perform the
263lexical analysis, then emits the necessary python DEDENT tokens from
264what remains in the `wisent-python-indent-stack'."
265 (let* ((wisent-python-indent-stack (list 0))
266 (stream (wisent-python-lexer start end depth length))
267 (semantic-lex-token-stream nil))
268 ;; Emit DEDENT tokens if something remains in the INDENT stack.
269 (while (> (pop wisent-python-indent-stack) 0)
270 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
271 (nconc stream (nreverse semantic-lex-token-stream))))
272
273(define-mode-local-override semantic-get-local-variables python-mode ()
274 "Get the local variables based on point's context.
275To be implemented for python! For now just return nil."
276 nil)
277
278(defcustom-mode-local-semantic-dependency-system-include-path
279 python-mode semantic-python-dependency-system-include-path
280 nil
281 "The system include path used by Python langauge.")
282
283;;; Enable Semantic in `python-mode'.
284;;
285
286;;;###autoload
287(defun wisent-python-default-setup ()
288 "Setup buffer for parse."
289 (wisent-python-wy--install-parser)
290 (set (make-local-variable 'parse-sexp-ignore-comments) t)
291 (setq
292 ;; Character used to separation a parent/child relationship
293 semantic-type-relation-separator-character '(".")
294 semantic-command-separation-character ";"
295 ;; The following is no more necessary as semantic-lex is overriden
296 ;; in python-mode.
297 ;; semantic-lex-analyzer 'wisent-python-lexer
298
299 ;; Semantic to take over from the one provided by python.
300 ;; The python one, if it uses the senator advice, will hang
301 ;; Emacs unrecoverably.
302 imenu-create-index-function 'semantic-create-imenu-index
303
304 ;; I need a python guru to update this list:
305 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
306 (function . "Methods"))
307 semantic-symbol->name-assoc-list '((type . "Classes")
308 (variable . "Variables")
309 (function . "Functions")
310 (include . "Imports")
311 (package . "Package")
312 (code . "Code")))
313 )
314
315;;;###autoload
316(add-hook 'python-mode-hook 'wisent-python-default-setup)
317
318;; Make sure the newer python modes pull in the same python
319;; mode overrides.
320(define-child-mode python-2-mode python-mode "Python 2 mode")
321(define-child-mode python-3-mode python-mode "Python 3 mode")
322
323\f
324;;; Test
325;;
326(defun wisent-python-lex-buffer ()
327 "Run `wisent-python-lexer' on current buffer."
328 (interactive)
329 (semantic-lex-init)
330 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
331 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
332 (erase-buffer)
333 (pp token-stream (current-buffer))
334 (goto-char (point-min))
335 (pop-to-buffer (current-buffer)))))
336
337(provide 'semantic/wisent/python)
338
339;; Local variables:
340;; generated-autoload-file: "../loaddefs.el"
341;; generated-autoload-load-name: "semantic/wisent/python"
342;; End:
343
344;;; semantic/wisent/python.el ends here