Commit | Line | Data |
---|---|---|
52bee098 CY |
1 | ;;; wisent-python.el --- Semantic support for Python |
2 | ||
95df8112 | 3 | ;; Copyright (C) 2002, 2004, 2006-2011 Free Software Foundation, Inc. |
52bee098 CY |
4 | |
5 | ;; Author: Richard Kim <emacs18@gmail.com> | |
6 | ;; Maintainer: Richard Kim <emacs18@gmail.com> | |
7 | ;; Created: June 2002 | |
8 | ;; Keywords: syntax | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation, either version 3 of the License, or | |
15 | ;; (at your option) any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
24 | ||
25 | ;;; Commentary: | |
26 | ;; | |
27 | ;; Parser support for Python. | |
28 | ||
29 | ;;; Code: | |
30 | ||
31 | (require 'semantic/wisent) | |
32 | (require 'semantic/wisent/python-wy) | |
33 | (require 'semantic/dep) | |
34 | (require 'semantic/ctxt) | |
35 | ||
36 | \f | |
37 | ;;; Lexical analysis | |
38 | ;; | |
39 | ||
40 | ;; Python strings are delimited by either single quotes or double | |
41 | ;; quotes, e.g., "I'm a string" and 'I too am s string'. | |
42 | ;; In addition a string can have either a 'r' and/or 'u' prefix. | |
43 | ;; The 'r' prefix means raw, i.e., normal backslash substitutions are | |
44 | ;; to be suppressed. For example, r"01\n34" is a string with six | |
45 | ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following | |
46 | ;; string is a unicode. | |
47 | (defconst wisent-python-string-re | |
48 | (concat (regexp-opt '("r" "u" "ur" "R" "U" "UR" "Ur" "uR") t) | |
49 | "?['\"]") | |
91abaf51 | 50 | "Regexp matching beginning of a Python string.") |
52bee098 CY |
51 | |
52 | (defvar wisent-python-EXPANDING-block nil | |
53 | "Non-nil when expanding a paren block for Python lexical analyzer.") | |
54 | ||
55 | (defun wisent-python-implicit-line-joining-p () | |
56 | "Return non-nil if implicit line joining is active. | |
91abaf51 | 57 | That is, if inside an expression in parentheses, square brackets or |
52bee098 CY |
58 | curly braces." |
59 | wisent-python-EXPANDING-block) | |
60 | ||
61 | (defsubst wisent-python-forward-string () | |
91abaf51 | 62 | "Move point at the end of the Python string at point." |
52bee098 CY |
63 | (when (looking-at wisent-python-string-re) |
64 | ;; skip the prefix | |
65 | (and (match-end 1) (goto-char (match-end 1))) | |
66 | ;; skip the quoted part | |
67 | (cond | |
68 | ((looking-at "\"\"\"[^\"]") | |
69 | (search-forward "\"\"\"" nil nil 2)) | |
70 | ((looking-at "'''[^']") | |
71 | (search-forward "'''" nil nil 2)) | |
72 | ((forward-sexp 1))))) | |
73 | ||
74 | (defun wisent-python-forward-line () | |
75 | "Move point to the beginning of the next logical line. | |
76 | Usually this is simply the next physical line unless strings, | |
77 | implicit/explicit line continuation, blank lines, or comment lines are | |
78 | encountered. This function skips over such items so that the point is | |
79 | at the beginning of the next logical line. If the current logical | |
80 | line ends at the end of the buffer, leave the point there." | |
81 | (while (not (eolp)) | |
82 | (when (= (point) | |
83 | (progn | |
84 | (cond | |
85 | ;; Skip over python strings. | |
86 | ((looking-at wisent-python-string-re) | |
87 | (wisent-python-forward-string)) | |
88 | ;; At a comment start just goto end of line. | |
89 | ((looking-at "\\s<") | |
90 | (end-of-line)) | |
91 | ;; Skip over generic lists and strings. | |
92 | ((looking-at "\\(\\s(\\|\\s\"\\)") | |
93 | (forward-sexp 1)) | |
94 | ;; At the explicit line continuation character | |
95 | ;; (backslash) move to next line. | |
96 | ((looking-at "\\s\\") | |
97 | (forward-line 1)) | |
98 | ;; Skip over white space, word, symbol, punctuation, | |
99 | ;; and paired delimiter (backquote) characters. | |
100 | ((skip-syntax-forward "-w_.$)"))) | |
101 | (point))) | |
102 | (error "python-forward-line endless loop detected"))) | |
103 | ;; The point is at eol, skip blank and comment lines. | |
104 | (forward-comment (point-max)) | |
105 | ;; Goto the beginning of the next line. | |
106 | (or (eobp) (beginning-of-line))) | |
107 | ||
108 | (defun wisent-python-forward-line-skip-indented () | |
109 | "Move point to the next logical line, skipping indented lines. | |
110 | That is the next line whose indentation is less than or equal to the | |
111 | identation of the current line." | |
112 | (let ((indent (current-indentation))) | |
113 | (while (progn (wisent-python-forward-line) | |
114 | (and (not (eobp)) | |
115 | (> (current-indentation) indent)))))) | |
116 | ||
117 | (defun wisent-python-end-of-block () | |
118 | "Move point to the end of the current block." | |
119 | (let ((indent (current-indentation))) | |
120 | (while (and (not (eobp)) (>= (current-indentation) indent)) | |
121 | (wisent-python-forward-line-skip-indented)) | |
122 | ;; Don't include final comments in current block bounds | |
123 | (forward-comment (- (point-max))) | |
124 | (or (bolp) (forward-line 1)) | |
125 | )) | |
126 | ||
127 | ;; Indentation stack, what the Python (2.3) language spec. says: | |
128 | ;; | |
129 | ;; The indentation levels of consecutive lines are used to generate | |
130 | ;; INDENT and DEDENT tokens, using a stack, as follows. | |
131 | ;; | |
132 | ;; Before the first line of the file is read, a single zero is pushed | |
133 | ;; on the stack; this will never be popped off again. The numbers | |
134 | ;; pushed on the stack will always be strictly increasing from bottom | |
135 | ;; to top. At the beginning of each logical line, the line's | |
136 | ;; indentation level is compared to the top of the stack. If it is | |
137 | ;; equal, nothing happens. If it is larger, it is pushed on the stack, | |
138 | ;; and one INDENT token is generated. If it is smaller, it must be one | |
139 | ;; of the numbers occurring on the stack; all numbers on the stack | |
140 | ;; that are larger are popped off, and for each number popped off a | |
141 | ;; DEDENT token is generated. At the end of the file, a DEDENT token | |
142 | ;; is generated for each number remaining on the stack that is larger | |
143 | ;; than zero. | |
144 | (defvar wisent-python-indent-stack) | |
145 | ||
146 | (define-lex-analyzer wisent-python-lex-beginning-of-line | |
91abaf51 | 147 | "Detect and create Python indentation tokens at beginning of line." |
52bee098 CY |
148 | (and |
149 | (bolp) (not (wisent-python-implicit-line-joining-p)) | |
150 | (let ((last-indent (car wisent-python-indent-stack)) | |
151 | (last-pos (point)) | |
152 | (curr-indent (current-indentation))) | |
153 | (skip-syntax-forward "-") | |
154 | (cond | |
155 | ;; Skip comments and blank lines. No change in indentation. | |
156 | ((or (eolp) (looking-at semantic-lex-comment-regex)) | |
157 | (forward-comment (point-max)) | |
158 | (or (eobp) (beginning-of-line)) | |
159 | (setq semantic-lex-end-point (point)) | |
160 | ;; Loop lexer to handle the next line. | |
161 | t) | |
162 | ;; No change in indentation. | |
163 | ((= curr-indent last-indent) | |
164 | (setq semantic-lex-end-point (point)) | |
165 | ;; Try next analyzers. | |
166 | nil) | |
167 | ;; Indentation increased | |
168 | ((> curr-indent last-indent) | |
169 | (if (or (not semantic-lex-maximum-depth) | |
170 | (< semantic-lex-current-depth semantic-lex-maximum-depth)) | |
171 | (progn | |
172 | ;; Return an INDENT lexical token | |
173 | (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)) | |
174 | (push curr-indent wisent-python-indent-stack) | |
175 | (semantic-lex-push-token | |
176 | (semantic-lex-token 'INDENT last-pos (point)))) | |
177 | ;; Add an INDENT_BLOCK token | |
178 | (semantic-lex-push-token | |
179 | (semantic-lex-token | |
180 | 'INDENT_BLOCK | |
181 | (progn (beginning-of-line) (point)) | |
182 | (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK | |
183 | (wisent-python-end-of-block) | |
184 | (point))))) | |
185 | ;; Loop lexer to handle tokens in current line. | |
186 | t) | |
187 | ;; Indentation decreased | |
188 | (t | |
189 | ;; Pop items from indentation stack | |
190 | (while (< curr-indent last-indent) | |
191 | (pop wisent-python-indent-stack) | |
192 | (setq semantic-lex-current-depth (1- semantic-lex-current-depth) | |
193 | last-indent (car wisent-python-indent-stack)) | |
194 | (semantic-lex-push-token | |
195 | (semantic-lex-token 'DEDENT last-pos (point)))) | |
196 | ;; If pos did not change, then we must return nil so that | |
197 | ;; other lexical analyzers can be run. | |
198 | (/= last-pos (point)))))) | |
199 | ;; All the work was done in the above analyzer matching condition. | |
200 | ) | |
201 | ||
202 | (define-lex-regex-analyzer wisent-python-lex-end-of-line | |
91abaf51 | 203 | "Detect and create Python newline tokens. |
52bee098 CY |
204 | Just skip the newline character if the following line is an implicit |
205 | continuation of current line." | |
206 | "\\(\n\\|\\s>\\)" | |
207 | (if (wisent-python-implicit-line-joining-p) | |
208 | (setq semantic-lex-end-point (match-end 0)) | |
209 | (semantic-lex-push-token | |
210 | (semantic-lex-token 'NEWLINE (point) (match-end 0))))) | |
211 | ||
212 | (define-lex-regex-analyzer wisent-python-lex-string | |
213 | "Detect and create python string tokens." | |
214 | wisent-python-string-re | |
215 | (semantic-lex-push-token | |
216 | (semantic-lex-token | |
217 | 'STRING_LITERAL | |
218 | (point) | |
219 | (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL | |
220 | (wisent-python-forward-string) | |
221 | (point))))) | |
222 | ||
223 | (define-lex-regex-analyzer wisent-python-lex-ignore-backslash | |
224 | "Detect and skip over backslash (explicit line joining) tokens. | |
225 | A backslash must be the last token of a physical line, it is illegal | |
226 | elsewhere on a line outside a string literal." | |
227 | "\\s\\\\s-*$" | |
228 | ;; Skip over the detected backslash and go to the first | |
229 | ;; non-whitespace character in the next physical line. | |
230 | (forward-line) | |
231 | (skip-syntax-forward "-") | |
232 | (setq semantic-lex-end-point (point))) | |
233 | ||
234 | (define-lex wisent-python-lexer | |
235 | "Lexical Analyzer for Python code." | |
236 | ;; Must analyze beginning of line first to handle indentation. | |
237 | wisent-python-lex-beginning-of-line | |
238 | wisent-python-lex-end-of-line | |
239 | ;; Must analyze string before symbol to handle string prefix. | |
240 | wisent-python-lex-string | |
241 | ;; Analyzers auto-generated from grammar. | |
242 | wisent-python-wy--<number>-regexp-analyzer | |
243 | wisent-python-wy--<keyword>-keyword-analyzer | |
244 | wisent-python-wy--<symbol>-regexp-analyzer | |
245 | wisent-python-wy--<block>-block-analyzer | |
246 | wisent-python-wy--<punctuation>-string-analyzer | |
247 | ;; Ignored things. | |
248 | wisent-python-lex-ignore-backslash | |
249 | semantic-lex-ignore-whitespace | |
250 | semantic-lex-ignore-comments | |
251 | ;; Signal error on unhandled syntax. | |
252 | semantic-lex-default-action) | |
253 | \f | |
254 | ;;; Overridden Semantic API. | |
255 | ;; | |
256 | (define-mode-local-override semantic-lex python-mode | |
257 | (start end &optional depth length) | |
91abaf51 | 258 | "Lexically analyze Python code in current buffer. |
52bee098 CY |
259 | See the function `semantic-lex' for the meaning of the START, END, |
260 | DEPTH and LENGTH arguments. | |
261 | This function calls `wisent-python-lexer' to actually perform the | |
91abaf51 | 262 | lexical analysis, then emits the necessary Python DEDENT tokens from |
52bee098 CY |
263 | what remains in the `wisent-python-indent-stack'." |
264 | (let* ((wisent-python-indent-stack (list 0)) | |
265 | (stream (wisent-python-lexer start end depth length)) | |
266 | (semantic-lex-token-stream nil)) | |
267 | ;; Emit DEDENT tokens if something remains in the INDENT stack. | |
268 | (while (> (pop wisent-python-indent-stack) 0) | |
269 | (semantic-lex-push-token (semantic-lex-token 'DEDENT end end))) | |
270 | (nconc stream (nreverse semantic-lex-token-stream)))) | |
271 | ||
272 | (define-mode-local-override semantic-get-local-variables python-mode () | |
273 | "Get the local variables based on point's context. | |
91abaf51 | 274 | To be implemented for Python! For now just return nil." |
52bee098 CY |
275 | nil) |
276 | ||
277 | (defcustom-mode-local-semantic-dependency-system-include-path | |
278 | python-mode semantic-python-dependency-system-include-path | |
279 | nil | |
91abaf51 | 280 | "The system include path used by Python language.") |
52bee098 CY |
281 | |
282 | ;;; Enable Semantic in `python-mode'. | |
283 | ;; | |
284 | ||
285 | ;;;###autoload | |
286 | (defun wisent-python-default-setup () | |
287 | "Setup buffer for parse." | |
288 | (wisent-python-wy--install-parser) | |
289 | (set (make-local-variable 'parse-sexp-ignore-comments) t) | |
290 | (setq | |
291 | ;; Character used to separation a parent/child relationship | |
292 | semantic-type-relation-separator-character '(".") | |
293 | semantic-command-separation-character ";" | |
294 | ;; The following is no more necessary as semantic-lex is overriden | |
295 | ;; in python-mode. | |
296 | ;; semantic-lex-analyzer 'wisent-python-lexer | |
297 | ||
298 | ;; Semantic to take over from the one provided by python. | |
299 | ;; The python one, if it uses the senator advice, will hang | |
300 | ;; Emacs unrecoverably. | |
301 | imenu-create-index-function 'semantic-create-imenu-index | |
302 | ||
303 | ;; I need a python guru to update this list: | |
304 | semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables") | |
305 | (function . "Methods")) | |
306 | semantic-symbol->name-assoc-list '((type . "Classes") | |
307 | (variable . "Variables") | |
308 | (function . "Functions") | |
309 | (include . "Imports") | |
310 | (package . "Package") | |
311 | (code . "Code"))) | |
312 | ) | |
313 | ||
314 | ;;;###autoload | |
315 | (add-hook 'python-mode-hook 'wisent-python-default-setup) | |
316 | ||
317 | ;; Make sure the newer python modes pull in the same python | |
318 | ;; mode overrides. | |
319 | (define-child-mode python-2-mode python-mode "Python 2 mode") | |
320 | (define-child-mode python-3-mode python-mode "Python 3 mode") | |
321 | ||
322 | \f | |
323 | ;;; Test | |
324 | ;; | |
325 | (defun wisent-python-lex-buffer () | |
326 | "Run `wisent-python-lexer' on current buffer." | |
327 | (interactive) | |
328 | (semantic-lex-init) | |
329 | (let ((token-stream (semantic-lex (point-min) (point-max) 0))) | |
330 | (with-current-buffer (get-buffer-create "*wisent-python-lexer*") | |
331 | (erase-buffer) | |
332 | (pp token-stream (current-buffer)) | |
333 | (goto-char (point-min)) | |
334 | (pop-to-buffer (current-buffer))))) | |
335 | ||
336 | (provide 'semantic/wisent/python) | |
337 | ||
338 | ;; Local variables: | |
339 | ;; generated-autoload-file: "../loaddefs.el" | |
340 | ;; generated-autoload-load-name: "semantic/wisent/python" | |
341 | ;; End: | |
342 | ||
343 | ;;; semantic/wisent/python.el ends here |