Commit | Line | Data |
---|---|---|
52bee098 CY |
1 | ;;; wisent-python.el --- Semantic support for Python |
2 | ||
3 | ;; Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009, 2010 | |
4 | ;; Free Software Foundation, Inc. | |
5 | ||
6 | ;; Author: Richard Kim <emacs18@gmail.com> | |
7 | ;; Maintainer: Richard Kim <emacs18@gmail.com> | |
8 | ;; Created: June 2002 | |
9 | ;; Keywords: syntax | |
10 | ||
11 | ;; This file is part of GNU Emacs. | |
12 | ||
13 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
14 | ;; it under the terms of the GNU General Public License as published by | |
15 | ;; the Free Software Foundation, either version 3 of the License, or | |
16 | ;; (at your option) any later version. | |
17 | ||
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
25 | ||
26 | ;;; Commentary: | |
27 | ;; | |
28 | ;; Parser support for Python. | |
29 | ||
30 | ;;; Code: | |
31 | ||
32 | (require 'semantic/wisent) | |
33 | (require 'semantic/wisent/python-wy) | |
34 | (require 'semantic/dep) | |
35 | (require 'semantic/ctxt) | |
36 | ||
37 | \f | |
38 | ;;; Lexical analysis | |
39 | ;; | |
40 | ||
41 | ;; Python strings are delimited by either single quotes or double | |
42 | ;; quotes, e.g., "I'm a string" and 'I too am s string'. | |
43 | ;; In addition a string can have either a 'r' and/or 'u' prefix. | |
44 | ;; The 'r' prefix means raw, i.e., normal backslash substitutions are | |
45 | ;; to be suppressed. For example, r"01\n34" is a string with six | |
46 | ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following | |
47 | ;; string is a unicode. | |
48 | (defconst wisent-python-string-re | |
49 | (concat (regexp-opt '("r" "u" "ur" "R" "U" "UR" "Ur" "uR") t) | |
50 | "?['\"]") | |
51 | "Regexp matching beginning of a python string.") | |
52 | ||
53 | (defvar wisent-python-EXPANDING-block nil | |
54 | "Non-nil when expanding a paren block for Python lexical analyzer.") | |
55 | ||
56 | (defun wisent-python-implicit-line-joining-p () | |
57 | "Return non-nil if implicit line joining is active. | |
58 | That is, if inside an expressions in parentheses, square brackets or | |
59 | curly braces." | |
60 | wisent-python-EXPANDING-block) | |
61 | ||
62 | (defsubst wisent-python-forward-string () | |
63 | "Move point at the end of the python string at point." | |
64 | (when (looking-at wisent-python-string-re) | |
65 | ;; skip the prefix | |
66 | (and (match-end 1) (goto-char (match-end 1))) | |
67 | ;; skip the quoted part | |
68 | (cond | |
69 | ((looking-at "\"\"\"[^\"]") | |
70 | (search-forward "\"\"\"" nil nil 2)) | |
71 | ((looking-at "'''[^']") | |
72 | (search-forward "'''" nil nil 2)) | |
73 | ((forward-sexp 1))))) | |
74 | ||
75 | (defun wisent-python-forward-line () | |
76 | "Move point to the beginning of the next logical line. | |
77 | Usually this is simply the next physical line unless strings, | |
78 | implicit/explicit line continuation, blank lines, or comment lines are | |
79 | encountered. This function skips over such items so that the point is | |
80 | at the beginning of the next logical line. If the current logical | |
81 | line ends at the end of the buffer, leave the point there." | |
82 | (while (not (eolp)) | |
83 | (when (= (point) | |
84 | (progn | |
85 | (cond | |
86 | ;; Skip over python strings. | |
87 | ((looking-at wisent-python-string-re) | |
88 | (wisent-python-forward-string)) | |
89 | ;; At a comment start just goto end of line. | |
90 | ((looking-at "\\s<") | |
91 | (end-of-line)) | |
92 | ;; Skip over generic lists and strings. | |
93 | ((looking-at "\\(\\s(\\|\\s\"\\)") | |
94 | (forward-sexp 1)) | |
95 | ;; At the explicit line continuation character | |
96 | ;; (backslash) move to next line. | |
97 | ((looking-at "\\s\\") | |
98 | (forward-line 1)) | |
99 | ;; Skip over white space, word, symbol, punctuation, | |
100 | ;; and paired delimiter (backquote) characters. | |
101 | ((skip-syntax-forward "-w_.$)"))) | |
102 | (point))) | |
103 | (error "python-forward-line endless loop detected"))) | |
104 | ;; The point is at eol, skip blank and comment lines. | |
105 | (forward-comment (point-max)) | |
106 | ;; Goto the beginning of the next line. | |
107 | (or (eobp) (beginning-of-line))) | |
108 | ||
109 | (defun wisent-python-forward-line-skip-indented () | |
110 | "Move point to the next logical line, skipping indented lines. | |
111 | That is the next line whose indentation is less than or equal to the | |
112 | identation of the current line." | |
113 | (let ((indent (current-indentation))) | |
114 | (while (progn (wisent-python-forward-line) | |
115 | (and (not (eobp)) | |
116 | (> (current-indentation) indent)))))) | |
117 | ||
118 | (defun wisent-python-end-of-block () | |
119 | "Move point to the end of the current block." | |
120 | (let ((indent (current-indentation))) | |
121 | (while (and (not (eobp)) (>= (current-indentation) indent)) | |
122 | (wisent-python-forward-line-skip-indented)) | |
123 | ;; Don't include final comments in current block bounds | |
124 | (forward-comment (- (point-max))) | |
125 | (or (bolp) (forward-line 1)) | |
126 | )) | |
127 | ||
128 | ;; Indentation stack, what the Python (2.3) language spec. says: | |
129 | ;; | |
130 | ;; The indentation levels of consecutive lines are used to generate | |
131 | ;; INDENT and DEDENT tokens, using a stack, as follows. | |
132 | ;; | |
133 | ;; Before the first line of the file is read, a single zero is pushed | |
134 | ;; on the stack; this will never be popped off again. The numbers | |
135 | ;; pushed on the stack will always be strictly increasing from bottom | |
136 | ;; to top. At the beginning of each logical line, the line's | |
137 | ;; indentation level is compared to the top of the stack. If it is | |
138 | ;; equal, nothing happens. If it is larger, it is pushed on the stack, | |
139 | ;; and one INDENT token is generated. If it is smaller, it must be one | |
140 | ;; of the numbers occurring on the stack; all numbers on the stack | |
141 | ;; that are larger are popped off, and for each number popped off a | |
142 | ;; DEDENT token is generated. At the end of the file, a DEDENT token | |
143 | ;; is generated for each number remaining on the stack that is larger | |
144 | ;; than zero. | |
145 | (defvar wisent-python-indent-stack) | |
146 | ||
147 | (define-lex-analyzer wisent-python-lex-beginning-of-line | |
148 | "Detect and create python indentation tokens at beginning of line." | |
149 | (and | |
150 | (bolp) (not (wisent-python-implicit-line-joining-p)) | |
151 | (let ((last-indent (car wisent-python-indent-stack)) | |
152 | (last-pos (point)) | |
153 | (curr-indent (current-indentation))) | |
154 | (skip-syntax-forward "-") | |
155 | (cond | |
156 | ;; Skip comments and blank lines. No change in indentation. | |
157 | ((or (eolp) (looking-at semantic-lex-comment-regex)) | |
158 | (forward-comment (point-max)) | |
159 | (or (eobp) (beginning-of-line)) | |
160 | (setq semantic-lex-end-point (point)) | |
161 | ;; Loop lexer to handle the next line. | |
162 | t) | |
163 | ;; No change in indentation. | |
164 | ((= curr-indent last-indent) | |
165 | (setq semantic-lex-end-point (point)) | |
166 | ;; Try next analyzers. | |
167 | nil) | |
168 | ;; Indentation increased | |
169 | ((> curr-indent last-indent) | |
170 | (if (or (not semantic-lex-maximum-depth) | |
171 | (< semantic-lex-current-depth semantic-lex-maximum-depth)) | |
172 | (progn | |
173 | ;; Return an INDENT lexical token | |
174 | (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)) | |
175 | (push curr-indent wisent-python-indent-stack) | |
176 | (semantic-lex-push-token | |
177 | (semantic-lex-token 'INDENT last-pos (point)))) | |
178 | ;; Add an INDENT_BLOCK token | |
179 | (semantic-lex-push-token | |
180 | (semantic-lex-token | |
181 | 'INDENT_BLOCK | |
182 | (progn (beginning-of-line) (point)) | |
183 | (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK | |
184 | (wisent-python-end-of-block) | |
185 | (point))))) | |
186 | ;; Loop lexer to handle tokens in current line. | |
187 | t) | |
188 | ;; Indentation decreased | |
189 | (t | |
190 | ;; Pop items from indentation stack | |
191 | (while (< curr-indent last-indent) | |
192 | (pop wisent-python-indent-stack) | |
193 | (setq semantic-lex-current-depth (1- semantic-lex-current-depth) | |
194 | last-indent (car wisent-python-indent-stack)) | |
195 | (semantic-lex-push-token | |
196 | (semantic-lex-token 'DEDENT last-pos (point)))) | |
197 | ;; If pos did not change, then we must return nil so that | |
198 | ;; other lexical analyzers can be run. | |
199 | (/= last-pos (point)))))) | |
200 | ;; All the work was done in the above analyzer matching condition. | |
201 | ) | |
202 | ||
203 | (define-lex-regex-analyzer wisent-python-lex-end-of-line | |
204 | "Detect and create python newline tokens. | |
205 | Just skip the newline character if the following line is an implicit | |
206 | continuation of current line." | |
207 | "\\(\n\\|\\s>\\)" | |
208 | (if (wisent-python-implicit-line-joining-p) | |
209 | (setq semantic-lex-end-point (match-end 0)) | |
210 | (semantic-lex-push-token | |
211 | (semantic-lex-token 'NEWLINE (point) (match-end 0))))) | |
212 | ||
213 | (define-lex-regex-analyzer wisent-python-lex-string | |
214 | "Detect and create python string tokens." | |
215 | wisent-python-string-re | |
216 | (semantic-lex-push-token | |
217 | (semantic-lex-token | |
218 | 'STRING_LITERAL | |
219 | (point) | |
220 | (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL | |
221 | (wisent-python-forward-string) | |
222 | (point))))) | |
223 | ||
224 | (define-lex-regex-analyzer wisent-python-lex-ignore-backslash | |
225 | "Detect and skip over backslash (explicit line joining) tokens. | |
226 | A backslash must be the last token of a physical line, it is illegal | |
227 | elsewhere on a line outside a string literal." | |
228 | "\\s\\\\s-*$" | |
229 | ;; Skip over the detected backslash and go to the first | |
230 | ;; non-whitespace character in the next physical line. | |
231 | (forward-line) | |
232 | (skip-syntax-forward "-") | |
233 | (setq semantic-lex-end-point (point))) | |
234 | ||
235 | (define-lex wisent-python-lexer | |
236 | "Lexical Analyzer for Python code." | |
237 | ;; Must analyze beginning of line first to handle indentation. | |
238 | wisent-python-lex-beginning-of-line | |
239 | wisent-python-lex-end-of-line | |
240 | ;; Must analyze string before symbol to handle string prefix. | |
241 | wisent-python-lex-string | |
242 | ;; Analyzers auto-generated from grammar. | |
243 | wisent-python-wy--<number>-regexp-analyzer | |
244 | wisent-python-wy--<keyword>-keyword-analyzer | |
245 | wisent-python-wy--<symbol>-regexp-analyzer | |
246 | wisent-python-wy--<block>-block-analyzer | |
247 | wisent-python-wy--<punctuation>-string-analyzer | |
248 | ;; Ignored things. | |
249 | wisent-python-lex-ignore-backslash | |
250 | semantic-lex-ignore-whitespace | |
251 | semantic-lex-ignore-comments | |
252 | ;; Signal error on unhandled syntax. | |
253 | semantic-lex-default-action) | |
254 | \f | |
255 | ;;; Overridden Semantic API. | |
256 | ;; | |
257 | (define-mode-local-override semantic-lex python-mode | |
258 | (start end &optional depth length) | |
259 | "Lexically analyze python code in current buffer. | |
260 | See the function `semantic-lex' for the meaning of the START, END, | |
261 | DEPTH and LENGTH arguments. | |
262 | This function calls `wisent-python-lexer' to actually perform the | |
263 | lexical analysis, then emits the necessary python DEDENT tokens from | |
264 | what remains in the `wisent-python-indent-stack'." | |
265 | (let* ((wisent-python-indent-stack (list 0)) | |
266 | (stream (wisent-python-lexer start end depth length)) | |
267 | (semantic-lex-token-stream nil)) | |
268 | ;; Emit DEDENT tokens if something remains in the INDENT stack. | |
269 | (while (> (pop wisent-python-indent-stack) 0) | |
270 | (semantic-lex-push-token (semantic-lex-token 'DEDENT end end))) | |
271 | (nconc stream (nreverse semantic-lex-token-stream)))) | |
272 | ||
273 | (define-mode-local-override semantic-get-local-variables python-mode () | |
274 | "Get the local variables based on point's context. | |
275 | To be implemented for python! For now just return nil." | |
276 | nil) | |
277 | ||
278 | (defcustom-mode-local-semantic-dependency-system-include-path | |
279 | python-mode semantic-python-dependency-system-include-path | |
280 | nil | |
281 | "The system include path used by Python langauge.") | |
282 | ||
283 | ;;; Enable Semantic in `python-mode'. | |
284 | ;; | |
285 | ||
286 | ;;;###autoload | |
287 | (defun wisent-python-default-setup () | |
288 | "Setup buffer for parse." | |
289 | (wisent-python-wy--install-parser) | |
290 | (set (make-local-variable 'parse-sexp-ignore-comments) t) | |
291 | (setq | |
292 | ;; Character used to separation a parent/child relationship | |
293 | semantic-type-relation-separator-character '(".") | |
294 | semantic-command-separation-character ";" | |
295 | ;; The following is no more necessary as semantic-lex is overriden | |
296 | ;; in python-mode. | |
297 | ;; semantic-lex-analyzer 'wisent-python-lexer | |
298 | ||
299 | ;; Semantic to take over from the one provided by python. | |
300 | ;; The python one, if it uses the senator advice, will hang | |
301 | ;; Emacs unrecoverably. | |
302 | imenu-create-index-function 'semantic-create-imenu-index | |
303 | ||
304 | ;; I need a python guru to update this list: | |
305 | semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables") | |
306 | (function . "Methods")) | |
307 | semantic-symbol->name-assoc-list '((type . "Classes") | |
308 | (variable . "Variables") | |
309 | (function . "Functions") | |
310 | (include . "Imports") | |
311 | (package . "Package") | |
312 | (code . "Code"))) | |
313 | ) | |
314 | ||
315 | ;;;###autoload | |
316 | (add-hook 'python-mode-hook 'wisent-python-default-setup) | |
317 | ||
318 | ;; Make sure the newer python modes pull in the same python | |
319 | ;; mode overrides. | |
320 | (define-child-mode python-2-mode python-mode "Python 2 mode") | |
321 | (define-child-mode python-3-mode python-mode "Python 3 mode") | |
322 | ||
323 | \f | |
324 | ;;; Test | |
325 | ;; | |
326 | (defun wisent-python-lex-buffer () | |
327 | "Run `wisent-python-lexer' on current buffer." | |
328 | (interactive) | |
329 | (semantic-lex-init) | |
330 | (let ((token-stream (semantic-lex (point-min) (point-max) 0))) | |
331 | (with-current-buffer (get-buffer-create "*wisent-python-lexer*") | |
332 | (erase-buffer) | |
333 | (pp token-stream (current-buffer)) | |
334 | (goto-char (point-min)) | |
335 | (pop-to-buffer (current-buffer))))) | |
336 | ||
337 | (provide 'semantic/wisent/python) | |
338 | ||
339 | ;; Local variables: | |
340 | ;; generated-autoload-file: "../loaddefs.el" | |
341 | ;; generated-autoload-load-name: "semantic/wisent/python" | |
342 | ;; End: | |
343 | ||
344 | ;;; semantic/wisent/python.el ends here |