89c0499b7dabb394f04278e76a3cd073fef7ee2f
[bpt/emacs.git] / lisp / cedet / semantic / wisent / python.el
1 ;;; wisent-python.el --- Semantic support for Python
2
3 ;; Copyright (C) 2002, 2004, 2006-2012 Free Software Foundation, Inc.
4
5 ;; Author: Richard Kim <emacs18@gmail.com>
6 ;; Maintainer: Richard Kim <emacs18@gmail.com>
7 ;; Created: June 2002
8 ;; Keywords: syntax
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25 ;;; Commentary:
26 ;;
27 ;; Parser support for Python.
28
29 ;;; Code:
30
31 (require 'rx)
32
33 ;; Try to load python support, but fail silently since it is only used
34 ;; for optional functionality
35 (require 'python nil t)
36
37 (require 'semantic/wisent)
38 (require 'semantic/wisent/python-wy)
39 (require 'semantic/find)
40 (require 'semantic/dep)
41 (require 'semantic/ctxt)
42
43 (eval-when-compile
44 (require 'cl))
45
46 ;;; Customization
47 ;;
48
49 (defun semantic-python-get-system-include-path ()
50 "Evaluate some Python code that determines the system include path."
51 (delq nil
52 (mapcar
53 (lambda (dir)
54 (when (file-directory-p dir)
55 dir))
56 (split-string
57 (python-shell-internal-send-string
58 "import sys;print ('\\n'.join(sys.path))")
59 "\n" t))))
60
61 (defcustom-mode-local-semantic-dependency-system-include-path
62 python-mode semantic-python-dependency-system-include-path
63 (when (and (featurep 'python)
64 ;; python-mode and batch somehow often hangs.
65 (not noninteractive))
66 (semantic-python-get-system-include-path))
67 "The system include path used by Python language.")
68 \f
69 ;;; Lexical analysis
70 ;;
71
72 ;; Python strings are delimited by either single quotes or double
73 ;; quotes, e.g., "I'm a string" and 'I too am a string'.
74 ;; In addition a string can have either a 'r' and/or 'u' prefix.
75 ;; The 'r' prefix means raw, i.e., normal backslash substitutions are
76 ;; to be suppressed. For example, r"01\n34" is a string with six
77 ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
78 ;; string is Unicode.
79 (defconst wisent-python-string-start-re "[uU]?[rR]?['\"]"
80 "Regexp matching beginning of a Python string.")
81
82 (defconst wisent-python-string-re
83 (rx
84 (opt (any "uU")) (opt (any "rR"))
85 (or
86 ;; Triple-quoted string using apostrophes
87 (: "'''" (zero-or-more (or "\\'"
88 (not (any "'"))
89 (: (repeat 1 2 "'") (not (any "'")))))
90 "'''")
91 ;; String using apostrophes
92 (: "'" (zero-or-more (or "\\'"
93 (not (any "'"))))
94 "'")
95 ;; Triple-quoted string using quotation marks.
96 (: "\"\"\"" (zero-or-more (or "\\\""
97 (not (any "\""))
98 (: (repeat 1 2 "\"") (not (any "\"")))))
99 "\"\"\"")
100 ;; String using quotation marks.
101 (: "\"" (zero-or-more (or "\\\""
102 (not (any "\""))))
103 "\"")))
104 "Regexp matching a complete Python string.")
105
106 (defvar wisent-python-EXPANDING-block nil
107 "Non-nil when expanding a paren block for Python lexical analyzer.")
108
109 (defun wisent-python-implicit-line-joining-p ()
110 "Return non-nil if implicit line joining is active.
111 That is, if inside an expression in parentheses, square brackets or
112 curly braces."
113 wisent-python-EXPANDING-block)
114
115 (defsubst wisent-python-forward-string ()
116 "Move point at the end of the Python string at point."
117 (if (looking-at wisent-python-string-re)
118 (let ((start (match-beginning 0))
119 (end (match-end 0)))
120 ;; Incomplete triple-quoted string gets matched instead as a
121 ;; complete single quoted string. (This special case would be
122 ;; unnecessary if Emacs regular expressions had negative
123 ;; look-ahead assertions.)
124 (when (and (= (- end start) 2)
125 (looking-at "\"\\{3\\}\\|'\\{3\\}"))
126 (error "unterminated syntax"))
127 (goto-char end))
128 (error "unterminated syntax")))
129
130 (defun wisent-python-forward-balanced-expression ()
131 "Move point to the end of the balanced expression at point.
132 Here 'balanced expression' means anything matched by Emacs'
133 open/close parenthesis syntax classes. We can't use forward-sexp
134 for this because that Emacs built-in can't parse Python's
135 triple-quoted string syntax."
136 (let ((end-char (cdr (syntax-after (point)))))
137 (forward-char 1)
138 (while (not (or (eobp) (eq (char-after (point)) end-char)))
139 (cond
140 ;; Skip over python strings.
141 ((looking-at wisent-python-string-start-re)
142 (wisent-python-forward-string))
143 ;; At a comment start just goto end of line.
144 ((looking-at "\\s<")
145 (end-of-line))
146 ;; Skip over balanced expressions.
147 ((looking-at "\\s(")
148 (wisent-python-forward-balanced-expression))
149 ;; Skip over white space, word, symbol, punctuation, paired
150 ;; delimiter (backquote) characters, line continuation, and end
151 ;; of comment characters (AKA newline characters in Python).
152 ((zerop (skip-syntax-forward "-w_.$\\>"))
153 (error "can't figure out how to go forward from here"))))
154 ;; Skip closing character. As a last resort this should raise an
155 ;; error if we hit EOB before we find our closing character..
156 (forward-char 1)))
157
158 (defun wisent-python-forward-line ()
159 "Move point to the beginning of the next logical line.
160 Usually this is simply the next physical line unless strings,
161 implicit/explicit line continuation, blank lines, or comment lines are
162 encountered. This function skips over such items so that the point is
163 at the beginning of the next logical line. If the current logical
164 line ends at the end of the buffer, leave the point there."
165 (while (not (eolp))
166 (when (= (point)
167 (progn
168 (cond
169 ;; Skip over python strings.
170 ((looking-at wisent-python-string-start-re)
171 (wisent-python-forward-string))
172 ;; At a comment start just goto end of line.
173 ((looking-at "\\s<")
174 (end-of-line))
175 ;; Skip over balanced expressions.
176 ((looking-at "\\s(")
177 (wisent-python-forward-balanced-expression))
178 ;; At the explicit line continuation character
179 ;; (backslash) move to next line.
180 ((looking-at "\\s\\")
181 (forward-line 1))
182 ;; Skip over white space, word, symbol, punctuation,
183 ;; and paired delimiter (backquote) characters.
184 ((skip-syntax-forward "-w_.$)")))
185 (point)))
186 (error "python-forward-line endless loop detected")))
187 ;; The point is at eol, skip blank and comment lines.
188 (forward-comment (point-max))
189 ;; Goto the beginning of the next line.
190 (or (eobp) (beginning-of-line)))
191
192 (defun wisent-python-forward-line-skip-indented ()
193 "Move point to the next logical line, skipping indented lines.
194 That is the next line whose indentation is less than or equal to
195 the indentation of the current line."
196 (let ((indent (current-indentation)))
197 (while (progn (wisent-python-forward-line)
198 (and (not (eobp))
199 (> (current-indentation) indent))))))
200
201 (defun wisent-python-end-of-block ()
202 "Move point to the end of the current block."
203 (let ((indent (current-indentation)))
204 (while (and (not (eobp)) (>= (current-indentation) indent))
205 (wisent-python-forward-line-skip-indented))
206 ;; Don't include final comments in current block bounds
207 (forward-comment (- (point-max)))
208 (or (bolp) (forward-line 1))
209 ))
210
211 ;; Indentation stack, what the Python (2.3) language spec. says:
212 ;;
213 ;; The indentation levels of consecutive lines are used to generate
214 ;; INDENT and DEDENT tokens, using a stack, as follows.
215 ;;
216 ;; Before the first line of the file is read, a single zero is pushed
217 ;; on the stack; this will never be popped off again. The numbers
218 ;; pushed on the stack will always be strictly increasing from bottom
219 ;; to top. At the beginning of each logical line, the line's
220 ;; indentation level is compared to the top of the stack. If it is
221 ;; equal, nothing happens. If it is larger, it is pushed on the stack,
222 ;; and one INDENT token is generated. If it is smaller, it must be one
223 ;; of the numbers occurring on the stack; all numbers on the stack
224 ;; that are larger are popped off, and for each number popped off a
225 ;; DEDENT token is generated. At the end of the file, a DEDENT token
226 ;; is generated for each number remaining on the stack that is larger
227 ;; than zero.
228 (defvar wisent-python-indent-stack)
229
230 (define-lex-analyzer wisent-python-lex-beginning-of-line
231 "Detect and create Python indentation tokens at beginning of line."
232 (and
233 (bolp) (not (wisent-python-implicit-line-joining-p))
234 (let ((last-indent (car wisent-python-indent-stack))
235 (last-pos (point))
236 (curr-indent (current-indentation)))
237 (skip-syntax-forward "-")
238 (cond
239 ;; Skip comments and blank lines. No change in indentation.
240 ((or (eolp) (looking-at semantic-lex-comment-regex))
241 (forward-comment (point-max))
242 (or (eobp) (beginning-of-line))
243 (setq semantic-lex-end-point (point))
244 ;; Loop lexer to handle the next line.
245 t)
246 ;; No change in indentation.
247 ((= curr-indent last-indent)
248 (setq semantic-lex-end-point (point))
249 ;; Try next analyzers.
250 nil)
251 ;; Indentation increased
252 ((> curr-indent last-indent)
253 (if (or (not semantic-lex-maximum-depth)
254 (< semantic-lex-current-depth semantic-lex-maximum-depth))
255 (progn
256 ;; Return an INDENT lexical token
257 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
258 (push curr-indent wisent-python-indent-stack)
259 (semantic-lex-push-token
260 (semantic-lex-token 'INDENT last-pos (point))))
261 ;; Add an INDENT_BLOCK token
262 (semantic-lex-push-token
263 (semantic-lex-token
264 'INDENT_BLOCK
265 (progn (beginning-of-line) (point))
266 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
267 (wisent-python-end-of-block)
268 (point)))))
269 ;; Loop lexer to handle tokens in current line.
270 t)
271 ;; Indentation decreased
272 ((progn
273 ;; Pop items from indentation stack
274 (while (< curr-indent last-indent)
275 (pop wisent-python-indent-stack)
276 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
277 last-indent (car wisent-python-indent-stack))
278 (semantic-lex-push-token
279 (semantic-lex-token 'DEDENT last-pos (point))))
280 (= last-pos (point)))
281 ;; If pos did not change, then we must return nil so that
282 ;; other lexical analyzers can be run.
283 nil))))
284 ;; All the work was done in the above analyzer matching condition.
285 )
286
287 (define-lex-regex-analyzer wisent-python-lex-end-of-line
288 "Detect and create Python newline tokens.
289 Just skip the newline character if the following line is an implicit
290 continuation of current line."
291 "\\(\n\\|\\s>\\)"
292 (if (wisent-python-implicit-line-joining-p)
293 (setq semantic-lex-end-point (match-end 0))
294 (semantic-lex-push-token
295 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
296
297 (define-lex-regex-analyzer wisent-python-lex-string
298 "Detect and create python string tokens."
299 wisent-python-string-start-re
300 (semantic-lex-push-token
301 (semantic-lex-token
302 'STRING_LITERAL
303 (point)
304 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
305 (wisent-python-forward-string)
306 (point)))))
307
308 (define-lex-regex-analyzer wisent-python-lex-ignore-backslash
309 "Detect and skip over backslash (explicit line joining) tokens.
310 A backslash must be the last token of a physical line, it is illegal
311 elsewhere on a line outside a string literal."
312 "\\s\\\\s-*$"
313 ;; Skip over the detected backslash and go to the first
314 ;; non-whitespace character in the next physical line.
315 (forward-line)
316 (skip-syntax-forward "-")
317 (setq semantic-lex-end-point (point)))
318
319 (define-lex wisent-python-lexer
320 "Lexical Analyzer for Python code."
321 ;; Must analyze beginning of line first to handle indentation.
322 wisent-python-lex-beginning-of-line
323 wisent-python-lex-end-of-line
324 ;; Must analyze string before symbol to handle string prefix.
325 wisent-python-lex-string
326 ;; Analyzers auto-generated from grammar.
327 wisent-python-wy--<number>-regexp-analyzer
328 wisent-python-wy--<keyword>-keyword-analyzer
329 wisent-python-wy--<symbol>-regexp-analyzer
330 wisent-python-wy--<block>-block-analyzer
331 wisent-python-wy--<punctuation>-string-analyzer
332 ;; Ignored things.
333 wisent-python-lex-ignore-backslash
334 semantic-lex-ignore-whitespace
335 semantic-lex-ignore-comments
336 ;; Signal error on unhandled syntax.
337 semantic-lex-default-action)
338
339 \f
340 ;;; Parsing
341 ;;
342
343 (defun wisent-python-reconstitute-function-tag (tag suite)
344 "Move a docstring from TAG's members into its :documentation attribute.
345 Set attributes for constructors, special, private and static methods."
346 ;; Analyze first statement to see whether it is a documentation
347 ;; string.
348 (let ((first-statement (car suite)))
349 (when (semantic-python-docstring-p first-statement)
350 (semantic-tag-put-attribute
351 tag :documentation
352 (semantic-python-extract-docstring first-statement))))
353
354 ;; TODO HACK: we try to identify methods using the following
355 ;; heuristic:
356 ;; + at least one argument
357 ;; + first argument is self
358 (when (and (> (length (semantic-tag-function-arguments tag)) 0)
359 (string= (semantic-tag-name
360 (first (semantic-tag-function-arguments tag)))
361 "self"))
362 (semantic-tag-put-attribute tag :parent "dummy"))
363
364 ;; Identify constructors, special and private functions
365 (cond
366 ;; TODO only valid when the function resides inside a class
367 ((string= (semantic-tag-name tag) "__init__")
368 (semantic-tag-put-attribute tag :constructor-flag t)
369 (semantic-tag-put-attribute tag :suite suite))
370
371 ((semantic-python-special-p tag)
372 (semantic-tag-put-attribute tag :special-flag t))
373
374 ((semantic-python-private-p tag)
375 (semantic-tag-put-attribute tag :protection "private")))
376
377 ;; If there is a staticmethod decorator, add a static typemodifier
378 ;; for the function.
379 (when (semantic-find-tags-by-name
380 "staticmethod"
381 (semantic-tag-get-attribute tag :decorators))
382 (semantic-tag-put-attribute
383 tag :typemodifiers
384 (cons "static"
385 (semantic-tag-get-attribute tag :typemodifiers))))
386
387 ;; TODO
388 ;; + check for decorators classmethod
389 ;; + check for operators
390 tag)
391
392 (defun wisent-python-reconstitute-class-tag (tag)
393 "Move a docstring from TAG's members into its :documentation attribute."
394 ;; The first member of TAG may be a documentation string. If that is
395 ;; the case, remove of it from the members list and stick its
396 ;; content into the :documentation attribute.
397 (let ((first-member (car (semantic-tag-type-members tag))))
398 (when (semantic-python-docstring-p first-member)
399 (semantic-tag-put-attribute
400 tag :members
401 (cdr (semantic-tag-type-members tag)))
402 (semantic-tag-put-attribute
403 tag :documentation
404 (semantic-python-extract-docstring first-member))))
405
406 ;; Try to find the constructor, determine the name of the instance
407 ;; parameter, find assignments to instance variables and add
408 ;; corresponding variable tags to the list of members.
409 (dolist (member (semantic-tag-type-members tag))
410 (when (semantic-tag-function-constructor-p member)
411 (let ((self (semantic-tag-name
412 (car (semantic-tag-function-arguments member)))))
413 (dolist (statement (semantic-tag-get-attribute member :suite))
414 (when (semantic-python-instance-variable-p statement self)
415 (let ((variable (semantic-tag-clone
416 statement
417 (substring (semantic-tag-name statement) 5)))
418 (members (semantic-tag-get-attribute tag :members)))
419 (when (semantic-python-private-p variable)
420 (semantic-tag-put-attribute variable :protection "private"))
421 (setcdr (last members) (list variable))))))))
422
423 ;; TODO remove the :suite attribute
424 tag)
425
426 (defun semantic-python-expand-tag (tag)
427 "Expand compound declarations found in TAG into separate tags.
428 TAG contains compound declaration if the NAME part of the tag is
429 a list. In python, this can happen with `import' statements."
430 (let ((class (semantic-tag-class tag))
431 (elts (semantic-tag-name tag))
432 (expand nil))
433 (cond
434 ((and (eq class 'include) (listp elts))
435 (dolist (E elts)
436 (setq expand (cons (semantic-tag-clone tag E) expand)))
437 (setq expand (nreverse expand)))
438 )))
439
440
441 \f
442 ;;; Overridden Semantic API.
443 ;;
444
445 (define-mode-local-override semantic-lex python-mode
446 (start end &optional depth length)
447 "Lexically analyze Python code in current buffer.
448 See the function `semantic-lex' for the meaning of the START, END,
449 DEPTH and LENGTH arguments.
450 This function calls `wisent-python-lexer' to actually perform the
451 lexical analysis, then emits the necessary Python DEDENT tokens from
452 what remains in the `wisent-python-indent-stack'."
453 (let* ((wisent-python-indent-stack (list 0))
454 (stream (wisent-python-lexer start end depth length))
455 (semantic-lex-token-stream nil))
456 ;; Emit DEDENT tokens if something remains in the INDENT stack.
457 (while (> (pop wisent-python-indent-stack) 0)
458 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
459 (nconc stream (nreverse semantic-lex-token-stream))))
460
461 (define-mode-local-override semantic-get-local-variables python-mode ()
462 "Get the local variables based on point's context.
463 To be implemented for Python! For now just return nil."
464 nil)
465
466 ;; Adapted from the semantic Java support by Andrey Torba
467 (define-mode-local-override semantic-tag-include-filename python-mode (tag)
468 "Return a suitable path for (some) Python imports."
469 (let ((name (semantic-tag-name tag)))
470 (concat (mapconcat 'identity (split-string name "\\.") "/") ".py")))
471
472 ;; Override ctxt-current-function/assignment defaults, since they do
473 ;; not work properly with Python code, even leading to endless loops
474 ;; (see bug #xxxxx).
475 (define-mode-local-override semantic-ctxt-current-function python-mode (&optional point)
476 "Return the current function call the cursor is in at POINT.
477 The function returned is the one accepting the arguments that
478 the cursor is currently in. It will not return function symbol if the
479 cursor is on the text representing that function."
480 nil)
481
482 (define-mode-local-override semantic-ctxt-current-assignment python-mode (&optional point)
483 "Return the current assignment near the cursor at POINT.
484 Return a list as per `semantic-ctxt-current-symbol'.
485 Return nil if there is nothing relevant."
486 nil)
487
488 ;;; Enable Semantic in `python-mode'.
489 ;;
490
491 ;;;###autoload
492 (defun wisent-python-default-setup ()
493 "Setup buffer for parse."
494 (wisent-python-wy--install-parser)
495 (set (make-local-variable 'parse-sexp-ignore-comments) t)
496 ;; Give python modes the possibility to overwrite this:
497 (if (not comment-start-skip)
498 (set (make-local-variable 'comment-start-skip) "#+\\s-*"))
499 (setq
500 ;; Character used to separation a parent/child relationship
501 semantic-type-relation-separator-character '(".")
502 semantic-command-separation-character ";"
503 ;; Parsing
504 semantic-tag-expand-function 'semantic-python-expand-tag
505
506 ;; Semantic to take over from the one provided by python.
507 ;; The python one, if it uses the senator advice, will hang
508 ;; Emacs unrecoverably.
509 imenu-create-index-function 'semantic-create-imenu-index
510
511 ;; I need a python guru to update this list:
512 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
513 (function . "Methods"))
514 semantic-symbol->name-assoc-list '((type . "Classes")
515 (variable . "Variables")
516 (function . "Functions")
517 (include . "Imports")
518 (package . "Package")
519 (code . "Code")))
520 )
521
522 ;;;###autoload
523 (add-hook 'python-mode-hook 'wisent-python-default-setup)
524
525 ;; Make sure the newer python modes pull in the same python
526 ;; mode overrides.
527 (define-child-mode python-2-mode python-mode "Python 2 mode")
528 (define-child-mode python-3-mode python-mode "Python 3 mode")
529
530 \f
531 ;;; Utility functions
532 ;;
533
534 (defun semantic-python-special-p (tag)
535 "Return non-nil if the name of TAG is a special identifier of
536 the form __NAME__. "
537 (string-match
538 (rx (seq string-start "__" (1+ (syntax symbol)) "__" string-end))
539 (semantic-tag-name tag)))
540
541 (defun semantic-python-private-p (tag)
542 "Return non-nil if the name of TAG follows the convention _NAME
543 for private names."
544 (string-match
545 (rx (seq string-start "_" (0+ (syntax symbol)) string-end))
546 (semantic-tag-name tag)))
547
548 (defun semantic-python-instance-variable-p (tag &optional self)
549 "Return non-nil if TAG is an instance variable of the instance
550 SELF or the instance name \"self\" if SELF is nil."
551 (when (semantic-tag-of-class-p tag 'variable)
552 (let ((name (semantic-tag-name tag)))
553 (when (string-match
554 (rx-to-string
555 `(seq string-start ,(or self "self") "."))
556 name)
557 (not (string-match "\\." (substring name 5)))))))
558
559 (defun semantic-python-docstring-p (tag)
560 "Return non-nil, when TAG is a Python documentation string."
561 ;; TAG is considered to be a documentation string if the first
562 ;; member is of class 'code and its name looks like a documentation
563 ;; string.
564 (let ((class (semantic-tag-class tag))
565 (name (semantic-tag-name tag)))
566 (and (eq class 'code)
567 (string-match
568 (rx (seq string-start "\"\"\"" (0+ anything) "\"\"\"" string-end))
569 name))))
570
571 (defun semantic-python-extract-docstring (tag)
572 "Return the Python documentation string contained in TAG."
573 ;; Strip leading and trailing """
574 (let ((name (semantic-tag-name tag)))
575 (substring name 3 -3)))
576
577 \f
578 ;;; Test
579 ;;
580
581 (defun wisent-python-lex-buffer ()
582 "Run `wisent-python-lexer' on current buffer."
583 (interactive)
584 (semantic-lex-init)
585 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
586 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
587 (erase-buffer)
588 (pp token-stream (current-buffer))
589 (goto-char (point-min))
590 (pop-to-buffer (current-buffer)))))
591
592 (provide 'semantic/wisent/python)
593
594 ;; Local variables:
595 ;; generated-autoload-file: "../loaddefs.el"
596 ;; generated-autoload-load-name: "semantic/wisent/python"
597 ;; End:
598
599 ;;; semantic/wisent/python.el ends here