1 ;;; wisent-python.el --- Semantic support for Python
3 ;; Copyright (C) 2002, 2004, 2006-2012 Free Software Foundation, Inc.
5 ;; Author: Richard Kim <emacs18@gmail.com>
6 ;; Maintainer: Richard Kim <emacs18@gmail.com>
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; Parser support for Python.
33 ;; Try to load python support, but fail silently since it is only used
34 ;; for optional functionality
35 (require 'python nil t
)
37 (require 'semantic
/wisent
)
38 (require 'semantic
/wisent
/python-wy
)
39 (require 'semantic
/find
)
40 (require 'semantic
/dep
)
41 (require 'semantic
/ctxt
)
49 (defun semantic-python-get-system-include-path ()
50 "Evaluate some Python code that determines the system include path."
54 (when (file-directory-p dir
)
57 (python-shell-internal-send-string
58 "import sys;print ('\\n'.join(sys.path))")
61 (defcustom-mode-local-semantic-dependency-system-include-path
62 python-mode semantic-python-dependency-system-include-path
63 (when (and (featurep 'python
)
64 ;; python-mode and batch somehow often hangs.
66 (semantic-python-get-system-include-path))
67 "The system include path used by Python language.")
72 ;; Python strings are delimited by either single quotes or double
73 ;; quotes, e.g., "I'm a string" and 'I too am a string'.
74 ;; In addition a string can have either a 'r' and/or 'u' prefix.
75 ;; The 'r' prefix means raw, i.e., normal backslash substitutions are
76 ;; to be suppressed. For example, r"01\n34" is a string with six
77 ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
79 (defconst wisent-python-string-start-re
"[uU]?[rR]?['\"]"
80 "Regexp matching beginning of a Python string.")
82 (defconst wisent-python-string-re
84 (opt (any "uU")) (opt (any "rR"))
86 ;; Triple-quoted string using apostrophes
87 (: "'''" (zero-or-more (or "\\'"
89 (: (repeat 1 2 "'") (not (any "'")))))
91 ;; String using apostrophes
92 (: "'" (zero-or-more (or "\\'"
95 ;; Triple-quoted string using quotation marks.
96 (: "\"\"\"" (zero-or-more (or "\\\""
98 (: (repeat 1 2 "\"") (not (any "\"")))))
100 ;; String using quotation marks.
101 (: "\"" (zero-or-more (or "\\\""
104 "Regexp matching a complete Python string.")
106 (defvar wisent-python-EXPANDING-block nil
107 "Non-nil when expanding a paren block for Python lexical analyzer.")
109 (defun wisent-python-implicit-line-joining-p ()
110 "Return non-nil if implicit line joining is active.
111 That is, if inside an expression in parentheses, square brackets or
113 wisent-python-EXPANDING-block
)
115 (defsubst wisent-python-forward-string
()
116 "Move point at the end of the Python string at point."
117 (if (looking-at wisent-python-string-re
)
118 (let ((start (match-beginning 0))
120 ;; Incomplete triple-quoted string gets matched instead as a
121 ;; complete single quoted string. (This special case would be
122 ;; unnecessary if Emacs regular expressions had negative
123 ;; look-ahead assertions.)
124 (when (and (= (- end start
) 2)
125 (looking-at "\"\\{3\\}\\|'\\{3\\}"))
126 (error "unterminated syntax"))
128 (error "unterminated syntax")))
130 (defun wisent-python-forward-balanced-expression ()
131 "Move point to the end of the balanced expression at point.
132 Here 'balanced expression' means anything matched by Emacs'
133 open/close parenthesis syntax classes. We can't use forward-sexp
134 for this because that Emacs built-in can't parse Python's
135 triple-quoted string syntax."
136 (let ((end-char (cdr (syntax-after (point)))))
138 (while (not (or (eobp) (eq (char-after (point)) end-char
)))
140 ;; Skip over python strings.
141 ((looking-at wisent-python-string-start-re
)
142 (wisent-python-forward-string))
143 ;; At a comment start just goto end of line.
146 ;; Skip over balanced expressions.
148 (wisent-python-forward-balanced-expression))
149 ;; Skip over white space, word, symbol, punctuation, paired
150 ;; delimiter (backquote) characters, line continuation, and end
151 ;; of comment characters (AKA newline characters in Python).
152 ((zerop (skip-syntax-forward "-w_.$\\>"))
153 (error "can't figure out how to go forward from here"))))
154 ;; Skip closing character. As a last resort this should raise an
155 ;; error if we hit EOB before we find our closing character..
158 (defun wisent-python-forward-line ()
159 "Move point to the beginning of the next logical line.
160 Usually this is simply the next physical line unless strings,
161 implicit/explicit line continuation, blank lines, or comment lines are
162 encountered. This function skips over such items so that the point is
163 at the beginning of the next logical line. If the current logical
164 line ends at the end of the buffer, leave the point there."
169 ;; Skip over python strings.
170 ((looking-at wisent-python-string-start-re
)
171 (wisent-python-forward-string))
172 ;; At a comment start just goto end of line.
175 ;; Skip over balanced expressions.
177 (wisent-python-forward-balanced-expression))
178 ;; At the explicit line continuation character
179 ;; (backslash) move to next line.
180 ((looking-at "\\s\\")
182 ;; Skip over white space, word, symbol, punctuation,
183 ;; and paired delimiter (backquote) characters.
184 ((skip-syntax-forward "-w_.$)")))
186 (error "python-forward-line endless loop detected")))
187 ;; The point is at eol, skip blank and comment lines.
188 (forward-comment (point-max))
189 ;; Goto the beginning of the next line.
190 (or (eobp) (beginning-of-line)))
192 (defun wisent-python-forward-line-skip-indented ()
193 "Move point to the next logical line, skipping indented lines.
194 That is the next line whose indentation is less than or equal to
195 the indentation of the current line."
196 (let ((indent (current-indentation)))
197 (while (progn (wisent-python-forward-line)
199 (> (current-indentation) indent
))))))
201 (defun wisent-python-end-of-block ()
202 "Move point to the end of the current block."
203 (let ((indent (current-indentation)))
204 (while (and (not (eobp)) (>= (current-indentation) indent
))
205 (wisent-python-forward-line-skip-indented))
206 ;; Don't include final comments in current block bounds
207 (forward-comment (- (point-max)))
208 (or (bolp) (forward-line 1))
211 ;; Indentation stack, what the Python (2.3) language spec. says:
213 ;; The indentation levels of consecutive lines are used to generate
214 ;; INDENT and DEDENT tokens, using a stack, as follows.
216 ;; Before the first line of the file is read, a single zero is pushed
217 ;; on the stack; this will never be popped off again. The numbers
218 ;; pushed on the stack will always be strictly increasing from bottom
219 ;; to top. At the beginning of each logical line, the line's
220 ;; indentation level is compared to the top of the stack. If it is
221 ;; equal, nothing happens. If it is larger, it is pushed on the stack,
222 ;; and one INDENT token is generated. If it is smaller, it must be one
223 ;; of the numbers occurring on the stack; all numbers on the stack
224 ;; that are larger are popped off, and for each number popped off a
225 ;; DEDENT token is generated. At the end of the file, a DEDENT token
226 ;; is generated for each number remaining on the stack that is larger
228 (defvar wisent-python-indent-stack
)
230 (define-lex-analyzer wisent-python-lex-beginning-of-line
231 "Detect and create Python indentation tokens at beginning of line."
233 (bolp) (not (wisent-python-implicit-line-joining-p))
234 (let ((last-indent (car wisent-python-indent-stack
))
236 (curr-indent (current-indentation)))
237 (skip-syntax-forward "-")
239 ;; Skip comments and blank lines. No change in indentation.
240 ((or (eolp) (looking-at semantic-lex-comment-regex
))
241 (forward-comment (point-max))
242 (or (eobp) (beginning-of-line))
243 (setq semantic-lex-end-point
(point))
244 ;; Loop lexer to handle the next line.
246 ;; No change in indentation.
247 ((= curr-indent last-indent
)
248 (setq semantic-lex-end-point
(point))
249 ;; Try next analyzers.
251 ;; Indentation increased
252 ((> curr-indent last-indent
)
253 (if (or (not semantic-lex-maximum-depth
)
254 (< semantic-lex-current-depth semantic-lex-maximum-depth
))
256 ;; Return an INDENT lexical token
257 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
))
258 (push curr-indent wisent-python-indent-stack
)
259 (semantic-lex-push-token
260 (semantic-lex-token 'INDENT last-pos
(point))))
261 ;; Add an INDENT_BLOCK token
262 (semantic-lex-push-token
265 (progn (beginning-of-line) (point))
266 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
267 (wisent-python-end-of-block)
269 ;; Loop lexer to handle tokens in current line.
271 ;; Indentation decreased
273 ;; Pop items from indentation stack
274 (while (< curr-indent last-indent
)
275 (pop wisent-python-indent-stack
)
276 (setq semantic-lex-current-depth
(1- semantic-lex-current-depth
)
277 last-indent
(car wisent-python-indent-stack
))
278 (semantic-lex-push-token
279 (semantic-lex-token 'DEDENT last-pos
(point))))
280 (= last-pos
(point)))
281 ;; If pos did not change, then we must return nil so that
282 ;; other lexical analyzers can be run.
284 ;; All the work was done in the above analyzer matching condition.
287 (define-lex-regex-analyzer wisent-python-lex-end-of-line
288 "Detect and create Python newline tokens.
289 Just skip the newline character if the following line is an implicit
290 continuation of current line."
292 (if (wisent-python-implicit-line-joining-p)
293 (setq semantic-lex-end-point
(match-end 0))
294 (semantic-lex-push-token
295 (semantic-lex-token 'NEWLINE
(point) (match-end 0)))))
297 (define-lex-regex-analyzer wisent-python-lex-string
298 "Detect and create python string tokens."
299 wisent-python-string-start-re
300 (semantic-lex-push-token
304 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
305 (wisent-python-forward-string)
308 (define-lex-regex-analyzer wisent-python-lex-ignore-backslash
309 "Detect and skip over backslash (explicit line joining) tokens.
310 A backslash must be the last token of a physical line, it is illegal
311 elsewhere on a line outside a string literal."
313 ;; Skip over the detected backslash and go to the first
314 ;; non-whitespace character in the next physical line.
316 (skip-syntax-forward "-")
317 (setq semantic-lex-end-point
(point)))
319 (define-lex wisent-python-lexer
320 "Lexical Analyzer for Python code."
321 ;; Must analyze beginning of line first to handle indentation.
322 wisent-python-lex-beginning-of-line
323 wisent-python-lex-end-of-line
324 ;; Must analyze string before symbol to handle string prefix.
325 wisent-python-lex-string
326 ;; Analyzers auto-generated from grammar.
327 wisent-python-wy--
<number
>-regexp-analyzer
328 wisent-python-wy--
<keyword
>-keyword-analyzer
329 wisent-python-wy--
<symbol
>-regexp-analyzer
330 wisent-python-wy--
<block
>-block-analyzer
331 wisent-python-wy--
<punctuation
>-string-analyzer
333 wisent-python-lex-ignore-backslash
334 semantic-lex-ignore-whitespace
335 semantic-lex-ignore-comments
336 ;; Signal error on unhandled syntax.
337 semantic-lex-default-action
)
343 (defun wisent-python-reconstitute-function-tag (tag suite
)
344 "Move a docstring from TAG's members into its :documentation attribute.
345 Set attributes for constructors, special, private and static methods."
346 ;; Analyze first statement to see whether it is a documentation
348 (let ((first-statement (car suite
)))
349 (when (semantic-python-docstring-p first-statement
)
350 (semantic-tag-put-attribute
352 (semantic-python-extract-docstring first-statement
))))
354 ;; TODO HACK: we try to identify methods using the following
356 ;; + at least one argument
357 ;; + first argument is self
358 (when (and (> (length (semantic-tag-function-arguments tag
)) 0)
359 (string= (semantic-tag-name
360 (first (semantic-tag-function-arguments tag
)))
362 (semantic-tag-put-attribute tag
:parent
"dummy"))
364 ;; Identify constructors, special and private functions
366 ;; TODO only valid when the function resides inside a class
367 ((string= (semantic-tag-name tag
) "__init__")
368 (semantic-tag-put-attribute tag
:constructor-flag t
)
369 (semantic-tag-put-attribute tag
:suite suite
))
371 ((semantic-python-special-p tag
)
372 (semantic-tag-put-attribute tag
:special-flag t
))
374 ((semantic-python-private-p tag
)
375 (semantic-tag-put-attribute tag
:protection
"private")))
377 ;; If there is a staticmethod decorator, add a static typemodifier
379 (when (semantic-find-tags-by-name
381 (semantic-tag-get-attribute tag
:decorators
))
382 (semantic-tag-put-attribute
385 (semantic-tag-get-attribute tag
:typemodifiers
))))
388 ;; + check for decorators classmethod
389 ;; + check for operators
392 (defun wisent-python-reconstitute-class-tag (tag)
393 "Move a docstring from TAG's members into its :documentation attribute."
394 ;; The first member of TAG may be a documentation string. If that is
395 ;; the case, remove of it from the members list and stick its
396 ;; content into the :documentation attribute.
397 (let ((first-member (car (semantic-tag-type-members tag
))))
398 (when (semantic-python-docstring-p first-member
)
399 (semantic-tag-put-attribute
401 (cdr (semantic-tag-type-members tag
)))
402 (semantic-tag-put-attribute
404 (semantic-python-extract-docstring first-member
))))
406 ;; Try to find the constructor, determine the name of the instance
407 ;; parameter, find assignments to instance variables and add
408 ;; corresponding variable tags to the list of members.
409 (dolist (member (semantic-tag-type-members tag
))
410 (when (semantic-tag-function-constructor-p member
)
411 (let ((self (semantic-tag-name
412 (car (semantic-tag-function-arguments member
)))))
413 (dolist (statement (semantic-tag-get-attribute member
:suite
))
414 (when (semantic-python-instance-variable-p statement self
)
415 (let ((variable (semantic-tag-clone
417 (substring (semantic-tag-name statement
) 5)))
418 (members (semantic-tag-get-attribute tag
:members
)))
419 (when (semantic-python-private-p variable
)
420 (semantic-tag-put-attribute variable
:protection
"private"))
421 (setcdr (last members
) (list variable
))))))))
423 ;; TODO remove the :suite attribute
426 (defun semantic-python-expand-tag (tag)
427 "Expand compound declarations found in TAG into separate tags.
428 TAG contains compound declaration if the NAME part of the tag is
429 a list. In python, this can happen with `import' statements."
430 (let ((class (semantic-tag-class tag
))
431 (elts (semantic-tag-name tag
))
434 ((and (eq class
'include
) (listp elts
))
436 (setq expand
(cons (semantic-tag-clone tag E
) expand
)))
437 (setq expand
(nreverse expand
)))
442 ;;; Overridden Semantic API.
445 (define-mode-local-override semantic-lex python-mode
446 (start end
&optional depth length
)
447 "Lexically analyze Python code in current buffer.
448 See the function `semantic-lex' for the meaning of the START, END,
449 DEPTH and LENGTH arguments.
450 This function calls `wisent-python-lexer' to actually perform the
451 lexical analysis, then emits the necessary Python DEDENT tokens from
452 what remains in the `wisent-python-indent-stack'."
453 (let* ((wisent-python-indent-stack (list 0))
454 (stream (wisent-python-lexer start end depth length
))
455 (semantic-lex-token-stream nil
))
456 ;; Emit DEDENT tokens if something remains in the INDENT stack.
457 (while (> (pop wisent-python-indent-stack
) 0)
458 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end
)))
459 (nconc stream
(nreverse semantic-lex-token-stream
))))
461 (define-mode-local-override semantic-get-local-variables python-mode
()
462 "Get the local variables based on point's context.
463 To be implemented for Python! For now just return nil."
466 ;; Adapted from the semantic Java support by Andrey Torba
467 (define-mode-local-override semantic-tag-include-filename python-mode
(tag)
468 "Return a suitable path for (some) Python imports."
469 (let ((name (semantic-tag-name tag
)))
470 (concat (mapconcat 'identity
(split-string name
"\\.") "/") ".py")))
472 ;; Override ctxt-current-function/assignment defaults, since they do
473 ;; not work properly with Python code, even leading to endless loops
475 (define-mode-local-override semantic-ctxt-current-function python-mode
(&optional point
)
476 "Return the current function call the cursor is in at POINT.
477 The function returned is the one accepting the arguments that
478 the cursor is currently in. It will not return function symbol if the
479 cursor is on the text representing that function."
482 (define-mode-local-override semantic-ctxt-current-assignment python-mode
(&optional point
)
483 "Return the current assignment near the cursor at POINT.
484 Return a list as per `semantic-ctxt-current-symbol'.
485 Return nil if there is nothing relevant."
488 ;;; Enable Semantic in `python-mode'.
492 (defun wisent-python-default-setup ()
493 "Setup buffer for parse."
494 (wisent-python-wy--install-parser)
495 (set (make-local-variable 'parse-sexp-ignore-comments
) t
)
496 ;; Give python modes the possibility to overwrite this:
497 (if (not comment-start-skip
)
498 (set (make-local-variable 'comment-start-skip
) "#+\\s-*"))
500 ;; Character used to separation a parent/child relationship
501 semantic-type-relation-separator-character
'(".")
502 semantic-command-separation-character
";"
504 semantic-tag-expand-function
'semantic-python-expand-tag
506 ;; Semantic to take over from the one provided by python.
507 ;; The python one, if it uses the senator advice, will hang
508 ;; Emacs unrecoverably.
509 imenu-create-index-function
'semantic-create-imenu-index
511 ;; I need a python guru to update this list:
512 semantic-symbol-
>name-assoc-list-for-type-parts
'((variable .
"Variables")
513 (function .
"Methods"))
514 semantic-symbol-
>name-assoc-list
'((type .
"Classes")
515 (variable .
"Variables")
516 (function .
"Functions")
517 (include .
"Imports")
518 (package .
"Package")
523 (add-hook 'python-mode-hook
'wisent-python-default-setup
)
525 ;; Make sure the newer python modes pull in the same python
527 (define-child-mode python-2-mode python-mode
"Python 2 mode")
528 (define-child-mode python-3-mode python-mode
"Python 3 mode")
531 ;;; Utility functions
534 (defun semantic-python-special-p (tag)
535 "Return non-nil if the name of TAG is a special identifier of
538 (rx (seq string-start
"__" (1+ (syntax symbol
)) "__" string-end
))
539 (semantic-tag-name tag
)))
541 (defun semantic-python-private-p (tag)
542 "Return non-nil if the name of TAG follows the convention _NAME
545 (rx (seq string-start
"_" (0+ (syntax symbol
)) string-end
))
546 (semantic-tag-name tag
)))
548 (defun semantic-python-instance-variable-p (tag &optional self
)
549 "Return non-nil if TAG is an instance variable of the instance
550 SELF or the instance name \"self\" if SELF is nil."
551 (when (semantic-tag-of-class-p tag
'variable
)
552 (let ((name (semantic-tag-name tag
)))
555 `(seq string-start
,(or self
"self") "."))
557 (not (string-match "\\." (substring name
5)))))))
559 (defun semantic-python-docstring-p (tag)
560 "Return non-nil, when TAG is a Python documentation string."
561 ;; TAG is considered to be a documentation string if the first
562 ;; member is of class 'code and its name looks like a documentation
564 (let ((class (semantic-tag-class tag
))
565 (name (semantic-tag-name tag
)))
566 (and (eq class
'code
)
568 (rx (seq string-start
"\"\"\"" (0+ anything
) "\"\"\"" string-end
))
571 (defun semantic-python-extract-docstring (tag)
572 "Return the Python documentation string contained in TAG."
573 ;; Strip leading and trailing """
574 (let ((name (semantic-tag-name tag
)))
575 (substring name
3 -
3)))
581 (defun wisent-python-lex-buffer ()
582 "Run `wisent-python-lexer' on current buffer."
585 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
586 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
588 (pp token-stream
(current-buffer))
589 (goto-char (point-min))
590 (pop-to-buffer (current-buffer)))))
592 (provide 'semantic
/wisent
/python
)
595 ;; generated-autoload-file: "../loaddefs.el"
596 ;; generated-autoload-load-name: "semantic/wisent/python"
599 ;;; semantic/wisent/python.el ends here