Update copyright notices for 2013.
[bpt/emacs.git] / lisp / cedet / semantic / wisent / python.el
CommitLineData
52bee098
CY
1;;; wisent-python.el --- Semantic support for Python
2
ab422c4d 3;; Copyright (C) 2002, 2004, 2006-2013 Free Software Foundation, Inc.
52bee098
CY
4
5;; Author: Richard Kim <emacs18@gmail.com>
6;; Maintainer: Richard Kim <emacs18@gmail.com>
7;; Created: June 2002
8;; Keywords: syntax
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software: you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25;;; Commentary:
26;;
27;; Parser support for Python.
28
29;;; Code:
30
62a81506
CY
31(require 'rx)
32
33;; Try to load python support, but fail silently since it is only used
34;; for optional functionality
35(require 'python nil t)
36
52bee098
CY
37(require 'semantic/wisent)
38(require 'semantic/wisent/python-wy)
62a81506 39(require 'semantic/find)
52bee098
CY
40(require 'semantic/dep)
41(require 'semantic/ctxt)
42
62a81506
CY
43(eval-when-compile
44 (require 'cl))
45
46;;; Customization
47;;
48
49(defun semantic-python-get-system-include-path ()
50 "Evaluate some Python code that determines the system include path."
bf659b3f
DE
51 (delq nil
52 (mapcar
53 (lambda (dir)
54 (when (file-directory-p dir)
55 dir))
56 (split-string
57 (python-shell-internal-send-string
58 "import sys;print ('\\n'.join(sys.path))")
59 "\n" t))))
62a81506
CY
60
61(defcustom-mode-local-semantic-dependency-system-include-path
62 python-mode semantic-python-dependency-system-include-path
63 (when (and (featurep 'python)
64 ;; python-mode and batch somehow often hangs.
65 (not noninteractive))
66 (semantic-python-get-system-include-path))
67 "The system include path used by Python language.")
52bee098
CY
68\f
69;;; Lexical analysis
70;;
71
72;; Python strings are delimited by either single quotes or double
62a81506 73;; quotes, e.g., "I'm a string" and 'I too am a string'.
52bee098
CY
74;; In addition a string can have either a 'r' and/or 'u' prefix.
75;; The 'r' prefix means raw, i.e., normal backslash substitutions are
76;; to be suppressed. For example, r"01\n34" is a string with six
77;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
fe7a3057 78;; string is Unicode.
62a81506 79(defconst wisent-python-string-start-re "[uU]?[rR]?['\"]"
91abaf51 80 "Regexp matching beginning of a Python string.")
52bee098 81
62a81506
CY
82(defconst wisent-python-string-re
83 (rx
84 (opt (any "uU")) (opt (any "rR"))
85 (or
86 ;; Triple-quoted string using apostrophes
87 (: "'''" (zero-or-more (or "\\'"
88 (not (any "'"))
89 (: (repeat 1 2 "'") (not (any "'")))))
90 "'''")
91 ;; String using apostrophes
92 (: "'" (zero-or-more (or "\\'"
93 (not (any "'"))))
94 "'")
95 ;; Triple-quoted string using quotation marks.
96 (: "\"\"\"" (zero-or-more (or "\\\""
97 (not (any "\""))
98 (: (repeat 1 2 "\"") (not (any "\"")))))
99 "\"\"\"")
100 ;; String using quotation marks.
101 (: "\"" (zero-or-more (or "\\\""
102 (not (any "\""))))
103 "\"")))
104 "Regexp matching a complete Python string.")
105
52bee098
CY
106(defvar wisent-python-EXPANDING-block nil
107 "Non-nil when expanding a paren block for Python lexical analyzer.")
108
109(defun wisent-python-implicit-line-joining-p ()
110 "Return non-nil if implicit line joining is active.
91abaf51 111That is, if inside an expression in parentheses, square brackets or
52bee098
CY
112curly braces."
113 wisent-python-EXPANDING-block)
114
115(defsubst wisent-python-forward-string ()
91abaf51 116 "Move point at the end of the Python string at point."
62a81506
CY
117 (if (looking-at wisent-python-string-re)
118 (let ((start (match-beginning 0))
119 (end (match-end 0)))
120 ;; Incomplete triple-quoted string gets matched instead as a
121 ;; complete single quoted string. (This special case would be
122 ;; unnecessary if Emacs regular expressions had negative
123 ;; look-ahead assertions.)
124 (when (and (= (- end start) 2)
125 (looking-at "\"\\{3\\}\\|'\\{3\\}"))
126 (error "unterminated syntax"))
127 (goto-char end))
128 (error "unterminated syntax")))
129
130(defun wisent-python-forward-balanced-expression ()
131 "Move point to the end of the balanced expression at point.
132Here 'balanced expression' means anything matched by Emacs'
133open/close parenthesis syntax classes. We can't use forward-sexp
134for this because that Emacs built-in can't parse Python's
135triple-quoted string syntax."
136 (let ((end-char (cdr (syntax-after (point)))))
137 (forward-char 1)
138 (while (not (or (eobp) (eq (char-after (point)) end-char)))
139 (cond
140 ;; Skip over python strings.
141 ((looking-at wisent-python-string-start-re)
142 (wisent-python-forward-string))
143 ;; At a comment start just goto end of line.
144 ((looking-at "\\s<")
145 (end-of-line))
146 ;; Skip over balanced expressions.
147 ((looking-at "\\s(")
148 (wisent-python-forward-balanced-expression))
149 ;; Skip over white space, word, symbol, punctuation, paired
150 ;; delimiter (backquote) characters, line continuation, and end
151 ;; of comment characters (AKA newline characters in Python).
152 ((zerop (skip-syntax-forward "-w_.$\\>"))
153 (error "can't figure out how to go forward from here"))))
154 ;; Skip closing character. As a last resort this should raise an
155 ;; error if we hit EOB before we find our closing character..
156 (forward-char 1)))
52bee098
CY
157
158(defun wisent-python-forward-line ()
159 "Move point to the beginning of the next logical line.
160Usually this is simply the next physical line unless strings,
161implicit/explicit line continuation, blank lines, or comment lines are
162encountered. This function skips over such items so that the point is
163at the beginning of the next logical line. If the current logical
164line ends at the end of the buffer, leave the point there."
165 (while (not (eolp))
166 (when (= (point)
167 (progn
168 (cond
169 ;; Skip over python strings.
62a81506 170 ((looking-at wisent-python-string-start-re)
52bee098
CY
171 (wisent-python-forward-string))
172 ;; At a comment start just goto end of line.
173 ((looking-at "\\s<")
174 (end-of-line))
62a81506
CY
175 ;; Skip over balanced expressions.
176 ((looking-at "\\s(")
177 (wisent-python-forward-balanced-expression))
52bee098
CY
178 ;; At the explicit line continuation character
179 ;; (backslash) move to next line.
180 ((looking-at "\\s\\")
181 (forward-line 1))
182 ;; Skip over white space, word, symbol, punctuation,
183 ;; and paired delimiter (backquote) characters.
184 ((skip-syntax-forward "-w_.$)")))
185 (point)))
186 (error "python-forward-line endless loop detected")))
187 ;; The point is at eol, skip blank and comment lines.
188 (forward-comment (point-max))
189 ;; Goto the beginning of the next line.
190 (or (eobp) (beginning-of-line)))
191
192(defun wisent-python-forward-line-skip-indented ()
193 "Move point to the next logical line, skipping indented lines.
62a81506
CY
194That is the next line whose indentation is less than or equal to
195the indentation of the current line."
52bee098
CY
196 (let ((indent (current-indentation)))
197 (while (progn (wisent-python-forward-line)
198 (and (not (eobp))
199 (> (current-indentation) indent))))))
200
201(defun wisent-python-end-of-block ()
202 "Move point to the end of the current block."
203 (let ((indent (current-indentation)))
204 (while (and (not (eobp)) (>= (current-indentation) indent))
205 (wisent-python-forward-line-skip-indented))
206 ;; Don't include final comments in current block bounds
207 (forward-comment (- (point-max)))
208 (or (bolp) (forward-line 1))
209 ))
210
211;; Indentation stack, what the Python (2.3) language spec. says:
212;;
213;; The indentation levels of consecutive lines are used to generate
214;; INDENT and DEDENT tokens, using a stack, as follows.
215;;
216;; Before the first line of the file is read, a single zero is pushed
217;; on the stack; this will never be popped off again. The numbers
218;; pushed on the stack will always be strictly increasing from bottom
219;; to top. At the beginning of each logical line, the line's
220;; indentation level is compared to the top of the stack. If it is
221;; equal, nothing happens. If it is larger, it is pushed on the stack,
222;; and one INDENT token is generated. If it is smaller, it must be one
223;; of the numbers occurring on the stack; all numbers on the stack
224;; that are larger are popped off, and for each number popped off a
225;; DEDENT token is generated. At the end of the file, a DEDENT token
226;; is generated for each number remaining on the stack that is larger
227;; than zero.
228(defvar wisent-python-indent-stack)
229
230(define-lex-analyzer wisent-python-lex-beginning-of-line
91abaf51 231 "Detect and create Python indentation tokens at beginning of line."
52bee098
CY
232 (and
233 (bolp) (not (wisent-python-implicit-line-joining-p))
234 (let ((last-indent (car wisent-python-indent-stack))
235 (last-pos (point))
236 (curr-indent (current-indentation)))
237 (skip-syntax-forward "-")
238 (cond
239 ;; Skip comments and blank lines. No change in indentation.
240 ((or (eolp) (looking-at semantic-lex-comment-regex))
241 (forward-comment (point-max))
242 (or (eobp) (beginning-of-line))
243 (setq semantic-lex-end-point (point))
244 ;; Loop lexer to handle the next line.
245 t)
246 ;; No change in indentation.
247 ((= curr-indent last-indent)
248 (setq semantic-lex-end-point (point))
249 ;; Try next analyzers.
250 nil)
251 ;; Indentation increased
252 ((> curr-indent last-indent)
253 (if (or (not semantic-lex-maximum-depth)
254 (< semantic-lex-current-depth semantic-lex-maximum-depth))
255 (progn
256 ;; Return an INDENT lexical token
257 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
258 (push curr-indent wisent-python-indent-stack)
259 (semantic-lex-push-token
260 (semantic-lex-token 'INDENT last-pos (point))))
261 ;; Add an INDENT_BLOCK token
262 (semantic-lex-push-token
263 (semantic-lex-token
264 'INDENT_BLOCK
265 (progn (beginning-of-line) (point))
266 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
267 (wisent-python-end-of-block)
268 (point)))))
269 ;; Loop lexer to handle tokens in current line.
270 t)
271 ;; Indentation decreased
62a81506
CY
272 ((progn
273 ;; Pop items from indentation stack
274 (while (< curr-indent last-indent)
275 (pop wisent-python-indent-stack)
276 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
277 last-indent (car wisent-python-indent-stack))
278 (semantic-lex-push-token
279 (semantic-lex-token 'DEDENT last-pos (point))))
280 (= last-pos (point)))
52bee098
CY
281 ;; If pos did not change, then we must return nil so that
282 ;; other lexical analyzers can be run.
62a81506 283 nil))))
52bee098
CY
284 ;; All the work was done in the above analyzer matching condition.
285 )
286
287(define-lex-regex-analyzer wisent-python-lex-end-of-line
91abaf51 288 "Detect and create Python newline tokens.
52bee098
CY
289Just skip the newline character if the following line is an implicit
290continuation of current line."
291 "\\(\n\\|\\s>\\)"
292 (if (wisent-python-implicit-line-joining-p)
293 (setq semantic-lex-end-point (match-end 0))
294 (semantic-lex-push-token
295 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
296
297(define-lex-regex-analyzer wisent-python-lex-string
298 "Detect and create python string tokens."
62a81506 299 wisent-python-string-start-re
52bee098
CY
300 (semantic-lex-push-token
301 (semantic-lex-token
302 'STRING_LITERAL
303 (point)
304 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
305 (wisent-python-forward-string)
306 (point)))))
307
308(define-lex-regex-analyzer wisent-python-lex-ignore-backslash
309 "Detect and skip over backslash (explicit line joining) tokens.
310A backslash must be the last token of a physical line, it is illegal
311elsewhere on a line outside a string literal."
312 "\\s\\\\s-*$"
313 ;; Skip over the detected backslash and go to the first
314 ;; non-whitespace character in the next physical line.
315 (forward-line)
316 (skip-syntax-forward "-")
317 (setq semantic-lex-end-point (point)))
318
319(define-lex wisent-python-lexer
320 "Lexical Analyzer for Python code."
321 ;; Must analyze beginning of line first to handle indentation.
322 wisent-python-lex-beginning-of-line
323 wisent-python-lex-end-of-line
324 ;; Must analyze string before symbol to handle string prefix.
325 wisent-python-lex-string
326 ;; Analyzers auto-generated from grammar.
327 wisent-python-wy--<number>-regexp-analyzer
328 wisent-python-wy--<keyword>-keyword-analyzer
329 wisent-python-wy--<symbol>-regexp-analyzer
330 wisent-python-wy--<block>-block-analyzer
331 wisent-python-wy--<punctuation>-string-analyzer
332 ;; Ignored things.
333 wisent-python-lex-ignore-backslash
334 semantic-lex-ignore-whitespace
335 semantic-lex-ignore-comments
336 ;; Signal error on unhandled syntax.
337 semantic-lex-default-action)
62a81506
CY
338
339\f
340;;; Parsing
341;;
342
343(defun wisent-python-reconstitute-function-tag (tag suite)
344 "Move a docstring from TAG's members into its :documentation attribute.
345Set attributes for constructors, special, private and static methods."
346 ;; Analyze first statement to see whether it is a documentation
347 ;; string.
348 (let ((first-statement (car suite)))
349 (when (semantic-python-docstring-p first-statement)
350 (semantic-tag-put-attribute
351 tag :documentation
352 (semantic-python-extract-docstring first-statement))))
353
354 ;; TODO HACK: we try to identify methods using the following
355 ;; heuristic:
356 ;; + at least one argument
357 ;; + first argument is self
358 (when (and (> (length (semantic-tag-function-arguments tag)) 0)
359 (string= (semantic-tag-name
360 (first (semantic-tag-function-arguments tag)))
361 "self"))
362 (semantic-tag-put-attribute tag :parent "dummy"))
363
364 ;; Identify constructors, special and private functions
365 (cond
366 ;; TODO only valid when the function resides inside a class
367 ((string= (semantic-tag-name tag) "__init__")
368 (semantic-tag-put-attribute tag :constructor-flag t)
369 (semantic-tag-put-attribute tag :suite suite))
370
371 ((semantic-python-special-p tag)
372 (semantic-tag-put-attribute tag :special-flag t))
373
374 ((semantic-python-private-p tag)
375 (semantic-tag-put-attribute tag :protection "private")))
376
377 ;; If there is a staticmethod decorator, add a static typemodifier
378 ;; for the function.
379 (when (semantic-find-tags-by-name
380 "staticmethod"
381 (semantic-tag-get-attribute tag :decorators))
382 (semantic-tag-put-attribute
383 tag :typemodifiers
384 (cons "static"
385 (semantic-tag-get-attribute tag :typemodifiers))))
386
387 ;; TODO
388 ;; + check for decorators classmethod
389 ;; + check for operators
390 tag)
391
392(defun wisent-python-reconstitute-class-tag (tag)
393 "Move a docstring from TAG's members into its :documentation attribute."
394 ;; The first member of TAG may be a documentation string. If that is
395 ;; the case, remove of it from the members list and stick its
396 ;; content into the :documentation attribute.
397 (let ((first-member (car (semantic-tag-type-members tag))))
398 (when (semantic-python-docstring-p first-member)
399 (semantic-tag-put-attribute
400 tag :members
401 (cdr (semantic-tag-type-members tag)))
402 (semantic-tag-put-attribute
403 tag :documentation
404 (semantic-python-extract-docstring first-member))))
405
406 ;; Try to find the constructor, determine the name of the instance
407 ;; parameter, find assignments to instance variables and add
408 ;; corresponding variable tags to the list of members.
409 (dolist (member (semantic-tag-type-members tag))
410 (when (semantic-tag-function-constructor-p member)
411 (let ((self (semantic-tag-name
412 (car (semantic-tag-function-arguments member)))))
413 (dolist (statement (semantic-tag-get-attribute member :suite))
414 (when (semantic-python-instance-variable-p statement self)
415 (let ((variable (semantic-tag-clone
416 statement
417 (substring (semantic-tag-name statement) 5)))
418 (members (semantic-tag-get-attribute tag :members)))
419 (when (semantic-python-private-p variable)
420 (semantic-tag-put-attribute variable :protection "private"))
421 (setcdr (last members) (list variable))))))))
422
423 ;; TODO remove the :suite attribute
424 tag)
425
426(defun semantic-python-expand-tag (tag)
427 "Expand compound declarations found in TAG into separate tags.
428TAG contains compound declaration if the NAME part of the tag is
429a list. In python, this can happen with `import' statements."
430 (let ((class (semantic-tag-class tag))
431 (elts (semantic-tag-name tag))
432 (expand nil))
433 (cond
434 ((and (eq class 'include) (listp elts))
435 (dolist (E elts)
436 (setq expand (cons (semantic-tag-clone tag E) expand)))
437 (setq expand (nreverse expand)))
438 )))
439
440
52bee098
CY
441\f
442;;; Overridden Semantic API.
443;;
62a81506 444
52bee098
CY
445(define-mode-local-override semantic-lex python-mode
446 (start end &optional depth length)
91abaf51 447 "Lexically analyze Python code in current buffer.
52bee098
CY
448See the function `semantic-lex' for the meaning of the START, END,
449DEPTH and LENGTH arguments.
450This function calls `wisent-python-lexer' to actually perform the
91abaf51 451lexical analysis, then emits the necessary Python DEDENT tokens from
52bee098
CY
452what remains in the `wisent-python-indent-stack'."
453 (let* ((wisent-python-indent-stack (list 0))
454 (stream (wisent-python-lexer start end depth length))
455 (semantic-lex-token-stream nil))
456 ;; Emit DEDENT tokens if something remains in the INDENT stack.
457 (while (> (pop wisent-python-indent-stack) 0)
458 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
459 (nconc stream (nreverse semantic-lex-token-stream))))
460
461(define-mode-local-override semantic-get-local-variables python-mode ()
462 "Get the local variables based on point's context.
91abaf51 463To be implemented for Python! For now just return nil."
52bee098
CY
464 nil)
465
62a81506
CY
466;; Adapted from the semantic Java support by Andrey Torba
467(define-mode-local-override semantic-tag-include-filename python-mode (tag)
468 "Return a suitable path for (some) Python imports."
469 (let ((name (semantic-tag-name tag)))
470 (concat (mapconcat 'identity (split-string name "\\.") "/") ".py")))
52bee098 471
740877cd
DE
472;; Override ctxt-current-function/assignment defaults, since they do
473;; not work properly with Python code, even leading to endless loops
474;; (see bug #xxxxx).
475(define-mode-local-override semantic-ctxt-current-function python-mode (&optional point)
476 "Return the current function call the cursor is in at POINT.
477The function returned is the one accepting the arguments that
478the cursor is currently in. It will not return function symbol if the
479cursor is on the text representing that function."
480 nil)
481
482(define-mode-local-override semantic-ctxt-current-assignment python-mode (&optional point)
483 "Return the current assignment near the cursor at POINT.
484Return a list as per `semantic-ctxt-current-symbol'.
485Return nil if there is nothing relevant."
486 nil)
487
52bee098
CY
488;;; Enable Semantic in `python-mode'.
489;;
490
491;;;###autoload
492(defun wisent-python-default-setup ()
493 "Setup buffer for parse."
494 (wisent-python-wy--install-parser)
495 (set (make-local-variable 'parse-sexp-ignore-comments) t)
62a81506
CY
496 ;; Give python modes the possibility to overwrite this:
497 (if (not comment-start-skip)
498 (set (make-local-variable 'comment-start-skip) "#+\\s-*"))
52bee098 499 (setq
62a81506 500 ;; Character used to separation a parent/child relationship
52bee098
CY
501 semantic-type-relation-separator-character '(".")
502 semantic-command-separation-character ";"
62a81506
CY
503 ;; Parsing
504 semantic-tag-expand-function 'semantic-python-expand-tag
52bee098
CY
505
506 ;; Semantic to take over from the one provided by python.
507 ;; The python one, if it uses the senator advice, will hang
508 ;; Emacs unrecoverably.
509 imenu-create-index-function 'semantic-create-imenu-index
510
511 ;; I need a python guru to update this list:
512 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
513 (function . "Methods"))
514 semantic-symbol->name-assoc-list '((type . "Classes")
515 (variable . "Variables")
516 (function . "Functions")
517 (include . "Imports")
518 (package . "Package")
519 (code . "Code")))
520 )
521
522;;;###autoload
523(add-hook 'python-mode-hook 'wisent-python-default-setup)
524
525;; Make sure the newer python modes pull in the same python
526;; mode overrides.
527(define-child-mode python-2-mode python-mode "Python 2 mode")
528(define-child-mode python-3-mode python-mode "Python 3 mode")
529
530\f
62a81506
CY
531;;; Utility functions
532;;
533
534(defun semantic-python-special-p (tag)
535 "Return non-nil if the name of TAG is a special identifier of
536the form __NAME__. "
537 (string-match
538 (rx (seq string-start "__" (1+ (syntax symbol)) "__" string-end))
539 (semantic-tag-name tag)))
540
541(defun semantic-python-private-p (tag)
542 "Return non-nil if the name of TAG follows the convention _NAME
543for private names."
544 (string-match
545 (rx (seq string-start "_" (0+ (syntax symbol)) string-end))
546 (semantic-tag-name tag)))
547
548(defun semantic-python-instance-variable-p (tag &optional self)
549 "Return non-nil if TAG is an instance variable of the instance
550SELF or the instance name \"self\" if SELF is nil."
551 (when (semantic-tag-of-class-p tag 'variable)
552 (let ((name (semantic-tag-name tag)))
553 (when (string-match
554 (rx-to-string
555 `(seq string-start ,(or self "self") "."))
556 name)
557 (not (string-match "\\." (substring name 5)))))))
558
559(defun semantic-python-docstring-p (tag)
560 "Return non-nil, when TAG is a Python documentation string."
561 ;; TAG is considered to be a documentation string if the first
562 ;; member is of class 'code and its name looks like a documentation
563 ;; string.
564 (let ((class (semantic-tag-class tag))
565 (name (semantic-tag-name tag)))
566 (and (eq class 'code)
567 (string-match
568 (rx (seq string-start "\"\"\"" (0+ anything) "\"\"\"" string-end))
569 name))))
570
571(defun semantic-python-extract-docstring (tag)
572 "Return the Python documentation string contained in TAG."
573 ;; Strip leading and trailing """
574 (let ((name (semantic-tag-name tag)))
575 (substring name 3 -3)))
576
577\f
52bee098
CY
578;;; Test
579;;
62a81506 580
52bee098
CY
581(defun wisent-python-lex-buffer ()
582 "Run `wisent-python-lexer' on current buffer."
583 (interactive)
584 (semantic-lex-init)
585 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
586 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
587 (erase-buffer)
588 (pp token-stream (current-buffer))
589 (goto-char (point-min))
590 (pop-to-buffer (current-buffer)))))
591
592(provide 'semantic/wisent/python)
593
594;; Local variables:
595;; generated-autoload-file: "../loaddefs.el"
596;; generated-autoload-load-name: "semantic/wisent/python"
597;; End:
598
599;;; semantic/wisent/python.el ends here