* semantic/ede-grammar.el (project-compile-target): Fix grammar
[bpt/emacs.git] / lisp / cedet / semantic / wisent / python.el
CommitLineData
52bee098
CY
1;;; wisent-python.el --- Semantic support for Python
2
acaf905b 3;; Copyright (C) 2002, 2004, 2006-2012 Free Software Foundation, Inc.
52bee098
CY
4
5;; Author: Richard Kim <emacs18@gmail.com>
6;; Maintainer: Richard Kim <emacs18@gmail.com>
7;; Created: June 2002
8;; Keywords: syntax
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software: you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25;;; Commentary:
26;;
27;; Parser support for Python.
28
29;;; Code:
30
62a81506
CY
31(require 'rx)
32
33;; Try to load python support, but fail silently since it is only used
34;; for optional functionality
35(require 'python nil t)
36
52bee098
CY
37(require 'semantic/wisent)
38(require 'semantic/wisent/python-wy)
62a81506 39(require 'semantic/find)
52bee098
CY
40(require 'semantic/dep)
41(require 'semantic/ctxt)
42
62a81506
CY
43(eval-when-compile
44 (require 'cl))
45
46;;; Customization
47;;
48
49(defun semantic-python-get-system-include-path ()
50 "Evaluate some Python code that determines the system include path."
51 (python-proc)
52 (if python-buffer
53 (with-current-buffer python-buffer
54 (set (make-local-variable 'python-preoutput-result) nil)
55 (python-send-string
56 "import sys; print '_emacs_out ' + '\\0'.join(sys.path)")
57 (accept-process-output (python-proc) 2)
58 (if python-preoutput-result
59 (split-string python-preoutput-result "[\0\n]" t)
60 ;; Try a second, Python3k compatible shot
61 (python-send-string
62 "import sys; print('_emacs_out ' + '\\0'.join(sys.path))")
63 (accept-process-output (python-proc) 2)
64 (if python-preoutput-result
65 (split-string python-preoutput-result "[\0\n]" t)
66 (message "Timeout while querying Python for system include path.")
67 nil)))
68 (message "Python seems to be unavailable on this system.")))
69
70(defcustom-mode-local-semantic-dependency-system-include-path
71 python-mode semantic-python-dependency-system-include-path
72 (when (and (featurep 'python)
73 ;; python-mode and batch somehow often hangs.
74 (not noninteractive))
75 (semantic-python-get-system-include-path))
76 "The system include path used by Python language.")
52bee098
CY
77\f
78;;; Lexical analysis
79;;
80
81;; Python strings are delimited by either single quotes or double
62a81506 82;; quotes, e.g., "I'm a string" and 'I too am a string'.
52bee098
CY
83;; In addition a string can have either a 'r' and/or 'u' prefix.
84;; The 'r' prefix means raw, i.e., normal backslash substitutions are
85;; to be suppressed. For example, r"01\n34" is a string with six
86;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
fe7a3057 87;; string is Unicode.
62a81506 88(defconst wisent-python-string-start-re "[uU]?[rR]?['\"]"
91abaf51 89 "Regexp matching beginning of a Python string.")
52bee098 90
62a81506
CY
91(defconst wisent-python-string-re
92 (rx
93 (opt (any "uU")) (opt (any "rR"))
94 (or
95 ;; Triple-quoted string using apostrophes
96 (: "'''" (zero-or-more (or "\\'"
97 (not (any "'"))
98 (: (repeat 1 2 "'") (not (any "'")))))
99 "'''")
100 ;; String using apostrophes
101 (: "'" (zero-or-more (or "\\'"
102 (not (any "'"))))
103 "'")
104 ;; Triple-quoted string using quotation marks.
105 (: "\"\"\"" (zero-or-more (or "\\\""
106 (not (any "\""))
107 (: (repeat 1 2 "\"") (not (any "\"")))))
108 "\"\"\"")
109 ;; String using quotation marks.
110 (: "\"" (zero-or-more (or "\\\""
111 (not (any "\""))))
112 "\"")))
113 "Regexp matching a complete Python string.")
114
52bee098
CY
115(defvar wisent-python-EXPANDING-block nil
116 "Non-nil when expanding a paren block for Python lexical analyzer.")
117
118(defun wisent-python-implicit-line-joining-p ()
119 "Return non-nil if implicit line joining is active.
91abaf51 120That is, if inside an expression in parentheses, square brackets or
52bee098
CY
121curly braces."
122 wisent-python-EXPANDING-block)
123
124(defsubst wisent-python-forward-string ()
91abaf51 125 "Move point at the end of the Python string at point."
62a81506
CY
126 (if (looking-at wisent-python-string-re)
127 (let ((start (match-beginning 0))
128 (end (match-end 0)))
129 ;; Incomplete triple-quoted string gets matched instead as a
130 ;; complete single quoted string. (This special case would be
131 ;; unnecessary if Emacs regular expressions had negative
132 ;; look-ahead assertions.)
133 (when (and (= (- end start) 2)
134 (looking-at "\"\\{3\\}\\|'\\{3\\}"))
135 (error "unterminated syntax"))
136 (goto-char end))
137 (error "unterminated syntax")))
138
139(defun wisent-python-forward-balanced-expression ()
140 "Move point to the end of the balanced expression at point.
141Here 'balanced expression' means anything matched by Emacs'
142open/close parenthesis syntax classes. We can't use forward-sexp
143for this because that Emacs built-in can't parse Python's
144triple-quoted string syntax."
145 (let ((end-char (cdr (syntax-after (point)))))
146 (forward-char 1)
147 (while (not (or (eobp) (eq (char-after (point)) end-char)))
148 (cond
149 ;; Skip over python strings.
150 ((looking-at wisent-python-string-start-re)
151 (wisent-python-forward-string))
152 ;; At a comment start just goto end of line.
153 ((looking-at "\\s<")
154 (end-of-line))
155 ;; Skip over balanced expressions.
156 ((looking-at "\\s(")
157 (wisent-python-forward-balanced-expression))
158 ;; Skip over white space, word, symbol, punctuation, paired
159 ;; delimiter (backquote) characters, line continuation, and end
160 ;; of comment characters (AKA newline characters in Python).
161 ((zerop (skip-syntax-forward "-w_.$\\>"))
162 (error "can't figure out how to go forward from here"))))
163 ;; Skip closing character. As a last resort this should raise an
164 ;; error if we hit EOB before we find our closing character..
165 (forward-char 1)))
52bee098
CY
166
167(defun wisent-python-forward-line ()
168 "Move point to the beginning of the next logical line.
169Usually this is simply the next physical line unless strings,
170implicit/explicit line continuation, blank lines, or comment lines are
171encountered. This function skips over such items so that the point is
172at the beginning of the next logical line. If the current logical
173line ends at the end of the buffer, leave the point there."
174 (while (not (eolp))
175 (when (= (point)
176 (progn
177 (cond
178 ;; Skip over python strings.
62a81506 179 ((looking-at wisent-python-string-start-re)
52bee098
CY
180 (wisent-python-forward-string))
181 ;; At a comment start just goto end of line.
182 ((looking-at "\\s<")
183 (end-of-line))
62a81506
CY
184 ;; Skip over balanced expressions.
185 ((looking-at "\\s(")
186 (wisent-python-forward-balanced-expression))
52bee098
CY
187 ;; At the explicit line continuation character
188 ;; (backslash) move to next line.
189 ((looking-at "\\s\\")
190 (forward-line 1))
191 ;; Skip over white space, word, symbol, punctuation,
192 ;; and paired delimiter (backquote) characters.
193 ((skip-syntax-forward "-w_.$)")))
194 (point)))
195 (error "python-forward-line endless loop detected")))
196 ;; The point is at eol, skip blank and comment lines.
197 (forward-comment (point-max))
198 ;; Goto the beginning of the next line.
199 (or (eobp) (beginning-of-line)))
200
201(defun wisent-python-forward-line-skip-indented ()
202 "Move point to the next logical line, skipping indented lines.
62a81506
CY
203That is the next line whose indentation is less than or equal to
204the indentation of the current line."
52bee098
CY
205 (let ((indent (current-indentation)))
206 (while (progn (wisent-python-forward-line)
207 (and (not (eobp))
208 (> (current-indentation) indent))))))
209
210(defun wisent-python-end-of-block ()
211 "Move point to the end of the current block."
212 (let ((indent (current-indentation)))
213 (while (and (not (eobp)) (>= (current-indentation) indent))
214 (wisent-python-forward-line-skip-indented))
215 ;; Don't include final comments in current block bounds
216 (forward-comment (- (point-max)))
217 (or (bolp) (forward-line 1))
218 ))
219
220;; Indentation stack, what the Python (2.3) language spec. says:
221;;
222;; The indentation levels of consecutive lines are used to generate
223;; INDENT and DEDENT tokens, using a stack, as follows.
224;;
225;; Before the first line of the file is read, a single zero is pushed
226;; on the stack; this will never be popped off again. The numbers
227;; pushed on the stack will always be strictly increasing from bottom
228;; to top. At the beginning of each logical line, the line's
229;; indentation level is compared to the top of the stack. If it is
230;; equal, nothing happens. If it is larger, it is pushed on the stack,
231;; and one INDENT token is generated. If it is smaller, it must be one
232;; of the numbers occurring on the stack; all numbers on the stack
233;; that are larger are popped off, and for each number popped off a
234;; DEDENT token is generated. At the end of the file, a DEDENT token
235;; is generated for each number remaining on the stack that is larger
236;; than zero.
237(defvar wisent-python-indent-stack)
238
239(define-lex-analyzer wisent-python-lex-beginning-of-line
91abaf51 240 "Detect and create Python indentation tokens at beginning of line."
52bee098
CY
241 (and
242 (bolp) (not (wisent-python-implicit-line-joining-p))
243 (let ((last-indent (car wisent-python-indent-stack))
244 (last-pos (point))
245 (curr-indent (current-indentation)))
246 (skip-syntax-forward "-")
247 (cond
248 ;; Skip comments and blank lines. No change in indentation.
249 ((or (eolp) (looking-at semantic-lex-comment-regex))
250 (forward-comment (point-max))
251 (or (eobp) (beginning-of-line))
252 (setq semantic-lex-end-point (point))
253 ;; Loop lexer to handle the next line.
254 t)
255 ;; No change in indentation.
256 ((= curr-indent last-indent)
257 (setq semantic-lex-end-point (point))
258 ;; Try next analyzers.
259 nil)
260 ;; Indentation increased
261 ((> curr-indent last-indent)
262 (if (or (not semantic-lex-maximum-depth)
263 (< semantic-lex-current-depth semantic-lex-maximum-depth))
264 (progn
265 ;; Return an INDENT lexical token
266 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
267 (push curr-indent wisent-python-indent-stack)
268 (semantic-lex-push-token
269 (semantic-lex-token 'INDENT last-pos (point))))
270 ;; Add an INDENT_BLOCK token
271 (semantic-lex-push-token
272 (semantic-lex-token
273 'INDENT_BLOCK
274 (progn (beginning-of-line) (point))
275 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
276 (wisent-python-end-of-block)
277 (point)))))
278 ;; Loop lexer to handle tokens in current line.
279 t)
280 ;; Indentation decreased
62a81506
CY
281 ((progn
282 ;; Pop items from indentation stack
283 (while (< curr-indent last-indent)
284 (pop wisent-python-indent-stack)
285 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
286 last-indent (car wisent-python-indent-stack))
287 (semantic-lex-push-token
288 (semantic-lex-token 'DEDENT last-pos (point))))
289 (= last-pos (point)))
52bee098
CY
290 ;; If pos did not change, then we must return nil so that
291 ;; other lexical analyzers can be run.
62a81506 292 nil))))
52bee098
CY
293 ;; All the work was done in the above analyzer matching condition.
294 )
295
296(define-lex-regex-analyzer wisent-python-lex-end-of-line
91abaf51 297 "Detect and create Python newline tokens.
52bee098
CY
298Just skip the newline character if the following line is an implicit
299continuation of current line."
300 "\\(\n\\|\\s>\\)"
301 (if (wisent-python-implicit-line-joining-p)
302 (setq semantic-lex-end-point (match-end 0))
303 (semantic-lex-push-token
304 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
305
306(define-lex-regex-analyzer wisent-python-lex-string
307 "Detect and create python string tokens."
62a81506 308 wisent-python-string-start-re
52bee098
CY
309 (semantic-lex-push-token
310 (semantic-lex-token
311 'STRING_LITERAL
312 (point)
313 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
314 (wisent-python-forward-string)
315 (point)))))
316
317(define-lex-regex-analyzer wisent-python-lex-ignore-backslash
318 "Detect and skip over backslash (explicit line joining) tokens.
319A backslash must be the last token of a physical line, it is illegal
320elsewhere on a line outside a string literal."
321 "\\s\\\\s-*$"
322 ;; Skip over the detected backslash and go to the first
323 ;; non-whitespace character in the next physical line.
324 (forward-line)
325 (skip-syntax-forward "-")
326 (setq semantic-lex-end-point (point)))
327
328(define-lex wisent-python-lexer
329 "Lexical Analyzer for Python code."
330 ;; Must analyze beginning of line first to handle indentation.
331 wisent-python-lex-beginning-of-line
332 wisent-python-lex-end-of-line
333 ;; Must analyze string before symbol to handle string prefix.
334 wisent-python-lex-string
335 ;; Analyzers auto-generated from grammar.
336 wisent-python-wy--<number>-regexp-analyzer
337 wisent-python-wy--<keyword>-keyword-analyzer
338 wisent-python-wy--<symbol>-regexp-analyzer
339 wisent-python-wy--<block>-block-analyzer
340 wisent-python-wy--<punctuation>-string-analyzer
341 ;; Ignored things.
342 wisent-python-lex-ignore-backslash
343 semantic-lex-ignore-whitespace
344 semantic-lex-ignore-comments
345 ;; Signal error on unhandled syntax.
346 semantic-lex-default-action)
62a81506
CY
347
348\f
349;;; Parsing
350;;
351
352(defun wisent-python-reconstitute-function-tag (tag suite)
353 "Move a docstring from TAG's members into its :documentation attribute.
354Set attributes for constructors, special, private and static methods."
355 ;; Analyze first statement to see whether it is a documentation
356 ;; string.
357 (let ((first-statement (car suite)))
358 (when (semantic-python-docstring-p first-statement)
359 (semantic-tag-put-attribute
360 tag :documentation
361 (semantic-python-extract-docstring first-statement))))
362
363 ;; TODO HACK: we try to identify methods using the following
364 ;; heuristic:
365 ;; + at least one argument
366 ;; + first argument is self
367 (when (and (> (length (semantic-tag-function-arguments tag)) 0)
368 (string= (semantic-tag-name
369 (first (semantic-tag-function-arguments tag)))
370 "self"))
371 (semantic-tag-put-attribute tag :parent "dummy"))
372
373 ;; Identify constructors, special and private functions
374 (cond
375 ;; TODO only valid when the function resides inside a class
376 ((string= (semantic-tag-name tag) "__init__")
377 (semantic-tag-put-attribute tag :constructor-flag t)
378 (semantic-tag-put-attribute tag :suite suite))
379
380 ((semantic-python-special-p tag)
381 (semantic-tag-put-attribute tag :special-flag t))
382
383 ((semantic-python-private-p tag)
384 (semantic-tag-put-attribute tag :protection "private")))
385
386 ;; If there is a staticmethod decorator, add a static typemodifier
387 ;; for the function.
388 (when (semantic-find-tags-by-name
389 "staticmethod"
390 (semantic-tag-get-attribute tag :decorators))
391 (semantic-tag-put-attribute
392 tag :typemodifiers
393 (cons "static"
394 (semantic-tag-get-attribute tag :typemodifiers))))
395
396 ;; TODO
397 ;; + check for decorators classmethod
398 ;; + check for operators
399 tag)
400
401(defun wisent-python-reconstitute-class-tag (tag)
402 "Move a docstring from TAG's members into its :documentation attribute."
403 ;; The first member of TAG may be a documentation string. If that is
404 ;; the case, remove of it from the members list and stick its
405 ;; content into the :documentation attribute.
406 (let ((first-member (car (semantic-tag-type-members tag))))
407 (when (semantic-python-docstring-p first-member)
408 (semantic-tag-put-attribute
409 tag :members
410 (cdr (semantic-tag-type-members tag)))
411 (semantic-tag-put-attribute
412 tag :documentation
413 (semantic-python-extract-docstring first-member))))
414
415 ;; Try to find the constructor, determine the name of the instance
416 ;; parameter, find assignments to instance variables and add
417 ;; corresponding variable tags to the list of members.
418 (dolist (member (semantic-tag-type-members tag))
419 (when (semantic-tag-function-constructor-p member)
420 (let ((self (semantic-tag-name
421 (car (semantic-tag-function-arguments member)))))
422 (dolist (statement (semantic-tag-get-attribute member :suite))
423 (when (semantic-python-instance-variable-p statement self)
424 (let ((variable (semantic-tag-clone
425 statement
426 (substring (semantic-tag-name statement) 5)))
427 (members (semantic-tag-get-attribute tag :members)))
428 (when (semantic-python-private-p variable)
429 (semantic-tag-put-attribute variable :protection "private"))
430 (setcdr (last members) (list variable))))))))
431
432 ;; TODO remove the :suite attribute
433 tag)
434
435(defun semantic-python-expand-tag (tag)
436 "Expand compound declarations found in TAG into separate tags.
437TAG contains compound declaration if the NAME part of the tag is
438a list. In python, this can happen with `import' statements."
439 (let ((class (semantic-tag-class tag))
440 (elts (semantic-tag-name tag))
441 (expand nil))
442 (cond
443 ((and (eq class 'include) (listp elts))
444 (dolist (E elts)
445 (setq expand (cons (semantic-tag-clone tag E) expand)))
446 (setq expand (nreverse expand)))
447 )))
448
449
52bee098
CY
450\f
451;;; Overridden Semantic API.
452;;
62a81506 453
52bee098
CY
454(define-mode-local-override semantic-lex python-mode
455 (start end &optional depth length)
91abaf51 456 "Lexically analyze Python code in current buffer.
52bee098
CY
457See the function `semantic-lex' for the meaning of the START, END,
458DEPTH and LENGTH arguments.
459This function calls `wisent-python-lexer' to actually perform the
91abaf51 460lexical analysis, then emits the necessary Python DEDENT tokens from
52bee098
CY
461what remains in the `wisent-python-indent-stack'."
462 (let* ((wisent-python-indent-stack (list 0))
463 (stream (wisent-python-lexer start end depth length))
464 (semantic-lex-token-stream nil))
465 ;; Emit DEDENT tokens if something remains in the INDENT stack.
466 (while (> (pop wisent-python-indent-stack) 0)
467 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
468 (nconc stream (nreverse semantic-lex-token-stream))))
469
470(define-mode-local-override semantic-get-local-variables python-mode ()
471 "Get the local variables based on point's context.
91abaf51 472To be implemented for Python! For now just return nil."
52bee098
CY
473 nil)
474
62a81506
CY
475;; Adapted from the semantic Java support by Andrey Torba
476(define-mode-local-override semantic-tag-include-filename python-mode (tag)
477 "Return a suitable path for (some) Python imports."
478 (let ((name (semantic-tag-name tag)))
479 (concat (mapconcat 'identity (split-string name "\\.") "/") ".py")))
52bee098
CY
480
481;;; Enable Semantic in `python-mode'.
482;;
483
484;;;###autoload
485(defun wisent-python-default-setup ()
486 "Setup buffer for parse."
487 (wisent-python-wy--install-parser)
488 (set (make-local-variable 'parse-sexp-ignore-comments) t)
62a81506
CY
489 ;; Give python modes the possibility to overwrite this:
490 (if (not comment-start-skip)
491 (set (make-local-variable 'comment-start-skip) "#+\\s-*"))
52bee098 492 (setq
62a81506 493 ;; Character used to separation a parent/child relationship
52bee098
CY
494 semantic-type-relation-separator-character '(".")
495 semantic-command-separation-character ";"
62a81506
CY
496 ;; Parsing
497 semantic-tag-expand-function 'semantic-python-expand-tag
52bee098
CY
498
499 ;; Semantic to take over from the one provided by python.
500 ;; The python one, if it uses the senator advice, will hang
501 ;; Emacs unrecoverably.
502 imenu-create-index-function 'semantic-create-imenu-index
503
504 ;; I need a python guru to update this list:
505 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
506 (function . "Methods"))
507 semantic-symbol->name-assoc-list '((type . "Classes")
508 (variable . "Variables")
509 (function . "Functions")
510 (include . "Imports")
511 (package . "Package")
512 (code . "Code")))
513 )
514
515;;;###autoload
516(add-hook 'python-mode-hook 'wisent-python-default-setup)
517
518;; Make sure the newer python modes pull in the same python
519;; mode overrides.
520(define-child-mode python-2-mode python-mode "Python 2 mode")
521(define-child-mode python-3-mode python-mode "Python 3 mode")
522
523\f
62a81506
CY
524;;; Utility functions
525;;
526
527(defun semantic-python-special-p (tag)
528 "Return non-nil if the name of TAG is a special identifier of
529the form __NAME__. "
530 (string-match
531 (rx (seq string-start "__" (1+ (syntax symbol)) "__" string-end))
532 (semantic-tag-name tag)))
533
534(defun semantic-python-private-p (tag)
535 "Return non-nil if the name of TAG follows the convention _NAME
536for private names."
537 (string-match
538 (rx (seq string-start "_" (0+ (syntax symbol)) string-end))
539 (semantic-tag-name tag)))
540
541(defun semantic-python-instance-variable-p (tag &optional self)
542 "Return non-nil if TAG is an instance variable of the instance
543SELF or the instance name \"self\" if SELF is nil."
544 (when (semantic-tag-of-class-p tag 'variable)
545 (let ((name (semantic-tag-name tag)))
546 (when (string-match
547 (rx-to-string
548 `(seq string-start ,(or self "self") "."))
549 name)
550 (not (string-match "\\." (substring name 5)))))))
551
552(defun semantic-python-docstring-p (tag)
553 "Return non-nil, when TAG is a Python documentation string."
554 ;; TAG is considered to be a documentation string if the first
555 ;; member is of class 'code and its name looks like a documentation
556 ;; string.
557 (let ((class (semantic-tag-class tag))
558 (name (semantic-tag-name tag)))
559 (and (eq class 'code)
560 (string-match
561 (rx (seq string-start "\"\"\"" (0+ anything) "\"\"\"" string-end))
562 name))))
563
564(defun semantic-python-extract-docstring (tag)
565 "Return the Python documentation string contained in TAG."
566 ;; Strip leading and trailing """
567 (let ((name (semantic-tag-name tag)))
568 (substring name 3 -3)))
569
570\f
52bee098
CY
571;;; Test
572;;
62a81506 573
52bee098
CY
574(defun wisent-python-lex-buffer ()
575 "Run `wisent-python-lexer' on current buffer."
576 (interactive)
577 (semantic-lex-init)
578 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
579 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
580 (erase-buffer)
581 (pp token-stream (current-buffer))
582 (goto-char (point-min))
583 (pop-to-buffer (current-buffer)))))
584
585(provide 'semantic/wisent/python)
586
587;; Local variables:
588;; generated-autoload-file: "../loaddefs.el"
589;; generated-autoload-load-name: "semantic/wisent/python"
590;; End:
591
592;;; semantic/wisent/python.el ends here