Update copyright notices for 2013.
[bpt/emacs.git] / lisp / thingatpt.el
1 ;;; thingatpt.el --- get the `thing' at point
2
3 ;; Copyright (C) 1991-1998, 2000-2013 Free Software Foundation, Inc.
4
5 ;; Author: Mike Williams <mikew@gopher.dosli.govt.nz>
6 ;; Maintainer: FSF
7 ;; Keywords: extensions, matching, mouse
8 ;; Created: Thu Mar 28 13:48:23 1991
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25 ;;; Commentary:
26
27 ;; This file provides routines for getting the "thing" at the location of
28 ;; point, whatever that "thing" happens to be. The "thing" is defined by
29 ;; its beginning and end positions in the buffer.
30 ;;
31 ;; The function bounds-of-thing-at-point finds the beginning and end
32 ;; positions by moving first forward to the end of the "thing", and then
33 ;; backwards to the beginning. By default, it uses the corresponding
34 ;; forward-"thing" operator (eg. forward-word, forward-line).
35 ;;
36 ;; Special cases are allowed for using properties associated with the named
37 ;; "thing":
38 ;;
39 ;; forward-op Function to call to skip forward over a "thing" (or
40 ;; with a negative argument, backward).
41 ;;
42 ;; beginning-op Function to call to skip to the beginning of a "thing".
43 ;; end-op Function to call to skip to the end of a "thing".
44 ;;
45 ;; Reliance on existing operators means that many `things' can be accessed
46 ;; without further code: eg.
47 ;; (thing-at-point 'line)
48 ;; (thing-at-point 'page)
49
50 ;;; Code:
51
52 (provide 'thingatpt)
53
54 ;; Basic movement
55
56 ;;;###autoload
57 (defun forward-thing (thing &optional n)
58 "Move forward to the end of the Nth next THING.
59 THING should be a symbol specifying a type of syntactic entity.
60 Possibilities include `symbol', `list', `sexp', `defun',
61 `filename', `url', `email', `word', `sentence', `whitespace',
62 `line', and `page'."
63 (let ((forward-op (or (get thing 'forward-op)
64 (intern-soft (format "forward-%s" thing)))))
65 (if (functionp forward-op)
66 (funcall forward-op (or n 1))
67 (error "Can't determine how to move over a %s" thing))))
68
69 ;; General routines
70
71 ;;;###autoload
72 (defun bounds-of-thing-at-point (thing)
73 "Determine the start and end buffer locations for the THING at point.
74 THING should be a symbol specifying a type of syntactic entity.
75 Possibilities include `symbol', `list', `sexp', `defun',
76 `filename', `url', `email', `word', `sentence', `whitespace',
77 `line', and `page'.
78
79 See the file `thingatpt.el' for documentation on how to define a
80 valid THING.
81
82 Return a cons cell (START . END) giving the start and end
83 positions of the thing found."
84 (if (get thing 'bounds-of-thing-at-point)
85 (funcall (get thing 'bounds-of-thing-at-point))
86 (let ((orig (point)))
87 (condition-case nil
88 (save-excursion
89 ;; Try moving forward, then back.
90 (funcall ;; First move to end.
91 (or (get thing 'end-op)
92 (lambda () (forward-thing thing 1))))
93 (funcall ;; Then move to beg.
94 (or (get thing 'beginning-op)
95 (lambda () (forward-thing thing -1))))
96 (let ((beg (point)))
97 (if (<= beg orig)
98 ;; If that brings us all the way back to ORIG,
99 ;; it worked. But END may not be the real end.
100 ;; So find the real end that corresponds to BEG.
101 ;; FIXME: in which cases can `real-end' differ from `end'?
102 (let ((real-end
103 (progn
104 (funcall
105 (or (get thing 'end-op)
106 (lambda () (forward-thing thing 1))))
107 (point))))
108 (when (and (<= orig real-end) (< beg real-end))
109 (cons beg real-end)))
110 (goto-char orig)
111 ;; Try a second time, moving backward first and then forward,
112 ;; so that we can find a thing that ends at ORIG.
113 (funcall ;; First, move to beg.
114 (or (get thing 'beginning-op)
115 (lambda () (forward-thing thing -1))))
116 (funcall ;; Then move to end.
117 (or (get thing 'end-op)
118 (lambda () (forward-thing thing 1))))
119 (let ((end (point))
120 (real-beg
121 (progn
122 (funcall
123 (or (get thing 'beginning-op)
124 (lambda () (forward-thing thing -1))))
125 (point))))
126 (if (and (<= real-beg orig) (<= orig end) (< real-beg end))
127 (cons real-beg end))))))
128 (error nil)))))
129
130 ;;;###autoload
131 (defun thing-at-point (thing)
132 "Return the THING at point.
133 THING should be a symbol specifying a type of syntactic entity.
134 Possibilities include `symbol', `list', `sexp', `defun',
135 `filename', `url', `email', `word', `sentence', `whitespace',
136 `line', `number', and `page'.
137
138 See the file `thingatpt.el' for documentation on how to define
139 a symbol as a valid THING."
140 (if (get thing 'thing-at-point)
141 (funcall (get thing 'thing-at-point))
142 (let ((bounds (bounds-of-thing-at-point thing)))
143 (if bounds
144 (buffer-substring (car bounds) (cdr bounds))))))
145
146 ;; Go to beginning/end
147
148 (defun beginning-of-thing (thing)
149 "Move point to the beginning of THING.
150 The bounds of THING are determined by `bounds-of-thing-at-point'."
151 (let ((bounds (bounds-of-thing-at-point thing)))
152 (or bounds (error "No %s here" thing))
153 (goto-char (car bounds))))
154
155 (defun end-of-thing (thing)
156 "Move point to the end of THING.
157 The bounds of THING are determined by `bounds-of-thing-at-point'."
158 (let ((bounds (bounds-of-thing-at-point thing)))
159 (or bounds (error "No %s here" thing))
160 (goto-char (cdr bounds))))
161
162 ;; Special cases
163
164 ;; Lines
165
166 ;; bolp will be false when you click on the last line in the buffer
167 ;; and it has no final newline.
168
169 (put 'line 'beginning-op
170 (lambda () (if (bolp) (forward-line -1) (beginning-of-line))))
171
172 ;; Sexps
173
174 (defun in-string-p ()
175 "Return non-nil if point is in a string.
176 \[This is an internal function.]"
177 (let ((orig (point)))
178 (save-excursion
179 (beginning-of-defun)
180 (nth 3 (parse-partial-sexp (point) orig)))))
181
182 (defun end-of-sexp ()
183 "Move point to the end of the current sexp.
184 \[This is an internal function.]"
185 (let ((char-syntax (char-syntax (char-after))))
186 (if (or (eq char-syntax ?\))
187 (and (eq char-syntax ?\") (in-string-p)))
188 (forward-char 1)
189 (forward-sexp 1))))
190
191 (put 'sexp 'end-op 'end-of-sexp)
192
193 (defun beginning-of-sexp ()
194 "Move point to the beginning of the current sexp.
195 \[This is an internal function.]"
196 (let ((char-syntax (char-syntax (char-before))))
197 (if (or (eq char-syntax ?\()
198 (and (eq char-syntax ?\") (in-string-p)))
199 (forward-char -1)
200 (forward-sexp -1))))
201
202 (put 'sexp 'beginning-op 'beginning-of-sexp)
203
204 ;; Lists
205
206 (put 'list 'bounds-of-thing-at-point 'thing-at-point-bounds-of-list-at-point)
207
208 (defun thing-at-point-bounds-of-list-at-point ()
209 "Return the bounds of the list at point.
210 \[Internal function used by `bounds-of-thing-at-point'.]"
211 (save-excursion
212 (let ((opoint (point))
213 (beg (condition-case nil
214 (progn (up-list -1)
215 (point))
216 (error nil))))
217 (condition-case nil
218 (if beg
219 (progn (forward-sexp)
220 (cons beg (point)))
221 ;; Are we are at the beginning of a top-level sexp?
222 (forward-sexp)
223 (let ((end (point)))
224 (backward-sexp)
225 (if (>= opoint (point))
226 (cons opoint end))))
227 (error nil)))))
228
229 ;; Defuns
230
231 (put 'defun 'beginning-op 'beginning-of-defun)
232 (put 'defun 'end-op 'end-of-defun)
233 (put 'defun 'forward-op 'end-of-defun)
234
235 ;; Filenames and URLs www.com/foo%32bar
236
237 (defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"
238 "Characters allowable in filenames.")
239
240 (put 'filename 'end-op
241 (lambda ()
242 (re-search-forward (concat "\\=[" thing-at-point-file-name-chars "]*")
243 nil t)))
244 (put 'filename 'beginning-op
245 (lambda ()
246 (if (re-search-backward (concat "[^" thing-at-point-file-name-chars "]")
247 nil t)
248 (forward-char)
249 (goto-char (point-min)))))
250
251 (defvar thing-at-point-url-path-regexp
252 "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+"
253 "A regular expression probably matching the host and filename or e-mail part of a URL.")
254
255 (defvar thing-at-point-short-url-regexp
256 (concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp)
257 "A regular expression probably matching a URL without an access scheme.
258 Hostname matching is stricter in this case than for
259 ``thing-at-point-url-regexp''.")
260
261 (defvar thing-at-point-uri-schemes
262 ;; Officials from http://www.iana.org/assignments/uri-schemes.html
263 '("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:"
264 "telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:"
265 "cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:"
266 "tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:"
267 "modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:"
268 "afs:" "tn3270:" "mailserver:"
269 "crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:"
270 "iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:"
271 "tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:"
272 ;; Compatibility
273 "snews:" "irc:" "mms://" "mmsh://")
274 "Uniform Resource Identifier (URI) Schemes.")
275
276 (defvar thing-at-point-url-regexp
277 (concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)"
278 thing-at-point-url-path-regexp)
279 "A regular expression probably matching a complete URL.")
280
281 (defvar thing-at-point-markedup-url-regexp
282 "<URL:[^>]+>"
283 "A regular expression matching a URL marked up per RFC1738.
284 This may contain whitespace (including newlines) .")
285
286 (put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point)
287 (defun thing-at-point-bounds-of-url-at-point ()
288 (let ((strip (thing-at-point-looking-at
289 thing-at-point-markedup-url-regexp))) ;; (url "") short
290 (if (or strip
291 (thing-at-point-looking-at thing-at-point-url-regexp)
292 ;; Access scheme omitted?
293 ;; (setq short (thing-at-point-looking-at
294 ;; thing-at-point-short-url-regexp))
295 )
296 (let ((beginning (match-beginning 0))
297 (end (match-end 0)))
298 (when strip
299 (setq beginning (+ beginning 5))
300 (setq end (- end 1)))
301 (cons beginning end)))))
302
303 (put 'url 'thing-at-point 'thing-at-point-url-at-point)
304 (defun thing-at-point-url-at-point ()
305 "Return the URL around or before point.
306
307 Search backwards for the start of a URL ending at or after point. If
308 no URL found, return nil. The access scheme will be prepended if
309 absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it
310 starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default."
311
312 (let ((url "") short strip)
313 (if (or (setq strip (thing-at-point-looking-at
314 thing-at-point-markedup-url-regexp))
315 (thing-at-point-looking-at thing-at-point-url-regexp)
316 ;; Access scheme omitted?
317 (setq short (thing-at-point-looking-at
318 thing-at-point-short-url-regexp)))
319 (progn
320 (setq url (buffer-substring-no-properties (match-beginning 0)
321 (match-end 0)))
322 (and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">"
323 ;; strip whitespace
324 (while (string-match "[ \t\n\r]+" url)
325 (setq url (replace-match "" t t url)))
326 (and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url)
327 ;; already has a URL scheme.
328 "")
329 ((string-match "@" url)
330 "mailto:")
331 ;; e.g. ftp.swiss... or ftp-swiss...
332 ((string-match "^ftp" url)
333 "ftp://")
334 (t "http://"))
335 url)))
336 (if (string-equal "" url)
337 nil
338 url)))))
339
340 ;; The normal thingatpt mechanism doesn't work for complex regexps.
341 ;; This should work for almost any regexp wherever we are in the
342 ;; match. To do a perfect job for any arbitrary regexp would mean
343 ;; testing every position before point. Regexp searches won't find
344 ;; matches that straddle the start position so we search forwards once
345 ;; and then back repeatedly and then back up a char at a time.
346
347 (defun thing-at-point-looking-at (regexp)
348 "Return non-nil if point is in or just after a match for REGEXP.
349 Set the match data from the earliest such match ending at or after
350 point."
351 (save-excursion
352 (let ((old-point (point)) match)
353 (and (looking-at regexp)
354 (>= (match-end 0) old-point)
355 (setq match (point)))
356 ;; Search back repeatedly from end of next match.
357 ;; This may fail if next match ends before this match does.
358 (re-search-forward regexp nil 'limit)
359 (while (and (re-search-backward regexp nil t)
360 (or (> (match-beginning 0) old-point)
361 (and (looking-at regexp) ; Extend match-end past search start
362 (>= (match-end 0) old-point)
363 (setq match (point))))))
364 (if (not match) nil
365 (goto-char match)
366 ;; Back up a char at a time in case search skipped
367 ;; intermediate match straddling search start pos.
368 (while (and (not (bobp))
369 (progn (backward-char 1) (looking-at regexp))
370 (>= (match-end 0) old-point)
371 (setq match (point))))
372 (goto-char match)
373 (looking-at regexp)))))
374
375 (put 'url 'end-op
376 (lambda ()
377 (let ((bounds (thing-at-point-bounds-of-url-at-point)))
378 (if bounds
379 (goto-char (cdr bounds))
380 (error "No URL here")))))
381 (put 'url 'beginning-op
382 (lambda ()
383 (let ((bounds (thing-at-point-bounds-of-url-at-point)))
384 (if bounds
385 (goto-char (car bounds))
386 (error "No URL here")))))
387
388 ;; Email addresses
389 (defvar thing-at-point-email-regexp
390 "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?"
391 "A regular expression probably matching an email address.
392 This does not match the real name portion, only the address, optionally
393 with angle brackets.")
394
395 ;; Haven't set 'forward-op on 'email nor defined 'forward-email' because
396 ;; not sure they're actually needed, and URL seems to skip them too.
397 ;; Note that (end-of-thing 'email) and (beginning-of-thing 'email)
398 ;; work automagically, though.
399
400 (put 'email 'bounds-of-thing-at-point
401 (lambda ()
402 (let ((thing (thing-at-point-looking-at thing-at-point-email-regexp)))
403 (if thing
404 (let ((beginning (match-beginning 0))
405 (end (match-end 0)))
406 (cons beginning end))))))
407
408 (put 'email 'thing-at-point
409 (lambda ()
410 (let ((boundary-pair (bounds-of-thing-at-point 'email)))
411 (if boundary-pair
412 (buffer-substring-no-properties
413 (car boundary-pair) (cdr boundary-pair))))))
414
415 ;; Whitespace
416
417 (defun forward-whitespace (arg)
418 "Move point to the end of the next sequence of whitespace chars.
419 Each such sequence may be a single newline, or a sequence of
420 consecutive space and/or tab characters.
421 With prefix argument ARG, do it ARG times if positive, or move
422 backwards ARG times if negative."
423 (interactive "p")
424 (if (natnump arg)
425 (re-search-forward "[ \t]+\\|\n" nil 'move arg)
426 (while (< arg 0)
427 (if (re-search-backward "[ \t]+\\|\n" nil 'move)
428 (or (eq (char-after (match-beginning 0)) ?\n)
429 (skip-chars-backward " \t")))
430 (setq arg (1+ arg)))))
431
432 ;; Buffer
433
434 (put 'buffer 'end-op (lambda () (goto-char (point-max))))
435 (put 'buffer 'beginning-op (lambda () (goto-char (point-min))))
436
437 ;; Symbols
438
439 (defun forward-symbol (arg)
440 "Move point to the next position that is the end of a symbol.
441 A symbol is any sequence of characters that are in either the
442 word constituent or symbol constituent syntax class.
443 With prefix argument ARG, do it ARG times if positive, or move
444 backwards ARG times if negative."
445 (interactive "p")
446 (if (natnump arg)
447 (re-search-forward "\\(\\sw\\|\\s_\\)+" nil 'move arg)
448 (while (< arg 0)
449 (if (re-search-backward "\\(\\sw\\|\\s_\\)+" nil 'move)
450 (skip-syntax-backward "w_"))
451 (setq arg (1+ arg)))))
452
453 ;; Syntax blocks
454
455 (defun forward-same-syntax (&optional arg)
456 "Move point past all characters with the same syntax class.
457 With prefix argument ARG, do it ARG times if positive, or move
458 backwards ARG times if negative."
459 (interactive "p")
460 (or arg (setq arg 1))
461 (while (< arg 0)
462 (skip-syntax-backward
463 (char-to-string (char-syntax (char-before))))
464 (setq arg (1+ arg)))
465 (while (> arg 0)
466 (skip-syntax-forward (char-to-string (char-syntax (char-after))))
467 (setq arg (1- arg))))
468
469 ;; Aliases
470
471 (defun word-at-point ()
472 "Return the word at point. See `thing-at-point'."
473 (thing-at-point 'word))
474
475 (defun sentence-at-point ()
476 "Return the sentence at point. See `thing-at-point'."
477 (thing-at-point 'sentence))
478
479 (defun read-from-whole-string (str)
480 "Read a Lisp expression from STR.
481 Signal an error if the entire string was not used."
482 (let* ((read-data (read-from-string str))
483 (more-left
484 (condition-case nil
485 ;; The call to `ignore' suppresses a compiler warning.
486 (progn (ignore (read-from-string (substring str (cdr read-data))))
487 t)
488 (end-of-file nil))))
489 (if more-left
490 (error "Can't read whole string")
491 (car read-data))))
492
493 (defun form-at-point (&optional thing pred)
494 (let ((sexp (condition-case nil
495 (read-from-whole-string (thing-at-point (or thing 'sexp)))
496 (error nil))))
497 (if (or (not pred) (funcall pred sexp)) sexp)))
498
499 ;;;###autoload
500 (defun sexp-at-point ()
501 "Return the sexp at point, or nil if none is found."
502 (form-at-point 'sexp))
503 ;;;###autoload
504 (defun symbol-at-point ()
505 "Return the symbol at point, or nil if none is found."
506 (let ((thing (thing-at-point 'symbol)))
507 (if thing (intern thing))))
508 ;;;###autoload
509 (defun number-at-point ()
510 "Return the number at point, or nil if none is found."
511 (form-at-point 'sexp 'numberp))
512 (put 'number 'thing-at-point 'number-at-point)
513 ;;;###autoload
514 (defun list-at-point ()
515 "Return the Lisp list at point, or nil if none is found."
516 (form-at-point 'list 'listp))
517
518 ;;; thingatpt.el ends here