(texinfo-section-types-regexp): Define here.
[bpt/emacs.git] / lisp / sort.el
CommitLineData
c88ab9ce
ER
1;;; sort.el --- commands to sort text in an Emacs buffer.
2
eea8d4ef
ER
3;; Copyright (C) 1986, 1987 Free Software Foundation, Inc.
4
e5167999
ER
5;; Author: Howie Kaye
6;; Maintainer: FSF
d7b4d18f 7;; Keywords: unix
e5167999 8
d32200ac
RS
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
e5167999 13;; the Free Software Foundation; either version 2, or (at your option)
d32200ac
RS
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to
23;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24
d9ecc911
ER
25;;; Commentary:
26
27;;; This package provides the sorting facilities documented in the Emacs
28;;; user's manual.
29
e5167999 30;;; Code:
d32200ac
RS
31
32(defun sort-subr (reverse nextrecfun endrecfun &optional startkeyfun endkeyfun)
33 "General text sorting routine to divide buffer into records and sort them.
34Arguments are REVERSE NEXTRECFUN ENDRECFUN &optional STARTKEYFUN ENDKEYFUN.
35
a08caf95 36We divide the accessible portion of the buffer into disjoint pieces
fbfed6f0
JB
37called sort records. A portion of each sort record (perhaps all of
38it) is designated as the sort key. The records are rearranged in the
39buffer in order by their sort keys. The records may or may not be
40contiguous.
d32200ac
RS
41
42Usually the records are rearranged in order of ascending sort key.
43If REVERSE is non-nil, they are rearranged in order of descending sort key.
44
45The next four arguments are functions to be called to move point
46across a sort record. They will be called many times from within sort-subr.
47
48NEXTRECFUN is called with point at the end of the previous record.
49It moves point to the start of the next record.
50It should move point to the end of the buffer if there are no more records.
51The first record is assumed to start at the position of point when sort-subr
52is called.
53
13f4b6b3 54ENDRECFUN is called with point within the record.
d32200ac
RS
55It should move point to the end of the record.
56
13f4b6b3
RS
57STARTKEYFUN moves from the start of the record to the start of the key.
58It may return either a non-nil value to be used as the key, or
a08caf95 59else the key is the substring between the values of point after
d9a55d32
RS
60STARTKEYFUN and ENDKEYFUN are called. If STARTKEYFUN is nil, the key
61starts at the beginning of the record.
d32200ac
RS
62
63ENDKEYFUN moves from the start of the sort key to the end of the sort key.
64ENDKEYFUN may be nil if STARTKEYFUN returns a value or if it would be the
65same as ENDRECFUN."
a08caf95
RS
66 ;; Heuristically try to avoid messages if sorting a small amt of text.
67 (let ((messages (> (- (point-max) (point-min)) 50000)))
68 (save-excursion
69 (if messages (message "Finding sort keys..."))
70 (let* ((sort-lists (sort-build-lists nextrecfun endrecfun
71 startkeyfun endkeyfun))
72 (old (reverse sort-lists)))
73 (if (null sort-lists)
74 ()
75 (or reverse (setq sort-lists (nreverse sort-lists)))
76 (if messages (message "Sorting records..."))
77 (setq sort-lists
78 (if (fboundp 'sortcar)
79 (sortcar sort-lists
80 (cond ((numberp (car (car sort-lists)))
81 ;; This handles both ints and floats.
82 '<)
83 ((consp (car (car sort-lists)))
4f1b6c54
RS
84 (function
85 (lambda (a b)
86 (> 0 (compare-buffer-substrings
b5ff8004
RS
87 nil (car a) (cdr a)
88 nil (car b) (cdr b))))))
a08caf95
RS
89 (t
90 'string<)))
fbfed6f0
JB
91 (sort sort-lists
92 (cond ((numberp (car (car sort-lists)))
93 (function
94 (lambda (a b)
95 (< (car a) (car b)))))
96 ((consp (car (car sort-lists)))
97 (function
98 (lambda (a b)
4f1b6c54 99 (> 0 (compare-buffer-substrings
b5ff8004
RS
100 nil (car (car a)) (cdr (car a))
101 nil (car (car b)) (cdr (car b)))))))
fbfed6f0
JB
102 (t
103 (function
104 (lambda (a b)
105 (string< (car a) (car b)))))))))
a08caf95
RS
106 (if reverse (setq sort-lists (nreverse sort-lists)))
107 (if messages (message "Reordering buffer..."))
108 (sort-reorder-buffer sort-lists old)))
109 (if messages (message "Reordering buffer... Done"))))
d32200ac
RS
110 nil)
111
112;; Parse buffer into records using the arguments as Lisp expressions;
e453d35a 113;; return a list of records. Each record looks like (KEY STARTPOS . ENDPOS)
d32200ac
RS
114;; where KEY is the sort key (a number or string),
115;; and STARTPOS and ENDPOS are the bounds of this record in the buffer.
116
117;; The records appear in the list lastmost first!
118
119(defun sort-build-lists (nextrecfun endrecfun startkeyfun endkeyfun)
120 (let ((sort-lists ())
121 (start-rec nil)
122 done key)
123 ;; Loop over sort records.
124 ;(goto-char (point-min)) -- it is the caller's responsibility to
125 ;arrange this if necessary
126 (while (not (eobp))
127 (setq start-rec (point)) ;save record start
128 (setq done nil)
129 ;; Get key value, or move to start of key.
130 (setq key (catch 'key
131 (or (and startkeyfun (funcall startkeyfun))
132 ;; If key was not returned as value,
133 ;; move to end of key and get key from the buffer.
134 (let ((start (point)))
135 (funcall (or endkeyfun
136 (prog1 endrecfun (setq done t))))
4f1b6c54 137 (cons start (point))))))
d32200ac
RS
138 ;; Move to end of this record (start of next one, or end of buffer).
139 (cond ((prog1 done (setq done nil)))
140 (endrecfun (funcall endrecfun))
141 (nextrecfun (funcall nextrecfun) (setq done t)))
142 (if key (setq sort-lists (cons
143 ;; consing optimization in case in which key
144 ;; is same as record.
145 (if (and (consp key)
146 (equal (car key) start-rec)
147 (equal (cdr key) (point)))
148 (cons key key)
d9a55d32
RS
149 (cons key (cons start-rec (point))))
150 sort-lists)))
d32200ac
RS
151 (and (not done) nextrecfun (funcall nextrecfun)))
152 sort-lists))
153
154(defun sort-reorder-buffer (sort-lists old)
155 (let ((inhibit-quit t)
156 (last (point-min))
157 (min (point-min)) (max (point-max)))
158 ;; Make sure insertions done for reordering
159 ;; do not go after any markers at the end of the sorted region,
160 ;; by inserting a space to separate them.
161 (goto-char (point-max))
162 (insert-before-markers " ")
163 (narrow-to-region min (1- (point-max)))
164 (while sort-lists
165 (goto-char (point-max))
166 (insert-buffer-substring (current-buffer)
167 last
168 (nth 1 (car old)))
169 (goto-char (point-max))
170 (insert-buffer-substring (current-buffer)
171 (nth 1 (car sort-lists))
d9a55d32
RS
172 (cdr (cdr (car sort-lists))))
173 (setq last (cdr (cdr (car old)))
d32200ac
RS
174 sort-lists (cdr sort-lists)
175 old (cdr old)))
176 (goto-char (point-max))
177 (insert-buffer-substring (current-buffer)
178 last
179 max)
180 ;; Delete the original copy of the text.
181 (delete-region min max)
182 ;; Get rid of the separator " ".
183 (goto-char (point-max))
184 (narrow-to-region min (1+ (point)))
185 (delete-region (point) (1+ (point)))))
186
f9f9507e 187;;;###autoload
d32200ac
RS
188(defun sort-lines (reverse beg end)
189 "Sort lines in region alphabetically; argument means descending order.
190Called from a program, there are three arguments:
191REVERSE (non-nil means reverse order), BEG and END (region to sort)."
192 (interactive "P\nr")
193 (save-excursion
194 (save-restriction
195 (narrow-to-region beg end)
196 (goto-char (point-min))
197 (sort-subr reverse 'forward-line 'end-of-line))))
198
f9f9507e 199;;;###autoload
d32200ac
RS
200(defun sort-paragraphs (reverse beg end)
201 "Sort paragraphs in region alphabetically; argument means descending order.
202Called from a program, there are three arguments:
203REVERSE (non-nil means reverse order), BEG and END (region to sort)."
204 (interactive "P\nr")
205 (save-excursion
206 (save-restriction
207 (narrow-to-region beg end)
208 (goto-char (point-min))
209 (sort-subr reverse
210 (function (lambda () (skip-chars-forward "\n \t\f")))
211 'forward-paragraph))))
212
f9f9507e 213;;;###autoload
d32200ac
RS
214(defun sort-pages (reverse beg end)
215 "Sort pages in region alphabetically; argument means descending order.
216Called from a program, there are three arguments:
217REVERSE (non-nil means reverse order), BEG and END (region to sort)."
218 (interactive "P\nr")
219 (save-excursion
220 (save-restriction
221 (narrow-to-region beg end)
222 (goto-char (point-min))
223 (sort-subr reverse
224 (function (lambda () (skip-chars-forward "\n")))
225 'forward-page))))
226\f
227(defvar sort-fields-syntax-table nil)
228(if sort-fields-syntax-table nil
229 (let ((table (make-syntax-table))
230 (i 0))
231 (while (< i 256)
232 (modify-syntax-entry i "w" table)
233 (setq i (1+ i)))
234 (modify-syntax-entry ?\ " " table)
235 (modify-syntax-entry ?\t " " table)
236 (modify-syntax-entry ?\n " " table)
237 (modify-syntax-entry ?\. "_" table) ; for floating pt. numbers. -wsr
238 (setq sort-fields-syntax-table table)))
239
f9f9507e 240;;;###autoload
d32200ac
RS
241(defun sort-numeric-fields (field beg end)
242 "Sort lines in region numerically by the ARGth field of each line.
243Fields are separated by whitespace and numbered from 1 up.
244Specified field must contain a number in each line of the region.
2af3a0e3 245With a negative arg, sorts by the ARGth field counted from the right.
d32200ac 246Called from a program, there are three arguments:
a4b13e10
JB
247FIELD, BEG and END. BEG and END specify region to sort.
248If you want to sort floating-point numbers, try `sort-float-fields'."
d32200ac
RS
249 (interactive "p\nr")
250 (sort-fields-1 field beg end
251 (function (lambda ()
252 (sort-skip-fields (1- field))
a4b13e10 253 (string-to-number
d32200ac
RS
254 (buffer-substring
255 (point)
256 (save-excursion
257 ;; This is just wrong! Even without floats...
258 ;; (skip-chars-forward "[0-9]")
259 (forward-sexp 1)
260 (point))))))
261 nil))
262
a4b13e10 263;;;###autoload
2af3a0e3 264(defun sort-float-fields (field beg end)
265 "Sort lines in region numerically by the ARGth field of each line.
266Fields are separated by whitespace and numbered from 1 up. Specified field
267must contain a floating point number in each line of the region. With a
268negative arg, sorts by the ARGth field counted from the right. Called from a
269program, there are three arguments: FIELD, BEG and END. BEG and END specify
270region to sort."
271 (interactive "p\nr")
272 (sort-fields-1 field beg end
273 (function (lambda ()
274 (sort-skip-fields (1- field))
6da3b16b 275 (string-to-number
2af3a0e3 276 (buffer-substring
277 (point)
278 (save-excursion
279 (re-search-forward
280 "[+-]?[0-9]*\.?[0-9]*\\([eE][+-]?[0-9]+\\)?")
281 (point))))))
282 nil))
283
f9f9507e 284;;;###autoload
d32200ac
RS
285(defun sort-fields (field beg end)
286 "Sort lines in region lexicographically by the ARGth field of each line.
287Fields are separated by whitespace and numbered from 1 up.
2af3a0e3 288With a negative arg, sorts by the ARGth field counted from the right.
d32200ac
RS
289Called from a program, there are three arguments:
290FIELD, BEG and END. BEG and END specify region to sort."
291 (interactive "p\nr")
292 (sort-fields-1 field beg end
293 (function (lambda ()
294 (sort-skip-fields (1- field))
295 nil))
296 (function (lambda () (skip-chars-forward "^ \t\n")))))
297
298(defun sort-fields-1 (field beg end startkeyfun endkeyfun)
2af3a0e3 299 (let ((tbl (syntax-table)))
300 (if (zerop field) (setq field 1))
d32200ac
RS
301 (unwind-protect
302 (save-excursion
303 (save-restriction
304 (narrow-to-region beg end)
305 (goto-char (point-min))
306 (set-syntax-table sort-fields-syntax-table)
2af3a0e3 307 (sort-subr nil
d32200ac
RS
308 'forward-line 'end-of-line
309 startkeyfun endkeyfun)))
310 (set-syntax-table tbl))))
311
312(defun sort-skip-fields (n)
2af3a0e3 313 (let ((bol (point))
314 (eol (save-excursion (end-of-line 1) (point))))
315 (if (> n 0) (forward-word n)
316 (end-of-line)
317 (forward-word (1+ n)))
318 (if (or (and (>= (point) eol) (> n 0))
319 ;; this is marginally wrong; if the first line of the sort
320 ;; at bob has the wrong number of fields the error won't be
321 ;; reported until the next short line.
322 (and (< (point) bol) (< n 0)))
d32200ac 323 (error "Line has too few fields: %s"
2af3a0e3 324 (buffer-substring bol eol)))
d32200ac
RS
325 (skip-chars-forward " \t")))
326
327\f
f9f9507e 328;;;###autoload
d32200ac
RS
329(defun sort-regexp-fields (reverse record-regexp key-regexp beg end)
330 "Sort the region lexicographically as specifed by RECORD-REGEXP and KEY.
331RECORD-REGEXP specifies the textual units which should be sorted.
332 For example, to sort lines RECORD-REGEXP would be \"^.*$\"
333KEY specifies the part of each record (ie each match for RECORD-REGEXP)
334 is to be used for sorting.
335 If it is \"\\digit\" then the digit'th \"\\(...\\)\" match field from
336 RECORD-REGEXP is used.
337 If it is \"\\&\" then the whole record is used.
338 Otherwise, it is a regular-expression for which to search within the record.
339If a match for KEY is not found within a record then that record is ignored.
340
341With a negative prefix arg sorts in reverse order.
342
343For example: to sort lines in the region by the first word on each line
344 starting with the letter \"f\",
345 RECORD-REGEXP would be \"^.*$\" and KEY would be \"\\=\\<f\\w*\\>\""
2af3a0e3 346 ;; using negative prefix arg to mean "reverse" is now inconsistent with
347 ;; other sort-.*fields functions but then again this was before, since it
348 ;; didn't use the magnitude of the arg to specify anything.
d32200ac
RS
349 (interactive "P\nsRegexp specifying records to sort:
350sRegexp specifying key within record: \nr")
351 (cond ((or (equal key-regexp "") (equal key-regexp "\\&"))
352 (setq key-regexp 0))
353 ((string-match "\\`\\\\[1-9]\\'" key-regexp)
354 (setq key-regexp (- (aref key-regexp 1) ?0))))
355 (save-excursion
356 (save-restriction
357 (narrow-to-region beg end)
358 (goto-char (point-min))
359 (let (sort-regexp-record-end) ;isn't dynamic scoping wonderful?
360 (re-search-forward record-regexp)
361 (setq sort-regexp-record-end (point))
362 (goto-char (match-beginning 0))
363 (sort-subr reverse
364 (function (lambda ()
365 (and (re-search-forward record-regexp nil 'move)
366 (setq sort-regexp-record-end (match-end 0))
367 (goto-char (match-beginning 0)))))
368 (function (lambda ()
369 (goto-char sort-regexp-record-end)))
370 (function (lambda ()
371 (let ((n 0))
372 (cond ((numberp key-regexp)
373 (setq n key-regexp))
374 ((re-search-forward
375 key-regexp sort-regexp-record-end t)
376 (setq n 0))
377 (t (throw 'key nil)))
378 (condition-case ()
379 (if (fboundp 'buffer-substring-lessp)
380 (cons (match-beginning n)
381 (match-end n))
382 (buffer-substring (match-beginning n)
383 (match-end n)))
384 ;; if there was no such register
385 (error (throw 'key nil)))))))))))
386
387\f
388(defvar sort-columns-subprocess t)
389
f9f9507e 390;;;###autoload
d32200ac
RS
391(defun sort-columns (reverse &optional beg end)
392 "Sort lines in region alphabetically by a certain range of columns.
393For the purpose of this command, the region includes
394the entire line that point is in and the entire line the mark is in.
395The column positions of point and mark bound the range of columns to sort on.
396A prefix argument means sort into reverse order.
397
398Note that `sort-columns' rejects text that contains tabs,
399because tabs could be split across the specified columns
400and it doesn't know how to handle that. Also, when possible,
401it uses the `sort' utility program, which doesn't understand tabs.
402Use \\[untabify] to convert tabs to spaces before sorting."
403 (interactive "P\nr")
404 (save-excursion
405 (let (beg1 end1 col-beg1 col-end1 col-start col-end)
406 (goto-char (min beg end))
407 (setq col-beg1 (current-column))
408 (beginning-of-line)
409 (setq beg1 (point))
410 (goto-char (max beg end))
411 (setq col-end1 (current-column))
412 (forward-line)
413 (setq end1 (point))
414 (setq col-start (min col-beg1 col-end1))
415 (setq col-end (max col-beg1 col-end1))
416 (if (search-backward "\t" beg1 t)
417 (error "sort-columns does not work with tabs. Use M-x untabify."))
418 (if (not (eq system-type 'vax-vms))
419 ;; Use the sort utility if we can; it is 4 times as fast.
420 (call-process-region beg1 end1 "sort" t t nil
421 (if reverse "-rt\n" "-t\n")
422 (concat "+0." col-start)
423 (concat "-0." col-end))
424 ;; On VMS, use Emacs's own facilities.
425 (save-excursion
426 (save-restriction
427 (narrow-to-region beg1 end1)
428 (goto-char beg1)
429 (sort-subr reverse 'forward-line 'end-of-line
430 (function (lambda () (move-to-column col-start) nil))
431 (function (lambda () (move-to-column col-end) nil)))))))))
2af3a0e3 432
f9f9507e 433;;;###autoload
2af3a0e3 434(defun reverse-region (beg end)
435 "Reverse the order of lines in a region.
436From a program takes two point or marker arguments, BEG and END."
437 (interactive "r")
438 (if (> beg end)
439 (let (mid) (setq mid end end beg beg mid)))
440 (save-excursion
441 ;; put beg at the start of a line and end and the end of one --
442 ;; the largest possible region which fits this criteria
443 (goto-char beg)
444 (or (bolp) (forward-line 1))
445 (setq beg (point))
446 (goto-char end)
447 ;; the test for bolp is for those times when end is on an empty line;
448 ;; it is probably not the case that the line should be included in the
449 ;; reversal; it isn't difficult to add it afterward.
450 (or (and (eolp) (not (bolp))) (progn (forward-line -1) (end-of-line)))
451 (setq end (point-marker))
452 ;; the real work. this thing cranks through memory on large regions.
453 (let (ll (do t))
454 (while do
455 (goto-char beg)
456 (setq ll (cons (buffer-substring (point) (progn (end-of-line) (point)))
457 ll))
458 (setq do (/= (point) end))
459 (delete-region beg (if do (1+ (point)) (point))))
460 (while (cdr ll)
461 (insert (car ll) "\n")
462 (setq ll (cdr ll)))
463 (insert (car ll)))))
49116ac0
JB
464
465(provide 'sort)
466
c88ab9ce 467;;; sort.el ends here