Update copyright year to 2014 by running admin/update-copyright.
[bpt/emacs.git] / lisp / mh-e / mh-scan.el
CommitLineData
dda00b2c
BW
1;;; mh-scan.el --- MH-E scan line constants and utilities
2
ba318903 3;; Copyright (C) 1993, 1995, 1997, 2000-2014 Free Software Foundation,
ab422c4d 4;; Inc.
dda00b2c
BW
5
6;; Author: Bill Wohler <wohler@newt.com>
7;; Maintainer: Bill Wohler <wohler@newt.com>
8;; Keywords: mail
9;; See: mh-e.el
10
11;; This file is part of GNU Emacs.
12
5e809f55 13;; GNU Emacs is free software: you can redistribute it and/or modify
dda00b2c 14;; it under the terms of the GNU General Public License as published by
5e809f55
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
dda00b2c
BW
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
5e809f55 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
dda00b2c
BW
25
26;;; Commentary:
27
28;; This file contains constants and a few functions for interpreting
29;; scan lines.
30
31;;; Change Log:
32
33;;; Code:
34
35(require 'mh-e)
36
37\f
38
39;;; Scan Formats
40
41;; The following scan formats are passed to the scan program if the setting of
42;; `mh-scan-format-file' is t. They are identical except the later one makes
43;; use of the nmh `decode' function to decode RFC 2047 encodings. If you just
44;; want to change the column of the notations, use the `mh-set-cmd-note'
45;; function.
46
47(defvar mh-scan-format-mh
48 (concat
49 "%4(msg)"
50 "%<(cur)+%| %>"
51 "%<{replied}-"
52 "%?(nonnull(comp{to}))%<(mymbox{to})t%>"
53 "%?(nonnull(comp{cc}))%<(mymbox{cc})c%>"
54 "%?(nonnull(comp{bcc}))%<(mymbox{bcc})b%>"
55 "%?(nonnull(comp{newsgroups}))n%>"
56 "%<(zero) %>"
57 "%02(mon{date})/%02(mday{date})%<{date} %|*%>"
58 "%<(mymbox{from})%<{to}To:%14(friendly{to})%>%>"
59 "%<(zero)%17(friendly{from})%> "
60 "%{subject}%<{body}<<%{body}%>")
61 "*Scan format string for MH.
62This string is passed to the scan program via the -format
5f76fe28 63argument. This format is identical to the default except that
dda00b2c
BW
64additional hints for fontification have been added to the fifth
65column (remember that in Emacs, the first column is 0).
66
67The values of the fifth column, in priority order, are: \"-\" if
68the message has been replied to, t if an address on the To: line
69matches one of the mailboxes of the current user, \"c\" if the Cc:
70line matches, \"b\" if the Bcc: line matches, and \"n\" if a
71non-empty Newsgroups: header is present.")
72
73(defvar mh-scan-format-nmh
74 (concat
75 "%4(msg)"
76 "%<(cur)+%| %>"
77 "%<{replied}-"
78 "%?(nonnull(comp{to}))%<(mymbox{to})t%>"
79 "%?(nonnull(comp{cc}))%<(mymbox{cc})c%>"
80 "%?(nonnull(comp{bcc}))%<(mymbox{bcc})b%>"
81 "%?(nonnull(comp{newsgroups}))n%>"
82 "%<(zero) %>"
83 "%02(mon{date})/%02(mday{date})%<{date} %|*%>"
84 "%<(mymbox{from})%<{to}To:%14(decode(friendly{to}))%>%>"
85 "%<(zero)%17(decode(friendly{from}))%> "
86 "%(decode{subject})%<{body}<<%{body}%>")
87 "*Scan format string for nmh.
88This string is passed to the scan program via the -format arg.
89This format is identical to the default except that additional
90hints for fontification have been added to the fifth
91column (remember that in Emacs, the first column is 0).
92
93The values of the fifth column, in priority order, are: \"-\" if
94the message has been replied to, t if an address on the To: field
95matches one of the mailboxes of the current user, \"c\" if the Cc:
96field matches, \"b\" if the Bcc: field matches, and \"n\" if a
97non-empty Newsgroups: field is present.")
98
99\f
100
101;;; Regular Expressions
102
103;; Alphabetical.
104
105(defvar mh-scan-body-regexp "\\(<<\\([^\n]+\\)?\\)"
106 "This regular expression matches the message body fragment.
107
108Note that the default setting of `mh-folder-font-lock-keywords'
109expects this expression to contain at least one parenthesized
110expression which matches the body text as in the default of
5f76fe28 111\"\\\\(<<\\\\([^\\n]+\\\\)?\\\\)\". If this regular expression is
dda00b2c
BW
112not correct, the body fragment will not be highlighted with the
113face `mh-folder-body'.")
114
41b97610
BW
115(defvar mh-scan-blacklisted-msg-regexp "^\\( *[0-9]+\\)B"
116 "This regular expression matches blacklisted (spam) messages.
117
118It must match from the beginning of the line. Note that the
119default setting of `mh-folder-font-lock-keywords' expects this
120expression to contain at least one parenthesized expression which
121matches the message number as in the default of
122
123 \"^\\\\( *[0-9]+\\\\)B\".
124
125This expression includes the leading space within parenthesis
126since it looks better to highlight it as well. The highlighting
127is done with the face `mh-folder-blacklisted'. This regular
128expression should be correct as it is needed by non-fontification
129functions. See also `mh-note-blacklisted'.")
130
dda00b2c
BW
131(defvar mh-scan-cur-msg-number-regexp "^\\( *[0-9]+\\+\\).*"
132 "This regular expression matches the current message.
133
5f76fe28 134It must match from the beginning of the line. Note that the
dda00b2c
BW
135default setting of `mh-folder-font-lock-keywords' expects this
136expression to contain at least one parenthesized expression which
137matches the message number as in the default of
138
139 \"^\\\\( *[0-9]+\\\\+\\\\).*\".
140
141This expression includes the leading space and current message
142marker \"+\" within the parenthesis since it looks better to
5f76fe28
JB
143highlight these items as well. The highlighting is done with the
144face `mh-folder-cur-msg-number'. This regular expression should
145be correct as it is needed by non-fontification functions. See
dda00b2c
BW
146also `mh-note-cur'.")
147
148(defvar mh-scan-date-regexp "\\([0-9][0-9]/[0-9][0-9]\\)"
149 "This regular expression matches a valid date.
150
151It must not be anchored to the beginning or the end of the line.
152Note that the default setting of `mh-folder-font-lock-keywords'
153expects this expression to contain only one parenthesized
154expression which matches the date field as in the default of
5f76fe28 155\"\\\\([0-9][0-9]/[0-9][0-9]\\\\)\"}. If this regular expression
dda00b2c
BW
156is not correct, the date will not be highlighted with the face
157`mh-folder-date'.")
158
159(defvar mh-scan-deleted-msg-regexp "^\\( *[0-9]+\\)D"
160 "This regular expression matches deleted messages.
161
5f76fe28 162It must match from the beginning of the line. Note that the
dda00b2c
BW
163default setting of `mh-folder-font-lock-keywords' expects this
164expression to contain at least one parenthesized expression which
165matches the message number as in the default of
166
167 \"^\\\\( *[0-9]+\\\\)D\".
168
169This expression includes the leading space within the parenthesis
5f76fe28
JB
170since it looks better to highlight it as well. The highlighting
171is done with the face `mh-folder-deleted'. This regular
dda00b2c 172expression should be correct as it is needed by non-fontification
5f76fe28 173functions. See also `mh-note-deleted'.")
dda00b2c 174
41b97610 175(defvar mh-scan-good-msg-regexp "^\\( *[0-9]+\\)[^^DBW0-9]"
dda00b2c
BW
176 "This regular expression matches \"good\" messages.
177
5f76fe28 178It must match from the beginning of the line. Note that the
dda00b2c
BW
179default setting of `mh-folder-font-lock-keywords' expects this
180expression to contain at least one parenthesized expression which
181matches the message number as in the default of
182
41b97610 183 \"^\\\\( *[0-9]+\\\\)[^^DBW0-9]\".
dda00b2c
BW
184
185This expression includes the leading space within the parenthesis
5f76fe28
JB
186since it looks better to highlight it as well. The highlighting
187is done with the face `mh-folder-msg-number'. This regular
dda00b2c
BW
188expression should be correct as it is needed by non-fontification
189functions.")
190
191(defvar mh-scan-msg-format-regexp "%\\([0-9]*\\)(msg)"
192 "This regular expression finds the message number width in a scan format.
193
194Note that the message number must be placed in a parenthesized
5f76fe28 195expression as in the default of \"%\\\\([0-9]*\\\\)(msg)\". This
dda00b2c
BW
196variable is only consulted if `mh-scan-format-file' is set to
197\"Use MH-E scan Format\".")
198
199(defvar mh-scan-msg-format-string "%d"
200 "This is a format string for width of the message number in a scan format.
201
5f76fe28 202Use \"0%d\" for zero-filled message numbers. This variable is only
dda00b2c
BW
203consulted if `mh-scan-format-file' is set to \"Use MH-E scan
204Format\".")
205
206(defvar mh-scan-msg-number-regexp "^ *\\([0-9]+\\)"
207 "This regular expression extracts the message number.
208
5f76fe28 209It must match from the beginning of the line. Note that the
dda00b2c
BW
210message number must be placed in a parenthesized expression as in
211the default of \"^ *\\\\([0-9]+\\\\)\".")
212
213(defvar mh-scan-msg-overflow-regexp "^[?0-9][0-9]"
214 "This regular expression matches overflowed message numbers.")
215
216(defvar mh-scan-msg-search-regexp "^[^0-9]*%d[^0-9]"
217 "This regular expression matches a particular message.
218
219It is a format string; use \"%d\" to represent the location of the
220message number within the expression as in the default of
221\"^[^0-9]*%d[^0-9]\".")
222
223(defvar mh-scan-rcpt-regexp "\\(To:\\)\\(..............\\)"
224 "This regular expression specifies the recipient in messages you sent.
225
226Note that the default setting of `mh-folder-font-lock-keywords'
227expects this expression to contain two parenthesized expressions.
228The first is expected to match the \"To:\" that the default scan
5f76fe28 229format file generates. The second is expected to match the
dda00b2c 230recipient's name as in the default of
5f76fe28 231\"\\\\(To:\\\\)\\\\(..............\\\\)\". If this regular
dda00b2c
BW
232expression is not correct, the \"To:\" string will not be
233highlighted with the face `mh-folder-to' and the recipient will
234not be highlighted with the face `mh-folder-address'")
235
236(defvar mh-scan-refiled-msg-regexp "^\\( *[0-9]+\\)\\^"
237 "This regular expression matches refiled messages.
238
5f76fe28 239It must match from the beginning of the line. Note that the
dda00b2c
BW
240default setting of `mh-folder-font-lock-keywords' expects this
241expression to contain at least one parenthesized expression which
242matches the message number as in the default of
243
244 \"^\\\\( *[0-9]+\\\\)\\\\^\".
245
246This expression includes the leading space within the parenthesis
5f76fe28
JB
247since it looks better to highlight it as well. The highlighting
248is done with the face `mh-folder-refiled'. This regular
dda00b2c 249expression should be correct as it is needed by non-fontification
5f76fe28 250functions. See also `mh-note-refiled'.")
dda00b2c
BW
251
252(defvar mh-scan-sent-to-me-sender-regexp
253 "^ *[0-9]+.\\([bct]\\).....[ ]*\\(..................\\)"
254 "This regular expression matches messages sent to us.
255
256Note that the default setting of `mh-folder-font-lock-keywords'
257expects this expression to contain at least two parenthesized
5f76fe28 258expressions. The first should match the fontification hint (see
dda00b2c
BW
259`mh-scan-format-nmh') and the second should match the user name
260as in the default of
261
262 ^ *[0-9]+.\\\\([bct]\\\\).....[ ]*\\\\(..................\\\\)
263
264If this regular expression is not correct, the notation hints
265will not be highlighted with the face
266`mh-mh-folder-sent-to-me-hint' and the sender will not be
267highlighted with the face `mh-folder-sent-to-me-sender'.")
268
269(defvar mh-scan-subject-regexp
270 "^ *[0-9]+........[ ]*...................\\([Rr][Ee]\\(\\[[0-9]+\\]\\)?:\\s-*\\)*\\([^<\n]*\\)"
271 "This regular expression matches the subject.
272
5f76fe28 273It must match from the beginning of the line. Note that the
dda00b2c
BW
274default setting of `mh-folder-font-lock-keywords' expects this
275expression to contain at least three parenthesized expressions.
276The first is expected to match the \"Re:\" string, if any, and is
5f76fe28 277highlighted with the face `mh-folder-followup'. The second
dda00b2c
BW
278matches an optional bracketed number after \"Re:\", such as in
279\"Re[2]:\" (and is thus a sub-expression of the first expression)
280and the third is expected to match the subject line itself which
5f76fe28 281is highlighted with the face `mh-folder-subject'. For example,
dda00b2c
BW
282the default (broken on multiple lines for readability) is
283
284 ^ *[0-9]+........[ ]*...................
285 \\\\([Rr][Ee]\\\\(\\\\\\=[[0-9]+\\\\]\\\\)?:\\\\s-*\\\\)*
286 \\\\([^<\\n]*\\\\)
287
288This regular expression should be correct as it is needed by
289non-fontification functions.")
290
291(defvar mh-scan-valid-regexp "^ *[0-9]"
292 "This regular expression describes a valid scan line.
293
294This is used to eliminate error messages that are occasionally
295produced by \"inc\".")
296
41b97610
BW
297(defvar mh-scan-whitelisted-msg-regexp "^\\( *[0-9]+\\)W"
298 "This regular expression matches whitelisted (non-spam) messages.
299
300It must match from the beginning of the line. Note that the
301default setting of `mh-folder-font-lock-keywords' expects this
302expression to contain at least one parenthesized expression which
303matches the message number as in the default of
304
305 \"^\\\\( *[0-9]+\\\\)W\".
306
307This expression includes the leading space within parenthesis
308since it looks better to highlight it as well. The highlighting
309is done with the face `mh-folder-whitelisted'. This regular
310expression should be correct as it is needed by non-fontification
311functions. See also `mh-note-whitelisted'.")
312
dda00b2c
BW
313\f
314
315;;; Widths, Offsets and Columns
316
317(defvar mh-cmd-note 4
318 "Column for notations.
319
320This variable should be set with the function `mh-set-cmd-note'.
321This variable may be updated dynamically if
322`mh-adaptive-cmd-note-flag' is on.
323
324Note that columns in Emacs start with 0.")
325(make-variable-buffer-local 'mh-cmd-note)
326
327(defvar mh-scan-cmd-note-width 1
328 "Number of columns consumed by the cmd-note field in `mh-scan-format'.
329
41b97610 330This column will have one of the values: \" \", \"^\", \"D\", \"B\", \"W\", \"+\", where
dda00b2c 331
5f76fe28 332 \" \" is the default value,
41b97610 333 \"^\" is the `mh-note-refiled' character,
dda00b2c 334 \"D\" is the `mh-note-deleted' character,
41b97610
BW
335 \"B\" is the `mh-note-blacklisted' character,
336 \"W\" is the `mh-note-whitelisted' character, and
dda00b2c
BW
337 \"+\" is the `mh-note-cur' character.")
338
339(defvar mh-scan-destination-width 1
340 "Number of columns consumed by the destination field in `mh-scan-format'.
341
342This column will have one of \" \", \"%\", \"-\", \"t\", \"c\", \"b\", or \"n\"
343in it.
344
345 \" \" blank space is the default character.
9b053e76 346 \"%\" indicates that the message in a named MH sequence.
dda00b2c
BW
347 \"-\" indicates that the message has been annotated with a replied field.
348 \"t\" indicates that the message contains mymbox in the To: field.
349 \"c\" indicates that the message contains mymbox in the Cc: field.
350 \"b\" indicates that the message contains mymbox in the Bcc: field.
351 \"n\" indicates that the message contains a Newsgroups: field.")
352
353(defvar mh-scan-date-width 5
354 "Number of columns consumed by the date field in `mh-scan-format'.
355This column will typically be of the form mm/dd.")
356
357(defvar mh-scan-date-flag-width 1
358 "Number of columns consumed to flag (in)valid dates in `mh-scan-format'.
359This column will have \" \" for valid and \"*\" for invalid or
360missing dates.")
361
362(defvar mh-scan-from-mbox-width 17
363 "Number of columns consumed with the \"From:\" line in `mh-scan-format'.
364This column will have a friendly name or e-mail address of the
365originator, or a \"To: address\" for outgoing e-mail messages.")
366
367(defvar mh-scan-from-mbox-sep-width 2
368 "Number of columns consumed by whitespace after from-mbox in `mh-scan-format'.
369This column will only ever have spaces in it.")
370
371(defvar mh-scan-field-destination-offset
372 (+ mh-scan-cmd-note-width)
373 "The offset from the `mh-cmd-note' for the destination column.")
374
375(defvar mh-scan-field-from-start-offset
376 (+ mh-scan-cmd-note-width
377 mh-scan-destination-width
378 mh-scan-date-width
379 mh-scan-date-flag-width)
380 "The offset from the `mh-cmd-note' to find the start of \"From:\" address.")
381
382(defvar mh-scan-field-from-end-offset
383 (+ mh-scan-field-from-start-offset mh-scan-from-mbox-width)
384 "The offset from the `mh-cmd-note' to find the end of \"From:\" address.")
385
386(defvar mh-scan-field-subject-start-offset
387 (+ mh-scan-cmd-note-width
388 mh-scan-destination-width
389 mh-scan-date-width
390 mh-scan-date-flag-width
391 mh-scan-from-mbox-width
392 mh-scan-from-mbox-sep-width)
393 "The offset from the `mh-cmd-note' to find the start of the subject.")
394
395\f
396
397;;; Notation
398
399;; Alphabetical.
400
41b97610
BW
401(defvar mh-note-blacklisted ?B
402 "Messages that have been blacklisted are marked by this character.
403See also `mh-scan-blacklisted-msg-regexp'.")
404
dda00b2c
BW
405(defvar mh-note-cur ?+
406 "The current message (in MH, not in MH-E) is marked by this character.
407See also `mh-scan-cur-msg-number-regexp'.")
408
3bdb2567 409(defvar mh-note-copied ?C
dda00b2c
BW
410 "Messages that have been copied are marked by this character.")
411
412(defvar mh-note-deleted ?D
413 "Messages that have been deleted are marked by this character.
414See also `mh-scan-deleted-msg-regexp'.")
415
416(defvar mh-note-dist ?R
417 "Messages that have been redistributed are marked by this character.")
418
419(defvar mh-note-forw ?F
420 "Messages that have been forwarded are marked by this character.")
421
3bdb2567 422(defvar mh-note-printed ?P
dda00b2c
BW
423 "Messages that have been printed are marked by this character.")
424
425(defvar mh-note-refiled ?^
426 "Messages that have been refiled are marked by this character.
427See also `mh-scan-refiled-msg-regexp'.")
428
429(defvar mh-note-repl ?-
430 "Messages that have been replied to are marked by this character.")
431
432(defvar mh-note-seq ?%
433 "Messages in a user-defined sequence are marked by this character.
434
435Messages in the \"search\" sequence are marked by this character as
436well.")
437
41b97610
BW
438(defvar mh-note-whitelisted ?W
439 "Messages that have been whitelisted are marked by this character.
440See also `mh-scan-whitelisted-msg-regexp'.")
441
dda00b2c
BW
442\f
443
444;;; Utilities
445
446;;;###mh-autoload
447(defun mh-scan-msg-number-regexp ()
448 "Return value of variable `mh-scan-msg-number-regexp'."
449 mh-scan-msg-number-regexp)
450
451;;;###mh-autoload
452(defun mh-scan-msg-search-regexp ()
453 "Return value of variable `mh-scan-msg-search-regexp'."
454 mh-scan-msg-search-regexp)
455
456;;;###mh-autoload
457(defun mh-set-cmd-note (column)
458 "Set `mh-cmd-note' to COLUMN.
459Note that columns in Emacs start with 0."
460 (setq mh-cmd-note column))
461
462;;;###mh-autoload
463(defun mh-scan-format ()
464 "Return the output format argument for the scan program."
465 (if (equal mh-scan-format-file t)
143578ae 466 (list "-format" (if (mh-variant-p 'nmh 'gnu-mh)
dda00b2c
BW
467 (list (mh-update-scan-format
468 mh-scan-format-nmh mh-cmd-note))
469 (list (mh-update-scan-format
470 mh-scan-format-mh mh-cmd-note))))
471 (if (not (equal mh-scan-format-file nil))
472 (list "-form" mh-scan-format-file))))
473
474(defun mh-update-scan-format (fmt width)
475 "Return a scan format with the (msg) width in the FMT replaced with WIDTH.
476
477The message number width portion of the format is discovered
5f76fe28 478using `mh-scan-msg-format-regexp'. Its replacement is controlled
dda00b2c
BW
479with `mh-scan-msg-format-string'."
480 (or (and
481 (string-match mh-scan-msg-format-regexp fmt)
482 (let ((begin (match-beginning 1))
483 (end (match-end 1)))
484 (concat (substring fmt 0 begin)
485 (format mh-scan-msg-format-string width)
486 (substring fmt end))))
487 fmt))
488
489;;;###mh-autoload
490(defun mh-msg-num-width (folder)
491 "Return the width of the largest message number in this FOLDER."
492 (or mh-progs (mh-find-path))
493 (let ((tmp-buffer (get-buffer-create mh-temp-buffer))
494 (width 0))
b5553d47 495 (with-current-buffer tmp-buffer
dda00b2c
BW
496 (erase-buffer)
497 (apply 'call-process
498 (expand-file-name mh-scan-prog mh-progs) nil '(t nil) nil
499 (list folder "last" "-format" "%(msg)"))
500 (goto-char (point-min))
501 (if (re-search-forward mh-scan-msg-number-regexp nil 0 1)
502 (setq width (length (buffer-substring
503 (match-beginning 1) (match-end 1))))))
504 width))
505
506;;;###mh-autoload
507(defun mh-msg-num-width-to-column (width)
508 "Return the column for notations given message number WIDTH.
509Note that columns in Emacs start with 0.
510
511If `mh-scan-format-file' is set to \"Use MH-E scan Format\" this
512means that either `mh-scan-format-mh' or `mh-scan-format-nmh' are
5f76fe28 513in use. This function therefore assumes that the first column is
dda00b2c
BW
514empty (to provide room for the cursor), the following WIDTH
515columns contain the message number, and the column for notations
516comes after that."
517 (if (eq mh-scan-format-file t)
518 (max (1+ width) 2)
519 (error "%s %s" "Can't call `mh-msg-num-width-to-column' when"
520 "`mh-scan-format-file' is not set to \"Use MH-E scan Format\"")))
521
522(provide 'mh-scan)
523
524;; Local Variables:
525;; indent-tabs-mode: nil
526;; sentence-end-double-space: nil
527;; End:
528
529;;; mh-scan.el ends here