(defgroup reftex): Update home page url-link.
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
e84b4b86
TTN
2
3;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
88e6695f 4;; 2004, 2005, 2006 Free Software Foundation, Inc.
eec82323 5
6748645f 6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 7;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
8;; Keywords: news
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25;; Boston, MA 02110-1301, USA.
eec82323
LMI
26
27;;; Commentary:
28
b092f83d
SZ
29;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
30
eec82323
LMI
31;;; Code:
32
33(require 'nnheader)
34(require 'message)
35(require 'nnmail)
36(require 'nnoo)
6748645f 37(require 'gnus-util)
16409b0b 38(require 'mm-util)
eec82323
LMI
39(eval-when-compile (require 'cl))
40
41(nnoo-declare nndoc)
42
43(defvoo nndoc-article-type 'guess
44 "*Type of the file.
45One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 46`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
47`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
48`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
49
50(defvoo nndoc-post-type 'mail
51 "*Whether the nndoc group is `mail' or `post'.")
52
6748645f
LMI
53(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
54 "Hook run after opening a document.
55The default function removes all trailing carriage returns
16409b0b 56from the document.")
6748645f 57
eec82323
LMI
58(defvar nndoc-type-alist
59 `((mmdf
60 (article-begin . "^\^A\^A\^A\^A\n")
61 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
62 (mime-digest
63 (article-begin . "")
64 (head-begin . "^ ?\n")
65 (head-end . "^ ?$")
66 (body-end . "")
67 (file-end . "")
68 (subtype digest guess))
69 (mime-parts
70 (generate-head-function . nndoc-generate-mime-parts-head)
71 (article-transform-function . nndoc-transform-mime-parts))
16409b0b
GM
72 (nsmail
73 (article-begin . "^From - "))
eec82323
LMI
74 (news
75 (article-begin . "^Path:"))
76 (rnews
77 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
78 (body-end-function . nndoc-rnews-body-end))
79 (mbox
80 (article-begin-function . nndoc-mbox-article-begin)
81 (body-end-function . nndoc-mbox-body-end))
82 (babyl
83 (article-begin . "\^_\^L *\n")
84 (body-end . "\^_")
85 (body-begin-function . nndoc-babyl-body-begin)
86 (head-begin-function . nndoc-babyl-head-begin))
23f87bed
MB
87 (exim-bounce
88 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
89 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
90 (rfc934
91 (article-begin . "^--.*\n+")
92 (body-end . "^--.*$")
93 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
94 (mailman
95 (article-begin . "^--__--__--\n\nMessage:")
96 (body-end . "^--__--__--$")
97 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
98 (clari-briefs
99 (article-begin . "^ \\*")
100 (body-end . "^\t------*[ \t]^*\n^ \\*")
101 (body-begin . "^\t")
102 (head-end . "^\t")
103 (generate-head-function . nndoc-generate-clari-briefs-head)
104 (article-transform-function . nndoc-transform-clari-briefs))
23f87bed 105
eec82323 106 (standard-digest
6748645f
LMI
107 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
108 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
109 (prepare-body-function . nndoc-unquote-dashes)
110 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
111 (head-end . "^ *$")
112 (body-begin . "^ *\n")
eec82323
LMI
113 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
114 (subtype digest guess))
115 (slack-digest
116 (article-begin . "^------------------------------*[\n \t]+")
117 (head-end . "^ ?$")
118 (body-end-function . nndoc-digest-body-end)
119 (body-begin . "^ ?$")
120 (file-end . "^End of")
121 (prepare-body-function . nndoc-unquote-dashes)
122 (subtype digest guess))
123 (lanl-gov-announce
124 (article-begin . "^\\\\\\\\\n")
125 (head-begin . "^Paper.*:")
126 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
127 (body-begin . "")
b092f83d
SZ
128 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
129 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
130 (generate-head-function . nndoc-generate-lanl-gov-head)
131 (article-transform-function . nndoc-transform-lanl-gov-announce)
132 (subtype preprints guess))
133 (rfc822-forward
23f87bed
MB
134 (article-begin . "^\n+")
135 (body-end-function . nndoc-rfc822-forward-body-end-function)
136 (generate-head-function . nndoc-rfc822-forward-generate-head)
137 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
138 (outlook
139 (article-begin-function . nndoc-outlook-article-begin)
140 (body-end . "\0"))
b092f83d
SZ
141 (oe-dbx ;; Outlook Express DBX format
142 (dissection-function . nndoc-oe-dbx-dissection)
143 (generate-head-function . nndoc-oe-dbx-generate-head)
144 (generate-article-function . nndoc-oe-dbx-generate-article))
145 (forward
146 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
147 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
148 (prepare-body-function . nndoc-unquote-dashes))
149 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
150 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
151 (guess
152 (guess . t)
153 (subtype nil))
154 (digest
155 (guess . t)
156 (subtype nil))
157 (preprints
158 (guess . t)
159 (subtype nil))))
160
b092f83d
SZ
161(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
162 "Regexp for binary nndoc file names.")
163
eec82323 164\f
eec82323
LMI
165(defvoo nndoc-file-begin nil)
166(defvoo nndoc-first-article nil)
eec82323
LMI
167(defvoo nndoc-article-begin nil)
168(defvoo nndoc-head-begin nil)
169(defvoo nndoc-head-end nil)
170(defvoo nndoc-file-end nil)
171(defvoo nndoc-body-begin nil)
172(defvoo nndoc-body-end-function nil)
173(defvoo nndoc-body-begin-function nil)
174(defvoo nndoc-head-begin-function nil)
175(defvoo nndoc-body-end nil)
6748645f 176;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
177;; following items. ARTICLE acts as the association key and is an ordinal
178;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
179;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
180;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
181;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
182;; generation, respectively. Other headers usually follow directly from the
183;; buffer. Value `nil' means no insert.
eec82323
LMI
184(defvoo nndoc-dissection-alist nil)
185(defvoo nndoc-prepare-body-function nil)
186(defvoo nndoc-generate-head-function nil)
187(defvoo nndoc-article-transform-function nil)
188(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
189(defvoo nndoc-generate-article-function nil)
190(defvoo nndoc-dissection-function nil)
eec82323
LMI
191
192(defvoo nndoc-status-string "")
193(defvoo nndoc-group-alist nil)
194(defvoo nndoc-current-buffer nil
195 "Current nndoc news buffer.")
196(defvoo nndoc-address nil)
197
198(defconst nndoc-version "nndoc 1.0"
199 "nndoc version.")
200
201\f
202
203;;; Interface functions
204
205(nnoo-define-basics nndoc)
206
207(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
208 (when (nndoc-possibly-change-buffer newsgroup server)
209 (save-excursion
210 (set-buffer nntp-server-buffer)
211 (erase-buffer)
212 (let (article entry)
213 (if (stringp (car articles))
214 'headers
215 (while articles
216 (when (setq entry (cdr (assq (setq article (pop articles))
217 nndoc-dissection-alist)))
218 (insert (format "221 %d Article retrieved.\n" article))
219 (if nndoc-generate-head-function
220 (funcall nndoc-generate-head-function article)
221 (insert-buffer-substring
222 nndoc-current-buffer (car entry) (nth 1 entry)))
223 (goto-char (point-max))
16409b0b 224 (unless (eq (char-after (1- (point))) ?\n)
eec82323
LMI
225 (insert "\n"))
226 (insert (format "Lines: %d\n" (nth 4 entry)))
227 (insert ".\n")))
228
229 (nnheader-fold-continuation-lines)
230 'headers)))))
231
232(deffoo nndoc-request-article (article &optional newsgroup server buffer)
233 (nndoc-possibly-change-buffer newsgroup server)
234 (save-excursion
235 (let ((buffer (or buffer nntp-server-buffer))
236 (entry (cdr (assq article nndoc-dissection-alist)))
237 beg)
238 (set-buffer buffer)
239 (erase-buffer)
240 (when entry
b092f83d
SZ
241 (cond
242 ((stringp article) nil)
243 (nndoc-generate-article-function
244 (funcall nndoc-generate-article-function article))
245 (t
eec82323
LMI
246 (insert-buffer-substring
247 nndoc-current-buffer (car entry) (nth 1 entry))
248 (insert "\n")
249 (setq beg (point))
250 (insert-buffer-substring
251 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
252 (goto-char beg)
253 (when nndoc-prepare-body-function
254 (funcall nndoc-prepare-body-function))
255 (when nndoc-article-transform-function
256 (funcall nndoc-article-transform-function article))
b092f83d 257 t))))))
eec82323
LMI
258
259(deffoo nndoc-request-group (group &optional server dont-check)
260 "Select news GROUP."
261 (let (number)
262 (cond
263 ((not (nndoc-possibly-change-buffer group server))
264 (nnheader-report 'nndoc "No such file or buffer: %s"
265 nndoc-address))
266 (dont-check
267 (nnheader-report 'nndoc "Selected group %s" group)
268 t)
269 ((zerop (setq number (length nndoc-dissection-alist)))
270 (nndoc-close-group group)
271 (nnheader-report 'nndoc "No articles in group %s" group))
272 (t
273 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
274
275(deffoo nndoc-request-type (group &optional article)
276 (cond ((not article) 'unknown)
b092f83d
SZ
277 (nndoc-post-type nndoc-post-type)
278 (t 'unknown)))
eec82323
LMI
279
280(deffoo nndoc-close-group (group &optional server)
281 (nndoc-possibly-change-buffer group server)
282 (and nndoc-current-buffer
283 (buffer-name nndoc-current-buffer)
284 (kill-buffer nndoc-current-buffer))
285 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
286 nndoc-group-alist))
287 (setq nndoc-current-buffer nil)
288 (nnoo-close-server 'nndoc server)
289 (setq nndoc-dissection-alist nil)
290 t)
291
292(deffoo nndoc-request-list (&optional server)
293 nil)
294
295(deffoo nndoc-request-newgroups (date &optional server)
296 nil)
297
298(deffoo nndoc-request-list-newsgroups (&optional server)
299 nil)
300
301\f
302;;; Internal functions.
303
304(defun nndoc-possibly-change-buffer (group source)
305 (let (buf)
306 (cond
307 ;; The current buffer is this group's buffer.
308 ((and nndoc-current-buffer
309 (buffer-name nndoc-current-buffer)
310 (eq nndoc-current-buffer
311 (setq buf (cdr (assoc group nndoc-group-alist))))))
312 ;; We change buffers by taking an old from the group alist.
313 ;; `source' is either a string (a file name) or a buffer object.
314 (buf
315 (setq nndoc-current-buffer buf))
316 ;; It's a totally new group.
317 ((or (and (bufferp nndoc-address)
318 (buffer-name nndoc-address))
319 (and (stringp nndoc-address)
320 (file-exists-p nndoc-address)
321 (not (file-directory-p nndoc-address))))
322 (push (cons group (setq nndoc-current-buffer
323 (get-buffer-create
324 (concat " *nndoc " group "*"))))
325 nndoc-group-alist)
326 (setq nndoc-dissection-alist nil)
327 (save-excursion
328 (set-buffer nndoc-current-buffer)
eec82323 329 (erase-buffer)
b092f83d
SZ
330 (if (and (stringp nndoc-address)
331 (string-match nndoc-binary-file-names nndoc-address))
332 (let ((coding-system-for-read 'binary))
333 (mm-insert-file-contents nndoc-address))
334 (if (stringp nndoc-address)
335 (nnheader-insert-file-contents nndoc-address)
336 (insert-buffer-substring nndoc-address))
337 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
338 ;; Initialize the nndoc structures according to this new document.
339 (when (and nndoc-current-buffer
340 (not nndoc-dissection-alist))
341 (save-excursion
342 (set-buffer nndoc-current-buffer)
343 (nndoc-set-delims)
6748645f
LMI
344 (if (eq nndoc-article-type 'mime-parts)
345 (nndoc-dissect-mime-parts)
346 (nndoc-dissect-buffer))))
eec82323
LMI
347 (unless nndoc-current-buffer
348 (nndoc-close-server))
349 ;; Return whether we managed to select a file.
350 nndoc-current-buffer))
351
352;;;
353;;; Deciding what document type we have
354;;;
355
356(defun nndoc-set-delims ()
357 "Set the nndoc delimiter variables according to the type of the document."
358 (let ((vars '(nndoc-file-begin
359 nndoc-first-article
6748645f
LMI
360 nndoc-article-begin-function
361 nndoc-head-begin nndoc-head-end
eec82323
LMI
362 nndoc-file-end nndoc-article-begin
363 nndoc-body-begin nndoc-body-end-function nndoc-body-end
364 nndoc-prepare-body-function nndoc-article-transform-function
365 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
366 nndoc-head-begin-function
367 nndoc-generate-article-function
368 nndoc-dissection-function)))
eec82323
LMI
369 (while vars
370 (set (pop vars) nil)))
371 (let (defs)
372 ;; Guess away until we find the real file type.
373 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
374 nndoc-type-alist))))
375 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
376 ;; Set the nndoc variables.
377 (while defs
378 (set (intern (format "nndoc-%s" (caar defs)))
379 (cdr (pop defs))))))
380
381(defun nndoc-guess-type (subtype)
382 (let ((alist nndoc-type-alist)
383 results result entry)
384 (while (and (not result)
385 (setq entry (pop alist)))
386 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
387 (goto-char (point-min))
16409b0b
GM
388 ;; Remove blank lines.
389 (while (eq (following-char) ?\n)
390 (delete-char 1))
eec82323
LMI
391 (when (numberp (setq result (funcall (intern
392 (format "nndoc-%s-type-p"
393 (car entry))))))
394 (push (cons result entry) results)
395 (setq result nil))))
396 (unless (or result results)
397 (error "Document is not of any recognized type"))
398 (if result
399 (car entry)
23f87bed 400 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
401
402;;;
403;;; Built-in type predicates and functions
404;;;
405
406(defun nndoc-mbox-type-p ()
407 (when (looking-at message-unix-mail-delimiter)
408 t))
409
410(defun nndoc-mbox-article-begin ()
411 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
412 (goto-char (match-beginning 0))))
413
414(defun nndoc-mbox-body-end ()
415 (let ((beg (point))
416 len end)
417 (when
418 (save-excursion
419 (and (re-search-backward
420 (concat "^" message-unix-mail-delimiter) nil t)
421 (setq end (point))
422 (search-forward "\n\n" beg t)
423 (re-search-backward
424 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
e9bd5782 425 (setq len (string-to-number (match-string 1)))
eec82323
LMI
426 (search-forward "\n\n" beg t)
427 (unless (= (setq len (+ (point) len)) (point-max))
428 (and (< len (point-max))
429 (goto-char len)
430 (looking-at message-unix-mail-delimiter)))))
431 (goto-char len))))
432
433(defun nndoc-mmdf-type-p ()
434 (when (looking-at "\^A\^A\^A\^A$")
435 t))
436
437(defun nndoc-news-type-p ()
438 (when (looking-at "^Path:.*\n")
439 t))
440
441(defun nndoc-rnews-type-p ()
442 (when (looking-at "#! *rnews")
443 t))
444
445(defun nndoc-rnews-body-end ()
446 (and (re-search-backward nndoc-article-begin nil t)
447 (forward-line 1)
e9bd5782 448 (goto-char (+ (point) (string-to-number (match-string 1))))))
eec82323
LMI
449
450(defun nndoc-babyl-type-p ()
451 (when (re-search-forward "\^_\^L *\n" nil t)
452 t))
453
454(defun nndoc-babyl-body-begin ()
455 (re-search-forward "^\n" nil t)
6748645f 456 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
457 (let ((next (or (save-excursion
458 (re-search-forward nndoc-article-begin nil t))
459 (point-max))))
460 (unless (re-search-forward "^\n" next t)
461 (goto-char next)
462 (forward-line -1)
463 (insert "\n")
464 (forward-line -1)))))
465
466(defun nndoc-babyl-head-begin ()
467 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 468 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
469 (forward-line 1))
470 t))
471
472(defun nndoc-forward-type-p ()
b092f83d 473 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 474 nil t)
23f87bed 475 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
476 t))
477
478(defun nndoc-rfc934-type-p ()
479 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
480 (not (re-search-forward "^Subject:.*digest" nil t))
481 (not (re-search-backward "^From:" nil t 2))
482 (not (re-search-forward "^From:" nil t 2)))
483 t))
484
b092f83d
SZ
485(defun nndoc-mailman-type-p ()
486 (when (re-search-forward "^--__--__--\n+" nil t)
487 t))
488
eec82323
LMI
489(defun nndoc-rfc822-forward-type-p ()
490 (save-restriction
491 (message-narrow-to-head)
492 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
493 t)))
494
495(defun nndoc-rfc822-forward-body-end-function ()
496 (goto-char (point-max)))
497
23f87bed
MB
498(defun nndoc-rfc822-forward-generate-article (article &optional head)
499 (let ((entry (cdr (assq article nndoc-dissection-alist)))
500 (begin (point))
501 encoding)
502 (with-current-buffer nndoc-current-buffer
503 (save-restriction
504 (message-narrow-to-head)
505 (setq encoding (message-fetch-field "content-transfer-encoding"))))
506 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
507 (when encoding
508 (save-restriction
509 (narrow-to-region begin (point-max))
510 (mm-decode-content-transfer-encoding
511 (intern (downcase (mail-header-strip encoding))))))
512 (when head
513 (goto-char begin)
514 (when (search-forward "\n\n" nil t)
515 (delete-region (1- (point)) (point-max)))))
516 t)
517
518(defun nndoc-rfc822-forward-generate-head (article)
519 (nndoc-rfc822-forward-generate-article article 'head))
520
6748645f
LMI
521(defun nndoc-mime-parts-type-p ()
522 (let ((case-fold-search t)
523 (limit (search-forward "\n\n" nil t)))
524 (goto-char (point-min))
525 (when (and limit
16409b0b
GM
526 (re-search-forward
527 (concat "\
528^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
529 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
530 limit t))
6748645f
LMI
531 t)))
532
533(defun nndoc-transform-mime-parts (article)
16409b0b
GM
534 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
535 (headers (nth 5 entry)))
536 (when headers
6748645f 537 (goto-char (point-min))
16409b0b
GM
538 (insert headers))))
539
540(defun nndoc-generate-mime-parts-head (article)
541 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
542 (headers (nth 6 entry)))
543 (save-restriction
544 (narrow-to-region (point) (point))
545 (insert-buffer-substring
546 nndoc-current-buffer (car entry) (nth 1 entry))
547 (goto-char (point-max)))
548 (when headers
549 (insert headers))))
6748645f 550
eec82323
LMI
551(defun nndoc-clari-briefs-type-p ()
552 (when (let ((case-fold-search nil))
553 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
554 t))
555
556(defun nndoc-transform-clari-briefs (article)
557 (goto-char (point-min))
558 (when (looking-at " *\\*\\(.*\\)\n")
559 (replace-match "" t t))
560 (nndoc-generate-clari-briefs-head article))
561
562(defun nndoc-generate-clari-briefs-head (article)
563 (let ((entry (cdr (assq article nndoc-dissection-alist)))
564 subject from)
565 (save-excursion
566 (set-buffer nndoc-current-buffer)
567 (save-restriction
568 (narrow-to-region (car entry) (nth 3 entry))
569 (goto-char (point-min))
570 (when (looking-at " *\\*\\(.*\\)$")
571 (setq subject (match-string 1))
572 (when (string-match "[ \t]+$" subject)
573 (setq subject (substring subject 0 (match-beginning 0)))))
574 (when
575 (let ((case-fold-search nil))
576 (re-search-forward
577 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
578 (setq from (match-string 1)))))
579 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
580 "\nSubject: " (or subject "(no subject)") "\n")))
581
b092f83d
SZ
582(defun nndoc-exim-bounce-type-p ()
583 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
584 t))
585
586(defun nndoc-exim-bounce-body-end-function ()
587 (goto-char (point-max)))
588
16409b0b 589
eec82323
LMI
590(defun nndoc-mime-digest-type-p ()
591 (let ((case-fold-search t)
592 boundary-id b-delimiter entry)
593 (when (and
594 (re-search-forward
595 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 596 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
597 nil t)
598 (match-beginning 1))
599 (setq boundary-id (match-string 1)
16409b0b 600 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
601 (setq entry (assq 'mime-digest nndoc-type-alist))
602 (setcdr entry
603 (list
16409b0b 604 (cons 'head-begin "^ ?\n")
eec82323
LMI
605 (cons 'head-end "^ ?$")
606 (cons 'body-begin "^ ?\n")
607 (cons 'article-begin b-delimiter)
608 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 609 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
610 t)))
611
612(defun nndoc-standard-digest-type-p ()
613 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
614 (re-search-forward
615 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
616 t))
617
618(defun nndoc-digest-body-end ()
619 (and (re-search-forward nndoc-article-begin nil t)
620 (goto-char (match-beginning 0))))
621
622(defun nndoc-slack-digest-type-p ()
623 0)
624
625(defun nndoc-lanl-gov-announce-type-p ()
626 (when (let ((case-fold-search nil))
b092f83d 627 (re-search-forward "^\\\\\\\\\nPaper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+" nil t))
eec82323
LMI
628 t))
629
630(defun nndoc-transform-lanl-gov-announce (article)
631 (goto-char (point-max))
b092f83d
SZ
632 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
633 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
634 (goto-char (point-min))
635 (while (re-search-forward "^\\\\\\\\$" nil t)
636 (replace-match "" t nil))
637 (goto-char (point-min))
638 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
639 (replace-match "Date: \\1 (revised) " t nil))
640 (goto-char (point-min))
641 (unless (re-search-forward "^From" nil t)
642 (goto-char (point-min))
643 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
644 (goto-char (point-min))
645 (insert "From: " (match-string 1) "\n"))))
eec82323
LMI
646
647(defun nndoc-generate-lanl-gov-head (article)
648 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
649 (from "<no address given>")
650 subject date)
eec82323
LMI
651 (save-excursion
652 (set-buffer nndoc-current-buffer)
653 (save-restriction
b092f83d
SZ
654 (narrow-to-region (car entry) (nth 1 entry))
655 (goto-char (point-min))
656 (when (looking-at "^Paper.*: \\([a-zA-Z-\\.]+/[0-9]+\\)")
657 (setq subject (concat " (" (match-string 1) ")"))
658 (when (re-search-forward "^From: \\(.*\\)" nil t)
659 (setq from (concat "<"
a1506d29 660 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
661 (match-string 1))) ">")))
662 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
663 (setq date (match-string 1))
664 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
665 (setq date (match-string 1))))
666 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
667 nil t)
668 (setq subject (concat (match-string 1) subject))
669 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
670 (while (and from (string-match "(\[^)\]*)" from))
671 (setq from (replace-match "" t t from)))
672 (insert "From: " (or from "unknown")
b092f83d
SZ
673 "\nSubject: " (or subject "(no subject)") "\n")
674 (if date (insert "Date: " date))))
eec82323 675
16409b0b
GM
676(defun nndoc-nsmail-type-p ()
677 (when (looking-at "From - ")
678 t))
679
680(defun nndoc-outlook-article-begin ()
681 (prog1 (re-search-forward "From:\\|Received:" nil t)
682 (goto-char (match-beginning 0))))
683
684(defun nndoc-outlook-type-p ()
685 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
686 (looking-at "JMF"))
687
b092f83d
SZ
688(defun nndoc-oe-dbx-type-p ()
689 (looking-at (mm-string-as-multibyte "\317\255\022\376")))
690
691(defun nndoc-read-little-endian ()
692 (+ (prog1 (char-after) (forward-char 1))
693 (lsh (prog1 (char-after) (forward-char 1)) 8)
694 (lsh (prog1 (char-after) (forward-char 1)) 16)
695 (lsh (prog1 (char-after) (forward-char 1)) 24)))
696
697(defun nndoc-oe-dbx-decode-block ()
698 (list
699 (nndoc-read-little-endian) ;; this address
700 (nndoc-read-little-endian) ;; next address offset
701 (nndoc-read-little-endian) ;; blocksize
702 (nndoc-read-little-endian))) ;; next address
703
704(defun nndoc-oe-dbx-dissection ()
705 (let ((i 0) blk p tp)
706 (goto-char 60117) ;; 0x0000EAD4+1
707 (setq p (point))
708 (unless (eobp)
709 (setq blk (nndoc-oe-dbx-decode-block)))
710 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
711 (> (nth 3 blk) p)))
712 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
713 (while (and (> (car blk) 0) (> (nth 3 blk) p))
714 (goto-char (1+ (nth 3 blk)))
715 (setq blk (nndoc-oe-dbx-decode-block)))
716 (if (or (<= (car blk) p)
717 (<= (nth 1 blk) 0)
718 (not (zerop (nth 3 blk))))
719 (setq blk nil)
720 (setq tp (+ (car blk) (nth 1 blk) 17))
721 (if (or (<= tp p) (>= tp (point-max)))
722 (setq blk nil)
723 (goto-char tp)
724 (setq p tp
725 blk (nndoc-oe-dbx-decode-block)))))))
726
727(defun nndoc-oe-dbx-generate-article (article &optional head)
728 (let ((entry (cdr (assq article nndoc-dissection-alist)))
729 (cur (current-buffer))
730 (begin (point))
731 blk p)
732 (with-current-buffer nndoc-current-buffer
733 (setq p (car entry))
734 (while (> p (point-min))
735 (goto-char p)
736 (setq blk (nndoc-oe-dbx-decode-block))
737 (setq p (point))
738 (with-current-buffer cur
739 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
740 (setq p (1+ (nth 3 blk)))))
741 (goto-char begin)
742 (while (re-search-forward "\r$" nil t)
743 (delete-backward-char 1))
744 (when head
745 (goto-char begin)
746 (when (search-forward "\n\n" nil t)
747 (setcar (cddddr entry) (count-lines (point) (point-max)))
748 (delete-region (1- (point)) (point-max))))
749 t))
750
751(defun nndoc-oe-dbx-generate-head (article)
752 (nndoc-oe-dbx-generate-article article 'head))
753
754(defun nndoc-mail-in-mail-type-p ()
755 (let (found)
756 (save-excursion
757 (catch 'done
758 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
759 (setq found 0)
760 (forward-line)
761 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
762 (if (looking-at "[-A-Za-z0-9]+:")
763 (setq found (1+ found)))
764 (forward-line))
765 (if (and (> found 0) (looking-at "\n"))
766 (throw 'done 9999)))
767 nil))))
768
769(defun nndoc-mail-in-mail-article-begin ()
770 (let (point found)
771 (if (catch 'done
772 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
773 (setq found 0)
774 (setq point (match-beginning 1))
775 (forward-line)
776 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
777 (if (looking-at "[-A-Za-z0-9]+:")
778 (setq found (1+ found)))
779 (forward-line))
780 (if (and (> found 0) (looking-at "\n"))
781 (throw 'done t)))
782 nil)
783 (goto-char point))))
784
6748645f
LMI
785(deffoo nndoc-request-accept-article (group &optional server last)
786 nil)
787
eec82323
LMI
788;;;
789;;; Functions for dissecting the documents
790;;;
791
792(defun nndoc-search (regexp)
793 (prog1
794 (re-search-forward regexp nil t)
795 (beginning-of-line)))
796
797(defun nndoc-dissect-buffer ()
798 "Go through the document and partition it into heads/bodies/articles."
799 (let ((i 0)
800 (first t)
23f87bed 801 art-begin head-begin head-end body-begin body-end)
eec82323
LMI
802 (setq nndoc-dissection-alist nil)
803 (save-excursion
804 (set-buffer nndoc-current-buffer)
805 (goto-char (point-min))
16409b0b
GM
806 ;; Remove blank lines.
807 (while (eq (following-char) ?\n)
808 (delete-char 1))
b092f83d
SZ
809 (if nndoc-dissection-function
810 (funcall nndoc-dissection-function)
811 ;; Find the beginning of the file.
812 (when nndoc-file-begin
813 (nndoc-search nndoc-file-begin))
814 ;; Go through the file.
815 (while (if (and first nndoc-first-article)
816 (nndoc-search nndoc-first-article)
23f87bed
MB
817 (if art-begin
818 (goto-char art-begin)
819 (nndoc-article-begin)))
820 (setq first nil
821 art-begin nil)
b092f83d
SZ
822 (cond (nndoc-head-begin-function
823 (funcall nndoc-head-begin-function))
824 (nndoc-head-begin
825 (nndoc-search nndoc-head-begin)))
826 (if (or (eobp)
827 (and nndoc-file-end
828 (looking-at nndoc-file-end)))
829 (goto-char (point-max))
830 (setq head-begin (point))
831 (nndoc-search (or nndoc-head-end "^$"))
832 (setq head-end (point))
833 (if nndoc-body-begin-function
834 (funcall nndoc-body-begin-function)
835 (nndoc-search (or nndoc-body-begin "^\n")))
836 (setq body-begin (point))
837 (or (and nndoc-body-end-function
838 (funcall nndoc-body-end-function))
839 (and nndoc-body-end
840 (nndoc-search nndoc-body-end))
23f87bed
MB
841 (and (nndoc-article-begin)
842 (setq art-begin (point)))
b092f83d
SZ
843 (progn
844 (goto-char (point-max))
845 (when nndoc-file-end
846 (and (re-search-backward nndoc-file-end nil t)
847 (beginning-of-line)))))
848 (setq body-end (point))
849 (push (list (incf i) head-begin head-end body-begin body-end
850 (count-lines body-begin body-end))
851 nndoc-dissection-alist)))))))
eec82323
LMI
852
853(defun nndoc-article-begin ()
854 (if nndoc-article-begin-function
855 (funcall nndoc-article-begin-function)
856 (ignore-errors
857 (nndoc-search nndoc-article-begin))))
858
859(defun nndoc-unquote-dashes ()
860 "Unquote quoted non-separators in digests."
861 (while (re-search-forward "^- -"nil t)
862 (replace-match "-" t t)))
863
6748645f
LMI
864;; Against compiler warnings.
865(defvar nndoc-mime-split-ordinal)
866
867(defun nndoc-dissect-mime-parts ()
868 "Go through a MIME composite article and partition it into sub-articles.
869When a MIME entity contains sub-entities, dissection produces one article for
870the header of this entity, and one article per sub-entity."
871 (setq nndoc-dissection-alist nil
872 nndoc-mime-split-ordinal 0)
873 (save-excursion
874 (set-buffer nndoc-current-buffer)
16409b0b
GM
875 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
876
877(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
878 position parent)
879 "Dissect an entity, within a composite MIME message.
880The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
881ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
882The string POSITION holds a dotted decimal representation of the article
883position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
884PARENT is the message-ID of the parent summary line, or nil for none."
885 (let ((case-fold-search t)
886 (message-id (nnmail-message-id))
887 head-end body-begin summary-insert message-rfc822 multipart-any
888 subject content-type type subtype boundary-regexp)
889 ;; Gracefully handle a missing body.
890 (goto-char head-begin)
2e36fdce
DL
891 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
892 (search-forward "\n\n" body-end t))
16409b0b
GM
893 (setq head-end (1- (point))
894 body-begin (point))
895 (setq head-end body-end
896 body-begin body-end))
897 (narrow-to-region head-begin head-end)
898 ;; Save MIME attributes.
899 (goto-char head-begin)
900 (setq content-type (message-fetch-field "Content-Type"))
901 (when content-type
902 (when (string-match
903 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
904 (setq type (downcase (match-string 1 content-type))
905 subtype (downcase (match-string 2 content-type))
906 message-rfc822 (and (string= type "message")
907 (string= subtype "rfc822"))
908 multipart-any (string= type "multipart")))
909 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
910 (setq subject (match-string 1 content-type)))
911 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
912 (setq boundary-regexp (concat "^--"
913 (regexp-quote
914 (match-string 1 content-type))
915 "\\(--\\)?[ \t]*\n"))))
916 (unless subject
917 (when (or multipart-any (not article-insert))
918 (setq subject (message-fetch-field "Subject"))))
919 (unless type
920 (setq type "text"
921 subtype "plain"))
922 ;; Prepare the article and summary inserts.
923 (unless article-insert
23f87bed 924 (setq article-insert (buffer-string)
16409b0b 925 head-end head-begin))
b092f83d
SZ
926 ;; Fix MIME-Version
927 (unless (string-match "MIME-Version:" article-insert)
928 (setq article-insert
929 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
930 (setq summary-insert article-insert)
931 ;; - summary Subject.
932 (setq summary-insert
933 (let ((line (concat "Subject: <" position
934 (and position multipart-any ".")
935 (and multipart-any "*")
936 (and (or position multipart-any) " ")
937 (cond ((string= subtype "plain") type)
938 ((string= subtype "basic") type)
939 (t subtype))
940 ">"
941 (and subject " ")
942 subject
943 "\n")))
944 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
945 (replace-match line t t summary-insert)
946 (concat summary-insert line))))
947 ;; - summary Message-ID.
948 (setq summary-insert
949 (let ((line (concat "Message-ID: " message-id "\n")))
950 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
951 (replace-match line t t summary-insert)
952 (concat summary-insert line))))
953 ;; - summary References.
954 (when parent
955 (setq summary-insert
956 (let ((line (concat "References: " parent "\n")))
957 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
958 summary-insert)
959 (replace-match line t t summary-insert)
960 (concat summary-insert line)))))
961 ;; Generate dissection information for this entity.
962 (push (list (incf nndoc-mime-split-ordinal)
963 head-begin head-end body-begin body-end
964 (count-lines body-begin body-end)
965 article-insert summary-insert)
966 nndoc-dissection-alist)
967 ;; Recurse for all sub-entities, if any.
968 (widen)
969 (cond
970 (message-rfc822
971 (save-excursion
972 (nndoc-dissect-mime-parts-sub body-begin body-end nil
973 position message-id)))
974 ((and multipart-any boundary-regexp)
975 (let ((part-counter 0)
976 part-begin part-end eof-flag)
977 (while (string-match "\
978^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
979 article-insert)
980 (setq article-insert (replace-match "" t t article-insert)))
981 (let ((case-fold-search nil))
982 (goto-char body-begin)
983 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 984 (while (not eof-flag)
16409b0b
GM
985 (setq part-begin (point))
986 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
987 (or (not (match-string 1))
988 (string= (match-string 1) "")
989 (setq eof-flag t))
990 (forward-line -1)
16409b0b 991 (setq part-end (point))
6748645f 992 (forward-line 1))
16409b0b 993 (t (setq part-end body-end
6748645f 994 eof-flag t)))
16409b0b
GM
995 (save-excursion
996 (nndoc-dissect-mime-parts-sub
997 part-begin part-end article-insert
998 (concat position
999 (and position ".")
1000 (format "%d" (incf part-counter)))
1001 message-id)))))))))
6748645f 1002
eec82323
LMI
1003;;;###autoload
1004(defun nndoc-add-type (definition &optional position)
1005 "Add document DEFINITION to the list of nndoc document definitions.
1006If POSITION is nil or `last', the definition will be added
1007as the last checked definition, if t or `first', add as the
1008first definition, and if any other symbol, add after that
1009symbol in the alist."
1010 ;; First remove any old instances.
6748645f 1011 (gnus-pull (car definition) nndoc-type-alist)
eec82323
LMI
1012 ;; Then enter the new definition in the proper place.
1013 (cond
1014 ((or (null position) (eq position 'last))
1015 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1016 ((or (eq position t) (eq position 'first))
1017 (push definition nndoc-type-alist))
1018 (t
1019 (let ((list (memq (assq position nndoc-type-alist)
1020 nndoc-type-alist)))
1021 (unless list
1022 (error "No such position: %s" position))
1023 (setcdr list (cons definition (cdr list)))))))
1024
1025(provide 'nndoc)
1026
ab5796a9 1027;;; arch-tag: f5c2970e-0387-47ac-a0b3-6cc317dffabe
eec82323 1028;;; nndoc.el ends here