Trailing whitepace deleted.
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
b092f83d 2;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
16409b0b 3;; Free Software Foundation, Inc.
eec82323 4
6748645f 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 6;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
7;; Keywords: news
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
25
26;;; Commentary:
27
b092f83d
SZ
28;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
29
eec82323
LMI
30;;; Code:
31
32(require 'nnheader)
33(require 'message)
34(require 'nnmail)
35(require 'nnoo)
6748645f 36(require 'gnus-util)
16409b0b 37(require 'mm-util)
eec82323
LMI
38(eval-when-compile (require 'cl))
39
40(nnoo-declare nndoc)
41
42(defvoo nndoc-article-type 'guess
43 "*Type of the file.
44One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 45`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
46`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
47`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
48
49(defvoo nndoc-post-type 'mail
50 "*Whether the nndoc group is `mail' or `post'.")
51
6748645f
LMI
52(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
53 "Hook run after opening a document.
54The default function removes all trailing carriage returns
16409b0b 55from the document.")
6748645f 56
eec82323
LMI
57(defvar nndoc-type-alist
58 `((mmdf
59 (article-begin . "^\^A\^A\^A\^A\n")
60 (body-end . "^\^A\^A\^A\^A\n"))
b092f83d
SZ
61 (exim-bounce
62 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
63 (body-end-function . nndoc-exim-bounce-body-end-function))
16409b0b
GM
64 (nsmail
65 (article-begin . "^From - "))
eec82323
LMI
66 (news
67 (article-begin . "^Path:"))
68 (rnews
69 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
70 (body-end-function . nndoc-rnews-body-end))
71 (mbox
72 (article-begin-function . nndoc-mbox-article-begin)
73 (body-end-function . nndoc-mbox-body-end))
74 (babyl
75 (article-begin . "\^_\^L *\n")
76 (body-end . "\^_")
77 (body-begin-function . nndoc-babyl-body-begin)
78 (head-begin-function . nndoc-babyl-head-begin))
eec82323
LMI
79 (rfc934
80 (article-begin . "^--.*\n+")
81 (body-end . "^--.*$")
82 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
83 (mailman
84 (article-begin . "^--__--__--\n\nMessage:")
85 (body-end . "^--__--__--$")
86 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
87 (clari-briefs
88 (article-begin . "^ \\*")
89 (body-end . "^\t------*[ \t]^*\n^ \\*")
90 (body-begin . "^\t")
91 (head-end . "^\t")
92 (generate-head-function . nndoc-generate-clari-briefs-head)
93 (article-transform-function . nndoc-transform-clari-briefs))
94 (mime-digest
95 (article-begin . "")
16409b0b 96 (head-begin . "^ ?\n")
eec82323
LMI
97 (head-end . "^ ?$")
98 (body-end . "")
99 (file-end . "")
100 (subtype digest guess))
6748645f
LMI
101 (mime-parts
102 (generate-head-function . nndoc-generate-mime-parts-head)
103 (article-transform-function . nndoc-transform-mime-parts))
eec82323 104 (standard-digest
6748645f
LMI
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
eec82323
LMI
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
121 (lanl-gov-announce
122 (article-begin . "^\\\\\\\\\n")
123 (head-begin . "^Paper.*:")
124 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
125 (body-begin . "")
b092f83d
SZ
126 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
127 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
128 (generate-head-function . nndoc-generate-lanl-gov-head)
129 (article-transform-function . nndoc-transform-lanl-gov-announce)
130 (subtype preprints guess))
131 (rfc822-forward
132 (article-begin . "^\n")
133 (body-end-function . nndoc-rfc822-forward-body-end-function))
16409b0b
GM
134 (outlook
135 (article-begin-function . nndoc-outlook-article-begin)
136 (body-end . "\0"))
b092f83d
SZ
137 (oe-dbx ;; Outlook Express DBX format
138 (dissection-function . nndoc-oe-dbx-dissection)
139 (generate-head-function . nndoc-oe-dbx-generate-head)
140 (generate-article-function . nndoc-oe-dbx-generate-article))
141 (forward
142 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
143 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
144 (prepare-body-function . nndoc-unquote-dashes))
145 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
146 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
147 (guess
148 (guess . t)
149 (subtype nil))
150 (digest
151 (guess . t)
152 (subtype nil))
153 (preprints
154 (guess . t)
155 (subtype nil))))
156
b092f83d
SZ
157(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
158 "Regexp for binary nndoc file names.")
159
eec82323 160\f
eec82323
LMI
161(defvoo nndoc-file-begin nil)
162(defvoo nndoc-first-article nil)
eec82323
LMI
163(defvoo nndoc-article-begin nil)
164(defvoo nndoc-head-begin nil)
165(defvoo nndoc-head-end nil)
166(defvoo nndoc-file-end nil)
167(defvoo nndoc-body-begin nil)
168(defvoo nndoc-body-end-function nil)
169(defvoo nndoc-body-begin-function nil)
170(defvoo nndoc-head-begin-function nil)
171(defvoo nndoc-body-end nil)
6748645f 172;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
173;; following items. ARTICLE acts as the association key and is an ordinal
174;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
175;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
176;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
177;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
178;; generation, respectively. Other headers usually follow directly from the
179;; buffer. Value `nil' means no insert.
eec82323
LMI
180(defvoo nndoc-dissection-alist nil)
181(defvoo nndoc-prepare-body-function nil)
182(defvoo nndoc-generate-head-function nil)
183(defvoo nndoc-article-transform-function nil)
184(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
185(defvoo nndoc-generate-article-function nil)
186(defvoo nndoc-dissection-function nil)
eec82323
LMI
187
188(defvoo nndoc-status-string "")
189(defvoo nndoc-group-alist nil)
190(defvoo nndoc-current-buffer nil
191 "Current nndoc news buffer.")
192(defvoo nndoc-address nil)
193
194(defconst nndoc-version "nndoc 1.0"
195 "nndoc version.")
196
197\f
198
199;;; Interface functions
200
201(nnoo-define-basics nndoc)
202
203(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
204 (when (nndoc-possibly-change-buffer newsgroup server)
205 (save-excursion
206 (set-buffer nntp-server-buffer)
207 (erase-buffer)
208 (let (article entry)
209 (if (stringp (car articles))
210 'headers
211 (while articles
212 (when (setq entry (cdr (assq (setq article (pop articles))
213 nndoc-dissection-alist)))
214 (insert (format "221 %d Article retrieved.\n" article))
215 (if nndoc-generate-head-function
216 (funcall nndoc-generate-head-function article)
217 (insert-buffer-substring
218 nndoc-current-buffer (car entry) (nth 1 entry)))
219 (goto-char (point-max))
16409b0b 220 (unless (eq (char-after (1- (point))) ?\n)
eec82323
LMI
221 (insert "\n"))
222 (insert (format "Lines: %d\n" (nth 4 entry)))
223 (insert ".\n")))
224
225 (nnheader-fold-continuation-lines)
226 'headers)))))
227
228(deffoo nndoc-request-article (article &optional newsgroup server buffer)
229 (nndoc-possibly-change-buffer newsgroup server)
230 (save-excursion
231 (let ((buffer (or buffer nntp-server-buffer))
232 (entry (cdr (assq article nndoc-dissection-alist)))
233 beg)
234 (set-buffer buffer)
235 (erase-buffer)
236 (when entry
b092f83d
SZ
237 (cond
238 ((stringp article) nil)
239 (nndoc-generate-article-function
240 (funcall nndoc-generate-article-function article))
241 (t
eec82323
LMI
242 (insert-buffer-substring
243 nndoc-current-buffer (car entry) (nth 1 entry))
244 (insert "\n")
245 (setq beg (point))
246 (insert-buffer-substring
247 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
248 (goto-char beg)
249 (when nndoc-prepare-body-function
250 (funcall nndoc-prepare-body-function))
251 (when nndoc-article-transform-function
252 (funcall nndoc-article-transform-function article))
b092f83d 253 t))))))
eec82323
LMI
254
255(deffoo nndoc-request-group (group &optional server dont-check)
256 "Select news GROUP."
257 (let (number)
258 (cond
259 ((not (nndoc-possibly-change-buffer group server))
260 (nnheader-report 'nndoc "No such file or buffer: %s"
261 nndoc-address))
262 (dont-check
263 (nnheader-report 'nndoc "Selected group %s" group)
264 t)
265 ((zerop (setq number (length nndoc-dissection-alist)))
266 (nndoc-close-group group)
267 (nnheader-report 'nndoc "No articles in group %s" group))
268 (t
269 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
270
271(deffoo nndoc-request-type (group &optional article)
272 (cond ((not article) 'unknown)
b092f83d
SZ
273 (nndoc-post-type nndoc-post-type)
274 (t 'unknown)))
eec82323
LMI
275
276(deffoo nndoc-close-group (group &optional server)
277 (nndoc-possibly-change-buffer group server)
278 (and nndoc-current-buffer
279 (buffer-name nndoc-current-buffer)
280 (kill-buffer nndoc-current-buffer))
281 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
282 nndoc-group-alist))
283 (setq nndoc-current-buffer nil)
284 (nnoo-close-server 'nndoc server)
285 (setq nndoc-dissection-alist nil)
286 t)
287
288(deffoo nndoc-request-list (&optional server)
289 nil)
290
291(deffoo nndoc-request-newgroups (date &optional server)
292 nil)
293
294(deffoo nndoc-request-list-newsgroups (&optional server)
295 nil)
296
297\f
298;;; Internal functions.
299
300(defun nndoc-possibly-change-buffer (group source)
301 (let (buf)
302 (cond
303 ;; The current buffer is this group's buffer.
304 ((and nndoc-current-buffer
305 (buffer-name nndoc-current-buffer)
306 (eq nndoc-current-buffer
307 (setq buf (cdr (assoc group nndoc-group-alist))))))
308 ;; We change buffers by taking an old from the group alist.
309 ;; `source' is either a string (a file name) or a buffer object.
310 (buf
311 (setq nndoc-current-buffer buf))
312 ;; It's a totally new group.
313 ((or (and (bufferp nndoc-address)
314 (buffer-name nndoc-address))
315 (and (stringp nndoc-address)
316 (file-exists-p nndoc-address)
317 (not (file-directory-p nndoc-address))))
318 (push (cons group (setq nndoc-current-buffer
319 (get-buffer-create
320 (concat " *nndoc " group "*"))))
321 nndoc-group-alist)
322 (setq nndoc-dissection-alist nil)
323 (save-excursion
324 (set-buffer nndoc-current-buffer)
eec82323 325 (erase-buffer)
b092f83d
SZ
326 (if (and (stringp nndoc-address)
327 (string-match nndoc-binary-file-names nndoc-address))
328 (let ((coding-system-for-read 'binary))
329 (mm-insert-file-contents nndoc-address))
330 (if (stringp nndoc-address)
331 (nnheader-insert-file-contents nndoc-address)
332 (insert-buffer-substring nndoc-address))
333 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
334 ;; Initialize the nndoc structures according to this new document.
335 (when (and nndoc-current-buffer
336 (not nndoc-dissection-alist))
337 (save-excursion
338 (set-buffer nndoc-current-buffer)
339 (nndoc-set-delims)
6748645f
LMI
340 (if (eq nndoc-article-type 'mime-parts)
341 (nndoc-dissect-mime-parts)
342 (nndoc-dissect-buffer))))
eec82323
LMI
343 (unless nndoc-current-buffer
344 (nndoc-close-server))
345 ;; Return whether we managed to select a file.
346 nndoc-current-buffer))
347
348;;;
349;;; Deciding what document type we have
350;;;
351
352(defun nndoc-set-delims ()
353 "Set the nndoc delimiter variables according to the type of the document."
354 (let ((vars '(nndoc-file-begin
355 nndoc-first-article
6748645f
LMI
356 nndoc-article-begin-function
357 nndoc-head-begin nndoc-head-end
eec82323
LMI
358 nndoc-file-end nndoc-article-begin
359 nndoc-body-begin nndoc-body-end-function nndoc-body-end
360 nndoc-prepare-body-function nndoc-article-transform-function
361 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
362 nndoc-head-begin-function
363 nndoc-generate-article-function
364 nndoc-dissection-function)))
eec82323
LMI
365 (while vars
366 (set (pop vars) nil)))
367 (let (defs)
368 ;; Guess away until we find the real file type.
369 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
370 nndoc-type-alist))))
371 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
372 ;; Set the nndoc variables.
373 (while defs
374 (set (intern (format "nndoc-%s" (caar defs)))
375 (cdr (pop defs))))))
376
377(defun nndoc-guess-type (subtype)
378 (let ((alist nndoc-type-alist)
379 results result entry)
380 (while (and (not result)
381 (setq entry (pop alist)))
382 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
383 (goto-char (point-min))
16409b0b
GM
384 ;; Remove blank lines.
385 (while (eq (following-char) ?\n)
386 (delete-char 1))
eec82323
LMI
387 (when (numberp (setq result (funcall (intern
388 (format "nndoc-%s-type-p"
389 (car entry))))))
390 (push (cons result entry) results)
391 (setq result nil))))
392 (unless (or result results)
393 (error "Document is not of any recognized type"))
394 (if result
395 (car entry)
6748645f 396 (cadar (sort results 'car-less-than-car)))))
eec82323
LMI
397
398;;;
399;;; Built-in type predicates and functions
400;;;
401
402(defun nndoc-mbox-type-p ()
403 (when (looking-at message-unix-mail-delimiter)
404 t))
405
406(defun nndoc-mbox-article-begin ()
407 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
408 (goto-char (match-beginning 0))))
409
410(defun nndoc-mbox-body-end ()
411 (let ((beg (point))
412 len end)
413 (when
414 (save-excursion
415 (and (re-search-backward
416 (concat "^" message-unix-mail-delimiter) nil t)
417 (setq end (point))
418 (search-forward "\n\n" beg t)
419 (re-search-backward
420 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
421 (setq len (string-to-int (match-string 1)))
422 (search-forward "\n\n" beg t)
423 (unless (= (setq len (+ (point) len)) (point-max))
424 (and (< len (point-max))
425 (goto-char len)
426 (looking-at message-unix-mail-delimiter)))))
427 (goto-char len))))
428
429(defun nndoc-mmdf-type-p ()
430 (when (looking-at "\^A\^A\^A\^A$")
431 t))
432
433(defun nndoc-news-type-p ()
434 (when (looking-at "^Path:.*\n")
435 t))
436
437(defun nndoc-rnews-type-p ()
438 (when (looking-at "#! *rnews")
439 t))
440
441(defun nndoc-rnews-body-end ()
442 (and (re-search-backward nndoc-article-begin nil t)
443 (forward-line 1)
444 (goto-char (+ (point) (string-to-int (match-string 1))))))
445
446(defun nndoc-babyl-type-p ()
447 (when (re-search-forward "\^_\^L *\n" nil t)
448 t))
449
450(defun nndoc-babyl-body-begin ()
451 (re-search-forward "^\n" nil t)
6748645f 452 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
453 (let ((next (or (save-excursion
454 (re-search-forward nndoc-article-begin nil t))
455 (point-max))))
456 (unless (re-search-forward "^\n" next t)
457 (goto-char next)
458 (forward-line -1)
459 (insert "\n")
460 (forward-line -1)))))
461
462(defun nndoc-babyl-head-begin ()
463 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 464 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
465 (forward-line 1))
466 t))
467
468(defun nndoc-forward-type-p ()
b092f83d 469 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 470 nil t)
b092f83d 471 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:"))
eec82323
LMI
472 t))
473
474(defun nndoc-rfc934-type-p ()
475 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
476 (not (re-search-forward "^Subject:.*digest" nil t))
477 (not (re-search-backward "^From:" nil t 2))
478 (not (re-search-forward "^From:" nil t 2)))
479 t))
480
b092f83d
SZ
481(defun nndoc-mailman-type-p ()
482 (when (re-search-forward "^--__--__--\n+" nil t)
483 t))
484
eec82323
LMI
485(defun nndoc-rfc822-forward-type-p ()
486 (save-restriction
487 (message-narrow-to-head)
488 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
489 t)))
490
491(defun nndoc-rfc822-forward-body-end-function ()
492 (goto-char (point-max)))
493
6748645f
LMI
494(defun nndoc-mime-parts-type-p ()
495 (let ((case-fold-search t)
496 (limit (search-forward "\n\n" nil t)))
497 (goto-char (point-min))
498 (when (and limit
16409b0b
GM
499 (re-search-forward
500 (concat "\
501^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
502 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
503 limit t))
6748645f
LMI
504 t)))
505
506(defun nndoc-transform-mime-parts (article)
16409b0b
GM
507 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
508 (headers (nth 5 entry)))
509 (when headers
6748645f 510 (goto-char (point-min))
16409b0b
GM
511 (insert headers))))
512
513(defun nndoc-generate-mime-parts-head (article)
514 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
515 (headers (nth 6 entry)))
516 (save-restriction
517 (narrow-to-region (point) (point))
518 (insert-buffer-substring
519 nndoc-current-buffer (car entry) (nth 1 entry))
520 (goto-char (point-max)))
521 (when headers
522 (insert headers))))
6748645f 523
eec82323
LMI
524(defun nndoc-clari-briefs-type-p ()
525 (when (let ((case-fold-search nil))
526 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
527 t))
528
529(defun nndoc-transform-clari-briefs (article)
530 (goto-char (point-min))
531 (when (looking-at " *\\*\\(.*\\)\n")
532 (replace-match "" t t))
533 (nndoc-generate-clari-briefs-head article))
534
535(defun nndoc-generate-clari-briefs-head (article)
536 (let ((entry (cdr (assq article nndoc-dissection-alist)))
537 subject from)
538 (save-excursion
539 (set-buffer nndoc-current-buffer)
540 (save-restriction
541 (narrow-to-region (car entry) (nth 3 entry))
542 (goto-char (point-min))
543 (when (looking-at " *\\*\\(.*\\)$")
544 (setq subject (match-string 1))
545 (when (string-match "[ \t]+$" subject)
546 (setq subject (substring subject 0 (match-beginning 0)))))
547 (when
548 (let ((case-fold-search nil))
549 (re-search-forward
550 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
551 (setq from (match-string 1)))))
552 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
553 "\nSubject: " (or subject "(no subject)") "\n")))
554
b092f83d
SZ
555(defun nndoc-exim-bounce-type-p ()
556 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
557 t))
558
559(defun nndoc-exim-bounce-body-end-function ()
560 (goto-char (point-max)))
561
16409b0b 562
eec82323
LMI
563(defun nndoc-mime-digest-type-p ()
564 (let ((case-fold-search t)
565 boundary-id b-delimiter entry)
566 (when (and
567 (re-search-forward
568 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 569 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
570 nil t)
571 (match-beginning 1))
572 (setq boundary-id (match-string 1)
16409b0b 573 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
574 (setq entry (assq 'mime-digest nndoc-type-alist))
575 (setcdr entry
576 (list
16409b0b 577 (cons 'head-begin "^ ?\n")
eec82323
LMI
578 (cons 'head-end "^ ?$")
579 (cons 'body-begin "^ ?\n")
580 (cons 'article-begin b-delimiter)
581 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 582 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
583 t)))
584
585(defun nndoc-standard-digest-type-p ()
586 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
587 (re-search-forward
588 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
589 t))
590
591(defun nndoc-digest-body-end ()
592 (and (re-search-forward nndoc-article-begin nil t)
593 (goto-char (match-beginning 0))))
594
595(defun nndoc-slack-digest-type-p ()
596 0)
597
598(defun nndoc-lanl-gov-announce-type-p ()
599 (when (let ((case-fold-search nil))
b092f83d 600 (re-search-forward "^\\\\\\\\\nPaper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+" nil t))
eec82323
LMI
601 t))
602
603(defun nndoc-transform-lanl-gov-announce (article)
604 (goto-char (point-max))
b092f83d
SZ
605 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
606 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
607 (goto-char (point-min))
608 (while (re-search-forward "^\\\\\\\\$" nil t)
609 (replace-match "" t nil))
610 (goto-char (point-min))
611 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
612 (replace-match "Date: \\1 (revised) " t nil))
613 (goto-char (point-min))
614 (unless (re-search-forward "^From" nil t)
615 (goto-char (point-min))
616 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
617 (goto-char (point-min))
618 (insert "From: " (match-string 1) "\n"))))
eec82323
LMI
619
620(defun nndoc-generate-lanl-gov-head (article)
621 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
622 (from "<no address given>")
623 subject date)
eec82323
LMI
624 (save-excursion
625 (set-buffer nndoc-current-buffer)
626 (save-restriction
b092f83d
SZ
627 (narrow-to-region (car entry) (nth 1 entry))
628 (goto-char (point-min))
629 (when (looking-at "^Paper.*: \\([a-zA-Z-\\.]+/[0-9]+\\)")
630 (setq subject (concat " (" (match-string 1) ")"))
631 (when (re-search-forward "^From: \\(.*\\)" nil t)
632 (setq from (concat "<"
a1506d29 633 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
634 (match-string 1))) ">")))
635 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
636 (setq date (match-string 1))
637 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
638 (setq date (match-string 1))))
639 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
640 nil t)
641 (setq subject (concat (match-string 1) subject))
642 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
643 (while (and from (string-match "(\[^)\]*)" from))
644 (setq from (replace-match "" t t from)))
645 (insert "From: " (or from "unknown")
b092f83d
SZ
646 "\nSubject: " (or subject "(no subject)") "\n")
647 (if date (insert "Date: " date))))
eec82323 648
16409b0b
GM
649(defun nndoc-nsmail-type-p ()
650 (when (looking-at "From - ")
651 t))
652
653(defun nndoc-outlook-article-begin ()
654 (prog1 (re-search-forward "From:\\|Received:" nil t)
655 (goto-char (match-beginning 0))))
656
657(defun nndoc-outlook-type-p ()
658 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
659 (looking-at "JMF"))
660
b092f83d
SZ
661(defun nndoc-oe-dbx-type-p ()
662 (looking-at (mm-string-as-multibyte "\317\255\022\376")))
663
664(defun nndoc-read-little-endian ()
665 (+ (prog1 (char-after) (forward-char 1))
666 (lsh (prog1 (char-after) (forward-char 1)) 8)
667 (lsh (prog1 (char-after) (forward-char 1)) 16)
668 (lsh (prog1 (char-after) (forward-char 1)) 24)))
669
670(defun nndoc-oe-dbx-decode-block ()
671 (list
672 (nndoc-read-little-endian) ;; this address
673 (nndoc-read-little-endian) ;; next address offset
674 (nndoc-read-little-endian) ;; blocksize
675 (nndoc-read-little-endian))) ;; next address
676
677(defun nndoc-oe-dbx-dissection ()
678 (let ((i 0) blk p tp)
679 (goto-char 60117) ;; 0x0000EAD4+1
680 (setq p (point))
681 (unless (eobp)
682 (setq blk (nndoc-oe-dbx-decode-block)))
683 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
684 (> (nth 3 blk) p)))
685 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
686 (while (and (> (car blk) 0) (> (nth 3 blk) p))
687 (goto-char (1+ (nth 3 blk)))
688 (setq blk (nndoc-oe-dbx-decode-block)))
689 (if (or (<= (car blk) p)
690 (<= (nth 1 blk) 0)
691 (not (zerop (nth 3 blk))))
692 (setq blk nil)
693 (setq tp (+ (car blk) (nth 1 blk) 17))
694 (if (or (<= tp p) (>= tp (point-max)))
695 (setq blk nil)
696 (goto-char tp)
697 (setq p tp
698 blk (nndoc-oe-dbx-decode-block)))))))
699
700(defun nndoc-oe-dbx-generate-article (article &optional head)
701 (let ((entry (cdr (assq article nndoc-dissection-alist)))
702 (cur (current-buffer))
703 (begin (point))
704 blk p)
705 (with-current-buffer nndoc-current-buffer
706 (setq p (car entry))
707 (while (> p (point-min))
708 (goto-char p)
709 (setq blk (nndoc-oe-dbx-decode-block))
710 (setq p (point))
711 (with-current-buffer cur
712 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
713 (setq p (1+ (nth 3 blk)))))
714 (goto-char begin)
715 (while (re-search-forward "\r$" nil t)
716 (delete-backward-char 1))
717 (when head
718 (goto-char begin)
719 (when (search-forward "\n\n" nil t)
720 (setcar (cddddr entry) (count-lines (point) (point-max)))
721 (delete-region (1- (point)) (point-max))))
722 t))
723
724(defun nndoc-oe-dbx-generate-head (article)
725 (nndoc-oe-dbx-generate-article article 'head))
726
727(defun nndoc-mail-in-mail-type-p ()
728 (let (found)
729 (save-excursion
730 (catch 'done
731 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
732 (setq found 0)
733 (forward-line)
734 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
735 (if (looking-at "[-A-Za-z0-9]+:")
736 (setq found (1+ found)))
737 (forward-line))
738 (if (and (> found 0) (looking-at "\n"))
739 (throw 'done 9999)))
740 nil))))
741
742(defun nndoc-mail-in-mail-article-begin ()
743 (let (point found)
744 (if (catch 'done
745 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
746 (setq found 0)
747 (setq point (match-beginning 1))
748 (forward-line)
749 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
750 (if (looking-at "[-A-Za-z0-9]+:")
751 (setq found (1+ found)))
752 (forward-line))
753 (if (and (> found 0) (looking-at "\n"))
754 (throw 'done t)))
755 nil)
756 (goto-char point))))
757
6748645f
LMI
758(deffoo nndoc-request-accept-article (group &optional server last)
759 nil)
760
eec82323
LMI
761;;;
762;;; Functions for dissecting the documents
763;;;
764
765(defun nndoc-search (regexp)
766 (prog1
767 (re-search-forward regexp nil t)
768 (beginning-of-line)))
769
770(defun nndoc-dissect-buffer ()
771 "Go through the document and partition it into heads/bodies/articles."
772 (let ((i 0)
773 (first t)
774 head-begin head-end body-begin body-end)
775 (setq nndoc-dissection-alist nil)
776 (save-excursion
777 (set-buffer nndoc-current-buffer)
778 (goto-char (point-min))
16409b0b
GM
779 ;; Remove blank lines.
780 (while (eq (following-char) ?\n)
781 (delete-char 1))
b092f83d
SZ
782 (if nndoc-dissection-function
783 (funcall nndoc-dissection-function)
784 ;; Find the beginning of the file.
785 (when nndoc-file-begin
786 (nndoc-search nndoc-file-begin))
787 ;; Go through the file.
788 (while (if (and first nndoc-first-article)
789 (nndoc-search nndoc-first-article)
790 (nndoc-article-begin))
791 (setq first nil)
792 (cond (nndoc-head-begin-function
793 (funcall nndoc-head-begin-function))
794 (nndoc-head-begin
795 (nndoc-search nndoc-head-begin)))
796 (if (or (eobp)
797 (and nndoc-file-end
798 (looking-at nndoc-file-end)))
799 (goto-char (point-max))
800 (setq head-begin (point))
801 (nndoc-search (or nndoc-head-end "^$"))
802 (setq head-end (point))
803 (if nndoc-body-begin-function
804 (funcall nndoc-body-begin-function)
805 (nndoc-search (or nndoc-body-begin "^\n")))
806 (setq body-begin (point))
807 (or (and nndoc-body-end-function
808 (funcall nndoc-body-end-function))
809 (and nndoc-body-end
810 (nndoc-search nndoc-body-end))
811 (nndoc-article-begin)
812 (progn
813 (goto-char (point-max))
814 (when nndoc-file-end
815 (and (re-search-backward nndoc-file-end nil t)
816 (beginning-of-line)))))
817 (setq body-end (point))
818 (push (list (incf i) head-begin head-end body-begin body-end
819 (count-lines body-begin body-end))
820 nndoc-dissection-alist)))))))
eec82323
LMI
821
822(defun nndoc-article-begin ()
823 (if nndoc-article-begin-function
824 (funcall nndoc-article-begin-function)
825 (ignore-errors
826 (nndoc-search nndoc-article-begin))))
827
828(defun nndoc-unquote-dashes ()
829 "Unquote quoted non-separators in digests."
830 (while (re-search-forward "^- -"nil t)
831 (replace-match "-" t t)))
832
6748645f
LMI
833;; Against compiler warnings.
834(defvar nndoc-mime-split-ordinal)
835
836(defun nndoc-dissect-mime-parts ()
837 "Go through a MIME composite article and partition it into sub-articles.
838When a MIME entity contains sub-entities, dissection produces one article for
839the header of this entity, and one article per sub-entity."
840 (setq nndoc-dissection-alist nil
841 nndoc-mime-split-ordinal 0)
842 (save-excursion
843 (set-buffer nndoc-current-buffer)
16409b0b
GM
844 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
845
846(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
847 position parent)
848 "Dissect an entity, within a composite MIME message.
849The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
850ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
851The string POSITION holds a dotted decimal representation of the article
852position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
853PARENT is the message-ID of the parent summary line, or nil for none."
854 (let ((case-fold-search t)
855 (message-id (nnmail-message-id))
856 head-end body-begin summary-insert message-rfc822 multipart-any
857 subject content-type type subtype boundary-regexp)
858 ;; Gracefully handle a missing body.
859 (goto-char head-begin)
2e36fdce
DL
860 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
861 (search-forward "\n\n" body-end t))
16409b0b
GM
862 (setq head-end (1- (point))
863 body-begin (point))
864 (setq head-end body-end
865 body-begin body-end))
866 (narrow-to-region head-begin head-end)
867 ;; Save MIME attributes.
868 (goto-char head-begin)
869 (setq content-type (message-fetch-field "Content-Type"))
870 (when content-type
871 (when (string-match
872 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
873 (setq type (downcase (match-string 1 content-type))
874 subtype (downcase (match-string 2 content-type))
875 message-rfc822 (and (string= type "message")
876 (string= subtype "rfc822"))
877 multipart-any (string= type "multipart")))
878 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
879 (setq subject (match-string 1 content-type)))
880 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
881 (setq boundary-regexp (concat "^--"
882 (regexp-quote
883 (match-string 1 content-type))
884 "\\(--\\)?[ \t]*\n"))))
885 (unless subject
886 (when (or multipart-any (not article-insert))
887 (setq subject (message-fetch-field "Subject"))))
888 (unless type
889 (setq type "text"
890 subtype "plain"))
891 ;; Prepare the article and summary inserts.
892 (unless article-insert
893 (setq article-insert (buffer-substring (point-min) (point-max))
894 head-end head-begin))
b092f83d
SZ
895 ;; Fix MIME-Version
896 (unless (string-match "MIME-Version:" article-insert)
897 (setq article-insert
898 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
899 (setq summary-insert article-insert)
900 ;; - summary Subject.
901 (setq summary-insert
902 (let ((line (concat "Subject: <" position
903 (and position multipart-any ".")
904 (and multipart-any "*")
905 (and (or position multipart-any) " ")
906 (cond ((string= subtype "plain") type)
907 ((string= subtype "basic") type)
908 (t subtype))
909 ">"
910 (and subject " ")
911 subject
912 "\n")))
913 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
914 (replace-match line t t summary-insert)
915 (concat summary-insert line))))
916 ;; - summary Message-ID.
917 (setq summary-insert
918 (let ((line (concat "Message-ID: " message-id "\n")))
919 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
920 (replace-match line t t summary-insert)
921 (concat summary-insert line))))
922 ;; - summary References.
923 (when parent
924 (setq summary-insert
925 (let ((line (concat "References: " parent "\n")))
926 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
927 summary-insert)
928 (replace-match line t t summary-insert)
929 (concat summary-insert line)))))
930 ;; Generate dissection information for this entity.
931 (push (list (incf nndoc-mime-split-ordinal)
932 head-begin head-end body-begin body-end
933 (count-lines body-begin body-end)
934 article-insert summary-insert)
935 nndoc-dissection-alist)
936 ;; Recurse for all sub-entities, if any.
937 (widen)
938 (cond
939 (message-rfc822
940 (save-excursion
941 (nndoc-dissect-mime-parts-sub body-begin body-end nil
942 position message-id)))
943 ((and multipart-any boundary-regexp)
944 (let ((part-counter 0)
945 part-begin part-end eof-flag)
946 (while (string-match "\
947^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
948 article-insert)
949 (setq article-insert (replace-match "" t t article-insert)))
950 (let ((case-fold-search nil))
951 (goto-char body-begin)
952 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 953 (while (not eof-flag)
16409b0b
GM
954 (setq part-begin (point))
955 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
956 (or (not (match-string 1))
957 (string= (match-string 1) "")
958 (setq eof-flag t))
959 (forward-line -1)
16409b0b 960 (setq part-end (point))
6748645f 961 (forward-line 1))
16409b0b 962 (t (setq part-end body-end
6748645f 963 eof-flag t)))
16409b0b
GM
964 (save-excursion
965 (nndoc-dissect-mime-parts-sub
966 part-begin part-end article-insert
967 (concat position
968 (and position ".")
969 (format "%d" (incf part-counter)))
970 message-id)))))))))
6748645f 971
eec82323
LMI
972;;;###autoload
973(defun nndoc-add-type (definition &optional position)
974 "Add document DEFINITION to the list of nndoc document definitions.
975If POSITION is nil or `last', the definition will be added
976as the last checked definition, if t or `first', add as the
977first definition, and if any other symbol, add after that
978symbol in the alist."
979 ;; First remove any old instances.
6748645f 980 (gnus-pull (car definition) nndoc-type-alist)
eec82323
LMI
981 ;; Then enter the new definition in the proper place.
982 (cond
983 ((or (null position) (eq position 'last))
984 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
985 ((or (eq position t) (eq position 'first))
986 (push definition nndoc-type-alist))
987 (t
988 (let ((list (memq (assq position nndoc-type-alist)
989 nndoc-type-alist)))
990 (unless list
991 (error "No such position: %s" position))
992 (setcdr list (cons definition (cdr list)))))))
993
994(provide 'nndoc)
995
996;;; nndoc.el ends here