Revision: miles@gnu.org--gnu-2005/emacs--unicode--0--patch-14
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
23f87bed 2;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
16409b0b 3;; Free Software Foundation, Inc.
eec82323 4
6748645f 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 6;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
7;; Keywords: news
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
25
26;;; Commentary:
27
b092f83d
SZ
28;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
29
eec82323
LMI
30;;; Code:
31
32(require 'nnheader)
33(require 'message)
34(require 'nnmail)
35(require 'nnoo)
6748645f 36(require 'gnus-util)
16409b0b 37(require 'mm-util)
eec82323
LMI
38(eval-when-compile (require 'cl))
39
40(nnoo-declare nndoc)
41
42(defvoo nndoc-article-type 'guess
43 "*Type of the file.
44One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 45`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
46`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
47`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
48
49(defvoo nndoc-post-type 'mail
50 "*Whether the nndoc group is `mail' or `post'.")
51
6748645f
LMI
52(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
53 "Hook run after opening a document.
54The default function removes all trailing carriage returns
16409b0b 55from the document.")
6748645f 56
eec82323
LMI
57(defvar nndoc-type-alist
58 `((mmdf
59 (article-begin . "^\^A\^A\^A\^A\n")
60 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
61 (mime-digest
62 (article-begin . "")
63 (head-begin . "^ ?\n")
64 (head-end . "^ ?$")
65 (body-end . "")
66 (file-end . "")
67 (subtype digest guess))
68 (mime-parts
69 (generate-head-function . nndoc-generate-mime-parts-head)
70 (article-transform-function . nndoc-transform-mime-parts))
16409b0b
GM
71 (nsmail
72 (article-begin . "^From - "))
eec82323
LMI
73 (news
74 (article-begin . "^Path:"))
75 (rnews
76 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
77 (body-end-function . nndoc-rnews-body-end))
78 (mbox
79 (article-begin-function . nndoc-mbox-article-begin)
80 (body-end-function . nndoc-mbox-body-end))
81 (babyl
82 (article-begin . "\^_\^L *\n")
83 (body-end . "\^_")
84 (body-begin-function . nndoc-babyl-body-begin)
85 (head-begin-function . nndoc-babyl-head-begin))
23f87bed
MB
86 (exim-bounce
87 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
88 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
89 (rfc934
90 (article-begin . "^--.*\n+")
91 (body-end . "^--.*$")
92 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
93 (mailman
94 (article-begin . "^--__--__--\n\nMessage:")
95 (body-end . "^--__--__--$")
96 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
97 (clari-briefs
98 (article-begin . "^ \\*")
99 (body-end . "^\t------*[ \t]^*\n^ \\*")
100 (body-begin . "^\t")
101 (head-end . "^\t")
102 (generate-head-function . nndoc-generate-clari-briefs-head)
103 (article-transform-function . nndoc-transform-clari-briefs))
23f87bed 104
eec82323 105 (standard-digest
6748645f
LMI
106 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
107 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
108 (prepare-body-function . nndoc-unquote-dashes)
109 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
110 (head-end . "^ *$")
111 (body-begin . "^ *\n")
eec82323
LMI
112 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
113 (subtype digest guess))
114 (slack-digest
115 (article-begin . "^------------------------------*[\n \t]+")
116 (head-end . "^ ?$")
117 (body-end-function . nndoc-digest-body-end)
118 (body-begin . "^ ?$")
119 (file-end . "^End of")
120 (prepare-body-function . nndoc-unquote-dashes)
121 (subtype digest guess))
122 (lanl-gov-announce
123 (article-begin . "^\\\\\\\\\n")
124 (head-begin . "^Paper.*:")
125 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
126 (body-begin . "")
b092f83d
SZ
127 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
128 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
129 (generate-head-function . nndoc-generate-lanl-gov-head)
130 (article-transform-function . nndoc-transform-lanl-gov-announce)
131 (subtype preprints guess))
132 (rfc822-forward
23f87bed
MB
133 (article-begin . "^\n+")
134 (body-end-function . nndoc-rfc822-forward-body-end-function)
135 (generate-head-function . nndoc-rfc822-forward-generate-head)
136 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
137 (outlook
138 (article-begin-function . nndoc-outlook-article-begin)
139 (body-end . "\0"))
b092f83d
SZ
140 (oe-dbx ;; Outlook Express DBX format
141 (dissection-function . nndoc-oe-dbx-dissection)
142 (generate-head-function . nndoc-oe-dbx-generate-head)
143 (generate-article-function . nndoc-oe-dbx-generate-article))
144 (forward
145 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
146 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
147 (prepare-body-function . nndoc-unquote-dashes))
148 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
149 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
150 (guess
151 (guess . t)
152 (subtype nil))
153 (digest
154 (guess . t)
155 (subtype nil))
156 (preprints
157 (guess . t)
158 (subtype nil))))
159
b092f83d
SZ
160(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
161 "Regexp for binary nndoc file names.")
162
eec82323 163\f
eec82323
LMI
164(defvoo nndoc-file-begin nil)
165(defvoo nndoc-first-article nil)
eec82323
LMI
166(defvoo nndoc-article-begin nil)
167(defvoo nndoc-head-begin nil)
168(defvoo nndoc-head-end nil)
169(defvoo nndoc-file-end nil)
170(defvoo nndoc-body-begin nil)
171(defvoo nndoc-body-end-function nil)
172(defvoo nndoc-body-begin-function nil)
173(defvoo nndoc-head-begin-function nil)
174(defvoo nndoc-body-end nil)
6748645f 175;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
176;; following items. ARTICLE acts as the association key and is an ordinal
177;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
178;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
179;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
180;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
181;; generation, respectively. Other headers usually follow directly from the
182;; buffer. Value `nil' means no insert.
eec82323
LMI
183(defvoo nndoc-dissection-alist nil)
184(defvoo nndoc-prepare-body-function nil)
185(defvoo nndoc-generate-head-function nil)
186(defvoo nndoc-article-transform-function nil)
187(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
188(defvoo nndoc-generate-article-function nil)
189(defvoo nndoc-dissection-function nil)
eec82323
LMI
190
191(defvoo nndoc-status-string "")
192(defvoo nndoc-group-alist nil)
193(defvoo nndoc-current-buffer nil
194 "Current nndoc news buffer.")
195(defvoo nndoc-address nil)
196
197(defconst nndoc-version "nndoc 1.0"
198 "nndoc version.")
199
200\f
201
202;;; Interface functions
203
204(nnoo-define-basics nndoc)
205
206(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
207 (when (nndoc-possibly-change-buffer newsgroup server)
208 (save-excursion
209 (set-buffer nntp-server-buffer)
210 (erase-buffer)
211 (let (article entry)
212 (if (stringp (car articles))
213 'headers
214 (while articles
215 (when (setq entry (cdr (assq (setq article (pop articles))
216 nndoc-dissection-alist)))
217 (insert (format "221 %d Article retrieved.\n" article))
218 (if nndoc-generate-head-function
219 (funcall nndoc-generate-head-function article)
220 (insert-buffer-substring
221 nndoc-current-buffer (car entry) (nth 1 entry)))
222 (goto-char (point-max))
16409b0b 223 (unless (eq (char-after (1- (point))) ?\n)
eec82323
LMI
224 (insert "\n"))
225 (insert (format "Lines: %d\n" (nth 4 entry)))
226 (insert ".\n")))
227
228 (nnheader-fold-continuation-lines)
229 'headers)))))
230
231(deffoo nndoc-request-article (article &optional newsgroup server buffer)
232 (nndoc-possibly-change-buffer newsgroup server)
233 (save-excursion
234 (let ((buffer (or buffer nntp-server-buffer))
235 (entry (cdr (assq article nndoc-dissection-alist)))
236 beg)
237 (set-buffer buffer)
238 (erase-buffer)
239 (when entry
b092f83d
SZ
240 (cond
241 ((stringp article) nil)
242 (nndoc-generate-article-function
243 (funcall nndoc-generate-article-function article))
244 (t
eec82323
LMI
245 (insert-buffer-substring
246 nndoc-current-buffer (car entry) (nth 1 entry))
247 (insert "\n")
248 (setq beg (point))
249 (insert-buffer-substring
250 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
251 (goto-char beg)
252 (when nndoc-prepare-body-function
253 (funcall nndoc-prepare-body-function))
254 (when nndoc-article-transform-function
255 (funcall nndoc-article-transform-function article))
b092f83d 256 t))))))
eec82323
LMI
257
258(deffoo nndoc-request-group (group &optional server dont-check)
259 "Select news GROUP."
260 (let (number)
261 (cond
262 ((not (nndoc-possibly-change-buffer group server))
263 (nnheader-report 'nndoc "No such file or buffer: %s"
264 nndoc-address))
265 (dont-check
266 (nnheader-report 'nndoc "Selected group %s" group)
267 t)
268 ((zerop (setq number (length nndoc-dissection-alist)))
269 (nndoc-close-group group)
270 (nnheader-report 'nndoc "No articles in group %s" group))
271 (t
272 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
273
274(deffoo nndoc-request-type (group &optional article)
275 (cond ((not article) 'unknown)
b092f83d
SZ
276 (nndoc-post-type nndoc-post-type)
277 (t 'unknown)))
eec82323
LMI
278
279(deffoo nndoc-close-group (group &optional server)
280 (nndoc-possibly-change-buffer group server)
281 (and nndoc-current-buffer
282 (buffer-name nndoc-current-buffer)
283 (kill-buffer nndoc-current-buffer))
284 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
285 nndoc-group-alist))
286 (setq nndoc-current-buffer nil)
287 (nnoo-close-server 'nndoc server)
288 (setq nndoc-dissection-alist nil)
289 t)
290
291(deffoo nndoc-request-list (&optional server)
292 nil)
293
294(deffoo nndoc-request-newgroups (date &optional server)
295 nil)
296
297(deffoo nndoc-request-list-newsgroups (&optional server)
298 nil)
299
300\f
301;;; Internal functions.
302
303(defun nndoc-possibly-change-buffer (group source)
304 (let (buf)
305 (cond
306 ;; The current buffer is this group's buffer.
307 ((and nndoc-current-buffer
308 (buffer-name nndoc-current-buffer)
309 (eq nndoc-current-buffer
310 (setq buf (cdr (assoc group nndoc-group-alist))))))
311 ;; We change buffers by taking an old from the group alist.
312 ;; `source' is either a string (a file name) or a buffer object.
313 (buf
314 (setq nndoc-current-buffer buf))
315 ;; It's a totally new group.
316 ((or (and (bufferp nndoc-address)
317 (buffer-name nndoc-address))
318 (and (stringp nndoc-address)
319 (file-exists-p nndoc-address)
320 (not (file-directory-p nndoc-address))))
321 (push (cons group (setq nndoc-current-buffer
322 (get-buffer-create
323 (concat " *nndoc " group "*"))))
324 nndoc-group-alist)
325 (setq nndoc-dissection-alist nil)
326 (save-excursion
327 (set-buffer nndoc-current-buffer)
eec82323 328 (erase-buffer)
b092f83d
SZ
329 (if (and (stringp nndoc-address)
330 (string-match nndoc-binary-file-names nndoc-address))
331 (let ((coding-system-for-read 'binary))
332 (mm-insert-file-contents nndoc-address))
333 (if (stringp nndoc-address)
334 (nnheader-insert-file-contents nndoc-address)
335 (insert-buffer-substring nndoc-address))
336 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
337 ;; Initialize the nndoc structures according to this new document.
338 (when (and nndoc-current-buffer
339 (not nndoc-dissection-alist))
340 (save-excursion
341 (set-buffer nndoc-current-buffer)
342 (nndoc-set-delims)
6748645f
LMI
343 (if (eq nndoc-article-type 'mime-parts)
344 (nndoc-dissect-mime-parts)
345 (nndoc-dissect-buffer))))
eec82323
LMI
346 (unless nndoc-current-buffer
347 (nndoc-close-server))
348 ;; Return whether we managed to select a file.
349 nndoc-current-buffer))
350
351;;;
352;;; Deciding what document type we have
353;;;
354
355(defun nndoc-set-delims ()
356 "Set the nndoc delimiter variables according to the type of the document."
357 (let ((vars '(nndoc-file-begin
358 nndoc-first-article
6748645f
LMI
359 nndoc-article-begin-function
360 nndoc-head-begin nndoc-head-end
eec82323
LMI
361 nndoc-file-end nndoc-article-begin
362 nndoc-body-begin nndoc-body-end-function nndoc-body-end
363 nndoc-prepare-body-function nndoc-article-transform-function
364 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
365 nndoc-head-begin-function
366 nndoc-generate-article-function
367 nndoc-dissection-function)))
eec82323
LMI
368 (while vars
369 (set (pop vars) nil)))
370 (let (defs)
371 ;; Guess away until we find the real file type.
372 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
373 nndoc-type-alist))))
374 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
375 ;; Set the nndoc variables.
376 (while defs
377 (set (intern (format "nndoc-%s" (caar defs)))
378 (cdr (pop defs))))))
379
380(defun nndoc-guess-type (subtype)
381 (let ((alist nndoc-type-alist)
382 results result entry)
383 (while (and (not result)
384 (setq entry (pop alist)))
385 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
386 (goto-char (point-min))
16409b0b
GM
387 ;; Remove blank lines.
388 (while (eq (following-char) ?\n)
389 (delete-char 1))
eec82323
LMI
390 (when (numberp (setq result (funcall (intern
391 (format "nndoc-%s-type-p"
392 (car entry))))))
393 (push (cons result entry) results)
394 (setq result nil))))
395 (unless (or result results)
396 (error "Document is not of any recognized type"))
397 (if result
398 (car entry)
23f87bed 399 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
400
401;;;
402;;; Built-in type predicates and functions
403;;;
404
405(defun nndoc-mbox-type-p ()
406 (when (looking-at message-unix-mail-delimiter)
407 t))
408
409(defun nndoc-mbox-article-begin ()
410 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
411 (goto-char (match-beginning 0))))
412
413(defun nndoc-mbox-body-end ()
414 (let ((beg (point))
415 len end)
416 (when
417 (save-excursion
418 (and (re-search-backward
419 (concat "^" message-unix-mail-delimiter) nil t)
420 (setq end (point))
421 (search-forward "\n\n" beg t)
422 (re-search-backward
423 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
424 (setq len (string-to-int (match-string 1)))
425 (search-forward "\n\n" beg t)
426 (unless (= (setq len (+ (point) len)) (point-max))
427 (and (< len (point-max))
428 (goto-char len)
429 (looking-at message-unix-mail-delimiter)))))
430 (goto-char len))))
431
432(defun nndoc-mmdf-type-p ()
433 (when (looking-at "\^A\^A\^A\^A$")
434 t))
435
436(defun nndoc-news-type-p ()
437 (when (looking-at "^Path:.*\n")
438 t))
439
440(defun nndoc-rnews-type-p ()
441 (when (looking-at "#! *rnews")
442 t))
443
444(defun nndoc-rnews-body-end ()
445 (and (re-search-backward nndoc-article-begin nil t)
446 (forward-line 1)
447 (goto-char (+ (point) (string-to-int (match-string 1))))))
448
449(defun nndoc-babyl-type-p ()
450 (when (re-search-forward "\^_\^L *\n" nil t)
451 t))
452
453(defun nndoc-babyl-body-begin ()
454 (re-search-forward "^\n" nil t)
6748645f 455 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
456 (let ((next (or (save-excursion
457 (re-search-forward nndoc-article-begin nil t))
458 (point-max))))
459 (unless (re-search-forward "^\n" next t)
460 (goto-char next)
461 (forward-line -1)
462 (insert "\n")
463 (forward-line -1)))))
464
465(defun nndoc-babyl-head-begin ()
466 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 467 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
468 (forward-line 1))
469 t))
470
471(defun nndoc-forward-type-p ()
b092f83d 472 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 473 nil t)
23f87bed 474 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
475 t))
476
477(defun nndoc-rfc934-type-p ()
478 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
479 (not (re-search-forward "^Subject:.*digest" nil t))
480 (not (re-search-backward "^From:" nil t 2))
481 (not (re-search-forward "^From:" nil t 2)))
482 t))
483
b092f83d
SZ
484(defun nndoc-mailman-type-p ()
485 (when (re-search-forward "^--__--__--\n+" nil t)
486 t))
487
eec82323
LMI
488(defun nndoc-rfc822-forward-type-p ()
489 (save-restriction
490 (message-narrow-to-head)
491 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
492 t)))
493
494(defun nndoc-rfc822-forward-body-end-function ()
495 (goto-char (point-max)))
496
23f87bed
MB
497(defun nndoc-rfc822-forward-generate-article (article &optional head)
498 (let ((entry (cdr (assq article nndoc-dissection-alist)))
499 (begin (point))
500 encoding)
501 (with-current-buffer nndoc-current-buffer
502 (save-restriction
503 (message-narrow-to-head)
504 (setq encoding (message-fetch-field "content-transfer-encoding"))))
505 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
506 (when encoding
507 (save-restriction
508 (narrow-to-region begin (point-max))
509 (mm-decode-content-transfer-encoding
510 (intern (downcase (mail-header-strip encoding))))))
511 (when head
512 (goto-char begin)
513 (when (search-forward "\n\n" nil t)
514 (delete-region (1- (point)) (point-max)))))
515 t)
516
517(defun nndoc-rfc822-forward-generate-head (article)
518 (nndoc-rfc822-forward-generate-article article 'head))
519
6748645f
LMI
520(defun nndoc-mime-parts-type-p ()
521 (let ((case-fold-search t)
522 (limit (search-forward "\n\n" nil t)))
523 (goto-char (point-min))
524 (when (and limit
16409b0b
GM
525 (re-search-forward
526 (concat "\
527^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
528 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
529 limit t))
6748645f
LMI
530 t)))
531
532(defun nndoc-transform-mime-parts (article)
16409b0b
GM
533 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
534 (headers (nth 5 entry)))
535 (when headers
6748645f 536 (goto-char (point-min))
16409b0b
GM
537 (insert headers))))
538
539(defun nndoc-generate-mime-parts-head (article)
540 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
541 (headers (nth 6 entry)))
542 (save-restriction
543 (narrow-to-region (point) (point))
544 (insert-buffer-substring
545 nndoc-current-buffer (car entry) (nth 1 entry))
546 (goto-char (point-max)))
547 (when headers
548 (insert headers))))
6748645f 549
eec82323
LMI
550(defun nndoc-clari-briefs-type-p ()
551 (when (let ((case-fold-search nil))
552 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
553 t))
554
555(defun nndoc-transform-clari-briefs (article)
556 (goto-char (point-min))
557 (when (looking-at " *\\*\\(.*\\)\n")
558 (replace-match "" t t))
559 (nndoc-generate-clari-briefs-head article))
560
561(defun nndoc-generate-clari-briefs-head (article)
562 (let ((entry (cdr (assq article nndoc-dissection-alist)))
563 subject from)
564 (save-excursion
565 (set-buffer nndoc-current-buffer)
566 (save-restriction
567 (narrow-to-region (car entry) (nth 3 entry))
568 (goto-char (point-min))
569 (when (looking-at " *\\*\\(.*\\)$")
570 (setq subject (match-string 1))
571 (when (string-match "[ \t]+$" subject)
572 (setq subject (substring subject 0 (match-beginning 0)))))
573 (when
574 (let ((case-fold-search nil))
575 (re-search-forward
576 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
577 (setq from (match-string 1)))))
578 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
579 "\nSubject: " (or subject "(no subject)") "\n")))
580
b092f83d
SZ
581(defun nndoc-exim-bounce-type-p ()
582 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
583 t))
584
585(defun nndoc-exim-bounce-body-end-function ()
586 (goto-char (point-max)))
587
16409b0b 588
eec82323
LMI
589(defun nndoc-mime-digest-type-p ()
590 (let ((case-fold-search t)
591 boundary-id b-delimiter entry)
592 (when (and
593 (re-search-forward
594 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 595 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
596 nil t)
597 (match-beginning 1))
598 (setq boundary-id (match-string 1)
16409b0b 599 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
600 (setq entry (assq 'mime-digest nndoc-type-alist))
601 (setcdr entry
602 (list
16409b0b 603 (cons 'head-begin "^ ?\n")
eec82323
LMI
604 (cons 'head-end "^ ?$")
605 (cons 'body-begin "^ ?\n")
606 (cons 'article-begin b-delimiter)
607 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 608 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
609 t)))
610
611(defun nndoc-standard-digest-type-p ()
612 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
613 (re-search-forward
614 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
615 t))
616
617(defun nndoc-digest-body-end ()
618 (and (re-search-forward nndoc-article-begin nil t)
619 (goto-char (match-beginning 0))))
620
621(defun nndoc-slack-digest-type-p ()
622 0)
623
624(defun nndoc-lanl-gov-announce-type-p ()
625 (when (let ((case-fold-search nil))
b092f83d 626 (re-search-forward "^\\\\\\\\\nPaper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+" nil t))
eec82323
LMI
627 t))
628
629(defun nndoc-transform-lanl-gov-announce (article)
630 (goto-char (point-max))
b092f83d
SZ
631 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
632 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
633 (goto-char (point-min))
634 (while (re-search-forward "^\\\\\\\\$" nil t)
635 (replace-match "" t nil))
636 (goto-char (point-min))
637 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
638 (replace-match "Date: \\1 (revised) " t nil))
639 (goto-char (point-min))
640 (unless (re-search-forward "^From" nil t)
641 (goto-char (point-min))
642 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
643 (goto-char (point-min))
644 (insert "From: " (match-string 1) "\n"))))
eec82323
LMI
645
646(defun nndoc-generate-lanl-gov-head (article)
647 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
648 (from "<no address given>")
649 subject date)
eec82323
LMI
650 (save-excursion
651 (set-buffer nndoc-current-buffer)
652 (save-restriction
b092f83d
SZ
653 (narrow-to-region (car entry) (nth 1 entry))
654 (goto-char (point-min))
655 (when (looking-at "^Paper.*: \\([a-zA-Z-\\.]+/[0-9]+\\)")
656 (setq subject (concat " (" (match-string 1) ")"))
657 (when (re-search-forward "^From: \\(.*\\)" nil t)
658 (setq from (concat "<"
a1506d29 659 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
660 (match-string 1))) ">")))
661 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
662 (setq date (match-string 1))
663 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
664 (setq date (match-string 1))))
665 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
666 nil t)
667 (setq subject (concat (match-string 1) subject))
668 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
669 (while (and from (string-match "(\[^)\]*)" from))
670 (setq from (replace-match "" t t from)))
671 (insert "From: " (or from "unknown")
b092f83d
SZ
672 "\nSubject: " (or subject "(no subject)") "\n")
673 (if date (insert "Date: " date))))
eec82323 674
16409b0b
GM
675(defun nndoc-nsmail-type-p ()
676 (when (looking-at "From - ")
677 t))
678
679(defun nndoc-outlook-article-begin ()
680 (prog1 (re-search-forward "From:\\|Received:" nil t)
681 (goto-char (match-beginning 0))))
682
683(defun nndoc-outlook-type-p ()
684 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
685 (looking-at "JMF"))
686
b092f83d
SZ
687(defun nndoc-oe-dbx-type-p ()
688 (looking-at (mm-string-as-multibyte "\317\255\022\376")))
689
690(defun nndoc-read-little-endian ()
691 (+ (prog1 (char-after) (forward-char 1))
692 (lsh (prog1 (char-after) (forward-char 1)) 8)
693 (lsh (prog1 (char-after) (forward-char 1)) 16)
694 (lsh (prog1 (char-after) (forward-char 1)) 24)))
695
696(defun nndoc-oe-dbx-decode-block ()
697 (list
698 (nndoc-read-little-endian) ;; this address
699 (nndoc-read-little-endian) ;; next address offset
700 (nndoc-read-little-endian) ;; blocksize
701 (nndoc-read-little-endian))) ;; next address
702
703(defun nndoc-oe-dbx-dissection ()
704 (let ((i 0) blk p tp)
705 (goto-char 60117) ;; 0x0000EAD4+1
706 (setq p (point))
707 (unless (eobp)
708 (setq blk (nndoc-oe-dbx-decode-block)))
709 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
710 (> (nth 3 blk) p)))
711 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
712 (while (and (> (car blk) 0) (> (nth 3 blk) p))
713 (goto-char (1+ (nth 3 blk)))
714 (setq blk (nndoc-oe-dbx-decode-block)))
715 (if (or (<= (car blk) p)
716 (<= (nth 1 blk) 0)
717 (not (zerop (nth 3 blk))))
718 (setq blk nil)
719 (setq tp (+ (car blk) (nth 1 blk) 17))
720 (if (or (<= tp p) (>= tp (point-max)))
721 (setq blk nil)
722 (goto-char tp)
723 (setq p tp
724 blk (nndoc-oe-dbx-decode-block)))))))
725
726(defun nndoc-oe-dbx-generate-article (article &optional head)
727 (let ((entry (cdr (assq article nndoc-dissection-alist)))
728 (cur (current-buffer))
729 (begin (point))
730 blk p)
731 (with-current-buffer nndoc-current-buffer
732 (setq p (car entry))
733 (while (> p (point-min))
734 (goto-char p)
735 (setq blk (nndoc-oe-dbx-decode-block))
736 (setq p (point))
737 (with-current-buffer cur
738 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
739 (setq p (1+ (nth 3 blk)))))
740 (goto-char begin)
741 (while (re-search-forward "\r$" nil t)
742 (delete-backward-char 1))
743 (when head
744 (goto-char begin)
745 (when (search-forward "\n\n" nil t)
746 (setcar (cddddr entry) (count-lines (point) (point-max)))
747 (delete-region (1- (point)) (point-max))))
748 t))
749
750(defun nndoc-oe-dbx-generate-head (article)
751 (nndoc-oe-dbx-generate-article article 'head))
752
753(defun nndoc-mail-in-mail-type-p ()
754 (let (found)
755 (save-excursion
756 (catch 'done
757 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
758 (setq found 0)
759 (forward-line)
760 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
761 (if (looking-at "[-A-Za-z0-9]+:")
762 (setq found (1+ found)))
763 (forward-line))
764 (if (and (> found 0) (looking-at "\n"))
765 (throw 'done 9999)))
766 nil))))
767
768(defun nndoc-mail-in-mail-article-begin ()
769 (let (point found)
770 (if (catch 'done
771 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
772 (setq found 0)
773 (setq point (match-beginning 1))
774 (forward-line)
775 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
776 (if (looking-at "[-A-Za-z0-9]+:")
777 (setq found (1+ found)))
778 (forward-line))
779 (if (and (> found 0) (looking-at "\n"))
780 (throw 'done t)))
781 nil)
782 (goto-char point))))
783
6748645f
LMI
784(deffoo nndoc-request-accept-article (group &optional server last)
785 nil)
786
eec82323
LMI
787;;;
788;;; Functions for dissecting the documents
789;;;
790
791(defun nndoc-search (regexp)
792 (prog1
793 (re-search-forward regexp nil t)
794 (beginning-of-line)))
795
796(defun nndoc-dissect-buffer ()
797 "Go through the document and partition it into heads/bodies/articles."
798 (let ((i 0)
799 (first t)
23f87bed 800 art-begin head-begin head-end body-begin body-end)
eec82323
LMI
801 (setq nndoc-dissection-alist nil)
802 (save-excursion
803 (set-buffer nndoc-current-buffer)
804 (goto-char (point-min))
16409b0b
GM
805 ;; Remove blank lines.
806 (while (eq (following-char) ?\n)
807 (delete-char 1))
b092f83d
SZ
808 (if nndoc-dissection-function
809 (funcall nndoc-dissection-function)
810 ;; Find the beginning of the file.
811 (when nndoc-file-begin
812 (nndoc-search nndoc-file-begin))
813 ;; Go through the file.
814 (while (if (and first nndoc-first-article)
815 (nndoc-search nndoc-first-article)
23f87bed
MB
816 (if art-begin
817 (goto-char art-begin)
818 (nndoc-article-begin)))
819 (setq first nil
820 art-begin nil)
b092f83d
SZ
821 (cond (nndoc-head-begin-function
822 (funcall nndoc-head-begin-function))
823 (nndoc-head-begin
824 (nndoc-search nndoc-head-begin)))
825 (if (or (eobp)
826 (and nndoc-file-end
827 (looking-at nndoc-file-end)))
828 (goto-char (point-max))
829 (setq head-begin (point))
830 (nndoc-search (or nndoc-head-end "^$"))
831 (setq head-end (point))
832 (if nndoc-body-begin-function
833 (funcall nndoc-body-begin-function)
834 (nndoc-search (or nndoc-body-begin "^\n")))
835 (setq body-begin (point))
836 (or (and nndoc-body-end-function
837 (funcall nndoc-body-end-function))
838 (and nndoc-body-end
839 (nndoc-search nndoc-body-end))
23f87bed
MB
840 (and (nndoc-article-begin)
841 (setq art-begin (point)))
b092f83d
SZ
842 (progn
843 (goto-char (point-max))
844 (when nndoc-file-end
845 (and (re-search-backward nndoc-file-end nil t)
846 (beginning-of-line)))))
847 (setq body-end (point))
848 (push (list (incf i) head-begin head-end body-begin body-end
849 (count-lines body-begin body-end))
850 nndoc-dissection-alist)))))))
eec82323
LMI
851
852(defun nndoc-article-begin ()
853 (if nndoc-article-begin-function
854 (funcall nndoc-article-begin-function)
855 (ignore-errors
856 (nndoc-search nndoc-article-begin))))
857
858(defun nndoc-unquote-dashes ()
859 "Unquote quoted non-separators in digests."
860 (while (re-search-forward "^- -"nil t)
861 (replace-match "-" t t)))
862
6748645f
LMI
863;; Against compiler warnings.
864(defvar nndoc-mime-split-ordinal)
865
866(defun nndoc-dissect-mime-parts ()
867 "Go through a MIME composite article and partition it into sub-articles.
868When a MIME entity contains sub-entities, dissection produces one article for
869the header of this entity, and one article per sub-entity."
870 (setq nndoc-dissection-alist nil
871 nndoc-mime-split-ordinal 0)
872 (save-excursion
873 (set-buffer nndoc-current-buffer)
16409b0b
GM
874 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
875
876(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
877 position parent)
878 "Dissect an entity, within a composite MIME message.
879The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
880ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
881The string POSITION holds a dotted decimal representation of the article
882position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
883PARENT is the message-ID of the parent summary line, or nil for none."
884 (let ((case-fold-search t)
885 (message-id (nnmail-message-id))
886 head-end body-begin summary-insert message-rfc822 multipart-any
887 subject content-type type subtype boundary-regexp)
888 ;; Gracefully handle a missing body.
889 (goto-char head-begin)
2e36fdce
DL
890 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
891 (search-forward "\n\n" body-end t))
16409b0b
GM
892 (setq head-end (1- (point))
893 body-begin (point))
894 (setq head-end body-end
895 body-begin body-end))
896 (narrow-to-region head-begin head-end)
897 ;; Save MIME attributes.
898 (goto-char head-begin)
899 (setq content-type (message-fetch-field "Content-Type"))
900 (when content-type
901 (when (string-match
902 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
903 (setq type (downcase (match-string 1 content-type))
904 subtype (downcase (match-string 2 content-type))
905 message-rfc822 (and (string= type "message")
906 (string= subtype "rfc822"))
907 multipart-any (string= type "multipart")))
908 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
909 (setq subject (match-string 1 content-type)))
910 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
911 (setq boundary-regexp (concat "^--"
912 (regexp-quote
913 (match-string 1 content-type))
914 "\\(--\\)?[ \t]*\n"))))
915 (unless subject
916 (when (or multipart-any (not article-insert))
917 (setq subject (message-fetch-field "Subject"))))
918 (unless type
919 (setq type "text"
920 subtype "plain"))
921 ;; Prepare the article and summary inserts.
922 (unless article-insert
23f87bed 923 (setq article-insert (buffer-string)
16409b0b 924 head-end head-begin))
b092f83d
SZ
925 ;; Fix MIME-Version
926 (unless (string-match "MIME-Version:" article-insert)
927 (setq article-insert
928 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
929 (setq summary-insert article-insert)
930 ;; - summary Subject.
931 (setq summary-insert
932 (let ((line (concat "Subject: <" position
933 (and position multipart-any ".")
934 (and multipart-any "*")
935 (and (or position multipart-any) " ")
936 (cond ((string= subtype "plain") type)
937 ((string= subtype "basic") type)
938 (t subtype))
939 ">"
940 (and subject " ")
941 subject
942 "\n")))
943 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
944 (replace-match line t t summary-insert)
945 (concat summary-insert line))))
946 ;; - summary Message-ID.
947 (setq summary-insert
948 (let ((line (concat "Message-ID: " message-id "\n")))
949 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
950 (replace-match line t t summary-insert)
951 (concat summary-insert line))))
952 ;; - summary References.
953 (when parent
954 (setq summary-insert
955 (let ((line (concat "References: " parent "\n")))
956 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
957 summary-insert)
958 (replace-match line t t summary-insert)
959 (concat summary-insert line)))))
960 ;; Generate dissection information for this entity.
961 (push (list (incf nndoc-mime-split-ordinal)
962 head-begin head-end body-begin body-end
963 (count-lines body-begin body-end)
964 article-insert summary-insert)
965 nndoc-dissection-alist)
966 ;; Recurse for all sub-entities, if any.
967 (widen)
968 (cond
969 (message-rfc822
970 (save-excursion
971 (nndoc-dissect-mime-parts-sub body-begin body-end nil
972 position message-id)))
973 ((and multipart-any boundary-regexp)
974 (let ((part-counter 0)
975 part-begin part-end eof-flag)
976 (while (string-match "\
977^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
978 article-insert)
979 (setq article-insert (replace-match "" t t article-insert)))
980 (let ((case-fold-search nil))
981 (goto-char body-begin)
982 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 983 (while (not eof-flag)
16409b0b
GM
984 (setq part-begin (point))
985 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
986 (or (not (match-string 1))
987 (string= (match-string 1) "")
988 (setq eof-flag t))
989 (forward-line -1)
16409b0b 990 (setq part-end (point))
6748645f 991 (forward-line 1))
16409b0b 992 (t (setq part-end body-end
6748645f 993 eof-flag t)))
16409b0b
GM
994 (save-excursion
995 (nndoc-dissect-mime-parts-sub
996 part-begin part-end article-insert
997 (concat position
998 (and position ".")
999 (format "%d" (incf part-counter)))
1000 message-id)))))))))
6748645f 1001
eec82323
LMI
1002;;;###autoload
1003(defun nndoc-add-type (definition &optional position)
1004 "Add document DEFINITION to the list of nndoc document definitions.
1005If POSITION is nil or `last', the definition will be added
1006as the last checked definition, if t or `first', add as the
1007first definition, and if any other symbol, add after that
1008symbol in the alist."
1009 ;; First remove any old instances.
6748645f 1010 (gnus-pull (car definition) nndoc-type-alist)
eec82323
LMI
1011 ;; Then enter the new definition in the proper place.
1012 (cond
1013 ((or (null position) (eq position 'last))
1014 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1015 ((or (eq position t) (eq position 'first))
1016 (push definition nndoc-type-alist))
1017 (t
1018 (let ((list (memq (assq position nndoc-type-alist)
1019 nndoc-type-alist)))
1020 (unless list
1021 (error "No such position: %s" position))
1022 (setcdr list (cons definition (cdr list)))))))
1023
1024(provide 'nndoc)
1025
ab5796a9 1026;;; arch-tag: f5c2970e-0387-47ac-a0b3-6cc317dffabe
eec82323 1027;;; nndoc.el ends here