Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
e84b4b86
TTN
2
3;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
49f70d46 4;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
eec82323 5
6748645f 6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 7;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
8;; Keywords: news
9
10;; This file is part of GNU Emacs.
11
5e809f55 12;; GNU Emacs is free software: you can redistribute it and/or modify
eec82323 13;; it under the terms of the GNU General Public License as published by
5e809f55
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
eec82323
LMI
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
5e809f55 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
eec82323
LMI
24
25;;; Commentary:
26
b092f83d
SZ
27;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
28
eec82323
LMI
29;;; Code:
30
31(require 'nnheader)
32(require 'message)
33(require 'nnmail)
34(require 'nnoo)
6748645f 35(require 'gnus-util)
16409b0b 36(require 'mm-util)
eec82323
LMI
37(eval-when-compile (require 'cl))
38
39(nnoo-declare nndoc)
40
41(defvoo nndoc-article-type 'guess
42 "*Type of the file.
43One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 44`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
45`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
47
48(defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
50
6748645f
LMI
51(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53The default function removes all trailing carriage returns
16409b0b 54from the document.")
6748645f 55
eec82323
LMI
56(defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
67 (mime-parts
68 (generate-head-function . nndoc-generate-mime-parts-head)
69 (article-transform-function . nndoc-transform-mime-parts))
16409b0b
GM
70 (nsmail
71 (article-begin . "^From - "))
eec82323
LMI
72 (news
73 (article-begin . "^Path:"))
74 (rnews
75 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
76 (body-end-function . nndoc-rnews-body-end))
77 (mbox
78 (article-begin-function . nndoc-mbox-article-begin)
79 (body-end-function . nndoc-mbox-body-end))
80 (babyl
81 (article-begin . "\^_\^L *\n")
82 (body-end . "\^_")
83 (body-begin-function . nndoc-babyl-body-begin)
84 (head-begin-function . nndoc-babyl-head-begin))
23f87bed
MB
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
23f87bed 103
eec82323 104 (standard-digest
6748645f
LMI
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
eec82323
LMI
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
121 (lanl-gov-announce
122 (article-begin . "^\\\\\\\\\n")
01c52d31 123 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
eec82323
LMI
124 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
125 (body-begin . "")
b092f83d
SZ
126 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
127 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
128 (generate-head-function . nndoc-generate-lanl-gov-head)
129 (article-transform-function . nndoc-transform-lanl-gov-announce)
130 (subtype preprints guess))
131 (rfc822-forward
23f87bed
MB
132 (article-begin . "^\n+")
133 (body-end-function . nndoc-rfc822-forward-body-end-function)
134 (generate-head-function . nndoc-rfc822-forward-generate-head)
135 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
136 (outlook
137 (article-begin-function . nndoc-outlook-article-begin)
138 (body-end . "\0"))
b092f83d
SZ
139 (oe-dbx ;; Outlook Express DBX format
140 (dissection-function . nndoc-oe-dbx-dissection)
141 (generate-head-function . nndoc-oe-dbx-generate-head)
142 (generate-article-function . nndoc-oe-dbx-generate-article))
143 (forward
144 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
145 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
146 (prepare-body-function . nndoc-unquote-dashes))
147 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
148 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
149 (guess
150 (guess . t)
151 (subtype nil))
152 (digest
153 (guess . t)
154 (subtype nil))
155 (preprints
156 (guess . t)
157 (subtype nil))))
158
b092f83d
SZ
159(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
160 "Regexp for binary nndoc file names.")
161
eec82323 162\f
eec82323
LMI
163(defvoo nndoc-file-begin nil)
164(defvoo nndoc-first-article nil)
eec82323
LMI
165(defvoo nndoc-article-begin nil)
166(defvoo nndoc-head-begin nil)
167(defvoo nndoc-head-end nil)
168(defvoo nndoc-file-end nil)
169(defvoo nndoc-body-begin nil)
170(defvoo nndoc-body-end-function nil)
171(defvoo nndoc-body-begin-function nil)
172(defvoo nndoc-head-begin-function nil)
173(defvoo nndoc-body-end nil)
6748645f 174;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
175;; following items. ARTICLE acts as the association key and is an ordinal
176;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
177;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
178;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
179;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
180;; generation, respectively. Other headers usually follow directly from the
181;; buffer. Value `nil' means no insert.
eec82323
LMI
182(defvoo nndoc-dissection-alist nil)
183(defvoo nndoc-prepare-body-function nil)
184(defvoo nndoc-generate-head-function nil)
185(defvoo nndoc-article-transform-function nil)
186(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
187(defvoo nndoc-generate-article-function nil)
188(defvoo nndoc-dissection-function nil)
eec82323
LMI
189
190(defvoo nndoc-status-string "")
191(defvoo nndoc-group-alist nil)
192(defvoo nndoc-current-buffer nil
193 "Current nndoc news buffer.")
194(defvoo nndoc-address nil)
195
196(defconst nndoc-version "nndoc 1.0"
197 "nndoc version.")
198
199\f
200
201;;; Interface functions
202
203(nnoo-define-basics nndoc)
204
205(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
206 (when (nndoc-possibly-change-buffer newsgroup server)
207 (save-excursion
208 (set-buffer nntp-server-buffer)
209 (erase-buffer)
210 (let (article entry)
211 (if (stringp (car articles))
212 'headers
213 (while articles
214 (when (setq entry (cdr (assq (setq article (pop articles))
215 nndoc-dissection-alist)))
216 (insert (format "221 %d Article retrieved.\n" article))
217 (if nndoc-generate-head-function
218 (funcall nndoc-generate-head-function article)
219 (insert-buffer-substring
220 nndoc-current-buffer (car entry) (nth 1 entry)))
221 (goto-char (point-max))
16409b0b 222 (unless (eq (char-after (1- (point))) ?\n)
eec82323
LMI
223 (insert "\n"))
224 (insert (format "Lines: %d\n" (nth 4 entry)))
225 (insert ".\n")))
226
227 (nnheader-fold-continuation-lines)
228 'headers)))))
229
230(deffoo nndoc-request-article (article &optional newsgroup server buffer)
231 (nndoc-possibly-change-buffer newsgroup server)
232 (save-excursion
233 (let ((buffer (or buffer nntp-server-buffer))
234 (entry (cdr (assq article nndoc-dissection-alist)))
235 beg)
236 (set-buffer buffer)
237 (erase-buffer)
238 (when entry
b092f83d
SZ
239 (cond
240 ((stringp article) nil)
241 (nndoc-generate-article-function
242 (funcall nndoc-generate-article-function article))
243 (t
eec82323
LMI
244 (insert-buffer-substring
245 nndoc-current-buffer (car entry) (nth 1 entry))
246 (insert "\n")
247 (setq beg (point))
248 (insert-buffer-substring
249 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
250 (goto-char beg)
251 (when nndoc-prepare-body-function
252 (funcall nndoc-prepare-body-function))
253 (when nndoc-article-transform-function
254 (funcall nndoc-article-transform-function article))
b092f83d 255 t))))))
eec82323
LMI
256
257(deffoo nndoc-request-group (group &optional server dont-check)
258 "Select news GROUP."
259 (let (number)
260 (cond
261 ((not (nndoc-possibly-change-buffer group server))
262 (nnheader-report 'nndoc "No such file or buffer: %s"
263 nndoc-address))
264 (dont-check
265 (nnheader-report 'nndoc "Selected group %s" group)
266 t)
267 ((zerop (setq number (length nndoc-dissection-alist)))
268 (nndoc-close-group group)
269 (nnheader-report 'nndoc "No articles in group %s" group))
270 (t
271 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
272
273(deffoo nndoc-request-type (group &optional article)
274 (cond ((not article) 'unknown)
b092f83d
SZ
275 (nndoc-post-type nndoc-post-type)
276 (t 'unknown)))
eec82323
LMI
277
278(deffoo nndoc-close-group (group &optional server)
279 (nndoc-possibly-change-buffer group server)
280 (and nndoc-current-buffer
281 (buffer-name nndoc-current-buffer)
282 (kill-buffer nndoc-current-buffer))
283 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
284 nndoc-group-alist))
285 (setq nndoc-current-buffer nil)
286 (nnoo-close-server 'nndoc server)
287 (setq nndoc-dissection-alist nil)
288 t)
289
290(deffoo nndoc-request-list (&optional server)
291 nil)
292
293(deffoo nndoc-request-newgroups (date &optional server)
294 nil)
295
296(deffoo nndoc-request-list-newsgroups (&optional server)
297 nil)
298
299\f
300;;; Internal functions.
301
302(defun nndoc-possibly-change-buffer (group source)
303 (let (buf)
304 (cond
305 ;; The current buffer is this group's buffer.
306 ((and nndoc-current-buffer
307 (buffer-name nndoc-current-buffer)
308 (eq nndoc-current-buffer
309 (setq buf (cdr (assoc group nndoc-group-alist))))))
310 ;; We change buffers by taking an old from the group alist.
311 ;; `source' is either a string (a file name) or a buffer object.
312 (buf
313 (setq nndoc-current-buffer buf))
314 ;; It's a totally new group.
315 ((or (and (bufferp nndoc-address)
316 (buffer-name nndoc-address))
317 (and (stringp nndoc-address)
318 (file-exists-p nndoc-address)
319 (not (file-directory-p nndoc-address))))
320 (push (cons group (setq nndoc-current-buffer
321 (get-buffer-create
322 (concat " *nndoc " group "*"))))
323 nndoc-group-alist)
324 (setq nndoc-dissection-alist nil)
325 (save-excursion
326 (set-buffer nndoc-current-buffer)
eec82323 327 (erase-buffer)
b092f83d
SZ
328 (if (and (stringp nndoc-address)
329 (string-match nndoc-binary-file-names nndoc-address))
330 (let ((coding-system-for-read 'binary))
331 (mm-insert-file-contents nndoc-address))
332 (if (stringp nndoc-address)
333 (nnheader-insert-file-contents nndoc-address)
334 (insert-buffer-substring nndoc-address))
335 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
336 ;; Initialize the nndoc structures according to this new document.
337 (when (and nndoc-current-buffer
338 (not nndoc-dissection-alist))
339 (save-excursion
340 (set-buffer nndoc-current-buffer)
341 (nndoc-set-delims)
6748645f
LMI
342 (if (eq nndoc-article-type 'mime-parts)
343 (nndoc-dissect-mime-parts)
344 (nndoc-dissect-buffer))))
eec82323
LMI
345 (unless nndoc-current-buffer
346 (nndoc-close-server))
347 ;; Return whether we managed to select a file.
348 nndoc-current-buffer))
349
350;;;
351;;; Deciding what document type we have
352;;;
353
354(defun nndoc-set-delims ()
355 "Set the nndoc delimiter variables according to the type of the document."
356 (let ((vars '(nndoc-file-begin
357 nndoc-first-article
6748645f
LMI
358 nndoc-article-begin-function
359 nndoc-head-begin nndoc-head-end
eec82323
LMI
360 nndoc-file-end nndoc-article-begin
361 nndoc-body-begin nndoc-body-end-function nndoc-body-end
362 nndoc-prepare-body-function nndoc-article-transform-function
363 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
364 nndoc-head-begin-function
365 nndoc-generate-article-function
366 nndoc-dissection-function)))
eec82323
LMI
367 (while vars
368 (set (pop vars) nil)))
369 (let (defs)
370 ;; Guess away until we find the real file type.
371 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
372 nndoc-type-alist))))
373 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
374 ;; Set the nndoc variables.
375 (while defs
376 (set (intern (format "nndoc-%s" (caar defs)))
377 (cdr (pop defs))))))
378
379(defun nndoc-guess-type (subtype)
380 (let ((alist nndoc-type-alist)
381 results result entry)
382 (while (and (not result)
383 (setq entry (pop alist)))
384 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
385 (goto-char (point-min))
16409b0b
GM
386 ;; Remove blank lines.
387 (while (eq (following-char) ?\n)
388 (delete-char 1))
eec82323
LMI
389 (when (numberp (setq result (funcall (intern
390 (format "nndoc-%s-type-p"
391 (car entry))))))
392 (push (cons result entry) results)
393 (setq result nil))))
394 (unless (or result results)
395 (error "Document is not of any recognized type"))
396 (if result
397 (car entry)
23f87bed 398 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
399
400;;;
401;;; Built-in type predicates and functions
402;;;
403
404(defun nndoc-mbox-type-p ()
405 (when (looking-at message-unix-mail-delimiter)
406 t))
407
408(defun nndoc-mbox-article-begin ()
409 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
410 (goto-char (match-beginning 0))))
411
412(defun nndoc-mbox-body-end ()
413 (let ((beg (point))
414 len end)
415 (when
416 (save-excursion
417 (and (re-search-backward
418 (concat "^" message-unix-mail-delimiter) nil t)
419 (setq end (point))
420 (search-forward "\n\n" beg t)
421 (re-search-backward
422 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
e9bd5782 423 (setq len (string-to-number (match-string 1)))
eec82323
LMI
424 (search-forward "\n\n" beg t)
425 (unless (= (setq len (+ (point) len)) (point-max))
426 (and (< len (point-max))
427 (goto-char len)
428 (looking-at message-unix-mail-delimiter)))))
429 (goto-char len))))
430
431(defun nndoc-mmdf-type-p ()
432 (when (looking-at "\^A\^A\^A\^A$")
433 t))
434
435(defun nndoc-news-type-p ()
436 (when (looking-at "^Path:.*\n")
437 t))
438
439(defun nndoc-rnews-type-p ()
440 (when (looking-at "#! *rnews")
441 t))
442
443(defun nndoc-rnews-body-end ()
444 (and (re-search-backward nndoc-article-begin nil t)
445 (forward-line 1)
e9bd5782 446 (goto-char (+ (point) (string-to-number (match-string 1))))))
eec82323
LMI
447
448(defun nndoc-babyl-type-p ()
449 (when (re-search-forward "\^_\^L *\n" nil t)
450 t))
451
452(defun nndoc-babyl-body-begin ()
453 (re-search-forward "^\n" nil t)
6748645f 454 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
455 (let ((next (or (save-excursion
456 (re-search-forward nndoc-article-begin nil t))
457 (point-max))))
458 (unless (re-search-forward "^\n" next t)
459 (goto-char next)
460 (forward-line -1)
461 (insert "\n")
462 (forward-line -1)))))
463
464(defun nndoc-babyl-head-begin ()
465 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 466 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
467 (forward-line 1))
468 t))
469
470(defun nndoc-forward-type-p ()
b092f83d 471 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 472 nil t)
23f87bed 473 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
474 t))
475
476(defun nndoc-rfc934-type-p ()
477 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
478 (not (re-search-forward "^Subject:.*digest" nil t))
479 (not (re-search-backward "^From:" nil t 2))
480 (not (re-search-forward "^From:" nil t 2)))
481 t))
482
b092f83d
SZ
483(defun nndoc-mailman-type-p ()
484 (when (re-search-forward "^--__--__--\n+" nil t)
485 t))
486
eec82323
LMI
487(defun nndoc-rfc822-forward-type-p ()
488 (save-restriction
489 (message-narrow-to-head)
490 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
491 t)))
492
493(defun nndoc-rfc822-forward-body-end-function ()
494 (goto-char (point-max)))
495
23f87bed
MB
496(defun nndoc-rfc822-forward-generate-article (article &optional head)
497 (let ((entry (cdr (assq article nndoc-dissection-alist)))
498 (begin (point))
499 encoding)
500 (with-current-buffer nndoc-current-buffer
501 (save-restriction
502 (message-narrow-to-head)
503 (setq encoding (message-fetch-field "content-transfer-encoding"))))
504 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
505 (when encoding
506 (save-restriction
507 (narrow-to-region begin (point-max))
508 (mm-decode-content-transfer-encoding
509 (intern (downcase (mail-header-strip encoding))))))
510 (when head
511 (goto-char begin)
512 (when (search-forward "\n\n" nil t)
513 (delete-region (1- (point)) (point-max)))))
514 t)
515
516(defun nndoc-rfc822-forward-generate-head (article)
517 (nndoc-rfc822-forward-generate-article article 'head))
518
6748645f
LMI
519(defun nndoc-mime-parts-type-p ()
520 (let ((case-fold-search t)
521 (limit (search-forward "\n\n" nil t)))
522 (goto-char (point-min))
523 (when (and limit
16409b0b
GM
524 (re-search-forward
525 (concat "\
526^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
527 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
528 limit t))
6748645f
LMI
529 t)))
530
531(defun nndoc-transform-mime-parts (article)
16409b0b
GM
532 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
533 (headers (nth 5 entry)))
534 (when headers
6748645f 535 (goto-char (point-min))
16409b0b
GM
536 (insert headers))))
537
538(defun nndoc-generate-mime-parts-head (article)
539 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
540 (headers (nth 6 entry)))
541 (save-restriction
542 (narrow-to-region (point) (point))
543 (insert-buffer-substring
544 nndoc-current-buffer (car entry) (nth 1 entry))
545 (goto-char (point-max)))
546 (when headers
547 (insert headers))))
6748645f 548
eec82323
LMI
549(defun nndoc-clari-briefs-type-p ()
550 (when (let ((case-fold-search nil))
551 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
552 t))
553
554(defun nndoc-transform-clari-briefs (article)
555 (goto-char (point-min))
556 (when (looking-at " *\\*\\(.*\\)\n")
557 (replace-match "" t t))
558 (nndoc-generate-clari-briefs-head article))
559
560(defun nndoc-generate-clari-briefs-head (article)
561 (let ((entry (cdr (assq article nndoc-dissection-alist)))
562 subject from)
563 (save-excursion
564 (set-buffer nndoc-current-buffer)
565 (save-restriction
566 (narrow-to-region (car entry) (nth 3 entry))
567 (goto-char (point-min))
568 (when (looking-at " *\\*\\(.*\\)$")
569 (setq subject (match-string 1))
570 (when (string-match "[ \t]+$" subject)
571 (setq subject (substring subject 0 (match-beginning 0)))))
572 (when
573 (let ((case-fold-search nil))
574 (re-search-forward
575 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
576 (setq from (match-string 1)))))
577 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
578 "\nSubject: " (or subject "(no subject)") "\n")))
579
b092f83d
SZ
580(defun nndoc-exim-bounce-type-p ()
581 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
582 t))
583
584(defun nndoc-exim-bounce-body-end-function ()
585 (goto-char (point-max)))
586
16409b0b 587
eec82323
LMI
588(defun nndoc-mime-digest-type-p ()
589 (let ((case-fold-search t)
590 boundary-id b-delimiter entry)
591 (when (and
592 (re-search-forward
593 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 594 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
595 nil t)
596 (match-beginning 1))
597 (setq boundary-id (match-string 1)
16409b0b 598 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
599 (setq entry (assq 'mime-digest nndoc-type-alist))
600 (setcdr entry
601 (list
16409b0b 602 (cons 'head-begin "^ ?\n")
eec82323
LMI
603 (cons 'head-end "^ ?$")
604 (cons 'body-begin "^ ?\n")
605 (cons 'article-begin b-delimiter)
606 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 607 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
608 t)))
609
610(defun nndoc-standard-digest-type-p ()
611 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
612 (re-search-forward
613 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
614 t))
615
616(defun nndoc-digest-body-end ()
617 (and (re-search-forward nndoc-article-begin nil t)
618 (goto-char (match-beginning 0))))
619
620(defun nndoc-slack-digest-type-p ()
621 0)
622
623(defun nndoc-lanl-gov-announce-type-p ()
624 (when (let ((case-fold-search nil))
01c52d31 625 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
eec82323
LMI
626 t))
627
628(defun nndoc-transform-lanl-gov-announce (article)
01c52d31
MB
629 (let ((case-fold-search nil))
630 (goto-char (point-max))
631 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
632 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
b092f83d 633 (goto-char (point-min))
01c52d31
MB
634 (while (re-search-forward "^\\\\\\\\$" nil t)
635 (replace-match "" t nil))
636 (goto-char (point-min))
637 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
638 (replace-match "Date: \\1 (revised) " t nil))
639 (goto-char (point-min))
640 (unless (re-search-forward "^From" nil t)
b092f83d 641 (goto-char (point-min))
01c52d31
MB
642 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
643 (goto-char (point-min))
644 (insert "From: " (match-string 1) "\n")))
645 (when (re-search-forward "^arXiv:" nil t)
646 (replace-match "Paper: arXiv:" t nil))))
eec82323
LMI
647
648(defun nndoc-generate-lanl-gov-head (article)
649 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
650 (from "<no address given>")
651 subject date)
eec82323
LMI
652 (save-excursion
653 (set-buffer nndoc-current-buffer)
654 (save-restriction
b092f83d
SZ
655 (narrow-to-region (car entry) (nth 1 entry))
656 (goto-char (point-min))
01c52d31
MB
657 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
658 (setq subject (concat " (" (match-string 2) ")"))
b092f83d
SZ
659 (when (re-search-forward "^From: \\(.*\\)" nil t)
660 (setq from (concat "<"
a1506d29 661 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
662 (match-string 1))) ">")))
663 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
664 (setq date (match-string 1))
665 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
666 (setq date (match-string 1))))
667 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
668 nil t)
669 (setq subject (concat (match-string 1) subject))
670 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
671 (while (and from (string-match "(\[^)\]*)" from))
672 (setq from (replace-match "" t t from)))
673 (insert "From: " (or from "unknown")
b092f83d
SZ
674 "\nSubject: " (or subject "(no subject)") "\n")
675 (if date (insert "Date: " date))))
eec82323 676
16409b0b
GM
677(defun nndoc-nsmail-type-p ()
678 (when (looking-at "From - ")
679 t))
680
681(defun nndoc-outlook-article-begin ()
682 (prog1 (re-search-forward "From:\\|Received:" nil t)
683 (goto-char (match-beginning 0))))
684
685(defun nndoc-outlook-type-p ()
686 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
687 (looking-at "JMF"))
688
b092f83d 689(defun nndoc-oe-dbx-type-p ()
62dc3ac3 690 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
b092f83d
SZ
691
692(defun nndoc-read-little-endian ()
693 (+ (prog1 (char-after) (forward-char 1))
694 (lsh (prog1 (char-after) (forward-char 1)) 8)
695 (lsh (prog1 (char-after) (forward-char 1)) 16)
696 (lsh (prog1 (char-after) (forward-char 1)) 24)))
697
698(defun nndoc-oe-dbx-decode-block ()
699 (list
700 (nndoc-read-little-endian) ;; this address
701 (nndoc-read-little-endian) ;; next address offset
702 (nndoc-read-little-endian) ;; blocksize
703 (nndoc-read-little-endian))) ;; next address
704
705(defun nndoc-oe-dbx-dissection ()
706 (let ((i 0) blk p tp)
707 (goto-char 60117) ;; 0x0000EAD4+1
708 (setq p (point))
709 (unless (eobp)
710 (setq blk (nndoc-oe-dbx-decode-block)))
711 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
712 (> (nth 3 blk) p)))
713 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
714 (while (and (> (car blk) 0) (> (nth 3 blk) p))
715 (goto-char (1+ (nth 3 blk)))
716 (setq blk (nndoc-oe-dbx-decode-block)))
717 (if (or (<= (car blk) p)
718 (<= (nth 1 blk) 0)
719 (not (zerop (nth 3 blk))))
720 (setq blk nil)
721 (setq tp (+ (car blk) (nth 1 blk) 17))
722 (if (or (<= tp p) (>= tp (point-max)))
723 (setq blk nil)
724 (goto-char tp)
725 (setq p tp
726 blk (nndoc-oe-dbx-decode-block)))))))
727
728(defun nndoc-oe-dbx-generate-article (article &optional head)
729 (let ((entry (cdr (assq article nndoc-dissection-alist)))
730 (cur (current-buffer))
731 (begin (point))
732 blk p)
733 (with-current-buffer nndoc-current-buffer
734 (setq p (car entry))
735 (while (> p (point-min))
736 (goto-char p)
737 (setq blk (nndoc-oe-dbx-decode-block))
738 (setq p (point))
739 (with-current-buffer cur
740 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
741 (setq p (1+ (nth 3 blk)))))
742 (goto-char begin)
743 (while (re-search-forward "\r$" nil t)
744 (delete-backward-char 1))
745 (when head
746 (goto-char begin)
747 (when (search-forward "\n\n" nil t)
748 (setcar (cddddr entry) (count-lines (point) (point-max)))
749 (delete-region (1- (point)) (point-max))))
750 t))
751
752(defun nndoc-oe-dbx-generate-head (article)
753 (nndoc-oe-dbx-generate-article article 'head))
754
755(defun nndoc-mail-in-mail-type-p ()
756 (let (found)
757 (save-excursion
758 (catch 'done
759 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
760 (setq found 0)
761 (forward-line)
762 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
763 (if (looking-at "[-A-Za-z0-9]+:")
764 (setq found (1+ found)))
765 (forward-line))
766 (if (and (> found 0) (looking-at "\n"))
767 (throw 'done 9999)))
768 nil))))
769
770(defun nndoc-mail-in-mail-article-begin ()
771 (let (point found)
772 (if (catch 'done
773 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
774 (setq found 0)
775 (setq point (match-beginning 1))
776 (forward-line)
777 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
778 (if (looking-at "[-A-Za-z0-9]+:")
779 (setq found (1+ found)))
780 (forward-line))
781 (if (and (> found 0) (looking-at "\n"))
782 (throw 'done t)))
783 nil)
784 (goto-char point))))
785
6748645f
LMI
786(deffoo nndoc-request-accept-article (group &optional server last)
787 nil)
788
eec82323
LMI
789;;;
790;;; Functions for dissecting the documents
791;;;
792
793(defun nndoc-search (regexp)
794 (prog1
795 (re-search-forward regexp nil t)
796 (beginning-of-line)))
797
798(defun nndoc-dissect-buffer ()
799 "Go through the document and partition it into heads/bodies/articles."
800 (let ((i 0)
801 (first t)
23f87bed 802 art-begin head-begin head-end body-begin body-end)
eec82323
LMI
803 (setq nndoc-dissection-alist nil)
804 (save-excursion
805 (set-buffer nndoc-current-buffer)
806 (goto-char (point-min))
16409b0b
GM
807 ;; Remove blank lines.
808 (while (eq (following-char) ?\n)
809 (delete-char 1))
b092f83d
SZ
810 (if nndoc-dissection-function
811 (funcall nndoc-dissection-function)
812 ;; Find the beginning of the file.
813 (when nndoc-file-begin
814 (nndoc-search nndoc-file-begin))
815 ;; Go through the file.
816 (while (if (and first nndoc-first-article)
817 (nndoc-search nndoc-first-article)
23f87bed
MB
818 (if art-begin
819 (goto-char art-begin)
820 (nndoc-article-begin)))
821 (setq first nil
822 art-begin nil)
b092f83d
SZ
823 (cond (nndoc-head-begin-function
824 (funcall nndoc-head-begin-function))
825 (nndoc-head-begin
826 (nndoc-search nndoc-head-begin)))
827 (if (or (eobp)
828 (and nndoc-file-end
829 (looking-at nndoc-file-end)))
830 (goto-char (point-max))
831 (setq head-begin (point))
832 (nndoc-search (or nndoc-head-end "^$"))
833 (setq head-end (point))
834 (if nndoc-body-begin-function
835 (funcall nndoc-body-begin-function)
836 (nndoc-search (or nndoc-body-begin "^\n")))
837 (setq body-begin (point))
838 (or (and nndoc-body-end-function
839 (funcall nndoc-body-end-function))
840 (and nndoc-body-end
841 (nndoc-search nndoc-body-end))
23f87bed
MB
842 (and (nndoc-article-begin)
843 (setq art-begin (point)))
b092f83d
SZ
844 (progn
845 (goto-char (point-max))
846 (when nndoc-file-end
847 (and (re-search-backward nndoc-file-end nil t)
848 (beginning-of-line)))))
849 (setq body-end (point))
850 (push (list (incf i) head-begin head-end body-begin body-end
851 (count-lines body-begin body-end))
852 nndoc-dissection-alist)))))))
eec82323
LMI
853
854(defun nndoc-article-begin ()
855 (if nndoc-article-begin-function
856 (funcall nndoc-article-begin-function)
857 (ignore-errors
858 (nndoc-search nndoc-article-begin))))
859
860(defun nndoc-unquote-dashes ()
861 "Unquote quoted non-separators in digests."
862 (while (re-search-forward "^- -"nil t)
863 (replace-match "-" t t)))
864
6748645f
LMI
865;; Against compiler warnings.
866(defvar nndoc-mime-split-ordinal)
867
868(defun nndoc-dissect-mime-parts ()
869 "Go through a MIME composite article and partition it into sub-articles.
870When a MIME entity contains sub-entities, dissection produces one article for
871the header of this entity, and one article per sub-entity."
872 (setq nndoc-dissection-alist nil
873 nndoc-mime-split-ordinal 0)
874 (save-excursion
875 (set-buffer nndoc-current-buffer)
16409b0b
GM
876 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
877
878(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
879 position parent)
880 "Dissect an entity, within a composite MIME message.
881The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
882ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
883The string POSITION holds a dotted decimal representation of the article
884position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
885PARENT is the message-ID of the parent summary line, or nil for none."
886 (let ((case-fold-search t)
887 (message-id (nnmail-message-id))
888 head-end body-begin summary-insert message-rfc822 multipart-any
889 subject content-type type subtype boundary-regexp)
890 ;; Gracefully handle a missing body.
891 (goto-char head-begin)
2e36fdce
DL
892 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
893 (search-forward "\n\n" body-end t))
16409b0b
GM
894 (setq head-end (1- (point))
895 body-begin (point))
896 (setq head-end body-end
897 body-begin body-end))
898 (narrow-to-region head-begin head-end)
899 ;; Save MIME attributes.
900 (goto-char head-begin)
901 (setq content-type (message-fetch-field "Content-Type"))
902 (when content-type
903 (when (string-match
904 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
905 (setq type (downcase (match-string 1 content-type))
906 subtype (downcase (match-string 2 content-type))
907 message-rfc822 (and (string= type "message")
908 (string= subtype "rfc822"))
909 multipart-any (string= type "multipart")))
910 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
911 (setq subject (match-string 1 content-type)))
912 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
913 (setq boundary-regexp (concat "^--"
914 (regexp-quote
915 (match-string 1 content-type))
916 "\\(--\\)?[ \t]*\n"))))
917 (unless subject
918 (when (or multipart-any (not article-insert))
919 (setq subject (message-fetch-field "Subject"))))
920 (unless type
921 (setq type "text"
922 subtype "plain"))
923 ;; Prepare the article and summary inserts.
924 (unless article-insert
23f87bed 925 (setq article-insert (buffer-string)
16409b0b 926 head-end head-begin))
b092f83d
SZ
927 ;; Fix MIME-Version
928 (unless (string-match "MIME-Version:" article-insert)
929 (setq article-insert
930 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
931 (setq summary-insert article-insert)
932 ;; - summary Subject.
933 (setq summary-insert
934 (let ((line (concat "Subject: <" position
935 (and position multipart-any ".")
936 (and multipart-any "*")
937 (and (or position multipart-any) " ")
938 (cond ((string= subtype "plain") type)
939 ((string= subtype "basic") type)
940 (t subtype))
941 ">"
942 (and subject " ")
943 subject
944 "\n")))
945 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
946 (replace-match line t t summary-insert)
947 (concat summary-insert line))))
948 ;; - summary Message-ID.
949 (setq summary-insert
950 (let ((line (concat "Message-ID: " message-id "\n")))
951 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
952 (replace-match line t t summary-insert)
953 (concat summary-insert line))))
954 ;; - summary References.
955 (when parent
956 (setq summary-insert
957 (let ((line (concat "References: " parent "\n")))
958 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
959 summary-insert)
960 (replace-match line t t summary-insert)
961 (concat summary-insert line)))))
962 ;; Generate dissection information for this entity.
963 (push (list (incf nndoc-mime-split-ordinal)
964 head-begin head-end body-begin body-end
965 (count-lines body-begin body-end)
966 article-insert summary-insert)
967 nndoc-dissection-alist)
968 ;; Recurse for all sub-entities, if any.
969 (widen)
970 (cond
971 (message-rfc822
972 (save-excursion
973 (nndoc-dissect-mime-parts-sub body-begin body-end nil
974 position message-id)))
975 ((and multipart-any boundary-regexp)
976 (let ((part-counter 0)
977 part-begin part-end eof-flag)
978 (while (string-match "\
979^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
980 article-insert)
981 (setq article-insert (replace-match "" t t article-insert)))
982 (let ((case-fold-search nil))
983 (goto-char body-begin)
984 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 985 (while (not eof-flag)
16409b0b
GM
986 (setq part-begin (point))
987 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
988 (or (not (match-string 1))
989 (string= (match-string 1) "")
990 (setq eof-flag t))
991 (forward-line -1)
16409b0b 992 (setq part-end (point))
6748645f 993 (forward-line 1))
16409b0b 994 (t (setq part-end body-end
6748645f 995 eof-flag t)))
16409b0b
GM
996 (save-excursion
997 (nndoc-dissect-mime-parts-sub
998 part-begin part-end article-insert
999 (concat position
1000 (and position ".")
1001 (format "%d" (incf part-counter)))
1002 message-id)))))))))
6748645f 1003
eec82323
LMI
1004;;;###autoload
1005(defun nndoc-add-type (definition &optional position)
1006 "Add document DEFINITION to the list of nndoc document definitions.
1007If POSITION is nil or `last', the definition will be added
1008as the last checked definition, if t or `first', add as the
1009first definition, and if any other symbol, add after that
1010symbol in the alist."
1011 ;; First remove any old instances.
6748645f 1012 (gnus-pull (car definition) nndoc-type-alist)
eec82323
LMI
1013 ;; Then enter the new definition in the proper place.
1014 (cond
1015 ((or (null position) (eq position 'last))
1016 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1017 ((or (eq position t) (eq position 'first))
1018 (push definition nndoc-type-alist))
1019 (t
1020 (let ((list (memq (assq position nndoc-type-alist)
1021 nndoc-type-alist)))
1022 (unless list
1023 (error "No such position: %s" position))
1024 (setcdr list (cons definition (cdr list)))))))
1025
1026(provide 'nndoc)
1027
62dc3ac3 1028;; arch-tag: f5c2970e-0387-47ac-a0b3-6cc317dffabe
eec82323 1029;;; nndoc.el ends here