Merge changes made in Gnus trunk.
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
e84b4b86
TTN
2
3;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
114f9c96 4;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
eec82323 5
6748645f 6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 7;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
8;; Keywords: news
9
10;; This file is part of GNU Emacs.
11
5e809f55 12;; GNU Emacs is free software: you can redistribute it and/or modify
eec82323 13;; it under the terms of the GNU General Public License as published by
5e809f55
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
eec82323
LMI
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
5e809f55 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
eec82323
LMI
24
25;;; Commentary:
26
b092f83d
SZ
27;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
28
eec82323
LMI
29;;; Code:
30
31(require 'nnheader)
32(require 'message)
33(require 'nnmail)
34(require 'nnoo)
6748645f 35(require 'gnus-util)
16409b0b 36(require 'mm-util)
eec82323
LMI
37(eval-when-compile (require 'cl))
38
39(nnoo-declare nndoc)
40
41(defvoo nndoc-article-type 'guess
42 "*Type of the file.
43One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 44`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
45`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
47
48(defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
50
6748645f
LMI
51(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53The default function removes all trailing carriage returns
16409b0b 54from the document.")
6748645f 55
eec82323
LMI
56(defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
67 (mime-parts
68 (generate-head-function . nndoc-generate-mime-parts-head)
69 (article-transform-function . nndoc-transform-mime-parts))
16409b0b
GM
70 (nsmail
71 (article-begin . "^From - "))
eec82323
LMI
72 (news
73 (article-begin . "^Path:"))
74 (rnews
75 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
76 (body-end-function . nndoc-rnews-body-end))
77 (mbox
78 (article-begin-function . nndoc-mbox-article-begin)
79 (body-end-function . nndoc-mbox-body-end))
80 (babyl
81 (article-begin . "\^_\^L *\n")
82 (body-end . "\^_")
83 (body-begin-function . nndoc-babyl-body-begin)
84 (head-begin-function . nndoc-babyl-head-begin))
23f87bed
MB
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
c9fc72fa 103
eec82323 104 (standard-digest
6748645f
LMI
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
eec82323
LMI
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
846daf20
LMI
121 (google
122 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
123 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
124 (head-begin . "^Date:")
125 (head-end . "^$")
126 (body-end-function . nndoc-digest-body-end)
127 (body-begin . "^$")
128 (file-end . "^==============================================================================$")
129 (prepare-body-function . nndoc-unquote-dashes)
130 (subtype digest guess))
eec82323
LMI
131 (lanl-gov-announce
132 (article-begin . "^\\\\\\\\\n")
01c52d31 133 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
eec82323
LMI
134 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
135 (body-begin . "")
b092f83d
SZ
136 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
137 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
138 (generate-head-function . nndoc-generate-lanl-gov-head)
139 (article-transform-function . nndoc-transform-lanl-gov-announce)
140 (subtype preprints guess))
141 (rfc822-forward
23f87bed
MB
142 (article-begin . "^\n+")
143 (body-end-function . nndoc-rfc822-forward-body-end-function)
144 (generate-head-function . nndoc-rfc822-forward-generate-head)
145 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
146 (outlook
147 (article-begin-function . nndoc-outlook-article-begin)
148 (body-end . "\0"))
b092f83d
SZ
149 (oe-dbx ;; Outlook Express DBX format
150 (dissection-function . nndoc-oe-dbx-dissection)
151 (generate-head-function . nndoc-oe-dbx-generate-head)
152 (generate-article-function . nndoc-oe-dbx-generate-article))
153 (forward
154 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
155 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
156 (prepare-body-function . nndoc-unquote-dashes))
157 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
158 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
159 (guess
160 (guess . t)
161 (subtype nil))
162 (digest
163 (guess . t)
164 (subtype nil))
165 (preprints
166 (guess . t)
167 (subtype nil))))
168
b092f83d
SZ
169(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
170 "Regexp for binary nndoc file names.")
171
eec82323 172\f
eec82323
LMI
173(defvoo nndoc-file-begin nil)
174(defvoo nndoc-first-article nil)
eec82323
LMI
175(defvoo nndoc-article-begin nil)
176(defvoo nndoc-head-begin nil)
177(defvoo nndoc-head-end nil)
178(defvoo nndoc-file-end nil)
179(defvoo nndoc-body-begin nil)
180(defvoo nndoc-body-end-function nil)
181(defvoo nndoc-body-begin-function nil)
182(defvoo nndoc-head-begin-function nil)
183(defvoo nndoc-body-end nil)
6748645f 184;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
185;; following items. ARTICLE acts as the association key and is an ordinal
186;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
187;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
188;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
189;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
190;; generation, respectively. Other headers usually follow directly from the
191;; buffer. Value `nil' means no insert.
eec82323
LMI
192(defvoo nndoc-dissection-alist nil)
193(defvoo nndoc-prepare-body-function nil)
194(defvoo nndoc-generate-head-function nil)
195(defvoo nndoc-article-transform-function nil)
196(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
197(defvoo nndoc-generate-article-function nil)
198(defvoo nndoc-dissection-function nil)
846daf20 199(defvoo nndoc-pre-dissection-function nil)
eec82323
LMI
200
201(defvoo nndoc-status-string "")
202(defvoo nndoc-group-alist nil)
203(defvoo nndoc-current-buffer nil
204 "Current nndoc news buffer.")
205(defvoo nndoc-address nil)
206
207(defconst nndoc-version "nndoc 1.0"
208 "nndoc version.")
209
210\f
211
212;;; Interface functions
213
214(nnoo-define-basics nndoc)
215
216(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
217 (when (nndoc-possibly-change-buffer newsgroup server)
20a673b2 218 (with-current-buffer nntp-server-buffer
eec82323
LMI
219 (erase-buffer)
220 (let (article entry)
221 (if (stringp (car articles))
222 'headers
223 (while articles
224 (when (setq entry (cdr (assq (setq article (pop articles))
225 nndoc-dissection-alist)))
226 (insert (format "221 %d Article retrieved.\n" article))
227 (if nndoc-generate-head-function
228 (funcall nndoc-generate-head-function article)
229 (insert-buffer-substring
230 nndoc-current-buffer (car entry) (nth 1 entry)))
231 (goto-char (point-max))
16409b0b 232 (unless (eq (char-after (1- (point))) ?\n)
eec82323
LMI
233 (insert "\n"))
234 (insert (format "Lines: %d\n" (nth 4 entry)))
235 (insert ".\n")))
236
237 (nnheader-fold-continuation-lines)
238 'headers)))))
239
240(deffoo nndoc-request-article (article &optional newsgroup server buffer)
241 (nndoc-possibly-change-buffer newsgroup server)
242 (save-excursion
243 (let ((buffer (or buffer nntp-server-buffer))
244 (entry (cdr (assq article nndoc-dissection-alist)))
245 beg)
246 (set-buffer buffer)
247 (erase-buffer)
248 (when entry
b092f83d
SZ
249 (cond
250 ((stringp article) nil)
251 (nndoc-generate-article-function
252 (funcall nndoc-generate-article-function article))
253 (t
eec82323
LMI
254 (insert-buffer-substring
255 nndoc-current-buffer (car entry) (nth 1 entry))
256 (insert "\n")
257 (setq beg (point))
258 (insert-buffer-substring
259 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
260 (goto-char beg)
261 (when nndoc-prepare-body-function
262 (funcall nndoc-prepare-body-function))
263 (when nndoc-article-transform-function
264 (funcall nndoc-article-transform-function article))
b092f83d 265 t))))))
eec82323
LMI
266
267(deffoo nndoc-request-group (group &optional server dont-check)
268 "Select news GROUP."
269 (let (number)
270 (cond
271 ((not (nndoc-possibly-change-buffer group server))
272 (nnheader-report 'nndoc "No such file or buffer: %s"
273 nndoc-address))
274 (dont-check
275 (nnheader-report 'nndoc "Selected group %s" group)
276 t)
277 ((zerop (setq number (length nndoc-dissection-alist)))
278 (nndoc-close-group group)
279 (nnheader-report 'nndoc "No articles in group %s" group))
280 (t
281 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
282
283(deffoo nndoc-request-type (group &optional article)
284 (cond ((not article) 'unknown)
b092f83d
SZ
285 (nndoc-post-type nndoc-post-type)
286 (t 'unknown)))
eec82323
LMI
287
288(deffoo nndoc-close-group (group &optional server)
289 (nndoc-possibly-change-buffer group server)
290 (and nndoc-current-buffer
291 (buffer-name nndoc-current-buffer)
292 (kill-buffer nndoc-current-buffer))
293 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
294 nndoc-group-alist))
295 (setq nndoc-current-buffer nil)
296 (nnoo-close-server 'nndoc server)
297 (setq nndoc-dissection-alist nil)
298 t)
299
300(deffoo nndoc-request-list (&optional server)
301 nil)
302
303(deffoo nndoc-request-newgroups (date &optional server)
304 nil)
305
306(deffoo nndoc-request-list-newsgroups (&optional server)
307 nil)
308
309\f
310;;; Internal functions.
311
312(defun nndoc-possibly-change-buffer (group source)
313 (let (buf)
314 (cond
315 ;; The current buffer is this group's buffer.
316 ((and nndoc-current-buffer
317 (buffer-name nndoc-current-buffer)
318 (eq nndoc-current-buffer
319 (setq buf (cdr (assoc group nndoc-group-alist))))))
320 ;; We change buffers by taking an old from the group alist.
321 ;; `source' is either a string (a file name) or a buffer object.
322 (buf
323 (setq nndoc-current-buffer buf))
324 ;; It's a totally new group.
325 ((or (and (bufferp nndoc-address)
326 (buffer-name nndoc-address))
327 (and (stringp nndoc-address)
328 (file-exists-p nndoc-address)
329 (not (file-directory-p nndoc-address))))
330 (push (cons group (setq nndoc-current-buffer
331 (get-buffer-create
332 (concat " *nndoc " group "*"))))
333 nndoc-group-alist)
334 (setq nndoc-dissection-alist nil)
20a673b2 335 (with-current-buffer nndoc-current-buffer
eec82323 336 (erase-buffer)
b092f83d
SZ
337 (if (and (stringp nndoc-address)
338 (string-match nndoc-binary-file-names nndoc-address))
339 (let ((coding-system-for-read 'binary))
340 (mm-insert-file-contents nndoc-address))
341 (if (stringp nndoc-address)
342 (nnheader-insert-file-contents nndoc-address)
343 (insert-buffer-substring nndoc-address))
344 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
345 ;; Initialize the nndoc structures according to this new document.
346 (when (and nndoc-current-buffer
347 (not nndoc-dissection-alist))
20a673b2 348 (with-current-buffer nndoc-current-buffer
eec82323 349 (nndoc-set-delims)
6748645f
LMI
350 (if (eq nndoc-article-type 'mime-parts)
351 (nndoc-dissect-mime-parts)
352 (nndoc-dissect-buffer))))
eec82323
LMI
353 (unless nndoc-current-buffer
354 (nndoc-close-server))
355 ;; Return whether we managed to select a file.
356 nndoc-current-buffer))
357
358;;;
359;;; Deciding what document type we have
360;;;
361
362(defun nndoc-set-delims ()
363 "Set the nndoc delimiter variables according to the type of the document."
364 (let ((vars '(nndoc-file-begin
365 nndoc-first-article
6748645f
LMI
366 nndoc-article-begin-function
367 nndoc-head-begin nndoc-head-end
eec82323
LMI
368 nndoc-file-end nndoc-article-begin
369 nndoc-body-begin nndoc-body-end-function nndoc-body-end
370 nndoc-prepare-body-function nndoc-article-transform-function
371 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
372 nndoc-head-begin-function
373 nndoc-generate-article-function
846daf20
LMI
374 nndoc-dissection-function
375 nndoc-pre-dissection-function)))
eec82323
LMI
376 (while vars
377 (set (pop vars) nil)))
378 (let (defs)
379 ;; Guess away until we find the real file type.
380 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
381 nndoc-type-alist))))
382 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
383 ;; Set the nndoc variables.
384 (while defs
385 (set (intern (format "nndoc-%s" (caar defs)))
386 (cdr (pop defs))))))
387
388(defun nndoc-guess-type (subtype)
389 (let ((alist nndoc-type-alist)
390 results result entry)
391 (while (and (not result)
392 (setq entry (pop alist)))
393 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
394 (goto-char (point-min))
16409b0b
GM
395 ;; Remove blank lines.
396 (while (eq (following-char) ?\n)
397 (delete-char 1))
eec82323
LMI
398 (when (numberp (setq result (funcall (intern
399 (format "nndoc-%s-type-p"
400 (car entry))))))
401 (push (cons result entry) results)
402 (setq result nil))))
403 (unless (or result results)
404 (error "Document is not of any recognized type"))
405 (if result
406 (car entry)
23f87bed 407 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
408
409;;;
410;;; Built-in type predicates and functions
411;;;
412
413(defun nndoc-mbox-type-p ()
414 (when (looking-at message-unix-mail-delimiter)
415 t))
416
417(defun nndoc-mbox-article-begin ()
418 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
419 (goto-char (match-beginning 0))))
420
421(defun nndoc-mbox-body-end ()
422 (let ((beg (point))
423 len end)
424 (when
425 (save-excursion
426 (and (re-search-backward
427 (concat "^" message-unix-mail-delimiter) nil t)
428 (setq end (point))
429 (search-forward "\n\n" beg t)
430 (re-search-backward
431 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
e9bd5782 432 (setq len (string-to-number (match-string 1)))
eec82323
LMI
433 (search-forward "\n\n" beg t)
434 (unless (= (setq len (+ (point) len)) (point-max))
435 (and (< len (point-max))
436 (goto-char len)
437 (looking-at message-unix-mail-delimiter)))))
438 (goto-char len))))
439
440(defun nndoc-mmdf-type-p ()
441 (when (looking-at "\^A\^A\^A\^A$")
442 t))
443
444(defun nndoc-news-type-p ()
445 (when (looking-at "^Path:.*\n")
446 t))
447
448(defun nndoc-rnews-type-p ()
449 (when (looking-at "#! *rnews")
450 t))
451
452(defun nndoc-rnews-body-end ()
453 (and (re-search-backward nndoc-article-begin nil t)
454 (forward-line 1)
e9bd5782 455 (goto-char (+ (point) (string-to-number (match-string 1))))))
eec82323 456
846daf20
LMI
457(defun nndoc-google-type-p ()
458 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
459 t))
460
461(defun nndoc-decode-content-transfer-encoding ()
462 (let ((encoding
463 (save-restriction
464 (message-narrow-to-head)
465 (message-fetch-field "content-transfer-encoding"))))
466 (when (and encoding
467 (search-forward "\n\n" nil t))
468 (save-restriction
469 (narrow-to-region (point) (point-max))
470 (mm-decode-content-transfer-encoding
471 (intern (downcase (mail-header-strip encoding))))))))
472
eec82323
LMI
473(defun nndoc-babyl-type-p ()
474 (when (re-search-forward "\^_\^L *\n" nil t)
475 t))
476
477(defun nndoc-babyl-body-begin ()
478 (re-search-forward "^\n" nil t)
6748645f 479 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
480 (let ((next (or (save-excursion
481 (re-search-forward nndoc-article-begin nil t))
482 (point-max))))
483 (unless (re-search-forward "^\n" next t)
484 (goto-char next)
485 (forward-line -1)
486 (insert "\n")
487 (forward-line -1)))))
488
489(defun nndoc-babyl-head-begin ()
490 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 491 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
492 (forward-line 1))
493 t))
494
495(defun nndoc-forward-type-p ()
b092f83d 496 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 497 nil t)
23f87bed 498 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
499 t))
500
501(defun nndoc-rfc934-type-p ()
502 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
503 (not (re-search-forward "^Subject:.*digest" nil t))
504 (not (re-search-backward "^From:" nil t 2))
505 (not (re-search-forward "^From:" nil t 2)))
506 t))
507
b092f83d
SZ
508(defun nndoc-mailman-type-p ()
509 (when (re-search-forward "^--__--__--\n+" nil t)
510 t))
511
eec82323
LMI
512(defun nndoc-rfc822-forward-type-p ()
513 (save-restriction
514 (message-narrow-to-head)
515 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
516 t)))
517
518(defun nndoc-rfc822-forward-body-end-function ()
519 (goto-char (point-max)))
520
23f87bed
MB
521(defun nndoc-rfc822-forward-generate-article (article &optional head)
522 (let ((entry (cdr (assq article nndoc-dissection-alist)))
523 (begin (point))
524 encoding)
525 (with-current-buffer nndoc-current-buffer
526 (save-restriction
527 (message-narrow-to-head)
528 (setq encoding (message-fetch-field "content-transfer-encoding"))))
529 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
530 (when encoding
531 (save-restriction
532 (narrow-to-region begin (point-max))
533 (mm-decode-content-transfer-encoding
534 (intern (downcase (mail-header-strip encoding))))))
535 (when head
536 (goto-char begin)
537 (when (search-forward "\n\n" nil t)
538 (delete-region (1- (point)) (point-max)))))
539 t)
540
541(defun nndoc-rfc822-forward-generate-head (article)
542 (nndoc-rfc822-forward-generate-article article 'head))
543
6748645f
LMI
544(defun nndoc-mime-parts-type-p ()
545 (let ((case-fold-search t)
546 (limit (search-forward "\n\n" nil t)))
547 (goto-char (point-min))
548 (when (and limit
16409b0b
GM
549 (re-search-forward
550 (concat "\
551^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
552 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
553 limit t))
6748645f
LMI
554 t)))
555
556(defun nndoc-transform-mime-parts (article)
16409b0b
GM
557 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
558 (headers (nth 5 entry)))
559 (when headers
6748645f 560 (goto-char (point-min))
16409b0b
GM
561 (insert headers))))
562
563(defun nndoc-generate-mime-parts-head (article)
564 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
565 (headers (nth 6 entry)))
566 (save-restriction
567 (narrow-to-region (point) (point))
568 (insert-buffer-substring
569 nndoc-current-buffer (car entry) (nth 1 entry))
570 (goto-char (point-max)))
571 (when headers
572 (insert headers))))
6748645f 573
eec82323
LMI
574(defun nndoc-clari-briefs-type-p ()
575 (when (let ((case-fold-search nil))
576 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
577 t))
578
579(defun nndoc-transform-clari-briefs (article)
580 (goto-char (point-min))
581 (when (looking-at " *\\*\\(.*\\)\n")
582 (replace-match "" t t))
583 (nndoc-generate-clari-briefs-head article))
584
585(defun nndoc-generate-clari-briefs-head (article)
586 (let ((entry (cdr (assq article nndoc-dissection-alist)))
587 subject from)
20a673b2 588 (with-current-buffer nndoc-current-buffer
eec82323
LMI
589 (save-restriction
590 (narrow-to-region (car entry) (nth 3 entry))
591 (goto-char (point-min))
592 (when (looking-at " *\\*\\(.*\\)$")
593 (setq subject (match-string 1))
594 (when (string-match "[ \t]+$" subject)
595 (setq subject (substring subject 0 (match-beginning 0)))))
596 (when
597 (let ((case-fold-search nil))
598 (re-search-forward
599 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
600 (setq from (match-string 1)))))
601 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
602 "\nSubject: " (or subject "(no subject)") "\n")))
603
b092f83d
SZ
604(defun nndoc-exim-bounce-type-p ()
605 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
606 t))
607
608(defun nndoc-exim-bounce-body-end-function ()
609 (goto-char (point-max)))
610
16409b0b 611
eec82323
LMI
612(defun nndoc-mime-digest-type-p ()
613 (let ((case-fold-search t)
614 boundary-id b-delimiter entry)
615 (when (and
616 (re-search-forward
617 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 618 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
619 nil t)
620 (match-beginning 1))
621 (setq boundary-id (match-string 1)
16409b0b 622 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
623 (setq entry (assq 'mime-digest nndoc-type-alist))
624 (setcdr entry
625 (list
16409b0b 626 (cons 'head-begin "^ ?\n")
eec82323
LMI
627 (cons 'head-end "^ ?$")
628 (cons 'body-begin "^ ?\n")
629 (cons 'article-begin b-delimiter)
630 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 631 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
632 t)))
633
634(defun nndoc-standard-digest-type-p ()
635 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
636 (re-search-forward
637 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
638 t))
639
640(defun nndoc-digest-body-end ()
641 (and (re-search-forward nndoc-article-begin nil t)
642 (goto-char (match-beginning 0))))
643
644(defun nndoc-slack-digest-type-p ()
645 0)
646
647(defun nndoc-lanl-gov-announce-type-p ()
648 (when (let ((case-fold-search nil))
01c52d31 649 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
eec82323
LMI
650 t))
651
652(defun nndoc-transform-lanl-gov-announce (article)
01c52d31
MB
653 (let ((case-fold-search nil))
654 (goto-char (point-max))
655 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
656 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
b092f83d 657 (goto-char (point-min))
01c52d31
MB
658 (while (re-search-forward "^\\\\\\\\$" nil t)
659 (replace-match "" t nil))
660 (goto-char (point-min))
661 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
662 (replace-match "Date: \\1 (revised) " t nil))
663 (goto-char (point-min))
664 (unless (re-search-forward "^From" nil t)
b092f83d 665 (goto-char (point-min))
01c52d31
MB
666 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
667 (goto-char (point-min))
668 (insert "From: " (match-string 1) "\n")))
669 (when (re-search-forward "^arXiv:" nil t)
670 (replace-match "Paper: arXiv:" t nil))))
eec82323
LMI
671
672(defun nndoc-generate-lanl-gov-head (article)
673 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
674 (from "<no address given>")
675 subject date)
20a673b2 676 (with-current-buffer nndoc-current-buffer
eec82323 677 (save-restriction
b092f83d
SZ
678 (narrow-to-region (car entry) (nth 1 entry))
679 (goto-char (point-min))
01c52d31
MB
680 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
681 (setq subject (concat " (" (match-string 2) ")"))
b092f83d
SZ
682 (when (re-search-forward "^From: \\(.*\\)" nil t)
683 (setq from (concat "<"
a1506d29 684 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
685 (match-string 1))) ">")))
686 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
687 (setq date (match-string 1))
688 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
689 (setq date (match-string 1))))
690 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
691 nil t)
692 (setq subject (concat (match-string 1) subject))
693 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
694 (while (and from (string-match "(\[^)\]*)" from))
695 (setq from (replace-match "" t t from)))
696 (insert "From: " (or from "unknown")
b092f83d
SZ
697 "\nSubject: " (or subject "(no subject)") "\n")
698 (if date (insert "Date: " date))))
eec82323 699
16409b0b
GM
700(defun nndoc-nsmail-type-p ()
701 (when (looking-at "From - ")
702 t))
703
704(defun nndoc-outlook-article-begin ()
705 (prog1 (re-search-forward "From:\\|Received:" nil t)
706 (goto-char (match-beginning 0))))
707
708(defun nndoc-outlook-type-p ()
709 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
710 (looking-at "JMF"))
711
b092f83d 712(defun nndoc-oe-dbx-type-p ()
62dc3ac3 713 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
b092f83d
SZ
714
715(defun nndoc-read-little-endian ()
716 (+ (prog1 (char-after) (forward-char 1))
717 (lsh (prog1 (char-after) (forward-char 1)) 8)
718 (lsh (prog1 (char-after) (forward-char 1)) 16)
719 (lsh (prog1 (char-after) (forward-char 1)) 24)))
720
721(defun nndoc-oe-dbx-decode-block ()
722 (list
723 (nndoc-read-little-endian) ;; this address
724 (nndoc-read-little-endian) ;; next address offset
725 (nndoc-read-little-endian) ;; blocksize
726 (nndoc-read-little-endian))) ;; next address
727
728(defun nndoc-oe-dbx-dissection ()
729 (let ((i 0) blk p tp)
730 (goto-char 60117) ;; 0x0000EAD4+1
731 (setq p (point))
732 (unless (eobp)
733 (setq blk (nndoc-oe-dbx-decode-block)))
734 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
735 (> (nth 3 blk) p)))
736 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
737 (while (and (> (car blk) 0) (> (nth 3 blk) p))
738 (goto-char (1+ (nth 3 blk)))
739 (setq blk (nndoc-oe-dbx-decode-block)))
740 (if (or (<= (car blk) p)
741 (<= (nth 1 blk) 0)
742 (not (zerop (nth 3 blk))))
743 (setq blk nil)
744 (setq tp (+ (car blk) (nth 1 blk) 17))
745 (if (or (<= tp p) (>= tp (point-max)))
746 (setq blk nil)
747 (goto-char tp)
748 (setq p tp
749 blk (nndoc-oe-dbx-decode-block)))))))
750
751(defun nndoc-oe-dbx-generate-article (article &optional head)
752 (let ((entry (cdr (assq article nndoc-dissection-alist)))
753 (cur (current-buffer))
754 (begin (point))
755 blk p)
756 (with-current-buffer nndoc-current-buffer
757 (setq p (car entry))
758 (while (> p (point-min))
759 (goto-char p)
760 (setq blk (nndoc-oe-dbx-decode-block))
761 (setq p (point))
762 (with-current-buffer cur
763 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
764 (setq p (1+ (nth 3 blk)))))
765 (goto-char begin)
766 (while (re-search-forward "\r$" nil t)
d355a0b7 767 (delete-char -1))
b092f83d
SZ
768 (when head
769 (goto-char begin)
770 (when (search-forward "\n\n" nil t)
771 (setcar (cddddr entry) (count-lines (point) (point-max)))
772 (delete-region (1- (point)) (point-max))))
773 t))
774
775(defun nndoc-oe-dbx-generate-head (article)
776 (nndoc-oe-dbx-generate-article article 'head))
777
778(defun nndoc-mail-in-mail-type-p ()
779 (let (found)
780 (save-excursion
781 (catch 'done
782 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
783 (setq found 0)
784 (forward-line)
785 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
786 (if (looking-at "[-A-Za-z0-9]+:")
787 (setq found (1+ found)))
788 (forward-line))
789 (if (and (> found 0) (looking-at "\n"))
790 (throw 'done 9999)))
791 nil))))
792
793(defun nndoc-mail-in-mail-article-begin ()
794 (let (point found)
795 (if (catch 'done
796 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
797 (setq found 0)
798 (setq point (match-beginning 1))
799 (forward-line)
800 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
801 (if (looking-at "[-A-Za-z0-9]+:")
802 (setq found (1+ found)))
803 (forward-line))
804 (if (and (> found 0) (looking-at "\n"))
805 (throw 'done t)))
806 nil)
807 (goto-char point))))
808
6748645f
LMI
809(deffoo nndoc-request-accept-article (group &optional server last)
810 nil)
811
eec82323
LMI
812;;;
813;;; Functions for dissecting the documents
814;;;
815
816(defun nndoc-search (regexp)
817 (prog1
818 (re-search-forward regexp nil t)
819 (beginning-of-line)))
820
821(defun nndoc-dissect-buffer ()
822 "Go through the document and partition it into heads/bodies/articles."
823 (let ((i 0)
824 (first t)
23f87bed 825 art-begin head-begin head-end body-begin body-end)
eec82323 826 (setq nndoc-dissection-alist nil)
20a673b2 827 (with-current-buffer nndoc-current-buffer
eec82323 828 (goto-char (point-min))
16409b0b
GM
829 ;; Remove blank lines.
830 (while (eq (following-char) ?\n)
831 (delete-char 1))
846daf20
LMI
832 (when nndoc-pre-dissection-function
833 (save-excursion
834 (funcall nndoc-pre-dissection-function)))
b092f83d
SZ
835 (if nndoc-dissection-function
836 (funcall nndoc-dissection-function)
837 ;; Find the beginning of the file.
838 (when nndoc-file-begin
839 (nndoc-search nndoc-file-begin))
840 ;; Go through the file.
841 (while (if (and first nndoc-first-article)
842 (nndoc-search nndoc-first-article)
23f87bed
MB
843 (if art-begin
844 (goto-char art-begin)
845 (nndoc-article-begin)))
846 (setq first nil
847 art-begin nil)
b092f83d
SZ
848 (cond (nndoc-head-begin-function
849 (funcall nndoc-head-begin-function))
850 (nndoc-head-begin
851 (nndoc-search nndoc-head-begin)))
852 (if (or (eobp)
853 (and nndoc-file-end
854 (looking-at nndoc-file-end)))
855 (goto-char (point-max))
856 (setq head-begin (point))
857 (nndoc-search (or nndoc-head-end "^$"))
858 (setq head-end (point))
859 (if nndoc-body-begin-function
860 (funcall nndoc-body-begin-function)
861 (nndoc-search (or nndoc-body-begin "^\n")))
862 (setq body-begin (point))
863 (or (and nndoc-body-end-function
864 (funcall nndoc-body-end-function))
865 (and nndoc-body-end
866 (nndoc-search nndoc-body-end))
23f87bed
MB
867 (and (nndoc-article-begin)
868 (setq art-begin (point)))
b092f83d
SZ
869 (progn
870 (goto-char (point-max))
871 (when nndoc-file-end
872 (and (re-search-backward nndoc-file-end nil t)
873 (beginning-of-line)))))
874 (setq body-end (point))
875 (push (list (incf i) head-begin head-end body-begin body-end
876 (count-lines body-begin body-end))
877 nndoc-dissection-alist)))))))
eec82323
LMI
878
879(defun nndoc-article-begin ()
880 (if nndoc-article-begin-function
881 (funcall nndoc-article-begin-function)
882 (ignore-errors
883 (nndoc-search nndoc-article-begin))))
884
885(defun nndoc-unquote-dashes ()
886 "Unquote quoted non-separators in digests."
887 (while (re-search-forward "^- -"nil t)
888 (replace-match "-" t t)))
889
6748645f
LMI
890;; Against compiler warnings.
891(defvar nndoc-mime-split-ordinal)
892
893(defun nndoc-dissect-mime-parts ()
894 "Go through a MIME composite article and partition it into sub-articles.
895When a MIME entity contains sub-entities, dissection produces one article for
896the header of this entity, and one article per sub-entity."
897 (setq nndoc-dissection-alist nil
898 nndoc-mime-split-ordinal 0)
20a673b2 899 (with-current-buffer nndoc-current-buffer
16409b0b
GM
900 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
901
902(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
903 position parent)
904 "Dissect an entity, within a composite MIME message.
905The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
906ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
907The string POSITION holds a dotted decimal representation of the article
908position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
909PARENT is the message-ID of the parent summary line, or nil for none."
910 (let ((case-fold-search t)
911 (message-id (nnmail-message-id))
912 head-end body-begin summary-insert message-rfc822 multipart-any
913 subject content-type type subtype boundary-regexp)
914 ;; Gracefully handle a missing body.
915 (goto-char head-begin)
2e36fdce
DL
916 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
917 (search-forward "\n\n" body-end t))
16409b0b
GM
918 (setq head-end (1- (point))
919 body-begin (point))
920 (setq head-end body-end
921 body-begin body-end))
922 (narrow-to-region head-begin head-end)
923 ;; Save MIME attributes.
924 (goto-char head-begin)
925 (setq content-type (message-fetch-field "Content-Type"))
926 (when content-type
927 (when (string-match
928 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
929 (setq type (downcase (match-string 1 content-type))
930 subtype (downcase (match-string 2 content-type))
931 message-rfc822 (and (string= type "message")
932 (string= subtype "rfc822"))
933 multipart-any (string= type "multipart")))
934 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
935 (setq subject (match-string 1 content-type)))
936 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
937 (setq boundary-regexp (concat "^--"
938 (regexp-quote
939 (match-string 1 content-type))
940 "\\(--\\)?[ \t]*\n"))))
941 (unless subject
942 (when (or multipart-any (not article-insert))
943 (setq subject (message-fetch-field "Subject"))))
944 (unless type
945 (setq type "text"
946 subtype "plain"))
947 ;; Prepare the article and summary inserts.
948 (unless article-insert
23f87bed 949 (setq article-insert (buffer-string)
16409b0b 950 head-end head-begin))
b092f83d
SZ
951 ;; Fix MIME-Version
952 (unless (string-match "MIME-Version:" article-insert)
953 (setq article-insert
954 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
955 (setq summary-insert article-insert)
956 ;; - summary Subject.
957 (setq summary-insert
958 (let ((line (concat "Subject: <" position
959 (and position multipart-any ".")
960 (and multipart-any "*")
961 (and (or position multipart-any) " ")
962 (cond ((string= subtype "plain") type)
963 ((string= subtype "basic") type)
964 (t subtype))
965 ">"
966 (and subject " ")
967 subject
968 "\n")))
969 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
970 (replace-match line t t summary-insert)
971 (concat summary-insert line))))
972 ;; - summary Message-ID.
973 (setq summary-insert
974 (let ((line (concat "Message-ID: " message-id "\n")))
975 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
976 (replace-match line t t summary-insert)
977 (concat summary-insert line))))
978 ;; - summary References.
979 (when parent
980 (setq summary-insert
981 (let ((line (concat "References: " parent "\n")))
982 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
983 summary-insert)
984 (replace-match line t t summary-insert)
985 (concat summary-insert line)))))
986 ;; Generate dissection information for this entity.
987 (push (list (incf nndoc-mime-split-ordinal)
988 head-begin head-end body-begin body-end
989 (count-lines body-begin body-end)
990 article-insert summary-insert)
991 nndoc-dissection-alist)
992 ;; Recurse for all sub-entities, if any.
993 (widen)
994 (cond
995 (message-rfc822
996 (save-excursion
997 (nndoc-dissect-mime-parts-sub body-begin body-end nil
998 position message-id)))
999 ((and multipart-any boundary-regexp)
1000 (let ((part-counter 0)
1001 part-begin part-end eof-flag)
1002 (while (string-match "\
1003^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1004 article-insert)
1005 (setq article-insert (replace-match "" t t article-insert)))
1006 (let ((case-fold-search nil))
1007 (goto-char body-begin)
1008 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 1009 (while (not eof-flag)
16409b0b
GM
1010 (setq part-begin (point))
1011 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
1012 (or (not (match-string 1))
1013 (string= (match-string 1) "")
1014 (setq eof-flag t))
1015 (forward-line -1)
16409b0b 1016 (setq part-end (point))
6748645f 1017 (forward-line 1))
16409b0b 1018 (t (setq part-end body-end
6748645f 1019 eof-flag t)))
16409b0b
GM
1020 (save-excursion
1021 (nndoc-dissect-mime-parts-sub
1022 part-begin part-end article-insert
1023 (concat position
1024 (and position ".")
1025 (format "%d" (incf part-counter)))
1026 message-id)))))))))
6748645f 1027
eec82323
LMI
1028;;;###autoload
1029(defun nndoc-add-type (definition &optional position)
1030 "Add document DEFINITION to the list of nndoc document definitions.
1031If POSITION is nil or `last', the definition will be added
1032as the last checked definition, if t or `first', add as the
1033first definition, and if any other symbol, add after that
1034symbol in the alist."
1035 ;; First remove any old instances.
6748645f 1036 (gnus-pull (car definition) nndoc-type-alist)
eec82323
LMI
1037 ;; Then enter the new definition in the proper place.
1038 (cond
1039 ((or (null position) (eq position 'last))
1040 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1041 ((or (eq position t) (eq position 'first))
1042 (push definition nndoc-type-alist))
1043 (t
1044 (let ((list (memq (assq position nndoc-type-alist)
1045 nndoc-type-alist)))
1046 (unless list
1047 (error "No such position: %s" position))
1048 (setcdr list (cons definition (cdr list)))))))
1049
1050(provide 'nndoc)
1051
1052;;; nndoc.el ends here