Merge changes from emacs-23 branch
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
e84b4b86 2
73b0cd50 3;; Copyright (C) 1995-2011 Free Software Foundation, Inc.
eec82323 4
6748645f 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 6;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
7;; Keywords: news
8
9;; This file is part of GNU Emacs.
10
5e809f55 11;; GNU Emacs is free software: you can redistribute it and/or modify
eec82323 12;; it under the terms of the GNU General Public License as published by
5e809f55
GM
13;; the Free Software Foundation, either version 3 of the License, or
14;; (at your option) any later version.
eec82323
LMI
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
5e809f55 22;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
eec82323
LMI
23
24;;; Commentary:
25
b092f83d
SZ
26;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
27
eec82323
LMI
28;;; Code:
29
30(require 'nnheader)
31(require 'message)
32(require 'nnmail)
33(require 'nnoo)
6748645f 34(require 'gnus-util)
16409b0b 35(require 'mm-util)
eec82323
LMI
36(eval-when-compile (require 'cl))
37
38(nnoo-declare nndoc)
39
40(defvoo nndoc-article-type 'guess
41 "*Type of the file.
42One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 43`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
44`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
45`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
46
47(defvoo nndoc-post-type 'mail
48 "*Whether the nndoc group is `mail' or `post'.")
49
6748645f
LMI
50(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
51 "Hook run after opening a document.
52The default function removes all trailing carriage returns
16409b0b 53from the document.")
6748645f 54
eec82323
LMI
55(defvar nndoc-type-alist
56 `((mmdf
57 (article-begin . "^\^A\^A\^A\^A\n")
58 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
59 (mime-digest
60 (article-begin . "")
61 (head-begin . "^ ?\n")
62 (head-end . "^ ?$")
63 (body-end . "")
64 (file-end . "")
65 (subtype digest guess))
16409b0b
GM
66 (nsmail
67 (article-begin . "^From - "))
eec82323
LMI
68 (news
69 (article-begin . "^Path:"))
70 (rnews
71 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
72 (body-end-function . nndoc-rnews-body-end))
73 (mbox
74 (article-begin-function . nndoc-mbox-article-begin)
75 (body-end-function . nndoc-mbox-body-end))
76 (babyl
77 (article-begin . "\^_\^L *\n")
78 (body-end . "\^_")
79 (body-begin-function . nndoc-babyl-body-begin)
80 (head-begin-function . nndoc-babyl-head-begin))
a3f57c41
G
81 (mime-parts
82 (generate-head-function . nndoc-generate-mime-parts-head)
83 (article-transform-function . nndoc-transform-mime-parts))
23f87bed
MB
84 (exim-bounce
85 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
86 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
87 (rfc934
88 (article-begin . "^--.*\n+")
89 (body-end . "^--.*$")
90 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
91 (mailman
92 (article-begin . "^--__--__--\n\nMessage:")
93 (body-end . "^--__--__--$")
94 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
95 (clari-briefs
96 (article-begin . "^ \\*")
97 (body-end . "^\t------*[ \t]^*\n^ \\*")
98 (body-begin . "^\t")
99 (head-end . "^\t")
100 (generate-head-function . nndoc-generate-clari-briefs-head)
101 (article-transform-function . nndoc-transform-clari-briefs))
c9fc72fa 102
eec82323 103 (standard-digest
6748645f
LMI
104 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
105 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
106 (prepare-body-function . nndoc-unquote-dashes)
107 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
108 (head-end . "^ *$")
109 (body-begin . "^ *\n")
eec82323
LMI
110 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
111 (subtype digest guess))
112 (slack-digest
113 (article-begin . "^------------------------------*[\n \t]+")
114 (head-end . "^ ?$")
115 (body-end-function . nndoc-digest-body-end)
116 (body-begin . "^ ?$")
117 (file-end . "^End of")
118 (prepare-body-function . nndoc-unquote-dashes)
119 (subtype digest guess))
846daf20
LMI
120 (google
121 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
122 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
123 (head-begin . "^Date:")
124 (head-end . "^$")
125 (body-end-function . nndoc-digest-body-end)
126 (body-begin . "^$")
127 (file-end . "^==============================================================================$")
128 (prepare-body-function . nndoc-unquote-dashes)
129 (subtype digest guess))
eec82323
LMI
130 (lanl-gov-announce
131 (article-begin . "^\\\\\\\\\n")
01c52d31 132 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
eec82323
LMI
133 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
134 (body-begin . "")
b092f83d
SZ
135 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
136 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
137 (generate-head-function . nndoc-generate-lanl-gov-head)
138 (article-transform-function . nndoc-transform-lanl-gov-announce)
139 (subtype preprints guess))
2e76c12c
LMI
140 (git
141 (file-begin . "\n- Log ---.*")
142 (article-begin . "^commit ")
143 (head-begin . "^Author: ")
144 (body-begin . "^$")
145 (file-end . "\n-----------------------------------------------------------------------")
146 (article-transform-function . nndoc-transform-git-article)
147 (header-transform-function . nndoc-transform-git-headers))
eec82323 148 (rfc822-forward
23f87bed
MB
149 (article-begin . "^\n+")
150 (body-end-function . nndoc-rfc822-forward-body-end-function)
151 (generate-head-function . nndoc-rfc822-forward-generate-head)
152 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
153 (outlook
154 (article-begin-function . nndoc-outlook-article-begin)
155 (body-end . "\0"))
b092f83d
SZ
156 (oe-dbx ;; Outlook Express DBX format
157 (dissection-function . nndoc-oe-dbx-dissection)
158 (generate-head-function . nndoc-oe-dbx-generate-head)
159 (generate-article-function . nndoc-oe-dbx-generate-article))
160 (forward
161 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
162 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
163 (prepare-body-function . nndoc-unquote-dashes))
164 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
165 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
166 (guess
167 (guess . t)
168 (subtype nil))
169 (digest
170 (guess . t)
171 (subtype nil))
172 (preprints
173 (guess . t)
174 (subtype nil))))
175
b092f83d
SZ
176(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
177 "Regexp for binary nndoc file names.")
178
eec82323 179\f
eec82323
LMI
180(defvoo nndoc-file-begin nil)
181(defvoo nndoc-first-article nil)
eec82323
LMI
182(defvoo nndoc-article-begin nil)
183(defvoo nndoc-head-begin nil)
184(defvoo nndoc-head-end nil)
185(defvoo nndoc-file-end nil)
186(defvoo nndoc-body-begin nil)
187(defvoo nndoc-body-end-function nil)
188(defvoo nndoc-body-begin-function nil)
189(defvoo nndoc-head-begin-function nil)
190(defvoo nndoc-body-end nil)
6748645f 191;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
192;; following items. ARTICLE acts as the association key and is an ordinal
193;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
194;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
195;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
196;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
197;; generation, respectively. Other headers usually follow directly from the
198;; buffer. Value `nil' means no insert.
eec82323
LMI
199(defvoo nndoc-dissection-alist nil)
200(defvoo nndoc-prepare-body-function nil)
201(defvoo nndoc-generate-head-function nil)
202(defvoo nndoc-article-transform-function nil)
2e76c12c 203(defvoo nndoc-header-transform-function nil)
eec82323 204(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
205(defvoo nndoc-generate-article-function nil)
206(defvoo nndoc-dissection-function nil)
846daf20 207(defvoo nndoc-pre-dissection-function nil)
eec82323
LMI
208
209(defvoo nndoc-status-string "")
210(defvoo nndoc-group-alist nil)
211(defvoo nndoc-current-buffer nil
212 "Current nndoc news buffer.")
213(defvoo nndoc-address nil)
214
215(defconst nndoc-version "nndoc 1.0"
216 "nndoc version.")
217
218\f
219
220;;; Interface functions
221
222(nnoo-define-basics nndoc)
223
224(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
225 (when (nndoc-possibly-change-buffer newsgroup server)
20a673b2 226 (with-current-buffer nntp-server-buffer
eec82323
LMI
227 (erase-buffer)
228 (let (article entry)
229 (if (stringp (car articles))
230 'headers
231 (while articles
232 (when (setq entry (cdr (assq (setq article (pop articles))
233 nndoc-dissection-alist)))
c872595d
LMI
234 (let ((start (point)))
235 (insert (format "221 %d Article retrieved.\n" article))
236 (if nndoc-generate-head-function
237 (funcall nndoc-generate-head-function article)
238 (insert-buffer-substring
239 nndoc-current-buffer (car entry) (nth 1 entry)))
240 (goto-char (point-max))
241 (unless (eq (char-after (1- (point))) ?\n)
242 (insert "\n"))
243 (insert (format "Lines: %d\n" (nth 4 entry)))
244 (insert ".\n")
245 (when nndoc-header-transform-function
246 (save-excursion
247 (save-restriction
248 (narrow-to-region start (point))
249 (funcall nndoc-header-transform-function entry)))))))
eec82323
LMI
250 (nnheader-fold-continuation-lines)
251 'headers)))))
252
253(deffoo nndoc-request-article (article &optional newsgroup server buffer)
254 (nndoc-possibly-change-buffer newsgroup server)
255 (save-excursion
256 (let ((buffer (or buffer nntp-server-buffer))
257 (entry (cdr (assq article nndoc-dissection-alist)))
258 beg)
259 (set-buffer buffer)
260 (erase-buffer)
261 (when entry
b092f83d
SZ
262 (cond
263 ((stringp article) nil)
264 (nndoc-generate-article-function
265 (funcall nndoc-generate-article-function article))
266 (t
eec82323
LMI
267 (insert-buffer-substring
268 nndoc-current-buffer (car entry) (nth 1 entry))
269 (insert "\n")
270 (setq beg (point))
271 (insert-buffer-substring
272 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
273 (goto-char beg)
274 (when nndoc-prepare-body-function
275 (funcall nndoc-prepare-body-function))
276 (when nndoc-article-transform-function
277 (funcall nndoc-article-transform-function article))
b092f83d 278 t))))))
eec82323 279
286c4fc2 280(deffoo nndoc-request-group (group &optional server dont-check info)
eec82323
LMI
281 "Select news GROUP."
282 (let (number)
283 (cond
284 ((not (nndoc-possibly-change-buffer group server))
285 (nnheader-report 'nndoc "No such file or buffer: %s"
286 nndoc-address))
287 (dont-check
288 (nnheader-report 'nndoc "Selected group %s" group)
289 t)
290 ((zerop (setq number (length nndoc-dissection-alist)))
291 (nndoc-close-group group)
292 (nnheader-report 'nndoc "No articles in group %s" group))
293 (t
294 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
295
229b59da
G
296(deffoo nndoc-retrieve-groups (groups &optional server)
297 (dolist (group groups)
298 (nndoc-request-group group server))
299 t)
300
eec82323
LMI
301(deffoo nndoc-request-type (group &optional article)
302 (cond ((not article) 'unknown)
b092f83d
SZ
303 (nndoc-post-type nndoc-post-type)
304 (t 'unknown)))
eec82323
LMI
305
306(deffoo nndoc-close-group (group &optional server)
307 (nndoc-possibly-change-buffer group server)
308 (and nndoc-current-buffer
309 (buffer-name nndoc-current-buffer)
310 (kill-buffer nndoc-current-buffer))
311 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
312 nndoc-group-alist))
313 (setq nndoc-current-buffer nil)
314 (nnoo-close-server 'nndoc server)
315 (setq nndoc-dissection-alist nil)
316 t)
317
318(deffoo nndoc-request-list (&optional server)
4478e074 319 t)
eec82323
LMI
320
321(deffoo nndoc-request-newgroups (date &optional server)
322 nil)
323
324(deffoo nndoc-request-list-newsgroups (&optional server)
325 nil)
326
327\f
328;;; Internal functions.
329
330(defun nndoc-possibly-change-buffer (group source)
331 (let (buf)
332 (cond
333 ;; The current buffer is this group's buffer.
334 ((and nndoc-current-buffer
335 (buffer-name nndoc-current-buffer)
336 (eq nndoc-current-buffer
337 (setq buf (cdr (assoc group nndoc-group-alist))))))
338 ;; We change buffers by taking an old from the group alist.
339 ;; `source' is either a string (a file name) or a buffer object.
340 (buf
341 (setq nndoc-current-buffer buf))
342 ;; It's a totally new group.
343 ((or (and (bufferp nndoc-address)
344 (buffer-name nndoc-address))
345 (and (stringp nndoc-address)
346 (file-exists-p nndoc-address)
347 (not (file-directory-p nndoc-address))))
348 (push (cons group (setq nndoc-current-buffer
349 (get-buffer-create
350 (concat " *nndoc " group "*"))))
351 nndoc-group-alist)
352 (setq nndoc-dissection-alist nil)
20a673b2 353 (with-current-buffer nndoc-current-buffer
eec82323 354 (erase-buffer)
b092f83d
SZ
355 (if (and (stringp nndoc-address)
356 (string-match nndoc-binary-file-names nndoc-address))
357 (let ((coding-system-for-read 'binary))
358 (mm-insert-file-contents nndoc-address))
359 (if (stringp nndoc-address)
360 (nnheader-insert-file-contents nndoc-address)
361 (insert-buffer-substring nndoc-address))
362 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
363 ;; Initialize the nndoc structures according to this new document.
364 (when (and nndoc-current-buffer
365 (not nndoc-dissection-alist))
20a673b2 366 (with-current-buffer nndoc-current-buffer
eec82323 367 (nndoc-set-delims)
6748645f
LMI
368 (if (eq nndoc-article-type 'mime-parts)
369 (nndoc-dissect-mime-parts)
370 (nndoc-dissect-buffer))))
eec82323
LMI
371 (unless nndoc-current-buffer
372 (nndoc-close-server))
373 ;; Return whether we managed to select a file.
374 nndoc-current-buffer))
375
376;;;
377;;; Deciding what document type we have
378;;;
379
380(defun nndoc-set-delims ()
381 "Set the nndoc delimiter variables according to the type of the document."
382 (let ((vars '(nndoc-file-begin
383 nndoc-first-article
6748645f
LMI
384 nndoc-article-begin-function
385 nndoc-head-begin nndoc-head-end
eec82323
LMI
386 nndoc-file-end nndoc-article-begin
387 nndoc-body-begin nndoc-body-end-function nndoc-body-end
388 nndoc-prepare-body-function nndoc-article-transform-function
2e76c12c 389 nndoc-header-transform-function
eec82323 390 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
391 nndoc-head-begin-function
392 nndoc-generate-article-function
846daf20
LMI
393 nndoc-dissection-function
394 nndoc-pre-dissection-function)))
eec82323
LMI
395 (while vars
396 (set (pop vars) nil)))
397 (let (defs)
398 ;; Guess away until we find the real file type.
399 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
400 nndoc-type-alist))))
401 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
402 ;; Set the nndoc variables.
403 (while defs
404 (set (intern (format "nndoc-%s" (caar defs)))
405 (cdr (pop defs))))))
406
407(defun nndoc-guess-type (subtype)
408 (let ((alist nndoc-type-alist)
409 results result entry)
410 (while (and (not result)
411 (setq entry (pop alist)))
412 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
413 (goto-char (point-min))
16409b0b
GM
414 ;; Remove blank lines.
415 (while (eq (following-char) ?\n)
416 (delete-char 1))
eec82323
LMI
417 (when (numberp (setq result (funcall (intern
418 (format "nndoc-%s-type-p"
419 (car entry))))))
420 (push (cons result entry) results)
421 (setq result nil))))
422 (unless (or result results)
423 (error "Document is not of any recognized type"))
424 (if result
425 (car entry)
23f87bed 426 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
427
428;;;
429;;; Built-in type predicates and functions
430;;;
431
432(defun nndoc-mbox-type-p ()
433 (when (looking-at message-unix-mail-delimiter)
434 t))
435
436(defun nndoc-mbox-article-begin ()
437 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
438 (goto-char (match-beginning 0))))
439
440(defun nndoc-mbox-body-end ()
441 (let ((beg (point))
442 len end)
443 (when
444 (save-excursion
445 (and (re-search-backward
446 (concat "^" message-unix-mail-delimiter) nil t)
447 (setq end (point))
448 (search-forward "\n\n" beg t)
449 (re-search-backward
450 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
e9bd5782 451 (setq len (string-to-number (match-string 1)))
eec82323
LMI
452 (search-forward "\n\n" beg t)
453 (unless (= (setq len (+ (point) len)) (point-max))
454 (and (< len (point-max))
455 (goto-char len)
456 (looking-at message-unix-mail-delimiter)))))
457 (goto-char len))))
458
459(defun nndoc-mmdf-type-p ()
460 (when (looking-at "\^A\^A\^A\^A$")
461 t))
462
463(defun nndoc-news-type-p ()
464 (when (looking-at "^Path:.*\n")
465 t))
466
467(defun nndoc-rnews-type-p ()
468 (when (looking-at "#! *rnews")
469 t))
470
471(defun nndoc-rnews-body-end ()
472 (and (re-search-backward nndoc-article-begin nil t)
473 (forward-line 1)
e9bd5782 474 (goto-char (+ (point) (string-to-number (match-string 1))))))
eec82323 475
846daf20
LMI
476(defun nndoc-google-type-p ()
477 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
478 t))
479
480(defun nndoc-decode-content-transfer-encoding ()
481 (let ((encoding
482 (save-restriction
483 (message-narrow-to-head)
484 (message-fetch-field "content-transfer-encoding"))))
485 (when (and encoding
486 (search-forward "\n\n" nil t))
487 (save-restriction
488 (narrow-to-region (point) (point-max))
489 (mm-decode-content-transfer-encoding
490 (intern (downcase (mail-header-strip encoding))))))))
491
eec82323
LMI
492(defun nndoc-babyl-type-p ()
493 (when (re-search-forward "\^_\^L *\n" nil t)
494 t))
495
496(defun nndoc-babyl-body-begin ()
497 (re-search-forward "^\n" nil t)
6748645f 498 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
499 (let ((next (or (save-excursion
500 (re-search-forward nndoc-article-begin nil t))
501 (point-max))))
502 (unless (re-search-forward "^\n" next t)
503 (goto-char next)
504 (forward-line -1)
505 (insert "\n")
506 (forward-line -1)))))
507
508(defun nndoc-babyl-head-begin ()
509 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 510 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
511 (forward-line 1))
512 t))
513
514(defun nndoc-forward-type-p ()
b092f83d 515 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 516 nil t)
23f87bed 517 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
518 t))
519
520(defun nndoc-rfc934-type-p ()
521 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
522 (not (re-search-forward "^Subject:.*digest" nil t))
523 (not (re-search-backward "^From:" nil t 2))
524 (not (re-search-forward "^From:" nil t 2)))
525 t))
526
b092f83d
SZ
527(defun nndoc-mailman-type-p ()
528 (when (re-search-forward "^--__--__--\n+" nil t)
529 t))
530
eec82323
LMI
531(defun nndoc-rfc822-forward-type-p ()
532 (save-restriction
533 (message-narrow-to-head)
534 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
535 t)))
536
537(defun nndoc-rfc822-forward-body-end-function ()
538 (goto-char (point-max)))
539
23f87bed
MB
540(defun nndoc-rfc822-forward-generate-article (article &optional head)
541 (let ((entry (cdr (assq article nndoc-dissection-alist)))
542 (begin (point))
543 encoding)
544 (with-current-buffer nndoc-current-buffer
545 (save-restriction
546 (message-narrow-to-head)
547 (setq encoding (message-fetch-field "content-transfer-encoding"))))
548 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
549 (when encoding
550 (save-restriction
551 (narrow-to-region begin (point-max))
552 (mm-decode-content-transfer-encoding
553 (intern (downcase (mail-header-strip encoding))))))
554 (when head
555 (goto-char begin)
556 (when (search-forward "\n\n" nil t)
557 (delete-region (1- (point)) (point-max)))))
558 t)
559
560(defun nndoc-rfc822-forward-generate-head (article)
561 (nndoc-rfc822-forward-generate-article article 'head))
562
6748645f
LMI
563(defun nndoc-mime-parts-type-p ()
564 (let ((case-fold-search t)
565 (limit (search-forward "\n\n" nil t)))
566 (goto-char (point-min))
567 (when (and limit
16409b0b
GM
568 (re-search-forward
569 (concat "\
570^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
571 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
572 limit t))
6748645f
LMI
573 t)))
574
575(defun nndoc-transform-mime-parts (article)
16409b0b
GM
576 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
577 (headers (nth 5 entry)))
578 (when headers
6748645f 579 (goto-char (point-min))
16409b0b
GM
580 (insert headers))))
581
582(defun nndoc-generate-mime-parts-head (article)
583 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
584 (headers (nth 6 entry)))
585 (save-restriction
586 (narrow-to-region (point) (point))
587 (insert-buffer-substring
588 nndoc-current-buffer (car entry) (nth 1 entry))
589 (goto-char (point-max)))
590 (when headers
591 (insert headers))))
6748645f 592
eec82323
LMI
593(defun nndoc-clari-briefs-type-p ()
594 (when (let ((case-fold-search nil))
595 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
596 t))
597
598(defun nndoc-transform-clari-briefs (article)
599 (goto-char (point-min))
600 (when (looking-at " *\\*\\(.*\\)\n")
601 (replace-match "" t t))
602 (nndoc-generate-clari-briefs-head article))
603
604(defun nndoc-generate-clari-briefs-head (article)
605 (let ((entry (cdr (assq article nndoc-dissection-alist)))
606 subject from)
20a673b2 607 (with-current-buffer nndoc-current-buffer
eec82323
LMI
608 (save-restriction
609 (narrow-to-region (car entry) (nth 3 entry))
610 (goto-char (point-min))
611 (when (looking-at " *\\*\\(.*\\)$")
612 (setq subject (match-string 1))
613 (when (string-match "[ \t]+$" subject)
614 (setq subject (substring subject 0 (match-beginning 0)))))
615 (when
616 (let ((case-fold-search nil))
617 (re-search-forward
618 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
619 (setq from (match-string 1)))))
620 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
621 "\nSubject: " (or subject "(no subject)") "\n")))
622
b092f83d
SZ
623(defun nndoc-exim-bounce-type-p ()
624 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
625 t))
626
627(defun nndoc-exim-bounce-body-end-function ()
628 (goto-char (point-max)))
629
16409b0b 630
eec82323
LMI
631(defun nndoc-mime-digest-type-p ()
632 (let ((case-fold-search t)
633 boundary-id b-delimiter entry)
634 (when (and
635 (re-search-forward
636 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 637 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
638 nil t)
639 (match-beginning 1))
640 (setq boundary-id (match-string 1)
16409b0b 641 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
642 (setq entry (assq 'mime-digest nndoc-type-alist))
643 (setcdr entry
644 (list
16409b0b 645 (cons 'head-begin "^ ?\n")
eec82323
LMI
646 (cons 'head-end "^ ?$")
647 (cons 'body-begin "^ ?\n")
648 (cons 'article-begin b-delimiter)
649 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 650 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
651 t)))
652
653(defun nndoc-standard-digest-type-p ()
654 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
655 (re-search-forward
656 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
657 t))
658
659(defun nndoc-digest-body-end ()
660 (and (re-search-forward nndoc-article-begin nil t)
661 (goto-char (match-beginning 0))))
662
663(defun nndoc-slack-digest-type-p ()
664 0)
665
2e76c12c
LMI
666(defun nndoc-git-type-p ()
667 (and (search-forward "\n- Log ---" nil t)
668 (search-forward "\ncommit " nil t)
669 (search-forward "\nAuthor: " nil t)))
670
671(defun nndoc-transform-git-article (article)
672 (goto-char (point-min))
673 (when (re-search-forward "^Author: " nil t)
674 (replace-match "From: " t t)))
675
c872595d 676(defun nndoc-transform-git-headers (entry)
2e76c12c 677 (goto-char (point-min))
c872595d
LMI
678 (when (re-search-forward "^Author: " nil t)
679 (replace-match "From: " t t))
680 (let (subject)
681 (with-current-buffer nndoc-current-buffer
682 (goto-char (car entry))
683 (when (search-forward "\n\n" nil t)
684 (setq subject (buffer-substring (point) (line-end-position)))))
685 (when subject
686 (goto-char (point-min))
687 (forward-line 1)
688 (insert (format "Subject: %s\n" subject)))))
2e76c12c 689
eec82323
LMI
690(defun nndoc-lanl-gov-announce-type-p ()
691 (when (let ((case-fold-search nil))
01c52d31 692 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
eec82323
LMI
693 t))
694
695(defun nndoc-transform-lanl-gov-announce (article)
01c52d31
MB
696 (let ((case-fold-search nil))
697 (goto-char (point-max))
698 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
699 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
b092f83d 700 (goto-char (point-min))
01c52d31
MB
701 (while (re-search-forward "^\\\\\\\\$" nil t)
702 (replace-match "" t nil))
703 (goto-char (point-min))
704 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
705 (replace-match "Date: \\1 (revised) " t nil))
706 (goto-char (point-min))
707 (unless (re-search-forward "^From" nil t)
b092f83d 708 (goto-char (point-min))
01c52d31
MB
709 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
710 (goto-char (point-min))
711 (insert "From: " (match-string 1) "\n")))
712 (when (re-search-forward "^arXiv:" nil t)
713 (replace-match "Paper: arXiv:" t nil))))
eec82323
LMI
714
715(defun nndoc-generate-lanl-gov-head (article)
716 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
717 (from "<no address given>")
718 subject date)
20a673b2 719 (with-current-buffer nndoc-current-buffer
eec82323 720 (save-restriction
b092f83d
SZ
721 (narrow-to-region (car entry) (nth 1 entry))
722 (goto-char (point-min))
01c52d31
MB
723 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
724 (setq subject (concat " (" (match-string 2) ")"))
b092f83d
SZ
725 (when (re-search-forward "^From: \\(.*\\)" nil t)
726 (setq from (concat "<"
a1506d29 727 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
728 (match-string 1))) ">")))
729 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
730 (setq date (match-string 1))
731 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
732 (setq date (match-string 1))))
733 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
734 nil t)
735 (setq subject (concat (match-string 1) subject))
736 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
737 (while (and from (string-match "(\[^)\]*)" from))
738 (setq from (replace-match "" t t from)))
739 (insert "From: " (or from "unknown")
b092f83d
SZ
740 "\nSubject: " (or subject "(no subject)") "\n")
741 (if date (insert "Date: " date))))
eec82323 742
16409b0b
GM
743(defun nndoc-nsmail-type-p ()
744 (when (looking-at "From - ")
745 t))
746
747(defun nndoc-outlook-article-begin ()
748 (prog1 (re-search-forward "From:\\|Received:" nil t)
749 (goto-char (match-beginning 0))))
750
751(defun nndoc-outlook-type-p ()
752 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
753 (looking-at "JMF"))
754
b092f83d 755(defun nndoc-oe-dbx-type-p ()
62dc3ac3 756 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
b092f83d
SZ
757
758(defun nndoc-read-little-endian ()
759 (+ (prog1 (char-after) (forward-char 1))
760 (lsh (prog1 (char-after) (forward-char 1)) 8)
761 (lsh (prog1 (char-after) (forward-char 1)) 16)
762 (lsh (prog1 (char-after) (forward-char 1)) 24)))
763
764(defun nndoc-oe-dbx-decode-block ()
765 (list
766 (nndoc-read-little-endian) ;; this address
767 (nndoc-read-little-endian) ;; next address offset
768 (nndoc-read-little-endian) ;; blocksize
769 (nndoc-read-little-endian))) ;; next address
770
771(defun nndoc-oe-dbx-dissection ()
772 (let ((i 0) blk p tp)
773 (goto-char 60117) ;; 0x0000EAD4+1
774 (setq p (point))
775 (unless (eobp)
776 (setq blk (nndoc-oe-dbx-decode-block)))
777 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
778 (> (nth 3 blk) p)))
779 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
780 (while (and (> (car blk) 0) (> (nth 3 blk) p))
781 (goto-char (1+ (nth 3 blk)))
782 (setq blk (nndoc-oe-dbx-decode-block)))
783 (if (or (<= (car blk) p)
784 (<= (nth 1 blk) 0)
785 (not (zerop (nth 3 blk))))
786 (setq blk nil)
787 (setq tp (+ (car blk) (nth 1 blk) 17))
788 (if (or (<= tp p) (>= tp (point-max)))
789 (setq blk nil)
790 (goto-char tp)
791 (setq p tp
792 blk (nndoc-oe-dbx-decode-block)))))))
793
794(defun nndoc-oe-dbx-generate-article (article &optional head)
795 (let ((entry (cdr (assq article nndoc-dissection-alist)))
796 (cur (current-buffer))
797 (begin (point))
798 blk p)
799 (with-current-buffer nndoc-current-buffer
800 (setq p (car entry))
801 (while (> p (point-min))
802 (goto-char p)
803 (setq blk (nndoc-oe-dbx-decode-block))
804 (setq p (point))
805 (with-current-buffer cur
806 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
807 (setq p (1+ (nth 3 blk)))))
808 (goto-char begin)
809 (while (re-search-forward "\r$" nil t)
d355a0b7 810 (delete-char -1))
b092f83d
SZ
811 (when head
812 (goto-char begin)
813 (when (search-forward "\n\n" nil t)
814 (setcar (cddddr entry) (count-lines (point) (point-max)))
815 (delete-region (1- (point)) (point-max))))
816 t))
817
818(defun nndoc-oe-dbx-generate-head (article)
819 (nndoc-oe-dbx-generate-article article 'head))
820
821(defun nndoc-mail-in-mail-type-p ()
822 (let (found)
823 (save-excursion
824 (catch 'done
825 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
826 (setq found 0)
827 (forward-line)
828 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
829 (if (looking-at "[-A-Za-z0-9]+:")
830 (setq found (1+ found)))
831 (forward-line))
832 (if (and (> found 0) (looking-at "\n"))
833 (throw 'done 9999)))
834 nil))))
835
836(defun nndoc-mail-in-mail-article-begin ()
837 (let (point found)
838 (if (catch 'done
839 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
840 (setq found 0)
841 (setq point (match-beginning 1))
842 (forward-line)
843 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
844 (if (looking-at "[-A-Za-z0-9]+:")
845 (setq found (1+ found)))
846 (forward-line))
847 (if (and (> found 0) (looking-at "\n"))
848 (throw 'done t)))
849 nil)
850 (goto-char point))))
851
6748645f
LMI
852(deffoo nndoc-request-accept-article (group &optional server last)
853 nil)
854
eec82323
LMI
855;;;
856;;; Functions for dissecting the documents
857;;;
858
859(defun nndoc-search (regexp)
860 (prog1
861 (re-search-forward regexp nil t)
862 (beginning-of-line)))
863
864(defun nndoc-dissect-buffer ()
865 "Go through the document and partition it into heads/bodies/articles."
866 (let ((i 0)
867 (first t)
23f87bed 868 art-begin head-begin head-end body-begin body-end)
eec82323 869 (setq nndoc-dissection-alist nil)
20a673b2 870 (with-current-buffer nndoc-current-buffer
eec82323 871 (goto-char (point-min))
16409b0b
GM
872 ;; Remove blank lines.
873 (while (eq (following-char) ?\n)
874 (delete-char 1))
846daf20
LMI
875 (when nndoc-pre-dissection-function
876 (save-excursion
877 (funcall nndoc-pre-dissection-function)))
b092f83d
SZ
878 (if nndoc-dissection-function
879 (funcall nndoc-dissection-function)
880 ;; Find the beginning of the file.
881 (when nndoc-file-begin
882 (nndoc-search nndoc-file-begin))
883 ;; Go through the file.
884 (while (if (and first nndoc-first-article)
885 (nndoc-search nndoc-first-article)
23f87bed
MB
886 (if art-begin
887 (goto-char art-begin)
888 (nndoc-article-begin)))
889 (setq first nil
890 art-begin nil)
b092f83d
SZ
891 (cond (nndoc-head-begin-function
892 (funcall nndoc-head-begin-function))
893 (nndoc-head-begin
894 (nndoc-search nndoc-head-begin)))
895 (if (or (eobp)
896 (and nndoc-file-end
897 (looking-at nndoc-file-end)))
898 (goto-char (point-max))
899 (setq head-begin (point))
900 (nndoc-search (or nndoc-head-end "^$"))
901 (setq head-end (point))
902 (if nndoc-body-begin-function
903 (funcall nndoc-body-begin-function)
904 (nndoc-search (or nndoc-body-begin "^\n")))
905 (setq body-begin (point))
906 (or (and nndoc-body-end-function
907 (funcall nndoc-body-end-function))
908 (and nndoc-body-end
909 (nndoc-search nndoc-body-end))
23f87bed
MB
910 (and (nndoc-article-begin)
911 (setq art-begin (point)))
b092f83d
SZ
912 (progn
913 (goto-char (point-max))
914 (when nndoc-file-end
915 (and (re-search-backward nndoc-file-end nil t)
916 (beginning-of-line)))))
917 (setq body-end (point))
918 (push (list (incf i) head-begin head-end body-begin body-end
919 (count-lines body-begin body-end))
389b76fa
G
920 nndoc-dissection-alist)))))
921 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
eec82323
LMI
922
923(defun nndoc-article-begin ()
924 (if nndoc-article-begin-function
925 (funcall nndoc-article-begin-function)
926 (ignore-errors
927 (nndoc-search nndoc-article-begin))))
928
929(defun nndoc-unquote-dashes ()
930 "Unquote quoted non-separators in digests."
931 (while (re-search-forward "^- -"nil t)
932 (replace-match "-" t t)))
933
6748645f
LMI
934;; Against compiler warnings.
935(defvar nndoc-mime-split-ordinal)
936
937(defun nndoc-dissect-mime-parts ()
938 "Go through a MIME composite article and partition it into sub-articles.
939When a MIME entity contains sub-entities, dissection produces one article for
940the header of this entity, and one article per sub-entity."
941 (setq nndoc-dissection-alist nil
942 nndoc-mime-split-ordinal 0)
20a673b2 943 (with-current-buffer nndoc-current-buffer
16409b0b
GM
944 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
945
946(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
947 position parent)
948 "Dissect an entity, within a composite MIME message.
949The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
950ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
951The string POSITION holds a dotted decimal representation of the article
952position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
953PARENT is the message-ID of the parent summary line, or nil for none."
954 (let ((case-fold-search t)
955 (message-id (nnmail-message-id))
956 head-end body-begin summary-insert message-rfc822 multipart-any
957 subject content-type type subtype boundary-regexp)
958 ;; Gracefully handle a missing body.
959 (goto-char head-begin)
2e36fdce
DL
960 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
961 (search-forward "\n\n" body-end t))
16409b0b
GM
962 (setq head-end (1- (point))
963 body-begin (point))
964 (setq head-end body-end
965 body-begin body-end))
966 (narrow-to-region head-begin head-end)
967 ;; Save MIME attributes.
968 (goto-char head-begin)
969 (setq content-type (message-fetch-field "Content-Type"))
970 (when content-type
971 (when (string-match
972 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
973 (setq type (downcase (match-string 1 content-type))
974 subtype (downcase (match-string 2 content-type))
975 message-rfc822 (and (string= type "message")
976 (string= subtype "rfc822"))
977 multipart-any (string= type "multipart")))
978 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
979 (setq subject (match-string 1 content-type)))
980 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
981 (setq boundary-regexp (concat "^--"
982 (regexp-quote
983 (match-string 1 content-type))
984 "\\(--\\)?[ \t]*\n"))))
985 (unless subject
986 (when (or multipart-any (not article-insert))
987 (setq subject (message-fetch-field "Subject"))))
988 (unless type
989 (setq type "text"
990 subtype "plain"))
991 ;; Prepare the article and summary inserts.
992 (unless article-insert
23f87bed 993 (setq article-insert (buffer-string)
16409b0b 994 head-end head-begin))
b092f83d
SZ
995 ;; Fix MIME-Version
996 (unless (string-match "MIME-Version:" article-insert)
997 (setq article-insert
998 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
999 (setq summary-insert article-insert)
1000 ;; - summary Subject.
1001 (setq summary-insert
1002 (let ((line (concat "Subject: <" position
1003 (and position multipart-any ".")
1004 (and multipart-any "*")
1005 (and (or position multipart-any) " ")
1006 (cond ((string= subtype "plain") type)
1007 ((string= subtype "basic") type)
1008 (t subtype))
1009 ">"
1010 (and subject " ")
1011 subject
1012 "\n")))
1013 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1014 (replace-match line t t summary-insert)
1015 (concat summary-insert line))))
1016 ;; - summary Message-ID.
1017 (setq summary-insert
1018 (let ((line (concat "Message-ID: " message-id "\n")))
1019 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1020 (replace-match line t t summary-insert)
1021 (concat summary-insert line))))
1022 ;; - summary References.
1023 (when parent
1024 (setq summary-insert
1025 (let ((line (concat "References: " parent "\n")))
1026 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1027 summary-insert)
1028 (replace-match line t t summary-insert)
1029 (concat summary-insert line)))))
1030 ;; Generate dissection information for this entity.
1031 (push (list (incf nndoc-mime-split-ordinal)
1032 head-begin head-end body-begin body-end
1033 (count-lines body-begin body-end)
1034 article-insert summary-insert)
1035 nndoc-dissection-alist)
1036 ;; Recurse for all sub-entities, if any.
1037 (widen)
1038 (cond
1039 (message-rfc822
1040 (save-excursion
1041 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1042 position message-id)))
1043 ((and multipart-any boundary-regexp)
1044 (let ((part-counter 0)
1045 part-begin part-end eof-flag)
1046 (while (string-match "\
1047^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1048 article-insert)
1049 (setq article-insert (replace-match "" t t article-insert)))
1050 (let ((case-fold-search nil))
1051 (goto-char body-begin)
1052 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 1053 (while (not eof-flag)
16409b0b
GM
1054 (setq part-begin (point))
1055 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
1056 (or (not (match-string 1))
1057 (string= (match-string 1) "")
1058 (setq eof-flag t))
1059 (forward-line -1)
16409b0b 1060 (setq part-end (point))
6748645f 1061 (forward-line 1))
16409b0b 1062 (t (setq part-end body-end
6748645f 1063 eof-flag t)))
16409b0b
GM
1064 (save-excursion
1065 (nndoc-dissect-mime-parts-sub
1066 part-begin part-end article-insert
1067 (concat position
1068 (and position ".")
1069 (format "%d" (incf part-counter)))
1070 message-id)))))))))
6748645f 1071
eec82323
LMI
1072;;;###autoload
1073(defun nndoc-add-type (definition &optional position)
1074 "Add document DEFINITION to the list of nndoc document definitions.
1075If POSITION is nil or `last', the definition will be added
1076as the last checked definition, if t or `first', add as the
1077first definition, and if any other symbol, add after that
1078symbol in the alist."
1079 ;; First remove any old instances.
36d3245f 1080 (gnus-alist-pull (car definition) nndoc-type-alist)
eec82323
LMI
1081 ;; Then enter the new definition in the proper place.
1082 (cond
1083 ((or (null position) (eq position 'last))
1084 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1085 ((or (eq position t) (eq position 'first))
1086 (push definition nndoc-type-alist))
1087 (t
1088 (let ((list (memq (assq position nndoc-type-alist)
1089 nndoc-type-alist)))
1090 (unless list
1091 (error "No such position: %s" position))
1092 (setcdr list (cons definition (cdr list)))))))
1093
1094(provide 'nndoc)
1095
1096;;; nndoc.el ends here