Replace some eshell functions that duplicate standard functions.
[bpt/emacs.git] / lisp / gnus / nndoc.el
CommitLineData
eec82323 1;;; nndoc.el --- single file access for Gnus
e84b4b86
TTN
2
3;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
114f9c96 4;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
eec82323 5
6748645f 6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
b092f83d 7;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
eec82323
LMI
8;; Keywords: news
9
10;; This file is part of GNU Emacs.
11
5e809f55 12;; GNU Emacs is free software: you can redistribute it and/or modify
eec82323 13;; it under the terms of the GNU General Public License as published by
5e809f55
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
eec82323
LMI
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
5e809f55 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
eec82323
LMI
24
25;;; Commentary:
26
b092f83d
SZ
27;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
28
eec82323
LMI
29;;; Code:
30
31(require 'nnheader)
32(require 'message)
33(require 'nnmail)
34(require 'nnoo)
6748645f 35(require 'gnus-util)
16409b0b 36(require 'mm-util)
eec82323
LMI
37(eval-when-compile (require 'cl))
38
39(nnoo-declare nndoc)
40
41(defvoo nndoc-article-type 'guess
42 "*Type of the file.
43One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
16409b0b 44`rfc934', `rfc822-forward', `mime-parts', `standard-digest',
b092f83d
SZ
45`slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46`mailman', `exim-bounce', or `guess'.")
eec82323
LMI
47
48(defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
50
6748645f
LMI
51(defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53The default function removes all trailing carriage returns
16409b0b 54from the document.")
6748645f 55
eec82323
LMI
56(defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
23f87bed
MB
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
16409b0b
GM
67 (nsmail
68 (article-begin . "^From - "))
eec82323
LMI
69 (news
70 (article-begin . "^Path:"))
71 (rnews
72 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
73 (body-end-function . nndoc-rnews-body-end))
74 (mbox
75 (article-begin-function . nndoc-mbox-article-begin)
76 (body-end-function . nndoc-mbox-body-end))
77 (babyl
78 (article-begin . "\^_\^L *\n")
79 (body-end . "\^_")
80 (body-begin-function . nndoc-babyl-body-begin)
81 (head-begin-function . nndoc-babyl-head-begin))
a3f57c41
G
82 (mime-parts
83 (generate-head-function . nndoc-generate-mime-parts-head)
84 (article-transform-function . nndoc-transform-mime-parts))
23f87bed
MB
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
eec82323
LMI
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
b092f83d
SZ
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
eec82323
LMI
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
c9fc72fa 103
eec82323 104 (standard-digest
6748645f
LMI
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
eec82323
LMI
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
6748645f
LMI
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
eec82323
LMI
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
846daf20
LMI
121 (google
122 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
123 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
124 (head-begin . "^Date:")
125 (head-end . "^$")
126 (body-end-function . nndoc-digest-body-end)
127 (body-begin . "^$")
128 (file-end . "^==============================================================================$")
129 (prepare-body-function . nndoc-unquote-dashes)
130 (subtype digest guess))
eec82323
LMI
131 (lanl-gov-announce
132 (article-begin . "^\\\\\\\\\n")
01c52d31 133 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
eec82323
LMI
134 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
135 (body-begin . "")
b092f83d
SZ
136 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
137 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
eec82323
LMI
138 (generate-head-function . nndoc-generate-lanl-gov-head)
139 (article-transform-function . nndoc-transform-lanl-gov-announce)
140 (subtype preprints guess))
2e76c12c
LMI
141 (git
142 (file-begin . "\n- Log ---.*")
143 (article-begin . "^commit ")
144 (head-begin . "^Author: ")
145 (body-begin . "^$")
146 (file-end . "\n-----------------------------------------------------------------------")
147 (article-transform-function . nndoc-transform-git-article)
148 (header-transform-function . nndoc-transform-git-headers))
eec82323 149 (rfc822-forward
23f87bed
MB
150 (article-begin . "^\n+")
151 (body-end-function . nndoc-rfc822-forward-body-end-function)
152 (generate-head-function . nndoc-rfc822-forward-generate-head)
153 (generate-article-function . nndoc-rfc822-forward-generate-article))
16409b0b
GM
154 (outlook
155 (article-begin-function . nndoc-outlook-article-begin)
156 (body-end . "\0"))
b092f83d
SZ
157 (oe-dbx ;; Outlook Express DBX format
158 (dissection-function . nndoc-oe-dbx-dissection)
159 (generate-head-function . nndoc-oe-dbx-generate-head)
160 (generate-article-function . nndoc-oe-dbx-generate-article))
161 (forward
162 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
163 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
164 (prepare-body-function . nndoc-unquote-dashes))
165 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
166 (article-begin-function . nndoc-mail-in-mail-article-begin))
eec82323
LMI
167 (guess
168 (guess . t)
169 (subtype nil))
170 (digest
171 (guess . t)
172 (subtype nil))
173 (preprints
174 (guess . t)
175 (subtype nil))))
176
b092f83d
SZ
177(defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
178 "Regexp for binary nndoc file names.")
179
eec82323 180\f
eec82323
LMI
181(defvoo nndoc-file-begin nil)
182(defvoo nndoc-first-article nil)
eec82323
LMI
183(defvoo nndoc-article-begin nil)
184(defvoo nndoc-head-begin nil)
185(defvoo nndoc-head-end nil)
186(defvoo nndoc-file-end nil)
187(defvoo nndoc-body-begin nil)
188(defvoo nndoc-body-end-function nil)
189(defvoo nndoc-body-begin-function nil)
190(defvoo nndoc-head-begin-function nil)
191(defvoo nndoc-body-end nil)
6748645f 192;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
16409b0b
GM
193;; following items. ARTICLE acts as the association key and is an ordinal
194;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
195;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
196;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
197;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
198;; generation, respectively. Other headers usually follow directly from the
199;; buffer. Value `nil' means no insert.
eec82323
LMI
200(defvoo nndoc-dissection-alist nil)
201(defvoo nndoc-prepare-body-function nil)
202(defvoo nndoc-generate-head-function nil)
203(defvoo nndoc-article-transform-function nil)
2e76c12c 204(defvoo nndoc-header-transform-function nil)
eec82323 205(defvoo nndoc-article-begin-function nil)
b092f83d
SZ
206(defvoo nndoc-generate-article-function nil)
207(defvoo nndoc-dissection-function nil)
846daf20 208(defvoo nndoc-pre-dissection-function nil)
eec82323
LMI
209
210(defvoo nndoc-status-string "")
211(defvoo nndoc-group-alist nil)
212(defvoo nndoc-current-buffer nil
213 "Current nndoc news buffer.")
214(defvoo nndoc-address nil)
215
216(defconst nndoc-version "nndoc 1.0"
217 "nndoc version.")
218
219\f
220
221;;; Interface functions
222
223(nnoo-define-basics nndoc)
224
225(deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
226 (when (nndoc-possibly-change-buffer newsgroup server)
20a673b2 227 (with-current-buffer nntp-server-buffer
eec82323
LMI
228 (erase-buffer)
229 (let (article entry)
230 (if (stringp (car articles))
231 'headers
232 (while articles
233 (when (setq entry (cdr (assq (setq article (pop articles))
234 nndoc-dissection-alist)))
c872595d
LMI
235 (let ((start (point)))
236 (insert (format "221 %d Article retrieved.\n" article))
237 (if nndoc-generate-head-function
238 (funcall nndoc-generate-head-function article)
239 (insert-buffer-substring
240 nndoc-current-buffer (car entry) (nth 1 entry)))
241 (goto-char (point-max))
242 (unless (eq (char-after (1- (point))) ?\n)
243 (insert "\n"))
244 (insert (format "Lines: %d\n" (nth 4 entry)))
245 (insert ".\n")
246 (when nndoc-header-transform-function
247 (save-excursion
248 (save-restriction
249 (narrow-to-region start (point))
250 (funcall nndoc-header-transform-function entry)))))))
eec82323
LMI
251 (nnheader-fold-continuation-lines)
252 'headers)))))
253
254(deffoo nndoc-request-article (article &optional newsgroup server buffer)
255 (nndoc-possibly-change-buffer newsgroup server)
256 (save-excursion
257 (let ((buffer (or buffer nntp-server-buffer))
258 (entry (cdr (assq article nndoc-dissection-alist)))
259 beg)
260 (set-buffer buffer)
261 (erase-buffer)
262 (when entry
b092f83d
SZ
263 (cond
264 ((stringp article) nil)
265 (nndoc-generate-article-function
266 (funcall nndoc-generate-article-function article))
267 (t
eec82323
LMI
268 (insert-buffer-substring
269 nndoc-current-buffer (car entry) (nth 1 entry))
270 (insert "\n")
271 (setq beg (point))
272 (insert-buffer-substring
273 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
274 (goto-char beg)
275 (when nndoc-prepare-body-function
276 (funcall nndoc-prepare-body-function))
277 (when nndoc-article-transform-function
278 (funcall nndoc-article-transform-function article))
b092f83d 279 t))))))
eec82323 280
286c4fc2 281(deffoo nndoc-request-group (group &optional server dont-check info)
eec82323
LMI
282 "Select news GROUP."
283 (let (number)
284 (cond
285 ((not (nndoc-possibly-change-buffer group server))
286 (nnheader-report 'nndoc "No such file or buffer: %s"
287 nndoc-address))
288 (dont-check
289 (nnheader-report 'nndoc "Selected group %s" group)
290 t)
291 ((zerop (setq number (length nndoc-dissection-alist)))
292 (nndoc-close-group group)
293 (nnheader-report 'nndoc "No articles in group %s" group))
294 (t
295 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
296
229b59da
G
297(deffoo nndoc-retrieve-groups (groups &optional server)
298 (dolist (group groups)
299 (nndoc-request-group group server))
300 t)
301
eec82323
LMI
302(deffoo nndoc-request-type (group &optional article)
303 (cond ((not article) 'unknown)
b092f83d
SZ
304 (nndoc-post-type nndoc-post-type)
305 (t 'unknown)))
eec82323
LMI
306
307(deffoo nndoc-close-group (group &optional server)
308 (nndoc-possibly-change-buffer group server)
309 (and nndoc-current-buffer
310 (buffer-name nndoc-current-buffer)
311 (kill-buffer nndoc-current-buffer))
312 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
313 nndoc-group-alist))
314 (setq nndoc-current-buffer nil)
315 (nnoo-close-server 'nndoc server)
316 (setq nndoc-dissection-alist nil)
317 t)
318
319(deffoo nndoc-request-list (&optional server)
4478e074 320 t)
eec82323
LMI
321
322(deffoo nndoc-request-newgroups (date &optional server)
323 nil)
324
325(deffoo nndoc-request-list-newsgroups (&optional server)
326 nil)
327
328\f
329;;; Internal functions.
330
331(defun nndoc-possibly-change-buffer (group source)
332 (let (buf)
333 (cond
334 ;; The current buffer is this group's buffer.
335 ((and nndoc-current-buffer
336 (buffer-name nndoc-current-buffer)
337 (eq nndoc-current-buffer
338 (setq buf (cdr (assoc group nndoc-group-alist))))))
339 ;; We change buffers by taking an old from the group alist.
340 ;; `source' is either a string (a file name) or a buffer object.
341 (buf
342 (setq nndoc-current-buffer buf))
343 ;; It's a totally new group.
344 ((or (and (bufferp nndoc-address)
345 (buffer-name nndoc-address))
346 (and (stringp nndoc-address)
347 (file-exists-p nndoc-address)
348 (not (file-directory-p nndoc-address))))
349 (push (cons group (setq nndoc-current-buffer
350 (get-buffer-create
351 (concat " *nndoc " group "*"))))
352 nndoc-group-alist)
353 (setq nndoc-dissection-alist nil)
20a673b2 354 (with-current-buffer nndoc-current-buffer
eec82323 355 (erase-buffer)
b092f83d
SZ
356 (if (and (stringp nndoc-address)
357 (string-match nndoc-binary-file-names nndoc-address))
358 (let ((coding-system-for-read 'binary))
359 (mm-insert-file-contents nndoc-address))
360 (if (stringp nndoc-address)
361 (nnheader-insert-file-contents nndoc-address)
362 (insert-buffer-substring nndoc-address))
363 (run-hooks 'nndoc-open-document-hook)))))
eec82323
LMI
364 ;; Initialize the nndoc structures according to this new document.
365 (when (and nndoc-current-buffer
366 (not nndoc-dissection-alist))
20a673b2 367 (with-current-buffer nndoc-current-buffer
eec82323 368 (nndoc-set-delims)
6748645f
LMI
369 (if (eq nndoc-article-type 'mime-parts)
370 (nndoc-dissect-mime-parts)
371 (nndoc-dissect-buffer))))
eec82323
LMI
372 (unless nndoc-current-buffer
373 (nndoc-close-server))
374 ;; Return whether we managed to select a file.
375 nndoc-current-buffer))
376
377;;;
378;;; Deciding what document type we have
379;;;
380
381(defun nndoc-set-delims ()
382 "Set the nndoc delimiter variables according to the type of the document."
383 (let ((vars '(nndoc-file-begin
384 nndoc-first-article
6748645f
LMI
385 nndoc-article-begin-function
386 nndoc-head-begin nndoc-head-end
eec82323
LMI
387 nndoc-file-end nndoc-article-begin
388 nndoc-body-begin nndoc-body-end-function nndoc-body-end
389 nndoc-prepare-body-function nndoc-article-transform-function
2e76c12c 390 nndoc-header-transform-function
eec82323 391 nndoc-generate-head-function nndoc-body-begin-function
b092f83d
SZ
392 nndoc-head-begin-function
393 nndoc-generate-article-function
846daf20
LMI
394 nndoc-dissection-function
395 nndoc-pre-dissection-function)))
eec82323
LMI
396 (while vars
397 (set (pop vars) nil)))
398 (let (defs)
399 ;; Guess away until we find the real file type.
400 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
401 nndoc-type-alist))))
402 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
403 ;; Set the nndoc variables.
404 (while defs
405 (set (intern (format "nndoc-%s" (caar defs)))
406 (cdr (pop defs))))))
407
408(defun nndoc-guess-type (subtype)
409 (let ((alist nndoc-type-alist)
410 results result entry)
411 (while (and (not result)
412 (setq entry (pop alist)))
413 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
414 (goto-char (point-min))
16409b0b
GM
415 ;; Remove blank lines.
416 (while (eq (following-char) ?\n)
417 (delete-char 1))
eec82323
LMI
418 (when (numberp (setq result (funcall (intern
419 (format "nndoc-%s-type-p"
420 (car entry))))))
421 (push (cons result entry) results)
422 (setq result nil))))
423 (unless (or result results)
424 (error "Document is not of any recognized type"))
425 (if result
426 (car entry)
23f87bed 427 (cadar (last (sort results 'car-less-than-car))))))
eec82323
LMI
428
429;;;
430;;; Built-in type predicates and functions
431;;;
432
433(defun nndoc-mbox-type-p ()
434 (when (looking-at message-unix-mail-delimiter)
435 t))
436
437(defun nndoc-mbox-article-begin ()
438 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
439 (goto-char (match-beginning 0))))
440
441(defun nndoc-mbox-body-end ()
442 (let ((beg (point))
443 len end)
444 (when
445 (save-excursion
446 (and (re-search-backward
447 (concat "^" message-unix-mail-delimiter) nil t)
448 (setq end (point))
449 (search-forward "\n\n" beg t)
450 (re-search-backward
451 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
e9bd5782 452 (setq len (string-to-number (match-string 1)))
eec82323
LMI
453 (search-forward "\n\n" beg t)
454 (unless (= (setq len (+ (point) len)) (point-max))
455 (and (< len (point-max))
456 (goto-char len)
457 (looking-at message-unix-mail-delimiter)))))
458 (goto-char len))))
459
460(defun nndoc-mmdf-type-p ()
461 (when (looking-at "\^A\^A\^A\^A$")
462 t))
463
464(defun nndoc-news-type-p ()
465 (when (looking-at "^Path:.*\n")
466 t))
467
468(defun nndoc-rnews-type-p ()
469 (when (looking-at "#! *rnews")
470 t))
471
472(defun nndoc-rnews-body-end ()
473 (and (re-search-backward nndoc-article-begin nil t)
474 (forward-line 1)
e9bd5782 475 (goto-char (+ (point) (string-to-number (match-string 1))))))
eec82323 476
846daf20
LMI
477(defun nndoc-google-type-p ()
478 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
479 t))
480
481(defun nndoc-decode-content-transfer-encoding ()
482 (let ((encoding
483 (save-restriction
484 (message-narrow-to-head)
485 (message-fetch-field "content-transfer-encoding"))))
486 (when (and encoding
487 (search-forward "\n\n" nil t))
488 (save-restriction
489 (narrow-to-region (point) (point-max))
490 (mm-decode-content-transfer-encoding
491 (intern (downcase (mail-header-strip encoding))))))))
492
eec82323
LMI
493(defun nndoc-babyl-type-p ()
494 (when (re-search-forward "\^_\^L *\n" nil t)
495 t))
496
497(defun nndoc-babyl-body-begin ()
498 (re-search-forward "^\n" nil t)
6748645f 499 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
500 (let ((next (or (save-excursion
501 (re-search-forward nndoc-article-begin nil t))
502 (point-max))))
503 (unless (re-search-forward "^\n" next t)
504 (goto-char next)
505 (forward-line -1)
506 (insert "\n")
507 (forward-line -1)))))
508
509(defun nndoc-babyl-head-begin ()
510 (when (re-search-forward "^[0-9].*\n" nil t)
6748645f 511 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
eec82323
LMI
512 (forward-line 1))
513 t))
514
515(defun nndoc-forward-type-p ()
b092f83d 516 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
16409b0b 517 nil t)
23f87bed 518 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
eec82323
LMI
519 t))
520
521(defun nndoc-rfc934-type-p ()
522 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
523 (not (re-search-forward "^Subject:.*digest" nil t))
524 (not (re-search-backward "^From:" nil t 2))
525 (not (re-search-forward "^From:" nil t 2)))
526 t))
527
b092f83d
SZ
528(defun nndoc-mailman-type-p ()
529 (when (re-search-forward "^--__--__--\n+" nil t)
530 t))
531
eec82323
LMI
532(defun nndoc-rfc822-forward-type-p ()
533 (save-restriction
534 (message-narrow-to-head)
535 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
536 t)))
537
538(defun nndoc-rfc822-forward-body-end-function ()
539 (goto-char (point-max)))
540
23f87bed
MB
541(defun nndoc-rfc822-forward-generate-article (article &optional head)
542 (let ((entry (cdr (assq article nndoc-dissection-alist)))
543 (begin (point))
544 encoding)
545 (with-current-buffer nndoc-current-buffer
546 (save-restriction
547 (message-narrow-to-head)
548 (setq encoding (message-fetch-field "content-transfer-encoding"))))
549 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
550 (when encoding
551 (save-restriction
552 (narrow-to-region begin (point-max))
553 (mm-decode-content-transfer-encoding
554 (intern (downcase (mail-header-strip encoding))))))
555 (when head
556 (goto-char begin)
557 (when (search-forward "\n\n" nil t)
558 (delete-region (1- (point)) (point-max)))))
559 t)
560
561(defun nndoc-rfc822-forward-generate-head (article)
562 (nndoc-rfc822-forward-generate-article article 'head))
563
6748645f
LMI
564(defun nndoc-mime-parts-type-p ()
565 (let ((case-fold-search t)
566 (limit (search-forward "\n\n" nil t)))
567 (goto-char (point-min))
568 (when (and limit
16409b0b
GM
569 (re-search-forward
570 (concat "\
571^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
572 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
573 limit t))
6748645f
LMI
574 t)))
575
576(defun nndoc-transform-mime-parts (article)
16409b0b
GM
577 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
578 (headers (nth 5 entry)))
579 (when headers
6748645f 580 (goto-char (point-min))
16409b0b
GM
581 (insert headers))))
582
583(defun nndoc-generate-mime-parts-head (article)
584 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
585 (headers (nth 6 entry)))
586 (save-restriction
587 (narrow-to-region (point) (point))
588 (insert-buffer-substring
589 nndoc-current-buffer (car entry) (nth 1 entry))
590 (goto-char (point-max)))
591 (when headers
592 (insert headers))))
6748645f 593
eec82323
LMI
594(defun nndoc-clari-briefs-type-p ()
595 (when (let ((case-fold-search nil))
596 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
597 t))
598
599(defun nndoc-transform-clari-briefs (article)
600 (goto-char (point-min))
601 (when (looking-at " *\\*\\(.*\\)\n")
602 (replace-match "" t t))
603 (nndoc-generate-clari-briefs-head article))
604
605(defun nndoc-generate-clari-briefs-head (article)
606 (let ((entry (cdr (assq article nndoc-dissection-alist)))
607 subject from)
20a673b2 608 (with-current-buffer nndoc-current-buffer
eec82323
LMI
609 (save-restriction
610 (narrow-to-region (car entry) (nth 3 entry))
611 (goto-char (point-min))
612 (when (looking-at " *\\*\\(.*\\)$")
613 (setq subject (match-string 1))
614 (when (string-match "[ \t]+$" subject)
615 (setq subject (substring subject 0 (match-beginning 0)))))
616 (when
617 (let ((case-fold-search nil))
618 (re-search-forward
619 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
620 (setq from (match-string 1)))))
621 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
622 "\nSubject: " (or subject "(no subject)") "\n")))
623
b092f83d
SZ
624(defun nndoc-exim-bounce-type-p ()
625 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
626 t))
627
628(defun nndoc-exim-bounce-body-end-function ()
629 (goto-char (point-max)))
630
16409b0b 631
eec82323
LMI
632(defun nndoc-mime-digest-type-p ()
633 (let ((case-fold-search t)
634 boundary-id b-delimiter entry)
635 (when (and
636 (re-search-forward
637 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
6748645f 638 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
eec82323
LMI
639 nil t)
640 (match-beginning 1))
641 (setq boundary-id (match-string 1)
16409b0b 642 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
eec82323
LMI
643 (setq entry (assq 'mime-digest nndoc-type-alist))
644 (setcdr entry
645 (list
16409b0b 646 (cons 'head-begin "^ ?\n")
eec82323
LMI
647 (cons 'head-end "^ ?$")
648 (cons 'body-begin "^ ?\n")
649 (cons 'article-begin b-delimiter)
650 (cons 'body-end-function 'nndoc-digest-body-end)
b092f83d 651 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
eec82323
LMI
652 t)))
653
654(defun nndoc-standard-digest-type-p ()
655 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
656 (re-search-forward
657 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
658 t))
659
660(defun nndoc-digest-body-end ()
661 (and (re-search-forward nndoc-article-begin nil t)
662 (goto-char (match-beginning 0))))
663
664(defun nndoc-slack-digest-type-p ()
665 0)
666
2e76c12c
LMI
667(defun nndoc-git-type-p ()
668 (and (search-forward "\n- Log ---" nil t)
669 (search-forward "\ncommit " nil t)
670 (search-forward "\nAuthor: " nil t)))
671
672(defun nndoc-transform-git-article (article)
673 (goto-char (point-min))
674 (when (re-search-forward "^Author: " nil t)
675 (replace-match "From: " t t)))
676
c872595d 677(defun nndoc-transform-git-headers (entry)
2e76c12c 678 (goto-char (point-min))
c872595d
LMI
679 (when (re-search-forward "^Author: " nil t)
680 (replace-match "From: " t t))
681 (let (subject)
682 (with-current-buffer nndoc-current-buffer
683 (goto-char (car entry))
684 (when (search-forward "\n\n" nil t)
685 (setq subject (buffer-substring (point) (line-end-position)))))
686 (when subject
687 (goto-char (point-min))
688 (forward-line 1)
689 (insert (format "Subject: %s\n" subject)))))
2e76c12c 690
eec82323
LMI
691(defun nndoc-lanl-gov-announce-type-p ()
692 (when (let ((case-fold-search nil))
01c52d31 693 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
eec82323
LMI
694 t))
695
696(defun nndoc-transform-lanl-gov-announce (article)
01c52d31
MB
697 (let ((case-fold-search nil))
698 (goto-char (point-max))
699 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
700 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
b092f83d 701 (goto-char (point-min))
01c52d31
MB
702 (while (re-search-forward "^\\\\\\\\$" nil t)
703 (replace-match "" t nil))
704 (goto-char (point-min))
705 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
706 (replace-match "Date: \\1 (revised) " t nil))
707 (goto-char (point-min))
708 (unless (re-search-forward "^From" nil t)
b092f83d 709 (goto-char (point-min))
01c52d31
MB
710 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
711 (goto-char (point-min))
712 (insert "From: " (match-string 1) "\n")))
713 (when (re-search-forward "^arXiv:" nil t)
714 (replace-match "Paper: arXiv:" t nil))))
eec82323
LMI
715
716(defun nndoc-generate-lanl-gov-head (article)
717 (let ((entry (cdr (assq article nndoc-dissection-alist)))
b092f83d
SZ
718 (from "<no address given>")
719 subject date)
20a673b2 720 (with-current-buffer nndoc-current-buffer
eec82323 721 (save-restriction
b092f83d
SZ
722 (narrow-to-region (car entry) (nth 1 entry))
723 (goto-char (point-min))
01c52d31
MB
724 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
725 (setq subject (concat " (" (match-string 2) ")"))
b092f83d
SZ
726 (when (re-search-forward "^From: \\(.*\\)" nil t)
727 (setq from (concat "<"
a1506d29 728 (cadr (funcall gnus-extract-address-components
b092f83d
SZ
729 (match-string 1))) ">")))
730 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
731 (setq date (match-string 1))
732 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
733 (setq date (match-string 1))))
734 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
735 nil t)
736 (setq subject (concat (match-string 1) subject))
737 (setq from (concat (match-string 2) " " from))))))
eec82323
LMI
738 (while (and from (string-match "(\[^)\]*)" from))
739 (setq from (replace-match "" t t from)))
740 (insert "From: " (or from "unknown")
b092f83d
SZ
741 "\nSubject: " (or subject "(no subject)") "\n")
742 (if date (insert "Date: " date))))
eec82323 743
16409b0b
GM
744(defun nndoc-nsmail-type-p ()
745 (when (looking-at "From - ")
746 t))
747
748(defun nndoc-outlook-article-begin ()
749 (prog1 (re-search-forward "From:\\|Received:" nil t)
750 (goto-char (match-beginning 0))))
751
752(defun nndoc-outlook-type-p ()
753 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
754 (looking-at "JMF"))
755
b092f83d 756(defun nndoc-oe-dbx-type-p ()
62dc3ac3 757 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
b092f83d
SZ
758
759(defun nndoc-read-little-endian ()
760 (+ (prog1 (char-after) (forward-char 1))
761 (lsh (prog1 (char-after) (forward-char 1)) 8)
762 (lsh (prog1 (char-after) (forward-char 1)) 16)
763 (lsh (prog1 (char-after) (forward-char 1)) 24)))
764
765(defun nndoc-oe-dbx-decode-block ()
766 (list
767 (nndoc-read-little-endian) ;; this address
768 (nndoc-read-little-endian) ;; next address offset
769 (nndoc-read-little-endian) ;; blocksize
770 (nndoc-read-little-endian))) ;; next address
771
772(defun nndoc-oe-dbx-dissection ()
773 (let ((i 0) blk p tp)
774 (goto-char 60117) ;; 0x0000EAD4+1
775 (setq p (point))
776 (unless (eobp)
777 (setq blk (nndoc-oe-dbx-decode-block)))
778 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
779 (> (nth 3 blk) p)))
780 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
781 (while (and (> (car blk) 0) (> (nth 3 blk) p))
782 (goto-char (1+ (nth 3 blk)))
783 (setq blk (nndoc-oe-dbx-decode-block)))
784 (if (or (<= (car blk) p)
785 (<= (nth 1 blk) 0)
786 (not (zerop (nth 3 blk))))
787 (setq blk nil)
788 (setq tp (+ (car blk) (nth 1 blk) 17))
789 (if (or (<= tp p) (>= tp (point-max)))
790 (setq blk nil)
791 (goto-char tp)
792 (setq p tp
793 blk (nndoc-oe-dbx-decode-block)))))))
794
795(defun nndoc-oe-dbx-generate-article (article &optional head)
796 (let ((entry (cdr (assq article nndoc-dissection-alist)))
797 (cur (current-buffer))
798 (begin (point))
799 blk p)
800 (with-current-buffer nndoc-current-buffer
801 (setq p (car entry))
802 (while (> p (point-min))
803 (goto-char p)
804 (setq blk (nndoc-oe-dbx-decode-block))
805 (setq p (point))
806 (with-current-buffer cur
807 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
808 (setq p (1+ (nth 3 blk)))))
809 (goto-char begin)
810 (while (re-search-forward "\r$" nil t)
d355a0b7 811 (delete-char -1))
b092f83d
SZ
812 (when head
813 (goto-char begin)
814 (when (search-forward "\n\n" nil t)
815 (setcar (cddddr entry) (count-lines (point) (point-max)))
816 (delete-region (1- (point)) (point-max))))
817 t))
818
819(defun nndoc-oe-dbx-generate-head (article)
820 (nndoc-oe-dbx-generate-article article 'head))
821
822(defun nndoc-mail-in-mail-type-p ()
823 (let (found)
824 (save-excursion
825 (catch 'done
826 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
827 (setq found 0)
828 (forward-line)
829 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
830 (if (looking-at "[-A-Za-z0-9]+:")
831 (setq found (1+ found)))
832 (forward-line))
833 (if (and (> found 0) (looking-at "\n"))
834 (throw 'done 9999)))
835 nil))))
836
837(defun nndoc-mail-in-mail-article-begin ()
838 (let (point found)
839 (if (catch 'done
840 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
841 (setq found 0)
842 (setq point (match-beginning 1))
843 (forward-line)
844 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
845 (if (looking-at "[-A-Za-z0-9]+:")
846 (setq found (1+ found)))
847 (forward-line))
848 (if (and (> found 0) (looking-at "\n"))
849 (throw 'done t)))
850 nil)
851 (goto-char point))))
852
6748645f
LMI
853(deffoo nndoc-request-accept-article (group &optional server last)
854 nil)
855
eec82323
LMI
856;;;
857;;; Functions for dissecting the documents
858;;;
859
860(defun nndoc-search (regexp)
861 (prog1
862 (re-search-forward regexp nil t)
863 (beginning-of-line)))
864
865(defun nndoc-dissect-buffer ()
866 "Go through the document and partition it into heads/bodies/articles."
867 (let ((i 0)
868 (first t)
23f87bed 869 art-begin head-begin head-end body-begin body-end)
eec82323 870 (setq nndoc-dissection-alist nil)
20a673b2 871 (with-current-buffer nndoc-current-buffer
eec82323 872 (goto-char (point-min))
16409b0b
GM
873 ;; Remove blank lines.
874 (while (eq (following-char) ?\n)
875 (delete-char 1))
846daf20
LMI
876 (when nndoc-pre-dissection-function
877 (save-excursion
878 (funcall nndoc-pre-dissection-function)))
b092f83d
SZ
879 (if nndoc-dissection-function
880 (funcall nndoc-dissection-function)
881 ;; Find the beginning of the file.
882 (when nndoc-file-begin
883 (nndoc-search nndoc-file-begin))
884 ;; Go through the file.
885 (while (if (and first nndoc-first-article)
886 (nndoc-search nndoc-first-article)
23f87bed
MB
887 (if art-begin
888 (goto-char art-begin)
889 (nndoc-article-begin)))
890 (setq first nil
891 art-begin nil)
b092f83d
SZ
892 (cond (nndoc-head-begin-function
893 (funcall nndoc-head-begin-function))
894 (nndoc-head-begin
895 (nndoc-search nndoc-head-begin)))
896 (if (or (eobp)
897 (and nndoc-file-end
898 (looking-at nndoc-file-end)))
899 (goto-char (point-max))
900 (setq head-begin (point))
901 (nndoc-search (or nndoc-head-end "^$"))
902 (setq head-end (point))
903 (if nndoc-body-begin-function
904 (funcall nndoc-body-begin-function)
905 (nndoc-search (or nndoc-body-begin "^\n")))
906 (setq body-begin (point))
907 (or (and nndoc-body-end-function
908 (funcall nndoc-body-end-function))
909 (and nndoc-body-end
910 (nndoc-search nndoc-body-end))
23f87bed
MB
911 (and (nndoc-article-begin)
912 (setq art-begin (point)))
b092f83d
SZ
913 (progn
914 (goto-char (point-max))
915 (when nndoc-file-end
916 (and (re-search-backward nndoc-file-end nil t)
917 (beginning-of-line)))))
918 (setq body-end (point))
919 (push (list (incf i) head-begin head-end body-begin body-end
920 (count-lines body-begin body-end))
389b76fa
G
921 nndoc-dissection-alist)))))
922 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
eec82323
LMI
923
924(defun nndoc-article-begin ()
925 (if nndoc-article-begin-function
926 (funcall nndoc-article-begin-function)
927 (ignore-errors
928 (nndoc-search nndoc-article-begin))))
929
930(defun nndoc-unquote-dashes ()
931 "Unquote quoted non-separators in digests."
932 (while (re-search-forward "^- -"nil t)
933 (replace-match "-" t t)))
934
6748645f
LMI
935;; Against compiler warnings.
936(defvar nndoc-mime-split-ordinal)
937
938(defun nndoc-dissect-mime-parts ()
939 "Go through a MIME composite article and partition it into sub-articles.
940When a MIME entity contains sub-entities, dissection produces one article for
941the header of this entity, and one article per sub-entity."
942 (setq nndoc-dissection-alist nil
943 nndoc-mime-split-ordinal 0)
20a673b2 944 (with-current-buffer nndoc-current-buffer
16409b0b
GM
945 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
946
947(defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
948 position parent)
949 "Dissect an entity, within a composite MIME message.
950The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
951ARTICLE-INSERT should be added at beginning for generating a full article.
6748645f
LMI
952The string POSITION holds a dotted decimal representation of the article
953position in the hierarchical structure, it is nil for the outer entity.
16409b0b
GM
954PARENT is the message-ID of the parent summary line, or nil for none."
955 (let ((case-fold-search t)
956 (message-id (nnmail-message-id))
957 head-end body-begin summary-insert message-rfc822 multipart-any
958 subject content-type type subtype boundary-regexp)
959 ;; Gracefully handle a missing body.
960 (goto-char head-begin)
2e36fdce
DL
961 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
962 (search-forward "\n\n" body-end t))
16409b0b
GM
963 (setq head-end (1- (point))
964 body-begin (point))
965 (setq head-end body-end
966 body-begin body-end))
967 (narrow-to-region head-begin head-end)
968 ;; Save MIME attributes.
969 (goto-char head-begin)
970 (setq content-type (message-fetch-field "Content-Type"))
971 (when content-type
972 (when (string-match
973 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
974 (setq type (downcase (match-string 1 content-type))
975 subtype (downcase (match-string 2 content-type))
976 message-rfc822 (and (string= type "message")
977 (string= subtype "rfc822"))
978 multipart-any (string= type "multipart")))
979 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
980 (setq subject (match-string 1 content-type)))
981 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
982 (setq boundary-regexp (concat "^--"
983 (regexp-quote
984 (match-string 1 content-type))
985 "\\(--\\)?[ \t]*\n"))))
986 (unless subject
987 (when (or multipart-any (not article-insert))
988 (setq subject (message-fetch-field "Subject"))))
989 (unless type
990 (setq type "text"
991 subtype "plain"))
992 ;; Prepare the article and summary inserts.
993 (unless article-insert
23f87bed 994 (setq article-insert (buffer-string)
16409b0b 995 head-end head-begin))
b092f83d
SZ
996 ;; Fix MIME-Version
997 (unless (string-match "MIME-Version:" article-insert)
998 (setq article-insert
999 (concat article-insert "MIME-Version: 1.0\n")))
16409b0b
GM
1000 (setq summary-insert article-insert)
1001 ;; - summary Subject.
1002 (setq summary-insert
1003 (let ((line (concat "Subject: <" position
1004 (and position multipart-any ".")
1005 (and multipart-any "*")
1006 (and (or position multipart-any) " ")
1007 (cond ((string= subtype "plain") type)
1008 ((string= subtype "basic") type)
1009 (t subtype))
1010 ">"
1011 (and subject " ")
1012 subject
1013 "\n")))
1014 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1015 (replace-match line t t summary-insert)
1016 (concat summary-insert line))))
1017 ;; - summary Message-ID.
1018 (setq summary-insert
1019 (let ((line (concat "Message-ID: " message-id "\n")))
1020 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1021 (replace-match line t t summary-insert)
1022 (concat summary-insert line))))
1023 ;; - summary References.
1024 (when parent
1025 (setq summary-insert
1026 (let ((line (concat "References: " parent "\n")))
1027 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1028 summary-insert)
1029 (replace-match line t t summary-insert)
1030 (concat summary-insert line)))))
1031 ;; Generate dissection information for this entity.
1032 (push (list (incf nndoc-mime-split-ordinal)
1033 head-begin head-end body-begin body-end
1034 (count-lines body-begin body-end)
1035 article-insert summary-insert)
1036 nndoc-dissection-alist)
1037 ;; Recurse for all sub-entities, if any.
1038 (widen)
1039 (cond
1040 (message-rfc822
1041 (save-excursion
1042 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1043 position message-id)))
1044 ((and multipart-any boundary-regexp)
1045 (let ((part-counter 0)
1046 part-begin part-end eof-flag)
1047 (while (string-match "\
1048^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1049 article-insert)
1050 (setq article-insert (replace-match "" t t article-insert)))
1051 (let ((case-fold-search nil))
1052 (goto-char body-begin)
1053 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
6748645f 1054 (while (not eof-flag)
16409b0b
GM
1055 (setq part-begin (point))
1056 (cond ((re-search-forward boundary-regexp body-end t)
6748645f
LMI
1057 (or (not (match-string 1))
1058 (string= (match-string 1) "")
1059 (setq eof-flag t))
1060 (forward-line -1)
16409b0b 1061 (setq part-end (point))
6748645f 1062 (forward-line 1))
16409b0b 1063 (t (setq part-end body-end
6748645f 1064 eof-flag t)))
16409b0b
GM
1065 (save-excursion
1066 (nndoc-dissect-mime-parts-sub
1067 part-begin part-end article-insert
1068 (concat position
1069 (and position ".")
1070 (format "%d" (incf part-counter)))
1071 message-id)))))))))
6748645f 1072
eec82323
LMI
1073;;;###autoload
1074(defun nndoc-add-type (definition &optional position)
1075 "Add document DEFINITION to the list of nndoc document definitions.
1076If POSITION is nil or `last', the definition will be added
1077as the last checked definition, if t or `first', add as the
1078first definition, and if any other symbol, add after that
1079symbol in the alist."
1080 ;; First remove any old instances.
36d3245f 1081 (gnus-alist-pull (car definition) nndoc-type-alist)
eec82323
LMI
1082 ;; Then enter the new definition in the proper place.
1083 (cond
1084 ((or (null position) (eq position 'last))
1085 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1086 ((or (eq position t) (eq position 'first))
1087 (push definition nndoc-type-alist))
1088 (t
1089 (let ((list (memq (assq position nndoc-type-alist)
1090 nndoc-type-alist)))
1091 (unless list
1092 (error "No such position: %s" position))
1093 (setcdr list (cons definition (cdr list)))))))
1094
1095(provide 'nndoc)
1096
1097;;; nndoc.el ends here