f900e02eb0894d4d780ab8a5f1a93d49e73bfb0c
[bpt/emacs.git] / lisp / gnus / nndoc.el
1 ;;; nndoc.el --- single file access for Gnus
2
3 ;; Copyright (C) 1995-2011 Free Software Foundation, Inc.
4
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
7 ;; Keywords: news
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23
24 ;;; Commentary:
25
26 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
27
28 ;;; Code:
29
30 (require 'nnheader)
31 (require 'message)
32 (require 'nnmail)
33 (require 'nnoo)
34 (require 'gnus-util)
35 (require 'mm-util)
36 (eval-when-compile (require 'cl))
37
38 (nnoo-declare nndoc)
39
40 (defvoo nndoc-article-type 'guess
41 "*Type of the file.
42 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
43 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
44 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
45 `mailman', `exim-bounce', or `guess'.")
46
47 (defvoo nndoc-post-type 'mail
48 "*Whether the nndoc group is `mail' or `post'.")
49
50 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
51 "Hook run after opening a document.
52 The default function removes all trailing carriage returns
53 from the document.")
54
55 (defvar nndoc-type-alist
56 `((mmdf
57 (article-begin . "^\^A\^A\^A\^A\n")
58 (body-end . "^\^A\^A\^A\^A\n"))
59 (mime-digest
60 (article-begin . "")
61 (head-begin . "^ ?\n")
62 (head-end . "^ ?$")
63 (body-end . "")
64 (file-end . "")
65 (subtype digest guess))
66 (nsmail
67 (article-begin . "^From - "))
68 (news
69 (article-begin . "^Path:"))
70 (rnews
71 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
72 (body-end-function . nndoc-rnews-body-end))
73 (mbox
74 (article-begin-function . nndoc-mbox-article-begin)
75 (body-end-function . nndoc-mbox-body-end))
76 (babyl
77 (article-begin . "\^_\^L *\n")
78 (body-end . "\^_")
79 (body-begin-function . nndoc-babyl-body-begin)
80 (head-begin-function . nndoc-babyl-head-begin))
81 (mime-parts
82 (generate-head-function . nndoc-generate-mime-parts-head)
83 (article-transform-function . nndoc-transform-mime-parts))
84 (exim-bounce
85 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
86 (body-end-function . nndoc-exim-bounce-body-end-function))
87 (rfc934
88 (article-begin . "^--.*\n+")
89 (body-end . "^--.*$")
90 (prepare-body-function . nndoc-unquote-dashes))
91 (mailman
92 (article-begin . "^--__--__--\n\nMessage:")
93 (body-end . "^--__--__--$")
94 (prepare-body-function . nndoc-unquote-dashes))
95 (clari-briefs
96 (article-begin . "^ \\*")
97 (body-end . "^\t------*[ \t]^*\n^ \\*")
98 (body-begin . "^\t")
99 (head-end . "^\t")
100 (generate-head-function . nndoc-generate-clari-briefs-head)
101 (article-transform-function . nndoc-transform-clari-briefs))
102
103 (standard-digest
104 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
105 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
106 (prepare-body-function . nndoc-unquote-dashes)
107 (body-end-function . nndoc-digest-body-end)
108 (head-end . "^ *$")
109 (body-begin . "^ *\n")
110 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
111 (subtype digest guess))
112 (slack-digest
113 (article-begin . "^------------------------------*[\n \t]+")
114 (head-end . "^ ?$")
115 (body-end-function . nndoc-digest-body-end)
116 (body-begin . "^ ?$")
117 (file-end . "^End of")
118 (prepare-body-function . nndoc-unquote-dashes)
119 (subtype digest guess))
120 (google
121 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
122 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
123 (head-begin . "^Date:")
124 (head-end . "^$")
125 (body-end-function . nndoc-digest-body-end)
126 (body-begin . "^$")
127 (file-end . "^==============================================================================$")
128 (prepare-body-function . nndoc-unquote-dashes)
129 (subtype digest guess))
130 (lanl-gov-announce
131 (article-begin . "^\\\\\\\\\n")
132 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
133 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
134 (body-begin . "")
135 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
136 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
137 (generate-head-function . nndoc-generate-lanl-gov-head)
138 (article-transform-function . nndoc-transform-lanl-gov-announce)
139 (subtype preprints guess))
140 (git
141 (file-begin . "\n- Log ---.*")
142 (article-begin . "^commit ")
143 (head-begin . "^Author: ")
144 (body-begin . "^$")
145 (file-end . "\n-----------------------------------------------------------------------")
146 (article-transform-function . nndoc-transform-git-article)
147 (header-transform-function . nndoc-transform-git-headers))
148 (rfc822-forward
149 (article-begin . "^\n+")
150 (body-end-function . nndoc-rfc822-forward-body-end-function)
151 (generate-head-function . nndoc-rfc822-forward-generate-head)
152 (generate-article-function . nndoc-rfc822-forward-generate-article))
153 (outlook
154 (article-begin-function . nndoc-outlook-article-begin)
155 (body-end . "\0"))
156 (oe-dbx ;; Outlook Express DBX format
157 (dissection-function . nndoc-oe-dbx-dissection)
158 (generate-head-function . nndoc-oe-dbx-generate-head)
159 (generate-article-function . nndoc-oe-dbx-generate-article))
160 (forward
161 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
162 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
163 (prepare-body-function . nndoc-unquote-dashes))
164 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
165 (article-begin-function . nndoc-mail-in-mail-article-begin))
166 (guess
167 (guess . t)
168 (subtype nil))
169 (digest
170 (guess . t)
171 (subtype nil))
172 (preprints
173 (guess . t)
174 (subtype nil))))
175
176 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
177 "Regexp for binary nndoc file names.")
178
179 \f
180 (defvoo nndoc-file-begin nil)
181 (defvoo nndoc-first-article nil)
182 (defvoo nndoc-article-begin nil)
183 (defvoo nndoc-head-begin nil)
184 (defvoo nndoc-head-end nil)
185 (defvoo nndoc-file-end nil)
186 (defvoo nndoc-body-begin nil)
187 (defvoo nndoc-body-end-function nil)
188 (defvoo nndoc-body-begin-function nil)
189 (defvoo nndoc-head-begin-function nil)
190 (defvoo nndoc-body-end nil)
191 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
192 ;; following items. ARTICLE acts as the association key and is an ordinal
193 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
194 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
195 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
196 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
197 ;; generation, respectively. Other headers usually follow directly from the
198 ;; buffer. Value `nil' means no insert.
199 (defvoo nndoc-dissection-alist nil)
200 (defvoo nndoc-prepare-body-function nil)
201 (defvoo nndoc-generate-head-function nil)
202 (defvoo nndoc-article-transform-function nil)
203 (defvoo nndoc-header-transform-function nil)
204 (defvoo nndoc-article-begin-function nil)
205 (defvoo nndoc-generate-article-function nil)
206 (defvoo nndoc-dissection-function nil)
207 (defvoo nndoc-pre-dissection-function nil)
208
209 (defvoo nndoc-status-string "")
210 (defvoo nndoc-group-alist nil)
211 (defvoo nndoc-current-buffer nil
212 "Current nndoc news buffer.")
213 (defvoo nndoc-address nil)
214
215 (defconst nndoc-version "nndoc 1.0"
216 "nndoc version.")
217
218 \f
219
220 ;;; Interface functions
221
222 (nnoo-define-basics nndoc)
223
224 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
225 (when (nndoc-possibly-change-buffer newsgroup server)
226 (with-current-buffer nntp-server-buffer
227 (erase-buffer)
228 (let (article entry)
229 (if (stringp (car articles))
230 'headers
231 (while articles
232 (when (setq entry (cdr (assq (setq article (pop articles))
233 nndoc-dissection-alist)))
234 (let ((start (point)))
235 (insert (format "221 %d Article retrieved.\n" article))
236 (if nndoc-generate-head-function
237 (funcall nndoc-generate-head-function article)
238 (insert-buffer-substring
239 nndoc-current-buffer (car entry) (nth 1 entry)))
240 (goto-char (point-max))
241 (unless (eq (char-after (1- (point))) ?\n)
242 (insert "\n"))
243 (insert (format "Lines: %d\n" (nth 4 entry)))
244 (insert ".\n")
245 (when nndoc-header-transform-function
246 (save-excursion
247 (save-restriction
248 (narrow-to-region start (point))
249 (funcall nndoc-header-transform-function entry)))))))
250 (nnheader-fold-continuation-lines)
251 'headers)))))
252
253 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
254 (nndoc-possibly-change-buffer newsgroup server)
255 (save-excursion
256 (let ((buffer (or buffer nntp-server-buffer))
257 (entry (cdr (assq article nndoc-dissection-alist)))
258 beg)
259 (set-buffer buffer)
260 (erase-buffer)
261 (when entry
262 (cond
263 ((stringp article) nil)
264 (nndoc-generate-article-function
265 (funcall nndoc-generate-article-function article))
266 (t
267 (insert-buffer-substring
268 nndoc-current-buffer (car entry) (nth 1 entry))
269 (insert "\n")
270 (setq beg (point))
271 (insert-buffer-substring
272 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
273 (goto-char beg)
274 (when nndoc-prepare-body-function
275 (funcall nndoc-prepare-body-function))
276 (when nndoc-article-transform-function
277 (funcall nndoc-article-transform-function article))
278 t))))))
279
280 (deffoo nndoc-request-group (group &optional server dont-check info)
281 "Select news GROUP."
282 (let (number)
283 (cond
284 ((not (nndoc-possibly-change-buffer group server))
285 (nnheader-report 'nndoc "No such file or buffer: %s"
286 nndoc-address))
287 (dont-check
288 (nnheader-report 'nndoc "Selected group %s" group)
289 t)
290 ((zerop (setq number (length nndoc-dissection-alist)))
291 (nndoc-close-group group)
292 (nnheader-report 'nndoc "No articles in group %s" group))
293 (t
294 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
295
296 (deffoo nndoc-retrieve-groups (groups &optional server)
297 (dolist (group groups)
298 (nndoc-request-group group server))
299 t)
300
301 (deffoo nndoc-request-type (group &optional article)
302 (cond ((not article) 'unknown)
303 (nndoc-post-type nndoc-post-type)
304 (t 'unknown)))
305
306 (deffoo nndoc-close-group (group &optional server)
307 (nndoc-possibly-change-buffer group server)
308 (and nndoc-current-buffer
309 (buffer-name nndoc-current-buffer)
310 (kill-buffer nndoc-current-buffer))
311 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
312 nndoc-group-alist))
313 (setq nndoc-current-buffer nil)
314 (nnoo-close-server 'nndoc server)
315 (setq nndoc-dissection-alist nil)
316 t)
317
318 (deffoo nndoc-request-list (&optional server)
319 t)
320
321 (deffoo nndoc-request-newgroups (date &optional server)
322 nil)
323
324 (deffoo nndoc-request-list-newsgroups (&optional server)
325 nil)
326
327 \f
328 ;;; Internal functions.
329
330 (defun nndoc-possibly-change-buffer (group source)
331 (let (buf)
332 (cond
333 ;; The current buffer is this group's buffer.
334 ((and nndoc-current-buffer
335 (buffer-name nndoc-current-buffer)
336 (eq nndoc-current-buffer
337 (setq buf (cdr (assoc group nndoc-group-alist))))))
338 ;; We change buffers by taking an old from the group alist.
339 ;; `source' is either a string (a file name) or a buffer object.
340 (buf
341 (setq nndoc-current-buffer buf))
342 ;; It's a totally new group.
343 ((or (and (bufferp nndoc-address)
344 (buffer-name nndoc-address))
345 (and (stringp nndoc-address)
346 (file-exists-p nndoc-address)
347 (not (file-directory-p nndoc-address))))
348 (push (cons group (setq nndoc-current-buffer
349 (get-buffer-create
350 (concat " *nndoc " group "*"))))
351 nndoc-group-alist)
352 (setq nndoc-dissection-alist nil)
353 (with-current-buffer nndoc-current-buffer
354 (erase-buffer)
355 (if (and (stringp nndoc-address)
356 (string-match nndoc-binary-file-names nndoc-address))
357 (let ((coding-system-for-read 'binary))
358 (mm-insert-file-contents nndoc-address))
359 (if (stringp nndoc-address)
360 (nnheader-insert-file-contents nndoc-address)
361 (insert-buffer-substring nndoc-address))
362 (run-hooks 'nndoc-open-document-hook)))))
363 ;; Initialize the nndoc structures according to this new document.
364 (when (and nndoc-current-buffer
365 (not nndoc-dissection-alist))
366 (with-current-buffer nndoc-current-buffer
367 (nndoc-set-delims)
368 (if (eq nndoc-article-type 'mime-parts)
369 (nndoc-dissect-mime-parts)
370 (nndoc-dissect-buffer))))
371 (unless nndoc-current-buffer
372 (nndoc-close-server))
373 ;; Return whether we managed to select a file.
374 nndoc-current-buffer))
375
376 ;;;
377 ;;; Deciding what document type we have
378 ;;;
379
380 (defun nndoc-set-delims ()
381 "Set the nndoc delimiter variables according to the type of the document."
382 (let ((vars '(nndoc-file-begin
383 nndoc-first-article
384 nndoc-article-begin-function
385 nndoc-head-begin nndoc-head-end
386 nndoc-file-end nndoc-article-begin
387 nndoc-body-begin nndoc-body-end-function nndoc-body-end
388 nndoc-prepare-body-function nndoc-article-transform-function
389 nndoc-header-transform-function
390 nndoc-generate-head-function nndoc-body-begin-function
391 nndoc-head-begin-function
392 nndoc-generate-article-function
393 nndoc-dissection-function
394 nndoc-pre-dissection-function)))
395 (while vars
396 (set (pop vars) nil)))
397 (let (defs)
398 ;; Guess away until we find the real file type.
399 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
400 nndoc-type-alist))))
401 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
402 ;; Set the nndoc variables.
403 (while defs
404 (set (intern (format "nndoc-%s" (caar defs)))
405 (cdr (pop defs))))))
406
407 (defun nndoc-guess-type (subtype)
408 (let ((alist nndoc-type-alist)
409 results result entry)
410 (while (and (not result)
411 (setq entry (pop alist)))
412 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
413 (goto-char (point-min))
414 ;; Remove blank lines.
415 (while (eq (following-char) ?\n)
416 (delete-char 1))
417 (when (numberp (setq result (funcall (intern
418 (format "nndoc-%s-type-p"
419 (car entry))))))
420 (push (cons result entry) results)
421 (setq result nil))))
422 (unless (or result results)
423 (error "Document is not of any recognized type"))
424 (if result
425 (car entry)
426 (cadar (last (sort results 'car-less-than-car))))))
427
428 ;;;
429 ;;; Built-in type predicates and functions
430 ;;;
431
432 (defun nndoc-mbox-type-p ()
433 (when (looking-at message-unix-mail-delimiter)
434 t))
435
436 (defun nndoc-mbox-article-begin ()
437 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
438 (goto-char (match-beginning 0))))
439
440 (defun nndoc-mbox-body-end ()
441 (let ((beg (point))
442 len end)
443 (when
444 (save-excursion
445 (and (re-search-backward
446 (concat "^" message-unix-mail-delimiter) nil t)
447 (setq end (point))
448 (search-forward "\n\n" beg t)
449 (re-search-backward
450 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
451 (setq len (string-to-number (match-string 1)))
452 (search-forward "\n\n" beg t)
453 (unless (= (setq len (+ (point) len)) (point-max))
454 (and (< len (point-max))
455 (goto-char len)
456 (looking-at message-unix-mail-delimiter)))))
457 (goto-char len))))
458
459 (defun nndoc-mmdf-type-p ()
460 (when (looking-at "\^A\^A\^A\^A$")
461 t))
462
463 (defun nndoc-news-type-p ()
464 (when (looking-at "^Path:.*\n")
465 t))
466
467 (defun nndoc-rnews-type-p ()
468 (when (looking-at "#! *rnews")
469 t))
470
471 (defun nndoc-rnews-body-end ()
472 (and (re-search-backward nndoc-article-begin nil t)
473 (forward-line 1)
474 (goto-char (+ (point) (string-to-number (match-string 1))))))
475
476 (defun nndoc-google-type-p ()
477 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
478 t))
479
480 (defun nndoc-decode-content-transfer-encoding ()
481 (let ((encoding
482 (save-restriction
483 (message-narrow-to-head)
484 (message-fetch-field "content-transfer-encoding"))))
485 (when (and encoding
486 (search-forward "\n\n" nil t))
487 (save-restriction
488 (narrow-to-region (point) (point-max))
489 (mm-decode-content-transfer-encoding
490 (intern (downcase (mail-header-strip encoding))))))))
491
492 (defun nndoc-babyl-type-p ()
493 (when (re-search-forward "\^_\^L *\n" nil t)
494 t))
495
496 (defun nndoc-babyl-body-begin ()
497 (re-search-forward "^\n" nil t)
498 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
499 (let ((next (or (save-excursion
500 (re-search-forward nndoc-article-begin nil t))
501 (point-max))))
502 (unless (re-search-forward "^\n" next t)
503 (goto-char next)
504 (forward-line -1)
505 (insert "\n")
506 (forward-line -1)))))
507
508 (defun nndoc-babyl-head-begin ()
509 (when (re-search-forward "^[0-9].*\n" nil t)
510 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
511 (forward-line 1))
512 t))
513
514 (defun nndoc-forward-type-p ()
515 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
516 nil t)
517 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
518 t))
519
520 (defun nndoc-rfc934-type-p ()
521 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
522 (not (re-search-forward "^Subject:.*digest" nil t))
523 (not (re-search-backward "^From:" nil t 2))
524 (not (re-search-forward "^From:" nil t 2)))
525 t))
526
527 (defun nndoc-mailman-type-p ()
528 (when (re-search-forward "^--__--__--\n+" nil t)
529 t))
530
531 (defun nndoc-rfc822-forward-type-p ()
532 (save-restriction
533 (message-narrow-to-head)
534 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
535 t)))
536
537 (defun nndoc-rfc822-forward-body-end-function ()
538 (goto-char (point-max)))
539
540 (defun nndoc-rfc822-forward-generate-article (article &optional head)
541 (let ((entry (cdr (assq article nndoc-dissection-alist)))
542 (begin (point))
543 encoding)
544 (with-current-buffer nndoc-current-buffer
545 (save-restriction
546 (message-narrow-to-head)
547 (setq encoding (message-fetch-field "content-transfer-encoding"))))
548 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
549 (when encoding
550 (save-restriction
551 (narrow-to-region begin (point-max))
552 (mm-decode-content-transfer-encoding
553 (intern (downcase (mail-header-strip encoding))))))
554 (when head
555 (goto-char begin)
556 (when (search-forward "\n\n" nil t)
557 (delete-region (1- (point)) (point-max)))))
558 t)
559
560 (defun nndoc-rfc822-forward-generate-head (article)
561 (nndoc-rfc822-forward-generate-article article 'head))
562
563 (defun nndoc-mime-parts-type-p ()
564 (let ((case-fold-search t)
565 (limit (search-forward "\n\n" nil t)))
566 (goto-char (point-min))
567 (when (and limit
568 (re-search-forward
569 (concat "\
570 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
571 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
572 limit t))
573 t)))
574
575 (defun nndoc-transform-mime-parts (article)
576 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
577 (headers (nth 5 entry)))
578 (when headers
579 (goto-char (point-min))
580 (insert headers))))
581
582 (defun nndoc-generate-mime-parts-head (article)
583 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
584 (headers (nth 6 entry)))
585 (save-restriction
586 (narrow-to-region (point) (point))
587 (insert-buffer-substring
588 nndoc-current-buffer (car entry) (nth 1 entry))
589 (goto-char (point-max)))
590 (when headers
591 (insert headers))))
592
593 (defun nndoc-clari-briefs-type-p ()
594 (when (let ((case-fold-search nil))
595 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
596 t))
597
598 (defun nndoc-transform-clari-briefs (article)
599 (goto-char (point-min))
600 (when (looking-at " *\\*\\(.*\\)\n")
601 (replace-match "" t t))
602 (nndoc-generate-clari-briefs-head article))
603
604 (defun nndoc-generate-clari-briefs-head (article)
605 (let ((entry (cdr (assq article nndoc-dissection-alist)))
606 subject from)
607 (with-current-buffer nndoc-current-buffer
608 (save-restriction
609 (narrow-to-region (car entry) (nth 3 entry))
610 (goto-char (point-min))
611 (when (looking-at " *\\*\\(.*\\)$")
612 (setq subject (match-string 1))
613 (when (string-match "[ \t]+$" subject)
614 (setq subject (substring subject 0 (match-beginning 0)))))
615 (when
616 (let ((case-fold-search nil))
617 (re-search-forward
618 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
619 (setq from (match-string 1)))))
620 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
621 "\nSubject: " (or subject "(no subject)") "\n")))
622
623 (defun nndoc-exim-bounce-type-p ()
624 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
625 t))
626
627 (defun nndoc-exim-bounce-body-end-function ()
628 (goto-char (point-max)))
629
630
631 (defun nndoc-mime-digest-type-p ()
632 (let ((case-fold-search t)
633 boundary-id b-delimiter entry)
634 (when (and
635 (re-search-forward
636 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
637 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
638 nil t)
639 (match-beginning 1))
640 (setq boundary-id (match-string 1)
641 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
642 (setq entry (assq 'mime-digest nndoc-type-alist))
643 (setcdr entry
644 (list
645 (cons 'head-begin "^ ?\n")
646 (cons 'head-end "^ ?$")
647 (cons 'body-begin "^ ?\n")
648 (cons 'article-begin b-delimiter)
649 (cons 'body-end-function 'nndoc-digest-body-end)
650 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
651 t)))
652
653 (defun nndoc-standard-digest-type-p ()
654 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
655 (re-search-forward
656 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
657 t))
658
659 (defun nndoc-digest-body-end ()
660 (and (re-search-forward nndoc-article-begin nil t)
661 (goto-char (match-beginning 0))))
662
663 (defun nndoc-slack-digest-type-p ()
664 0)
665
666 (defun nndoc-git-type-p ()
667 (and (search-forward "\n- Log ---" nil t)
668 (search-forward "\ncommit " nil t)
669 (search-forward "\nAuthor: " nil t)))
670
671 (defun nndoc-transform-git-article (article)
672 (goto-char (point-min))
673 (when (re-search-forward "^Author: " nil t)
674 (replace-match "From: " t t)))
675
676 (defun nndoc-transform-git-headers (entry)
677 (goto-char (point-min))
678 (when (re-search-forward "^Author: " nil t)
679 (replace-match "From: " t t))
680 (let (subject)
681 (with-current-buffer nndoc-current-buffer
682 (goto-char (car entry))
683 (when (search-forward "\n\n" nil t)
684 (setq subject (buffer-substring (point) (line-end-position)))))
685 (when subject
686 (goto-char (point-min))
687 (forward-line 1)
688 (insert (format "Subject: %s\n" subject)))))
689
690 (defun nndoc-lanl-gov-announce-type-p ()
691 (when (let ((case-fold-search nil))
692 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
693 t))
694
695 (defun nndoc-transform-lanl-gov-announce (article)
696 (let ((case-fold-search nil))
697 (goto-char (point-max))
698 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
699 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
700 (goto-char (point-min))
701 (while (re-search-forward "^\\\\\\\\$" nil t)
702 (replace-match "" t nil))
703 (goto-char (point-min))
704 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
705 (replace-match "Date: \\1 (revised) " t nil))
706 (goto-char (point-min))
707 (unless (re-search-forward "^From" nil t)
708 (goto-char (point-min))
709 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
710 (goto-char (point-min))
711 (insert "From: " (match-string 1) "\n")))
712 (when (re-search-forward "^arXiv:" nil t)
713 (replace-match "Paper: arXiv:" t nil))))
714
715 (defun nndoc-generate-lanl-gov-head (article)
716 (let ((entry (cdr (assq article nndoc-dissection-alist)))
717 (from "<no address given>")
718 subject date)
719 (with-current-buffer nndoc-current-buffer
720 (save-restriction
721 (narrow-to-region (car entry) (nth 1 entry))
722 (goto-char (point-min))
723 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
724 (setq subject (concat " (" (match-string 2) ")"))
725 (when (re-search-forward "^From: \\(.*\\)" nil t)
726 (setq from (concat "<"
727 (cadr (funcall gnus-extract-address-components
728 (match-string 1))) ">")))
729 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
730 (setq date (match-string 1))
731 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
732 (setq date (match-string 1))))
733 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
734 nil t)
735 (setq subject (concat (match-string 1) subject))
736 (setq from (concat (match-string 2) " " from))))))
737 (while (and from (string-match "(\[^)\]*)" from))
738 (setq from (replace-match "" t t from)))
739 (insert "From: " (or from "unknown")
740 "\nSubject: " (or subject "(no subject)") "\n")
741 (if date (insert "Date: " date))))
742
743 (defun nndoc-nsmail-type-p ()
744 (when (looking-at "From - ")
745 t))
746
747 (defun nndoc-outlook-article-begin ()
748 (prog1 (re-search-forward "From:\\|Received:" nil t)
749 (goto-char (match-beginning 0))))
750
751 (defun nndoc-outlook-type-p ()
752 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
753 (looking-at "JMF"))
754
755 (defun nndoc-oe-dbx-type-p ()
756 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
757
758 (defun nndoc-read-little-endian ()
759 (+ (prog1 (char-after) (forward-char 1))
760 (lsh (prog1 (char-after) (forward-char 1)) 8)
761 (lsh (prog1 (char-after) (forward-char 1)) 16)
762 (lsh (prog1 (char-after) (forward-char 1)) 24)))
763
764 (defun nndoc-oe-dbx-decode-block ()
765 (list
766 (nndoc-read-little-endian) ;; this address
767 (nndoc-read-little-endian) ;; next address offset
768 (nndoc-read-little-endian) ;; blocksize
769 (nndoc-read-little-endian))) ;; next address
770
771 (defun nndoc-oe-dbx-dissection ()
772 (let ((i 0) blk p tp)
773 (goto-char 60117) ;; 0x0000EAD4+1
774 (setq p (point))
775 (unless (eobp)
776 (setq blk (nndoc-oe-dbx-decode-block)))
777 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
778 (> (nth 3 blk) p)))
779 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
780 (while (and (> (car blk) 0) (> (nth 3 blk) p))
781 (goto-char (1+ (nth 3 blk)))
782 (setq blk (nndoc-oe-dbx-decode-block)))
783 (if (or (<= (car blk) p)
784 (<= (nth 1 blk) 0)
785 (not (zerop (nth 3 blk))))
786 (setq blk nil)
787 (setq tp (+ (car blk) (nth 1 blk) 17))
788 (if (or (<= tp p) (>= tp (point-max)))
789 (setq blk nil)
790 (goto-char tp)
791 (setq p tp
792 blk (nndoc-oe-dbx-decode-block)))))))
793
794 (defun nndoc-oe-dbx-generate-article (article &optional head)
795 (let ((entry (cdr (assq article nndoc-dissection-alist)))
796 (cur (current-buffer))
797 (begin (point))
798 blk p)
799 (with-current-buffer nndoc-current-buffer
800 (setq p (car entry))
801 (while (> p (point-min))
802 (goto-char p)
803 (setq blk (nndoc-oe-dbx-decode-block))
804 (setq p (point))
805 (with-current-buffer cur
806 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
807 (setq p (1+ (nth 3 blk)))))
808 (goto-char begin)
809 (while (re-search-forward "\r$" nil t)
810 (delete-char -1))
811 (when head
812 (goto-char begin)
813 (when (search-forward "\n\n" nil t)
814 (setcar (cddddr entry) (count-lines (point) (point-max)))
815 (delete-region (1- (point)) (point-max))))
816 t))
817
818 (defun nndoc-oe-dbx-generate-head (article)
819 (nndoc-oe-dbx-generate-article article 'head))
820
821 (defun nndoc-mail-in-mail-type-p ()
822 (let (found)
823 (save-excursion
824 (catch 'done
825 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
826 (setq found 0)
827 (forward-line)
828 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
829 (if (looking-at "[-A-Za-z0-9]+:")
830 (setq found (1+ found)))
831 (forward-line))
832 (if (and (> found 0) (looking-at "\n"))
833 (throw 'done 9999)))
834 nil))))
835
836 (defun nndoc-mail-in-mail-article-begin ()
837 (let (point found)
838 (if (catch 'done
839 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
840 (setq found 0)
841 (setq point (match-beginning 1))
842 (forward-line)
843 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
844 (if (looking-at "[-A-Za-z0-9]+:")
845 (setq found (1+ found)))
846 (forward-line))
847 (if (and (> found 0) (looking-at "\n"))
848 (throw 'done t)))
849 nil)
850 (goto-char point))))
851
852 (deffoo nndoc-request-accept-article (group &optional server last)
853 nil)
854
855 ;;;
856 ;;; Functions for dissecting the documents
857 ;;;
858
859 (defun nndoc-search (regexp)
860 (prog1
861 (re-search-forward regexp nil t)
862 (beginning-of-line)))
863
864 (defun nndoc-dissect-buffer ()
865 "Go through the document and partition it into heads/bodies/articles."
866 (let ((i 0)
867 (first t)
868 art-begin head-begin head-end body-begin body-end)
869 (setq nndoc-dissection-alist nil)
870 (with-current-buffer nndoc-current-buffer
871 (goto-char (point-min))
872 ;; Remove blank lines.
873 (while (eq (following-char) ?\n)
874 (delete-char 1))
875 (when nndoc-pre-dissection-function
876 (save-excursion
877 (funcall nndoc-pre-dissection-function)))
878 (if nndoc-dissection-function
879 (funcall nndoc-dissection-function)
880 ;; Find the beginning of the file.
881 (when nndoc-file-begin
882 (nndoc-search nndoc-file-begin))
883 ;; Go through the file.
884 (while (if (and first nndoc-first-article)
885 (nndoc-search nndoc-first-article)
886 (if art-begin
887 (goto-char art-begin)
888 (nndoc-article-begin)))
889 (setq first nil
890 art-begin nil)
891 (cond (nndoc-head-begin-function
892 (funcall nndoc-head-begin-function))
893 (nndoc-head-begin
894 (nndoc-search nndoc-head-begin)))
895 (if (or (eobp)
896 (and nndoc-file-end
897 (looking-at nndoc-file-end)))
898 (goto-char (point-max))
899 (setq head-begin (point))
900 (nndoc-search (or nndoc-head-end "^$"))
901 (setq head-end (point))
902 (if nndoc-body-begin-function
903 (funcall nndoc-body-begin-function)
904 (nndoc-search (or nndoc-body-begin "^\n")))
905 (setq body-begin (point))
906 (or (and nndoc-body-end-function
907 (funcall nndoc-body-end-function))
908 (and nndoc-body-end
909 (nndoc-search nndoc-body-end))
910 (and (nndoc-article-begin)
911 (setq art-begin (point)))
912 (progn
913 (goto-char (point-max))
914 (when nndoc-file-end
915 (and (re-search-backward nndoc-file-end nil t)
916 (beginning-of-line)))))
917 (setq body-end (point))
918 (push (list (incf i) head-begin head-end body-begin body-end
919 (count-lines body-begin body-end))
920 nndoc-dissection-alist)))))
921 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
922
923 (defun nndoc-article-begin ()
924 (if nndoc-article-begin-function
925 (funcall nndoc-article-begin-function)
926 (ignore-errors
927 (nndoc-search nndoc-article-begin))))
928
929 (defun nndoc-unquote-dashes ()
930 "Unquote quoted non-separators in digests."
931 (while (re-search-forward "^- -"nil t)
932 (replace-match "-" t t)))
933
934 ;; Against compiler warnings.
935 (defvar nndoc-mime-split-ordinal)
936
937 (defun nndoc-dissect-mime-parts ()
938 "Go through a MIME composite article and partition it into sub-articles.
939 When a MIME entity contains sub-entities, dissection produces one article for
940 the header of this entity, and one article per sub-entity."
941 (setq nndoc-dissection-alist nil
942 nndoc-mime-split-ordinal 0)
943 (with-current-buffer nndoc-current-buffer
944 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
945
946 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
947 position parent)
948 "Dissect an entity, within a composite MIME message.
949 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
950 ARTICLE-INSERT should be added at beginning for generating a full article.
951 The string POSITION holds a dotted decimal representation of the article
952 position in the hierarchical structure, it is nil for the outer entity.
953 PARENT is the message-ID of the parent summary line, or nil for none."
954 (let ((case-fold-search t)
955 (message-id (nnmail-message-id))
956 head-end body-begin summary-insert message-rfc822 multipart-any
957 subject content-type type subtype boundary-regexp)
958 ;; Gracefully handle a missing body.
959 (goto-char head-begin)
960 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
961 (search-forward "\n\n" body-end t))
962 (setq head-end (1- (point))
963 body-begin (point))
964 (setq head-end body-end
965 body-begin body-end))
966 (narrow-to-region head-begin head-end)
967 ;; Save MIME attributes.
968 (goto-char head-begin)
969 (setq content-type (message-fetch-field "Content-Type"))
970 (when content-type
971 (when (string-match
972 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
973 (setq type (downcase (match-string 1 content-type))
974 subtype (downcase (match-string 2 content-type))
975 message-rfc822 (and (string= type "message")
976 (string= subtype "rfc822"))
977 multipart-any (string= type "multipart")))
978 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
979 (setq subject (match-string 1 content-type)))
980 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
981 (setq boundary-regexp (concat "^--"
982 (regexp-quote
983 (match-string 1 content-type))
984 "\\(--\\)?[ \t]*\n"))))
985 (unless subject
986 (when (or multipart-any (not article-insert))
987 (setq subject (message-fetch-field "Subject"))))
988 (unless type
989 (setq type "text"
990 subtype "plain"))
991 ;; Prepare the article and summary inserts.
992 (unless article-insert
993 (setq article-insert (buffer-string)
994 head-end head-begin))
995 ;; Fix MIME-Version
996 (unless (string-match "MIME-Version:" article-insert)
997 (setq article-insert
998 (concat article-insert "MIME-Version: 1.0\n")))
999 (setq summary-insert article-insert)
1000 ;; - summary Subject.
1001 (setq summary-insert
1002 (let ((line (concat "Subject: <" position
1003 (and position multipart-any ".")
1004 (and multipart-any "*")
1005 (and (or position multipart-any) " ")
1006 (cond ((string= subtype "plain") type)
1007 ((string= subtype "basic") type)
1008 (t subtype))
1009 ">"
1010 (and subject " ")
1011 subject
1012 "\n")))
1013 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1014 (replace-match line t t summary-insert)
1015 (concat summary-insert line))))
1016 ;; - summary Message-ID.
1017 (setq summary-insert
1018 (let ((line (concat "Message-ID: " message-id "\n")))
1019 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1020 (replace-match line t t summary-insert)
1021 (concat summary-insert line))))
1022 ;; - summary References.
1023 (when parent
1024 (setq summary-insert
1025 (let ((line (concat "References: " parent "\n")))
1026 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1027 summary-insert)
1028 (replace-match line t t summary-insert)
1029 (concat summary-insert line)))))
1030 ;; Generate dissection information for this entity.
1031 (push (list (incf nndoc-mime-split-ordinal)
1032 head-begin head-end body-begin body-end
1033 (count-lines body-begin body-end)
1034 article-insert summary-insert)
1035 nndoc-dissection-alist)
1036 ;; Recurse for all sub-entities, if any.
1037 (widen)
1038 (cond
1039 (message-rfc822
1040 (save-excursion
1041 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1042 position message-id)))
1043 ((and multipart-any boundary-regexp)
1044 (let ((part-counter 0)
1045 part-begin part-end eof-flag)
1046 (while (string-match "\
1047 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1048 article-insert)
1049 (setq article-insert (replace-match "" t t article-insert)))
1050 (let ((case-fold-search nil))
1051 (goto-char body-begin)
1052 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
1053 (while (not eof-flag)
1054 (setq part-begin (point))
1055 (cond ((re-search-forward boundary-regexp body-end t)
1056 (or (not (match-string 1))
1057 (string= (match-string 1) "")
1058 (setq eof-flag t))
1059 (forward-line -1)
1060 (setq part-end (point))
1061 (forward-line 1))
1062 (t (setq part-end body-end
1063 eof-flag t)))
1064 (save-excursion
1065 (nndoc-dissect-mime-parts-sub
1066 part-begin part-end article-insert
1067 (concat position
1068 (and position ".")
1069 (format "%d" (incf part-counter)))
1070 message-id)))))))))
1071
1072 ;;;###autoload
1073 (defun nndoc-add-type (definition &optional position)
1074 "Add document DEFINITION to the list of nndoc document definitions.
1075 If POSITION is nil or `last', the definition will be added
1076 as the last checked definition, if t or `first', add as the
1077 first definition, and if any other symbol, add after that
1078 symbol in the alist."
1079 ;; First remove any old instances.
1080 (gnus-alist-pull (car definition) nndoc-type-alist)
1081 ;; Then enter the new definition in the proper place.
1082 (cond
1083 ((or (null position) (eq position 'last))
1084 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1085 ((or (eq position t) (eq position 'first))
1086 (push definition nndoc-type-alist))
1087 (t
1088 (let ((list (memq (assq position nndoc-type-alist)
1089 nndoc-type-alist)))
1090 (unless list
1091 (error "No such position: %s" position))
1092 (setcdr list (cons definition (cdr list)))))))
1093
1094 (provide 'nndoc)
1095
1096 ;;; nndoc.el ends here