Convert consecutive FSF copyright years to ranges.
[bpt/emacs.git] / lisp / gnus / nnrss.el
CommitLineData
23f87bed 1;;; nnrss.el --- interfacing with RSS
e84b4b86 2
73b0cd50 3;; Copyright (C) 2001-2011 Free Software Foundation, Inc.
23f87bed
MB
4
5;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
6;; Keywords: RSS
7
8;; This file is part of GNU Emacs.
9
5e809f55
GM
10;; GNU Emacs is free software: you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation, either version 3 of the License, or
13;; (at your option) any later version.
23f87bed 14
5e809f55
GM
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
23f87bed
MB
19
20;; You should have received a copy of the GNU General Public License
5e809f55 21;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23f87bed
MB
22
23;;; Commentary:
24
25;;; Code:
26
f0b7f5a8 27;; For Emacs <22.2 and XEmacs.
0f7cbeb9
GM
28(eval-and-compile
29 (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
30
23f87bed
MB
31(eval-when-compile (require 'cl))
32
33(require 'gnus)
34(require 'nnoo)
35(require 'nnmail)
36(require 'message)
37(require 'mm-util)
38(require 'gnus-util)
39(require 'time-date)
40(require 'rfc2231)
41(require 'mm-url)
91472578
MB
42(require 'rfc2047)
43(require 'mml)
23f87bed
MB
44(eval-when-compile
45 (ignore-errors
91472578 46 (require 'xml)))
23f87bed
MB
47(eval '(require 'xml))
48
49(nnoo-declare nnrss)
50
51(defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
52 "Where nnrss will save its files.")
53
01c52d31
MB
54(defvoo nnrss-ignore-article-fields '(slash:comments)
55 "*List of fields that should be ignored when comparing RSS articles.
56Some RSS feeds update article fields during their lives, e.g. to
57indicate the number of comments or the number of times the
58articles have been seen. However, if there is a difference
59between the local article and the distant one, the latter is
60considered to be new. To avoid this and discard some fields, set
61this variable to the list of fields to be ignored.")
62
23f87bed
MB
63;; (group max rss-url)
64(defvoo nnrss-server-data nil)
65
66;; (num timestamp url subject author date extra)
67(defvoo nnrss-group-data nil)
68(defvoo nnrss-group-max 0)
69(defvoo nnrss-group-min 1)
70(defvoo nnrss-group nil)
01c52d31 71(defvoo nnrss-group-hashtb (make-hash-table :test 'equal))
23f87bed
MB
72(defvoo nnrss-status-string "")
73
74(defconst nnrss-version "nnrss 1.0")
75
76(defvar nnrss-group-alist '()
77 "List of RSS addresses.")
78
6b958814
G
79(defvar nnrss-use-local nil
80 "If non-nil nnrss will read the feeds from local files in nnrss-directory.")
23f87bed
MB
81
82(defvar nnrss-description-field 'X-Gnus-Description
83 "Field name used for DESCRIPTION.
84To use the description in headers, put this name into `nnmail-extra-headers'.")
85
86(defvar nnrss-url-field 'X-Gnus-Url
87 "Field name used for URL.
88To use the description in headers, put this name into `nnmail-extra-headers'.")
89
90(defvar nnrss-content-function nil
91 "A function which is called in `nnrss-request-article'.
92The arguments are (ENTRY GROUP ARTICLE).
91472578 93ENTRY is the record of the current headline. GROUP is the group name.
23f87bed
MB
94ARTICLE is the article number of the current headline.")
95
91472578 96(defvar nnrss-file-coding-system mm-universal-coding-system
01c52d31
MB
97 "*Coding system used when reading and writing files.
98If you run Gnus with various versions of Emacsen, the value of this
99variable should be the coding system that all those Emacsen support.
100Note that you have to regenerate all the nnrss groups if you change
101the value. Moreover, you should be patient even if you are made to
102read the same articles twice, that arises for the difference of the
103versions of xml.el.")
91472578 104
82fe1aed
MB
105(defvar nnrss-compatible-encoding-alist
106 (delq nil (mapcar (lambda (elem)
107 (if (and (mm-coding-system-p (car elem))
108 (mm-coding-system-p (cdr elem)))
109 elem))
110 mm-charset-override-alist))
91472578
MB
111 "Alist of encodings and those supersets.
112The cdr of each element is used to decode data if it is available when
7dafe00b 113the car is what the data specify as the encoding. Or, the car is used
91472578
MB
114for decoding when the cdr that the data specify is not available.")
115
23f87bed
MB
116(nnoo-define-basics nnrss)
117
118;;; Interface functions
119
91472578
MB
120(defsubst nnrss-format-string (string)
121 (gnus-replace-in-string string " *\n *" " "))
122
123(defun nnrss-decode-group-name (group)
124 (if (and group (mm-coding-system-p 'utf-8))
125 (setq group (mm-decode-coding-string group 'utf-8))
126 group))
ad136a7c 127
23f87bed 128(deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
91472578 129 (setq group (nnrss-decode-group-name group))
23f87bed
MB
130 (nnrss-possibly-change-group group server)
131 (let (e)
63556fc6 132 (with-current-buffer nntp-server-buffer
23f87bed
MB
133 (erase-buffer)
134 (dolist (article articles)
135 (if (setq e (assq article nnrss-group-data))
136 (insert (number-to-string (car e)) "\t" ;; number
91472578
MB
137 ;; subject
138 (or (nth 3 e) "")
139 "\t"
140 ;; from
141 (or (nth 4 e) "(nobody)")
142 "\t"
143 ;; date
23f87bed 144 (or (nth 5 e) "")
91472578
MB
145 "\t"
146 ;; id
23f87bed 147 (format "<%d@%s.nnrss>" (car e) group)
91472578
MB
148 "\t"
149 ;; refs
150 "\t"
151 ;; chars
152 "-1" "\t"
153 ;; lines
154 "-1" "\t"
155 ;; Xref
156 "" "\t"
23f87bed
MB
157 (if (and (nth 6 e)
158 (memq nnrss-description-field
159 nnmail-extra-headers))
160 (concat (symbol-name nnrss-description-field)
161 ": "
162 (nnrss-format-string (nth 6 e))
163 "\t")
164 "")
165 (if (and (nth 2 e)
166 (memq nnrss-url-field
167 nnmail-extra-headers))
168 (concat (symbol-name nnrss-url-field)
169 ": "
170 (nnrss-format-string (nth 2 e))
171 "\t")
172 "")
173 "\n")))))
174 'nov)
175
286c4fc2 176(deffoo nnrss-request-group (group &optional server dont-check info)
91472578
MB
177 (setq group (nnrss-decode-group-name group))
178 (nnheader-message 6 "nnrss: Requesting %s..." group)
23f87bed 179 (nnrss-possibly-change-group group server)
91472578
MB
180 (prog1
181 (if dont-check
182 t
183 (nnrss-check-group group server)
184 (nnheader-report 'nnrss "Opened group %s" group)
185 (nnheader-insert
186 "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
187 (prin1-to-string group)
188 t))
189 (nnheader-message 6 "nnrss: Requesting %s...done" group)))
23f87bed
MB
190
191(deffoo nnrss-close-group (group &optional server)
192 t)
193
194(deffoo nnrss-request-article (article &optional group server buffer)
91472578
MB
195 (setq group (nnrss-decode-group-name group))
196 (when (stringp article)
197 (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
198 (string-to-number (match-string 1 article))
199 0)))
23f87bed
MB
200 (nnrss-possibly-change-group group server)
201 (let ((e (assq article nnrss-group-data))
23f87bed
MB
202 (nntp-server-buffer (or buffer nntp-server-buffer))
203 post err)
204 (when e
91472578
MB
205 (with-current-buffer nntp-server-buffer
206 (erase-buffer)
207 (if group
208 (insert "Newsgroups: " group "\n"))
209 (if (nth 3 e)
210 (insert "Subject: " (nth 3 e) "\n"))
211 (if (nth 4 e)
212 (insert "From: " (nth 4 e) "\n"))
213 (if (nth 5 e)
214 (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
215 (let ((header (buffer-string))
7dafe00b 216 (text (nth 6 e))
91472578 217 (link (nth 2 e))
31640842 218 (enclosure (nth 7 e))
d3a597b7 219 (comments (nth 8 e))
91472578
MB
220 (rfc2047-header-encoding-alist
221 (if (mm-coding-system-p 'utf-8)
222 (cons '("Newsgroups" . utf-8)
223 rfc2047-header-encoding-alist)
224 rfc2047-header-encoding-alist))
7dafe00b 225 rfc2047-encode-encoded-words body fn)
d3a597b7 226 (when (or text link enclosure comments)
91472578
MB
227 (insert "\n")
228 (insert "<#multipart type=alternative>\n"
229 "<#part type=\"text/plain\">\n")
230 (setq body (point))
31640842 231 (when text
7dafe00b
MB
232 (insert text)
233 (goto-char body)
ab388ec4
KY
234 (while (re-search-forward "\n+" nil t)
235 (replace-match " "))
236 (goto-char body)
237 ;; See `nnrss-check-group', which inserts "<br /><br />".
238 (when (search-forward "<br /><br />" nil t)
239 (if (eobp)
240 (replace-match "\n")
241 (replace-match "\n\n")))
242 (unless (eobp)
243 (let ((fill-column (default-value 'fill-column))
244 (window (get-buffer-window nntp-server-buffer)))
245 (when window
246 (setq fill-column
247 (max 1 (/ (* (window-width window) 7) 8))))
248 (fill-region (point) (point-max))
249 (goto-char (point-max))
250 ;; XEmacs version of `fill-region' inserts newline.
251 (unless (bolp)
252 (insert "\n"))))
31640842
MB
253 (when (or link enclosure)
254 (insert "\n")))
255 (when link
256 (insert link "\n"))
257 (when enclosure
258 (insert (car enclosure) " "
259 (nth 2 enclosure) " "
260 (nth 3 enclosure) "\n"))
d3a597b7
MB
261 (when comments
262 (insert comments "\n"))
91472578
MB
263 (setq body (buffer-substring body (point)))
264 (insert "<#/part>\n"
265 "<#part type=\"text/html\">\n"
266 "<html><head></head><body>\n")
267 (when text
268 (insert text "\n"))
269 (when link
270 (insert "<p><a href=\"" link "\">link</a></p>\n"))
31640842
MB
271 (when enclosure
272 (insert "<p><a href=\"" (car enclosure) "\">"
273 (cadr enclosure) "</a> " (nth 2 enclosure)
274 " " (nth 3 enclosure) "</p>\n"))
d3a597b7
MB
275 (when comments
276 (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
91472578
MB
277 (insert "</body></html>\n"
278 "<#/part>\n"
279 "<#/multipart>\n"))
280 (condition-case nil
554a69b8
KY
281 ;; Allow `mml-to-mime' to generate MIME article without
282 ;; making inquiry to a user for unknown encoding.
283 (let ((mml-confirmation-set
284 (cons 'unknown-encoding mml-confirmation-set)))
285 (mml-to-mime))
91472578
MB
286 (error
287 (erase-buffer)
288 (insert header
289 "Content-Type: text/plain; charset=gnus-decoded\n"
290 "Content-Transfer-Encoding: 8bit\n\n"
291 body)
292 (nnheader-message
293 3 "Warning - there might be invalid characters"))))
294 (goto-char (point-min))
295 (search-forward "\n\n")
296 (forward-line -1)
297 (insert (format "Message-ID: <%d@%s.nnrss>\n"
298 (car e)
299 (let ((rfc2047-encoding-type 'mime)
300 rfc2047-encode-max-chars)
301 (rfc2047-encode-string
302 (gnus-replace-in-string group "[\t\n ]+" "_")))))
303 (when nnrss-content-function
304 (funcall nnrss-content-function e group article))))
23f87bed
MB
305 (cond
306 (err
307 (nnheader-report 'nnrss err))
308 ((not e)
309 (nnheader-report 'nnrss "no such id: %d" article))
310 (t
311 (nnheader-report 'nnrss "article %s retrieved" (car e))
312 ;; we return the article number.
313 (cons nnrss-group (car e))))))
314
23f87bed
MB
315(deffoo nnrss-open-server (server &optional defs connectionless)
316 (nnrss-read-server-data server)
317 (nnoo-change-server 'nnrss server defs)
318 t)
319
320(deffoo nnrss-request-expire-articles
321 (articles group &optional server force)
91472578 322 (setq group (nnrss-decode-group-name group))
23f87bed
MB
323 (nnrss-possibly-change-group group server)
324 (let (e days not-expirable changed)
325 (dolist (art articles)
326 (if (and (setq e (assq art nnrss-group-data))
327 (nnmail-expired-article-p
328 group
329 (if (listp (setq days (nth 1 e))) days
330 (days-to-time (- days (time-to-days '(0 0)))))
331 force))
332 (setq nnrss-group-data (delq e nnrss-group-data)
333 changed t)
334 (push art not-expirable)))
335 (if changed
336 (nnrss-save-group-data group server))
337 not-expirable))
338
339(deffoo nnrss-request-delete-group (group &optional force server)
91472578 340 (setq group (nnrss-decode-group-name group))
23f87bed 341 (nnrss-possibly-change-group group server)
91472578
MB
342 (let (elem)
343 ;; There may be two or more entries in `nnrss-group-alist' since
344 ;; this function didn't delete them formerly.
345 (while (setq elem (assoc group nnrss-group-alist))
346 (setq nnrss-group-alist (delq elem nnrss-group-alist))))
23f87bed
MB
347 (setq nnrss-server-data
348 (delq (assoc group nnrss-server-data) nnrss-server-data))
349 (nnrss-save-server-data server)
91472578 350 (ignore-errors
01c52d31
MB
351 (let ((file-name-coding-system nnmail-pathname-coding-system))
352 (delete-file (nnrss-make-filename group server))))
23f87bed
MB
353 t)
354
355(deffoo nnrss-request-list-newsgroups (&optional server)
356 (nnrss-possibly-change-group nil server)
63556fc6 357 (with-current-buffer nntp-server-buffer
23f87bed
MB
358 (erase-buffer)
359 (dolist (elem nnrss-group-alist)
360 (if (third elem)
361 (insert (car elem) "\t" (third elem) "\n"))))
362 t)
363
74769e8b 364(deffoo nnrss-retrieve-groups (groups &optional server)
74769e8b 365 (dolist (group groups)
61b1af82 366 (nnrss-possibly-change-group group server)
74769e8b 367 (nnrss-check-group group server))
63556fc6 368 (with-current-buffer nntp-server-buffer
74769e8b
LMI
369 (erase-buffer)
370 (dolist (group groups)
371 (let ((elem (assoc group nnrss-server-data)))
372 (insert (format "%S %s 1 y\n" group (or (cadr elem) 0)))))
373 'active))
374
23f87bed
MB
375(nnoo-define-skeleton nnrss)
376
377;;; Internal functions
378(eval-when-compile (defun xml-rpc-method-call (&rest args)))
91472578
MB
379
380(defun nnrss-get-encoding ()
381 "Return an encoding attribute specified in the current xml contents.
382If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
383it is used instead. If the xml contents doesn't specify the encoding,
384return `utf-8' which is the default encoding for xml if it is available,
385otherwise return nil."
386 (goto-char (point-min))
387 (if (re-search-forward
01c52d31 388 "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
91472578 389 nil t)
01c52d31
MB
390 (let ((encoding (intern (downcase (or (match-string 1)
391 (match-string 2))))))
91472578
MB
392 (or
393 (mm-coding-system-p (cdr (assq encoding
394 nnrss-compatible-encoding-alist)))
395 (mm-coding-system-p encoding)
396 (mm-coding-system-p (car (rassq encoding
397 nnrss-compatible-encoding-alist)))))
398 (mm-coding-system-p 'utf-8)))
399
0f7cbeb9
GM
400(declare-function w3-parse-buffer "ext:w3-parse" (&optional buff))
401
23f87bed 402(defun nnrss-fetch (url &optional local)
91472578
MB
403 "Fetch URL and put it in a the expected Lisp structure."
404 (mm-with-unibyte-buffer
bff3818b 405 ;;some versions of url.el need this to close the connection quickly
91472578 406 (let (cs xmlform htmlform)
23f87bed
MB
407 ;; bit o' work necessary for w3 pre-cvs and post-cvs
408 (if local
409 (let ((coding-system-for-read 'binary))
410 (insert-file-contents url))
91472578
MB
411 ;; FIXME: shouldn't binding `coding-system-for-read' be moved
412 ;; to `mm-url-insert'?
413 (let ((coding-system-for-read 'binary))
7dafe00b
MB
414 (condition-case err
415 (mm-url-insert url)
416 (error (if (or debug-on-quit debug-on-error)
417 (signal (car err) (cdr err))
418 (message "nnrss: Failed to fetch %s" url))))))
91472578
MB
419 (nnheader-remove-cr-followed-by-lf)
420 ;; Decode text according to the encoding attribute.
421 (when (setq cs (nnrss-get-encoding))
11e95b02
MB
422 (insert (prog1
423 (mm-decode-coding-string (buffer-string) cs)
424 (erase-buffer)
425 (mm-enable-multibyte))))
91472578
MB
426 (goto-char (point-min))
427
428 ;; Because xml-parse-region can't deal with anything that isn't
429 ;; xml and w3-parse-buffer can't deal with some xml, we have to
430 ;; parse with xml-parse-region first and, if that fails, parse
431 ;; with w3-parse-buffer. Yuck. Eventually, someone should find out
432 ;; why w3-parse-buffer fails to parse some well-formed xml and
433 ;; fix it.
434
435 (condition-case err1
436 (setq xmlform (xml-parse-region (point-min) (point-max)))
437 (error
438 (condition-case err2
439 (setq htmlform (caddar (w3-parse-buffer
440 (current-buffer))))
441 (error
442 (message "\
443nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
444 url err1 err2)))))
445 (if htmlform
446 htmlform
447 xmlform))))
23f87bed
MB
448
449(defun nnrss-possibly-change-group (&optional group server)
450 (when (and server
451 (not (nnrss-server-opened server)))
452 (nnrss-open-server server))
453 (when (and group (not (equal group nnrss-group)))
454 (nnrss-read-group-data group server)
455 (setq nnrss-group group)))
456
8abf1b22 457(autoload 'timezone-parse-date "timezone")
7dafe00b
MB
458
459(defun nnrss-normalize-date (date)
460 "Return a date string of DATE in the RFC822 style.
461This function handles the ISO 8601 date format described in
0b10e437 462URL `http://www.w3.org/TR/NOTE-datetime', and also the RFC822 style
7dafe00b 463which RSS 2.0 allows."
4079589f
MB
464 (let (case-fold-search vector year month day time zone cts given)
465 (cond ((null date)) ; do nothing for this case
466 ;; if the date is just digits (unix time stamp):
467 ((string-match "^[0-9]+$" date)
468 (setq given (seconds-to-time (string-to-number date))))
7dafe00b
MB
469 ;; RFC822
470 ((string-match " [0-9]+ " date)
471 (setq vector (timezone-parse-date date)
472 year (string-to-number (aref vector 0)))
473 (when (>= year 1969)
474 (setq month (string-to-number (aref vector 1))
475 day (string-to-number (aref vector 2)))
476 (unless (>= (length (setq time (aref vector 3))) 3)
477 (setq time "00:00:00"))
478 (when (and (setq zone (aref vector 4))
479 (not (string-match "\\`[A-Z+-]" zone)))
480 (setq zone nil))))
481 ;; ISO 8601
482 ((string-match
483 (eval-when-compile
484 (concat
485 ;; 1. year
486 "\\(199[0-9]\\|20[0-9][0-9]\\)"
01c52d31
MB
487 "\\(?:-"
488 ;; 2. month
7dafe00b 489 "\\([01][0-9]\\)"
01c52d31
MB
490 "\\(?:-"
491 ;; 3. day
7dafe00b 492 "\\([0-3][0-9]\\)"
01c52d31
MB
493 "\\)?\\)?\\(?:T"
494 ;; 4. hh:mm
7dafe00b 495 "\\([012][0-9]:[0-5][0-9]\\)"
01c52d31
MB
496 "\\(?:"
497 ;; 5. :ss
7dafe00b 498 "\\(:[0-5][0-9]\\)"
01c52d31
MB
499 "\\(?:\\.[0-9]+\\)?\\)?\\)?"
500 ;; 6+7,8,9. zone
501 "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)"
7dafe00b
MB
502 "\\|\\([+-][012][0-9][0-5][0-9]\\)"
503 "\\|\\(Z\\)\\)?"))
504 date)
505 (setq year (string-to-number (match-string 1 date))
01c52d31
MB
506 month (string-to-number (or (match-string 2 date) "1"))
507 day (string-to-number (or (match-string 3 date) "1"))
508 time (if (match-beginning 5)
509 (substring date (match-beginning 4) (match-end 5))
510 (concat (or (match-string 4 date) "00:00") ":00"))
511 zone (cond ((match-beginning 6)
512 (concat (match-string 6 date)
513 (match-string 7 date)))
514 ((match-beginning 9) ;; Z
7dafe00b
MB
515 "+0000")
516 (t ;; nil if zone is not provided.
01c52d31 517 (match-string 8 date))))))
7dafe00b
MB
518 (if month
519 (progn
520 (setq cts (current-time-string (encode-time 0 0 0 day month year)))
521 (format "%s, %02d %s %04d %s%s"
522 (substring cts 0 3) day (substring cts 4 7) year time
523 (if zone
524 (concat " " zone)
525 "")))
4079589f 526 (message-make-date given))))
7dafe00b 527
23f87bed
MB
528;;; data functions
529
530(defun nnrss-read-server-data (server)
531 (setq nnrss-server-data nil)
01c52d31
MB
532 (let ((file (nnrss-make-filename "nnrss" server))
533 (file-name-coding-system nnmail-pathname-coding-system))
23f87bed 534 (when (file-exists-p file)
0c43b6f8 535 (load file nil t t))))
23f87bed
MB
536
537(defun nnrss-save-server-data (server)
538 (gnus-make-directory nnrss-directory)
91472578
MB
539 (let ((coding-system-for-write nnrss-file-coding-system)
540 (file-name-coding-system nnmail-pathname-coding-system))
541 (with-temp-file (nnrss-make-filename "nnrss" server)
542 (insert (format ";; -*- coding: %s; -*-\n"
543 nnrss-file-coding-system))
544 (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist))
545 (insert "\n")
546 (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data)))))
23f87bed
MB
547
548(defun nnrss-read-group-data (group server)
549 (setq nnrss-group-data nil)
01c52d31
MB
550 (if (hash-table-p nnrss-group-hashtb)
551 (clrhash nnrss-group-hashtb)
552 (setq nnrss-group-hashtb (make-hash-table :test 'equal)))
23f87bed
MB
553 (let ((pair (assoc group nnrss-server-data)))
554 (setq nnrss-group-max (or (cadr pair) 0))
555 (setq nnrss-group-min (+ nnrss-group-max 1)))
01c52d31
MB
556 (let ((file (nnrss-make-filename group server))
557 (file-name-coding-system nnmail-pathname-coding-system))
23f87bed 558 (when (file-exists-p file)
0c43b6f8 559 (load file nil t t)
23f87bed 560 (dolist (e nnrss-group-data)
01c52d31 561 (puthash (nth 9 e) t nnrss-group-hashtb)
91472578
MB
562 (when (and (car e) (> nnrss-group-min (car e)))
563 (setq nnrss-group-min (car e)))
564 (when (and (car e) (< nnrss-group-max (car e)))
565 (setq nnrss-group-max (car e)))))))
23f87bed
MB
566
567(defun nnrss-save-group-data (group server)
568 (gnus-make-directory nnrss-directory)
91472578
MB
569 (let ((coding-system-for-write nnrss-file-coding-system)
570 (file-name-coding-system nnmail-pathname-coding-system))
571 (with-temp-file (nnrss-make-filename group server)
572 (insert (format ";; -*- coding: %s; -*-\n"
573 nnrss-file-coding-system))
574 (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data)))))
575
576(defun nnrss-make-filename (name server)
577 (expand-file-name
578 (nnrss-translate-file-chars
579 (concat name
580 (and server
581 (not (equal server ""))
582 "-")
583 server
584 ".el"))
585 nnrss-directory))
586
587(gnus-add-shutdown 'nnrss-close 'gnus)
588
589(defun nnrss-close ()
590 "Clear internal nnrss variables."
591 (setq nnrss-group-data nil
592 nnrss-server-data nil
593 nnrss-group-hashtb nil
594 nnrss-group-alist nil))
23f87bed
MB
595
596;;; URL interface
597
598(defun nnrss-no-cache (url)
599 "")
600
601(defun nnrss-insert-w3 (url)
602 (mm-with-unibyte-current-buffer
7dafe00b
MB
603 (condition-case err
604 (mm-url-insert url)
605 (error (if (or debug-on-quit debug-on-error)
606 (signal (car err) (cdr err))
607 (message "nnrss: Failed to fetch %s" url))))))
23f87bed 608
91472578 609(defun nnrss-decode-entities-string (string)
23f87bed 610 (if string
91472578 611 (mm-with-multibyte-buffer
23f87bed
MB
612 (insert string)
613 (mm-url-decode-entities-nbsp)
614 (buffer-string))))
615
616(defalias 'nnrss-insert 'nnrss-insert-w3)
617
91472578
MB
618(defun nnrss-mime-encode-string (string)
619 (mm-with-multibyte-buffer
620 (insert string)
621 (mm-url-decode-entities-nbsp)
622 (goto-char (point-min))
623 (while (re-search-forward "[\t\n ]+" nil t)
624 (replace-match " "))
625 (goto-char (point-min))
626 (skip-chars-forward " ")
627 (delete-region (point-min) (point))
628 (goto-char (point-max))
629 (skip-chars-forward " ")
630 (delete-region (point) (point-max))
631 (let ((rfc2047-encoding-type 'mime)
632 rfc2047-encode-max-chars)
633 (rfc2047-encode-region (point-min) (point-max)))
634 (goto-char (point-min))
635 (while (search-forward "\n" nil t)
d355a0b7 636 (delete-char -1))
91472578
MB
637 (buffer-string)))
638
23f87bed 639;;; Snarf functions
01c52d31 640(defun nnrss-make-hash-index (item)
5b51650c 641 (gnus-message 9 "nnrss: Making hash index of %s" (gnus-prin1-to-string item))
01c52d31
MB
642 (setq item (gnus-remove-if
643 (lambda (field)
644 (when (listp field)
645 (memq (car field) nnrss-ignore-article-fields)))
646 item))
647 (md5 (gnus-prin1-to-string item)
648 nil nil
649 nnrss-file-coding-system))
650
23f87bed 651(defun nnrss-check-group (group server)
7dafe00b 652 (let (file xml subject url extra changed author date feed-subject
01c52d31
MB
653 enclosure comments rss-ns rdf-ns content-ns dc-ns
654 hash-index)
23f87bed
MB
655 (if (and nnrss-use-local
656 (file-exists-p (setq file (expand-file-name
657 (nnrss-translate-file-chars
658 (concat group ".xml"))
659 nnrss-directory))))
660 (setq xml (nnrss-fetch file t))
661 (setq url (or (nth 2 (assoc group nnrss-server-data))
662 (second (assoc group nnrss-group-alist))))
663 (unless url
664 (setq url
91472578
MB
665 (cdr
666 (assoc 'href
667 (nnrss-discover-feed
668 (read-string
669 (format "URL to search for %s: " group) "http://")))))
23f87bed
MB
670 (let ((pair (assoc group nnrss-server-data)))
671 (if pair
672 (setcdr (cdr pair) (list url))
673 (push (list group nnrss-group-max url) nnrss-server-data)))
674 (setq changed t))
675 (setq xml (nnrss-fetch url)))
23f87bed
MB
676 (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/")
677 rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
678 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
679 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
680 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
681 (when (and (listp item)
91472578 682 (string= (concat rss-ns "item") (car item))
01c52d31
MB
683 (progn (setq hash-index (nnrss-make-hash-index item))
684 (not (gethash hash-index nnrss-group-hashtb))))
23f87bed 685 (setq subject (nnrss-node-text rss-ns 'title item))
01c52d31
MB
686 (setq url (nnrss-decode-entities-string
687 (nnrss-node-text rss-ns 'link (cddr item))))
688 (setq extra (or (nnrss-node-text content-ns 'encoded item)
23f87bed 689 (nnrss-node-text rss-ns 'description item)))
7dafe00b
MB
690 (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
691 (setq extra (concat feed-subject "<br /><br />" extra)))
23f87bed
MB
692 (setq author (or (nnrss-node-text rss-ns 'author item)
693 (nnrss-node-text dc-ns 'creator item)
694 (nnrss-node-text dc-ns 'contributor item)))
7dafe00b
MB
695 (setq date (nnrss-normalize-date
696 (or (nnrss-node-text dc-ns 'date item)
697 (nnrss-node-text rss-ns 'pubDate item))))
d3a597b7 698 (setq comments (nnrss-node-text rss-ns 'comments item))
31640842
MB
699 (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item)))
700 (let ((url (cdr (assq 'url enclosure)))
701 (len (cdr (assq 'length enclosure)))
702 (type (cdr (assq 'type enclosure)))
703 (name))
704 (setq len
705 (if (and len (integerp (setq len (string-to-number len))))
706 ;; actually already in `ls-lisp-format-file-size' but
707 ;; probably not worth to require it for one function
708 (do ((size (/ len 1.0) (/ size 1024.0))
709 (post-fixes (list "" "k" "M" "G" "T" "P" "E")
710 (cdr post-fixes)))
711 ((< size 1024)
712 (format "%.1f%s" size (car post-fixes))))
713 "0"))
714 (setq url (or url ""))
715 (setq name (if (string-match "/\\([^/]*\\)$" url)
716 (match-string 1 url)
717 "file"))
718 (setq type (or type ""))
719 (setq enclosure (list url name len type))))
23f87bed
MB
720 (push
721 (list
722 (incf nnrss-group-max)
723 (current-time)
724 url
91472578
MB
725 (and subject (nnrss-mime-encode-string subject))
726 (and author (nnrss-mime-encode-string author))
23f87bed 727 date
31640842 728 (and extra (nnrss-decode-entities-string extra))
d3a597b7 729 enclosure
01c52d31
MB
730 comments
731 hash-index)
23f87bed 732 nnrss-group-data)
01c52d31 733 (puthash hash-index t nnrss-group-hashtb)
91472578
MB
734 (setq changed t))
735 (setq extra nil))
23f87bed
MB
736 (when changed
737 (nnrss-save-group-data group server)
738 (let ((pair (assoc group nnrss-server-data)))
739 (if pair
740 (setcar (cdr pair) nnrss-group-max)
741 (push (list group nnrss-group-max) nnrss-server-data)))
742 (nnrss-save-server-data server))))
743
0f7cbeb9
GM
744(declare-function gnus-group-make-rss-group "gnus-group" (&optional url))
745
91472578
MB
746(defun nnrss-opml-import (opml-file)
747 "OPML subscriptions import.
748Read the file and attempt to subscribe to each Feed in the file."
749 (interactive "fImport file: ")
c4288669
MB
750 (mapc
751 (lambda (node)
752 (let ((xmlurl (cdr (assq 'xmlUrl (cadr node)))))
753 (when (and xmlurl
754 (not (string-match "\\`[\t ]*\\'" xmlurl))
755 (prog1
756 (y-or-n-p (format "Subscribe to %s " xmlurl))
757 (message "")))
758 (condition-case err
759 (progn
760 (gnus-group-make-rss-group xmlurl)
761 (forward-line 1))
762 (error
763 (message
764 "Failed to subscribe to %s (%s); type any key to continue: "
765 xmlurl
766 (error-message-string err))
767 (let ((echo-keystrokes 0))
768 (read-char)))))))
91472578 769 (nnrss-find-el 'outline
c4288669
MB
770 (mm-with-multibyte-buffer
771 (insert-file-contents opml-file)
772 (xml-parse-region (point-min) (point-max))))))
91472578
MB
773
774(defun nnrss-opml-export ()
775 "OPML subscription export.
776Export subscriptions to a buffer in OPML Format."
777 (interactive)
778 (with-current-buffer (get-buffer-create "*OPML Export*")
779 (mm-set-buffer-file-coding-system 'utf-8)
780 (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
781 "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n"
782 "<opml version=\"1.1\">\n"
783 " <head>\n"
784 " <title>mySubscriptions</title>\n"
785 " <dateCreated>" (format-time-string "%a, %d %b %Y %T %z")
786 "</dateCreated>\n"
787 " <ownerEmail>" user-mail-address "</ownerEmail>\n"
788 " <ownerName>" (user-full-name) "</ownerName>\n"
789 " </head>\n"
790 " <body>\n")
791 (dolist (sub nnrss-group-alist)
792 (insert " <outline text=\"" (car sub)
793 "\" xmlUrl=\"" (cadr sub) "\"/>\n"))
794 (insert " </body>\n"
795 "</opml>\n"))
796 (pop-to-buffer "*OPML Export*")
797 (when (fboundp 'sgml-mode)
798 (sgml-mode)))
799
23f87bed
MB
800(defun nnrss-generate-download-script ()
801 "Generate a download script in the current buffer.
802It is useful when `(setq nnrss-use-local t)'."
803 (interactive)
804 (insert "#!/bin/sh\n")
805 (insert "WGET=wget\n")
806 (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n")
807 (dolist (elem nnrss-server-data)
808 (let ((url (or (nth 2 elem)
809 (second (assoc (car elem) nnrss-group-alist)))))
810 (insert "$WGET -q -O \"$RSSDIR\"/'"
811 (nnrss-translate-file-chars (concat (car elem) ".xml"))
812 "' '" url "'\n"))))
813
814(defun nnrss-translate-file-chars (name)
815 (let ((nnheader-file-name-translation-alist
816 (append nnheader-file-name-translation-alist '((?' . ?_)))))
817 (nnheader-translate-file-chars name)))
818
23f87bed
MB
819(defun nnrss-node-text (namespace local-name element)
820 (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
821 element))
822 (text (if (and node (listp node))
823 (nnrss-node-just-text node)
824 node))
ff5e68bf
MB
825 (cleaned-text (if text
826 (gnus-replace-in-string
827 (gnus-replace-in-string
828 text "^[\000-\037\177]+\\|^ +\\| +$" "")
829 "\r\n" "\n"))))
23f87bed
MB
830 (if (string-equal "" cleaned-text)
831 nil
832 cleaned-text)))
833
834(defun nnrss-node-just-text (node)
835 (if (and node (listp node))
836 (mapconcat 'nnrss-node-just-text (cddr node) " ")
837 node))
838
839(defun nnrss-find-el (tag data &optional found-list)
91472578
MB
840 "Find the all matching elements in the data.
841Careful with this on large documents!"
842 (when (consp data)
843 (dolist (bit data)
844 (when (car-safe bit)
845 (when (equal tag (car bit))
846 ;; Old xml.el may return a list of string.
847 (when (and (consp (caddr bit))
848 (stringp (caaddr bit)))
849 (setcar (cddr bit) (caaddr bit)))
850 (setq found-list
851 (append found-list
852 (list bit))))
853 (if (and (consp (car-safe (caddr bit)))
854 (not (stringp (caddr bit))))
855 (setq found-list
856 (append found-list
857 (nnrss-find-el
858 tag (caddr bit))))
859 (setq found-list
860 (append found-list
861 (nnrss-find-el
862 tag (cddr bit))))))))
23f87bed
MB
863 found-list)
864
865(defun nnrss-rsslink-p (el)
866 "Test if the element we are handed is an RSS autodiscovery link."
867 (and (eq (car-safe el) 'link)
868 (string-equal (cdr (assoc 'rel (cadr el))) "alternate")
91472578 869 (or (string-equal (cdr (assoc 'type (cadr el)))
23f87bed
MB
870 "application/rss+xml")
871 (string-equal (cdr (assoc 'type (cadr el))) "text/xml"))))
872
873(defun nnrss-get-rsslinks (data)
874 "Extract the <link> elements that are links to RSS from the parsed data."
91472578 875 (delq nil (mapcar
23f87bed
MB
876 (lambda (el)
877 (if (nnrss-rsslink-p el) el))
878 (nnrss-find-el 'link data))))
879
880(defun nnrss-extract-hrefs (data)
91472578
MB
881 "Recursively extract hrefs from a page's source.
882DATA should be the output of `xml-parse-region' or
883`w3-parse-buffer'."
23f87bed
MB
884 (mapcar (lambda (ahref)
885 (cdr (assoc 'href (cadr ahref))))
886 (nnrss-find-el 'a data)))
887
91472578 888(defmacro nnrss-match-macro (base-uri item onsite-list offsite-list)
23f87bed 889 `(cond ((or (string-match (concat "^" ,base-uri) ,item)
91472578
MB
890 (not (string-match "://" ,item)))
891 (setq ,onsite-list (append ,onsite-list (list ,item))))
892 (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
23f87bed
MB
893
894(defun nnrss-order-hrefs (base-uri hrefs)
895 "Given a list of hrefs, sort them using the following priorities:
896 1. links ending in .rss
897 2. links ending in .rdf
898 3. links ending in .xml
899 4. links containing the above
900 5. offsite links
901
902BASE-URI is used to determine the location of the links and
903whether they are `offsite' or `onsite'."
904 (let (rss-onsite-end rdf-onsite-end xml-onsite-end
905 rss-onsite-in rdf-onsite-in xml-onsite-in
906 rss-offsite-end rdf-offsite-end xml-offsite-end
01c52d31 907 rss-offsite-in rdf-offsite-in xml-offsite-in)
91472578
MB
908 (dolist (href hrefs)
909 (cond ((null href))
910 ((string-match "\\.rss$" href)
911 (nnrss-match-macro
912 base-uri href rss-onsite-end rss-offsite-end))
913 ((string-match "\\.rdf$" href)
914 (nnrss-match-macro
915 base-uri href rdf-onsite-end rdf-offsite-end))
916 ((string-match "\\.xml$" href)
917 (nnrss-match-macro
918 base-uri href xml-onsite-end xml-offsite-end))
919 ((string-match "rss" href)
920 (nnrss-match-macro
921 base-uri href rss-onsite-in rss-offsite-in))
922 ((string-match "rdf" href)
923 (nnrss-match-macro
924 base-uri href rdf-onsite-in rdf-offsite-in))
925 ((string-match "xml" href)
926 (nnrss-match-macro
927 base-uri href xml-onsite-in xml-offsite-in))))
928 (append
23f87bed
MB
929 rss-onsite-end rdf-onsite-end xml-onsite-end
930 rss-onsite-in rdf-onsite-in xml-onsite-in
931 rss-offsite-end rdf-offsite-end xml-offsite-end
932 rss-offsite-in rdf-offsite-in xml-offsite-in)))
933
934(defun nnrss-discover-feed (url)
935 "Given a page, find an RSS feed using Mark Pilgrim's
66627fa9 936`ultra-liberal rss locator'."
23f87bed
MB
937
938 (let ((parsed-page (nnrss-fetch url)))
939
940;; 1. if this url is the rss, use it.
941 (if (nnrss-rss-p parsed-page)
942 (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
943 (nnrss-rss-title-description rss-ns parsed-page url))
944
945;; 2. look for the <link rel="alternate"
946;; type="application/rss+xml" and use that if it is there.
947 (let ((links (nnrss-get-rsslinks parsed-page)))
948 (if links
949 (let* ((xml (nnrss-fetch
950 (cdr (assoc 'href (cadar links)))))
951 (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")))
952 (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links)))))
953
954;; 3. look for links on the site in the following order:
955;; - onsite links ending in .rss, .rdf, or .xml
956;; - onsite links containing any of the above
957;; - offsite links ending in .rss, .rdf, or .xml
958;; - offsite links containing any of the above
959 (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
960 (match-string 0 url)))
91472578 961 (hrefs (nnrss-order-hrefs
23f87bed
MB
962 base-uri (nnrss-extract-hrefs parsed-page)))
963 (rss-link nil))
91472578
MB
964 (while (and (eq rss-link nil) (not (eq hrefs nil)))
965 (let ((href-data (nnrss-fetch (car hrefs))))
966 (if (nnrss-rss-p href-data)
967 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
968 (setq rss-link (nnrss-rss-title-description
969 rss-ns href-data (car hrefs))))
970 (setq hrefs (cdr hrefs)))))
971 (if rss-link rss-link
23f87bed
MB
972
973;; 4. check syndic8
91472578 974 (nnrss-find-rss-via-syndic8 url))))))))
23f87bed
MB
975
976(defun nnrss-find-rss-via-syndic8 (url)
91472578 977 "Query syndic8 for the rss feeds it has for URL."
23f87bed
MB
978 (if (not (locate-library "xml-rpc"))
979 (progn
980 (message "XML-RPC is not available... not checking Syndic8.")
981 nil)
982 (require 'xml-rpc)
983 (let ((feedid (xml-rpc-method-call
984 "http://www.syndic8.com/xmlrpc.php"
985 'syndic8.FindSites
986 url)))
987 (when feedid
91472578 988 (let* ((feedinfo (xml-rpc-method-call
23f87bed
MB
989 "http://www.syndic8.com/xmlrpc.php"
990 'syndic8.GetFeedInfo
991 feedid))
992 (urllist
91472578 993 (delq nil
23f87bed
MB
994 (mapcar
995 (lambda (listinfo)
91472578 996 (if (string-equal
23f87bed
MB
997 (cdr (assoc "status" listinfo))
998 "Syndicated")
999 (cons
1000 (cdr (assoc "sitename" listinfo))
1001 (list
1002 (cons 'title
91472578 1003 (cdr (assoc
23f87bed
MB
1004 "sitename" listinfo)))
1005 (cons 'href
1006 (cdr (assoc
1007 "dataurl" listinfo)))))))
1008 feedinfo))))
1009 (if (not (> (length urllist) 1))
1010 (cdar urllist)
1011 (let ((completion-ignore-case t)
91472578 1012 (selection
23f87bed 1013 (mapcar (lambda (listinfo)
91472578 1014 (cons (cdr (assoc "sitename" listinfo))
e9bd5782 1015 (string-to-number
23f87bed
MB
1016 (cdr (assoc "feedid" listinfo)))))
1017 feedinfo)))
91472578 1018 (cdr (assoc
229b59da
G
1019 (gnus-completing-read
1020 "Multiple feeds found. Select one"
1021 selection t) urllist)))))))))
23f87bed
MB
1022
1023(defun nnrss-rss-p (data)
91472578
MB
1024 "Test if DATA is an RSS feed.
1025Simply ensures that the first element is rss or rdf."
23f87bed
MB
1026 (or (eq (caar data) 'rss)
1027 (eq (caar data) 'rdf:RDF)))
1028
1029(defun nnrss-rss-title-description (rss-namespace data url)
1030 "Return the title of an RSS feed."
1031 (if (nnrss-rss-p data)
1032 (let ((description (intern (concat rss-namespace "description")))
1033 (title (intern (concat rss-namespace "title")))
1034 (channel (nnrss-find-el (intern (concat rss-namespace "channel"))
1035 data)))
1036 (list
1037 (cons 'description (caddr (nth 0 (nnrss-find-el description channel))))
1038 (cons 'title (caddr (nth 0 (nnrss-find-el title channel))))
1039 (cons 'href url)))))
1040
1041(defun nnrss-get-namespace-prefix (el uri)
1042 "Given EL (containing a parsed element) and URI (containing a string
1043that gives the URI for which you want to retrieve the namespace
1044prefix), return the prefix."
1045 (let* ((prefix (car (rassoc uri (cadar el))))
91472578 1046 (nslist (if prefix
23f87bed
MB
1047 (split-string (symbol-name prefix) ":")))
1048 (ns (cond ((eq (length nslist) 1) ; no prefix given
1049 "")
1050 ((eq (length nslist) 2) ; extract prefix
1051 (cadr nslist)))))
91472578 1052 (if (and ns (not (string= ns "")))
23f87bed
MB
1053 (concat ns ":")
1054 ns)))
1055
1056(provide 'nnrss)
1057
23f87bed 1058;;; nnrss.el ends here