Merge from emacs-24; up to 2014-05-29T17:16:00Z!dmantipov@yandex.ru
[bpt/emacs.git] / lisp / gnus / nnrss.el
CommitLineData
23f87bed 1;;; nnrss.el --- interfacing with RSS
e84b4b86 2
ba318903 3;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
23f87bed
MB
4
5;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
6;; Keywords: RSS
7
8;; This file is part of GNU Emacs.
9
5e809f55
GM
10;; GNU Emacs is free software: you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation, either version 3 of the License, or
13;; (at your option) any later version.
23f87bed 14
5e809f55
GM
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
23f87bed
MB
19
20;; You should have received a copy of the GNU General Public License
5e809f55 21;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23f87bed
MB
22
23;;; Commentary:
24
25;;; Code:
26
27(eval-when-compile (require 'cl))
28
29(require 'gnus)
30(require 'nnoo)
31(require 'nnmail)
32(require 'message)
33(require 'mm-util)
34(require 'gnus-util)
35(require 'time-date)
36(require 'rfc2231)
37(require 'mm-url)
91472578
MB
38(require 'rfc2047)
39(require 'mml)
23f87bed
MB
40(eval-when-compile
41 (ignore-errors
91472578 42 (require 'xml)))
23f87bed
MB
43(eval '(require 'xml))
44
45(nnoo-declare nnrss)
46
47(defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
48 "Where nnrss will save its files.")
49
01c52d31
MB
50(defvoo nnrss-ignore-article-fields '(slash:comments)
51 "*List of fields that should be ignored when comparing RSS articles.
52Some RSS feeds update article fields during their lives, e.g. to
53indicate the number of comments or the number of times the
54articles have been seen. However, if there is a difference
55between the local article and the distant one, the latter is
56considered to be new. To avoid this and discard some fields, set
57this variable to the list of fields to be ignored.")
58
23f87bed
MB
59;; (group max rss-url)
60(defvoo nnrss-server-data nil)
61
62;; (num timestamp url subject author date extra)
63(defvoo nnrss-group-data nil)
64(defvoo nnrss-group-max 0)
65(defvoo nnrss-group-min 1)
66(defvoo nnrss-group nil)
01c52d31 67(defvoo nnrss-group-hashtb (make-hash-table :test 'equal))
23f87bed
MB
68(defvoo nnrss-status-string "")
69
70(defconst nnrss-version "nnrss 1.0")
71
72(defvar nnrss-group-alist '()
73 "List of RSS addresses.")
74
6b958814
G
75(defvar nnrss-use-local nil
76 "If non-nil nnrss will read the feeds from local files in nnrss-directory.")
23f87bed
MB
77
78(defvar nnrss-description-field 'X-Gnus-Description
79 "Field name used for DESCRIPTION.
80To use the description in headers, put this name into `nnmail-extra-headers'.")
81
82(defvar nnrss-url-field 'X-Gnus-Url
83 "Field name used for URL.
84To use the description in headers, put this name into `nnmail-extra-headers'.")
85
86(defvar nnrss-content-function nil
87 "A function which is called in `nnrss-request-article'.
88The arguments are (ENTRY GROUP ARTICLE).
91472578 89ENTRY is the record of the current headline. GROUP is the group name.
23f87bed
MB
90ARTICLE is the article number of the current headline.")
91
91472578 92(defvar nnrss-file-coding-system mm-universal-coding-system
01c52d31
MB
93 "*Coding system used when reading and writing files.
94If you run Gnus with various versions of Emacsen, the value of this
95variable should be the coding system that all those Emacsen support.
96Note that you have to regenerate all the nnrss groups if you change
97the value. Moreover, you should be patient even if you are made to
98read the same articles twice, that arises for the difference of the
99versions of xml.el.")
91472578 100
82fe1aed
MB
101(defvar nnrss-compatible-encoding-alist
102 (delq nil (mapcar (lambda (elem)
103 (if (and (mm-coding-system-p (car elem))
104 (mm-coding-system-p (cdr elem)))
105 elem))
106 mm-charset-override-alist))
91472578
MB
107 "Alist of encodings and those supersets.
108The cdr of each element is used to decode data if it is available when
7dafe00b 109the car is what the data specify as the encoding. Or, the car is used
91472578
MB
110for decoding when the cdr that the data specify is not available.")
111
23f87bed
MB
112(nnoo-define-basics nnrss)
113
114;;; Interface functions
115
91472578
MB
116(defsubst nnrss-format-string (string)
117 (gnus-replace-in-string string " *\n *" " "))
118
119(defun nnrss-decode-group-name (group)
120 (if (and group (mm-coding-system-p 'utf-8))
121 (setq group (mm-decode-coding-string group 'utf-8))
122 group))
ad136a7c 123
23f87bed 124(deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
91472578 125 (setq group (nnrss-decode-group-name group))
23f87bed
MB
126 (nnrss-possibly-change-group group server)
127 (let (e)
63556fc6 128 (with-current-buffer nntp-server-buffer
23f87bed
MB
129 (erase-buffer)
130 (dolist (article articles)
131 (if (setq e (assq article nnrss-group-data))
132 (insert (number-to-string (car e)) "\t" ;; number
91472578
MB
133 ;; subject
134 (or (nth 3 e) "")
135 "\t"
136 ;; from
137 (or (nth 4 e) "(nobody)")
138 "\t"
139 ;; date
23f87bed 140 (or (nth 5 e) "")
91472578
MB
141 "\t"
142 ;; id
23f87bed 143 (format "<%d@%s.nnrss>" (car e) group)
91472578
MB
144 "\t"
145 ;; refs
146 "\t"
147 ;; chars
148 "-1" "\t"
149 ;; lines
150 "-1" "\t"
151 ;; Xref
152 "" "\t"
23f87bed
MB
153 (if (and (nth 6 e)
154 (memq nnrss-description-field
155 nnmail-extra-headers))
156 (concat (symbol-name nnrss-description-field)
157 ": "
158 (nnrss-format-string (nth 6 e))
159 "\t")
160 "")
161 (if (and (nth 2 e)
162 (memq nnrss-url-field
163 nnmail-extra-headers))
164 (concat (symbol-name nnrss-url-field)
165 ": "
166 (nnrss-format-string (nth 2 e))
167 "\t")
168 "")
169 "\n")))))
170 'nov)
171
286c4fc2 172(deffoo nnrss-request-group (group &optional server dont-check info)
91472578
MB
173 (setq group (nnrss-decode-group-name group))
174 (nnheader-message 6 "nnrss: Requesting %s..." group)
23f87bed 175 (nnrss-possibly-change-group group server)
91472578
MB
176 (prog1
177 (if dont-check
178 t
179 (nnrss-check-group group server)
180 (nnheader-report 'nnrss "Opened group %s" group)
181 (nnheader-insert
182 "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
183 (prin1-to-string group)
184 t))
185 (nnheader-message 6 "nnrss: Requesting %s...done" group)))
23f87bed
MB
186
187(deffoo nnrss-close-group (group &optional server)
188 t)
189
190(deffoo nnrss-request-article (article &optional group server buffer)
91472578
MB
191 (setq group (nnrss-decode-group-name group))
192 (when (stringp article)
193 (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
194 (string-to-number (match-string 1 article))
195 0)))
23f87bed
MB
196 (nnrss-possibly-change-group group server)
197 (let ((e (assq article nnrss-group-data))
23f87bed
MB
198 (nntp-server-buffer (or buffer nntp-server-buffer))
199 post err)
200 (when e
91472578
MB
201 (with-current-buffer nntp-server-buffer
202 (erase-buffer)
203 (if group
204 (insert "Newsgroups: " group "\n"))
205 (if (nth 3 e)
206 (insert "Subject: " (nth 3 e) "\n"))
207 (if (nth 4 e)
208 (insert "From: " (nth 4 e) "\n"))
209 (if (nth 5 e)
210 (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
211 (let ((header (buffer-string))
7dafe00b 212 (text (nth 6 e))
91472578 213 (link (nth 2 e))
31640842 214 (enclosure (nth 7 e))
d3a597b7 215 (comments (nth 8 e))
91472578
MB
216 (rfc2047-header-encoding-alist
217 (if (mm-coding-system-p 'utf-8)
218 (cons '("Newsgroups" . utf-8)
219 rfc2047-header-encoding-alist)
220 rfc2047-header-encoding-alist))
7dafe00b 221 rfc2047-encode-encoded-words body fn)
d3a597b7 222 (when (or text link enclosure comments)
91472578
MB
223 (insert "\n")
224 (insert "<#multipart type=alternative>\n"
225 "<#part type=\"text/plain\">\n")
226 (setq body (point))
31640842 227 (when text
7dafe00b
MB
228 (insert text)
229 (goto-char body)
ab388ec4
KY
230 (while (re-search-forward "\n+" nil t)
231 (replace-match " "))
232 (goto-char body)
233 ;; See `nnrss-check-group', which inserts "<br /><br />".
234 (when (search-forward "<br /><br />" nil t)
235 (if (eobp)
236 (replace-match "\n")
237 (replace-match "\n\n")))
238 (unless (eobp)
239 (let ((fill-column (default-value 'fill-column))
240 (window (get-buffer-window nntp-server-buffer)))
241 (when window
242 (setq fill-column
243 (max 1 (/ (* (window-width window) 7) 8))))
244 (fill-region (point) (point-max))
245 (goto-char (point-max))
246 ;; XEmacs version of `fill-region' inserts newline.
247 (unless (bolp)
248 (insert "\n"))))
31640842
MB
249 (when (or link enclosure)
250 (insert "\n")))
251 (when link
252 (insert link "\n"))
253 (when enclosure
254 (insert (car enclosure) " "
255 (nth 2 enclosure) " "
256 (nth 3 enclosure) "\n"))
d3a597b7
MB
257 (when comments
258 (insert comments "\n"))
91472578
MB
259 (setq body (buffer-substring body (point)))
260 (insert "<#/part>\n"
261 "<#part type=\"text/html\">\n"
262 "<html><head></head><body>\n")
263 (when text
264 (insert text "\n"))
265 (when link
266 (insert "<p><a href=\"" link "\">link</a></p>\n"))
31640842
MB
267 (when enclosure
268 (insert "<p><a href=\"" (car enclosure) "\">"
269 (cadr enclosure) "</a> " (nth 2 enclosure)
270 " " (nth 3 enclosure) "</p>\n"))
d3a597b7
MB
271 (when comments
272 (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
91472578
MB
273 (insert "</body></html>\n"
274 "<#/part>\n"
275 "<#/multipart>\n"))
276 (condition-case nil
554a69b8
KY
277 ;; Allow `mml-to-mime' to generate MIME article without
278 ;; making inquiry to a user for unknown encoding.
279 (let ((mml-confirmation-set
280 (cons 'unknown-encoding mml-confirmation-set)))
281 (mml-to-mime))
91472578
MB
282 (error
283 (erase-buffer)
284 (insert header
285 "Content-Type: text/plain; charset=gnus-decoded\n"
286 "Content-Transfer-Encoding: 8bit\n\n"
287 body)
288 (nnheader-message
289 3 "Warning - there might be invalid characters"))))
290 (goto-char (point-min))
291 (search-forward "\n\n")
292 (forward-line -1)
293 (insert (format "Message-ID: <%d@%s.nnrss>\n"
294 (car e)
295 (let ((rfc2047-encoding-type 'mime)
296 rfc2047-encode-max-chars)
297 (rfc2047-encode-string
298 (gnus-replace-in-string group "[\t\n ]+" "_")))))
299 (when nnrss-content-function
300 (funcall nnrss-content-function e group article))))
23f87bed
MB
301 (cond
302 (err
303 (nnheader-report 'nnrss err))
304 ((not e)
305 (nnheader-report 'nnrss "no such id: %d" article))
306 (t
307 (nnheader-report 'nnrss "article %s retrieved" (car e))
308 ;; we return the article number.
309 (cons nnrss-group (car e))))))
310
23f87bed
MB
311(deffoo nnrss-open-server (server &optional defs connectionless)
312 (nnrss-read-server-data server)
313 (nnoo-change-server 'nnrss server defs)
314 t)
315
316(deffoo nnrss-request-expire-articles
317 (articles group &optional server force)
91472578 318 (setq group (nnrss-decode-group-name group))
23f87bed
MB
319 (nnrss-possibly-change-group group server)
320 (let (e days not-expirable changed)
321 (dolist (art articles)
322 (if (and (setq e (assq art nnrss-group-data))
323 (nnmail-expired-article-p
324 group
325 (if (listp (setq days (nth 1 e))) days
326 (days-to-time (- days (time-to-days '(0 0)))))
327 force))
328 (setq nnrss-group-data (delq e nnrss-group-data)
329 changed t)
330 (push art not-expirable)))
331 (if changed
332 (nnrss-save-group-data group server))
333 not-expirable))
334
335(deffoo nnrss-request-delete-group (group &optional force server)
91472578 336 (setq group (nnrss-decode-group-name group))
23f87bed 337 (nnrss-possibly-change-group group server)
91472578
MB
338 (let (elem)
339 ;; There may be two or more entries in `nnrss-group-alist' since
340 ;; this function didn't delete them formerly.
341 (while (setq elem (assoc group nnrss-group-alist))
342 (setq nnrss-group-alist (delq elem nnrss-group-alist))))
23f87bed
MB
343 (setq nnrss-server-data
344 (delq (assoc group nnrss-server-data) nnrss-server-data))
345 (nnrss-save-server-data server)
91472578 346 (ignore-errors
01c52d31
MB
347 (let ((file-name-coding-system nnmail-pathname-coding-system))
348 (delete-file (nnrss-make-filename group server))))
23f87bed
MB
349 t)
350
351(deffoo nnrss-request-list-newsgroups (&optional server)
352 (nnrss-possibly-change-group nil server)
63556fc6 353 (with-current-buffer nntp-server-buffer
23f87bed
MB
354 (erase-buffer)
355 (dolist (elem nnrss-group-alist)
356 (if (third elem)
357 (insert (car elem) "\t" (third elem) "\n"))))
358 t)
359
74769e8b 360(deffoo nnrss-retrieve-groups (groups &optional server)
74769e8b 361 (dolist (group groups)
c36da500 362 (setq group (nnrss-decode-group-name group))
61b1af82 363 (nnrss-possibly-change-group group server)
74769e8b 364 (nnrss-check-group group server))
63556fc6 365 (with-current-buffer nntp-server-buffer
74769e8b
LMI
366 (erase-buffer)
367 (dolist (group groups)
c36da500 368 (let ((elem (assoc (gnus-group-decoded-name group) nnrss-server-data)))
74769e8b
LMI
369 (insert (format "%S %s 1 y\n" group (or (cadr elem) 0)))))
370 'active))
371
23f87bed
MB
372(nnoo-define-skeleton nnrss)
373
374;;; Internal functions
375(eval-when-compile (defun xml-rpc-method-call (&rest args)))
91472578
MB
376
377(defun nnrss-get-encoding ()
378 "Return an encoding attribute specified in the current xml contents.
379If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
380it is used instead. If the xml contents doesn't specify the encoding,
381return `utf-8' which is the default encoding for xml if it is available,
382otherwise return nil."
383 (goto-char (point-min))
384 (if (re-search-forward
01c52d31 385 "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
91472578 386 nil t)
01c52d31
MB
387 (let ((encoding (intern (downcase (or (match-string 1)
388 (match-string 2))))))
91472578
MB
389 (or
390 (mm-coding-system-p (cdr (assq encoding
391 nnrss-compatible-encoding-alist)))
392 (mm-coding-system-p encoding)
393 (mm-coding-system-p (car (rassq encoding
394 nnrss-compatible-encoding-alist)))))
395 (mm-coding-system-p 'utf-8)))
396
4d2226bf
G
397(declare-function libxml-parse-html-region "xml.c"
398 (start end &optional base-url))
23f87bed 399(defun nnrss-fetch (url &optional local)
91472578
MB
400 "Fetch URL and put it in a the expected Lisp structure."
401 (mm-with-unibyte-buffer
bff3818b 402 ;;some versions of url.el need this to close the connection quickly
91472578 403 (let (cs xmlform htmlform)
23f87bed
MB
404 ;; bit o' work necessary for w3 pre-cvs and post-cvs
405 (if local
406 (let ((coding-system-for-read 'binary))
407 (insert-file-contents url))
91472578
MB
408 ;; FIXME: shouldn't binding `coding-system-for-read' be moved
409 ;; to `mm-url-insert'?
410 (let ((coding-system-for-read 'binary))
7dafe00b
MB
411 (condition-case err
412 (mm-url-insert url)
413 (error (if (or debug-on-quit debug-on-error)
414 (signal (car err) (cdr err))
415 (message "nnrss: Failed to fetch %s" url))))))
91472578
MB
416 (nnheader-remove-cr-followed-by-lf)
417 ;; Decode text according to the encoding attribute.
418 (when (setq cs (nnrss-get-encoding))
11e95b02
MB
419 (insert (prog1
420 (mm-decode-coding-string (buffer-string) cs)
421 (erase-buffer)
422 (mm-enable-multibyte))))
91472578
MB
423 (goto-char (point-min))
424
91472578
MB
425 (condition-case err1
426 (setq xmlform (xml-parse-region (point-min) (point-max)))
427 (error
428 (condition-case err2
4d2226bf 429 (setq htmlform (libxml-parse-html-region (point-min) (point-max)))
91472578
MB
430 (error
431 (message "\
4d2226bf 432nnrss: %s: Not valid XML %s and libxml-parse-html-region doesn't work %s"
91472578
MB
433 url err1 err2)))))
434 (if htmlform
435 htmlform
436 xmlform))))
23f87bed
MB
437
438(defun nnrss-possibly-change-group (&optional group server)
439 (when (and server
440 (not (nnrss-server-opened server)))
441 (nnrss-open-server server))
442 (when (and group (not (equal group nnrss-group)))
443 (nnrss-read-group-data group server)
444 (setq nnrss-group group)))
445
8abf1b22 446(autoload 'timezone-parse-date "timezone")
7dafe00b
MB
447
448(defun nnrss-normalize-date (date)
449 "Return a date string of DATE in the RFC822 style.
450This function handles the ISO 8601 date format described in
0b10e437 451URL `http://www.w3.org/TR/NOTE-datetime', and also the RFC822 style
7dafe00b 452which RSS 2.0 allows."
4079589f
MB
453 (let (case-fold-search vector year month day time zone cts given)
454 (cond ((null date)) ; do nothing for this case
455 ;; if the date is just digits (unix time stamp):
456 ((string-match "^[0-9]+$" date)
457 (setq given (seconds-to-time (string-to-number date))))
7dafe00b
MB
458 ;; RFC822
459 ((string-match " [0-9]+ " date)
460 (setq vector (timezone-parse-date date)
461 year (string-to-number (aref vector 0)))
462 (when (>= year 1969)
463 (setq month (string-to-number (aref vector 1))
464 day (string-to-number (aref vector 2)))
465 (unless (>= (length (setq time (aref vector 3))) 3)
466 (setq time "00:00:00"))
467 (when (and (setq zone (aref vector 4))
468 (not (string-match "\\`[A-Z+-]" zone)))
469 (setq zone nil))))
470 ;; ISO 8601
471 ((string-match
472 (eval-when-compile
473 (concat
474 ;; 1. year
475 "\\(199[0-9]\\|20[0-9][0-9]\\)"
01c52d31
MB
476 "\\(?:-"
477 ;; 2. month
7dafe00b 478 "\\([01][0-9]\\)"
01c52d31
MB
479 "\\(?:-"
480 ;; 3. day
7dafe00b 481 "\\([0-3][0-9]\\)"
01c52d31
MB
482 "\\)?\\)?\\(?:T"
483 ;; 4. hh:mm
7dafe00b 484 "\\([012][0-9]:[0-5][0-9]\\)"
01c52d31
MB
485 "\\(?:"
486 ;; 5. :ss
7dafe00b 487 "\\(:[0-5][0-9]\\)"
01c52d31
MB
488 "\\(?:\\.[0-9]+\\)?\\)?\\)?"
489 ;; 6+7,8,9. zone
490 "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)"
7dafe00b
MB
491 "\\|\\([+-][012][0-9][0-5][0-9]\\)"
492 "\\|\\(Z\\)\\)?"))
493 date)
494 (setq year (string-to-number (match-string 1 date))
01c52d31
MB
495 month (string-to-number (or (match-string 2 date) "1"))
496 day (string-to-number (or (match-string 3 date) "1"))
497 time (if (match-beginning 5)
498 (substring date (match-beginning 4) (match-end 5))
499 (concat (or (match-string 4 date) "00:00") ":00"))
500 zone (cond ((match-beginning 6)
501 (concat (match-string 6 date)
502 (match-string 7 date)))
503 ((match-beginning 9) ;; Z
7dafe00b
MB
504 "+0000")
505 (t ;; nil if zone is not provided.
01c52d31 506 (match-string 8 date))))))
7dafe00b
MB
507 (if month
508 (progn
509 (setq cts (current-time-string (encode-time 0 0 0 day month year)))
510 (format "%s, %02d %s %04d %s%s"
511 (substring cts 0 3) day (substring cts 4 7) year time
512 (if zone
513 (concat " " zone)
514 "")))
4079589f 515 (message-make-date given))))
7dafe00b 516
23f87bed
MB
517;;; data functions
518
519(defun nnrss-read-server-data (server)
520 (setq nnrss-server-data nil)
01c52d31
MB
521 (let ((file (nnrss-make-filename "nnrss" server))
522 (file-name-coding-system nnmail-pathname-coding-system))
23f87bed 523 (when (file-exists-p file)
0c43b6f8 524 (load file nil t t))))
23f87bed
MB
525
526(defun nnrss-save-server-data (server)
527 (gnus-make-directory nnrss-directory)
91472578
MB
528 (let ((coding-system-for-write nnrss-file-coding-system)
529 (file-name-coding-system nnmail-pathname-coding-system))
530 (with-temp-file (nnrss-make-filename "nnrss" server)
531 (insert (format ";; -*- coding: %s; -*-\n"
532 nnrss-file-coding-system))
533 (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist))
534 (insert "\n")
535 (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data)))))
23f87bed
MB
536
537(defun nnrss-read-group-data (group server)
538 (setq nnrss-group-data nil)
01c52d31
MB
539 (if (hash-table-p nnrss-group-hashtb)
540 (clrhash nnrss-group-hashtb)
541 (setq nnrss-group-hashtb (make-hash-table :test 'equal)))
23f87bed
MB
542 (let ((pair (assoc group nnrss-server-data)))
543 (setq nnrss-group-max (or (cadr pair) 0))
544 (setq nnrss-group-min (+ nnrss-group-max 1)))
01c52d31
MB
545 (let ((file (nnrss-make-filename group server))
546 (file-name-coding-system nnmail-pathname-coding-system))
23f87bed 547 (when (file-exists-p file)
0c43b6f8 548 (load file nil t t)
23f87bed 549 (dolist (e nnrss-group-data)
01c52d31 550 (puthash (nth 9 e) t nnrss-group-hashtb)
91472578
MB
551 (when (and (car e) (> nnrss-group-min (car e)))
552 (setq nnrss-group-min (car e)))
553 (when (and (car e) (< nnrss-group-max (car e)))
554 (setq nnrss-group-max (car e)))))))
23f87bed
MB
555
556(defun nnrss-save-group-data (group server)
557 (gnus-make-directory nnrss-directory)
91472578
MB
558 (let ((coding-system-for-write nnrss-file-coding-system)
559 (file-name-coding-system nnmail-pathname-coding-system))
560 (with-temp-file (nnrss-make-filename group server)
561 (insert (format ";; -*- coding: %s; -*-\n"
562 nnrss-file-coding-system))
563 (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data)))))
564
565(defun nnrss-make-filename (name server)
566 (expand-file-name
567 (nnrss-translate-file-chars
568 (concat name
569 (and server
570 (not (equal server ""))
571 "-")
572 server
573 ".el"))
574 nnrss-directory))
575
576(gnus-add-shutdown 'nnrss-close 'gnus)
577
578(defun nnrss-close ()
579 "Clear internal nnrss variables."
580 (setq nnrss-group-data nil
581 nnrss-server-data nil
582 nnrss-group-hashtb nil
583 nnrss-group-alist nil))
23f87bed
MB
584
585;;; URL interface
586
587(defun nnrss-no-cache (url)
588 "")
589
4d2226bf 590(defun nnrss-insert (url)
23f87bed 591 (mm-with-unibyte-current-buffer
7dafe00b
MB
592 (condition-case err
593 (mm-url-insert url)
594 (error (if (or debug-on-quit debug-on-error)
595 (signal (car err) (cdr err))
596 (message "nnrss: Failed to fetch %s" url))))))
23f87bed 597
91472578 598(defun nnrss-decode-entities-string (string)
23f87bed 599 (if string
91472578 600 (mm-with-multibyte-buffer
23f87bed
MB
601 (insert string)
602 (mm-url-decode-entities-nbsp)
603 (buffer-string))))
604
91472578
MB
605(defun nnrss-mime-encode-string (string)
606 (mm-with-multibyte-buffer
607 (insert string)
608 (mm-url-decode-entities-nbsp)
609 (goto-char (point-min))
610 (while (re-search-forward "[\t\n ]+" nil t)
611 (replace-match " "))
612 (goto-char (point-min))
613 (skip-chars-forward " ")
614 (delete-region (point-min) (point))
615 (goto-char (point-max))
616 (skip-chars-forward " ")
617 (delete-region (point) (point-max))
618 (let ((rfc2047-encoding-type 'mime)
619 rfc2047-encode-max-chars)
620 (rfc2047-encode-region (point-min) (point-max)))
621 (goto-char (point-min))
622 (while (search-forward "\n" nil t)
d355a0b7 623 (delete-char -1))
91472578
MB
624 (buffer-string)))
625
23f87bed 626;;; Snarf functions
01c52d31 627(defun nnrss-make-hash-index (item)
5b51650c 628 (gnus-message 9 "nnrss: Making hash index of %s" (gnus-prin1-to-string item))
01c52d31
MB
629 (setq item (gnus-remove-if
630 (lambda (field)
631 (when (listp field)
632 (memq (car field) nnrss-ignore-article-fields)))
633 item))
634 (md5 (gnus-prin1-to-string item)
635 nil nil
636 nnrss-file-coding-system))
637
23f87bed 638(defun nnrss-check-group (group server)
7dafe00b 639 (let (file xml subject url extra changed author date feed-subject
01c52d31
MB
640 enclosure comments rss-ns rdf-ns content-ns dc-ns
641 hash-index)
23f87bed
MB
642 (if (and nnrss-use-local
643 (file-exists-p (setq file (expand-file-name
644 (nnrss-translate-file-chars
645 (concat group ".xml"))
646 nnrss-directory))))
647 (setq xml (nnrss-fetch file t))
648 (setq url (or (nth 2 (assoc group nnrss-server-data))
649 (second (assoc group nnrss-group-alist))))
650 (unless url
651 (setq url
91472578
MB
652 (cdr
653 (assoc 'href
654 (nnrss-discover-feed
655 (read-string
656 (format "URL to search for %s: " group) "http://")))))
23f87bed
MB
657 (let ((pair (assoc group nnrss-server-data)))
658 (if pair
659 (setcdr (cdr pair) (list url))
660 (push (list group nnrss-group-max url) nnrss-server-data)))
661 (setq changed t))
662 (setq xml (nnrss-fetch url)))
23f87bed
MB
663 (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/")
664 rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
665 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
666 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
667 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
668 (when (and (listp item)
91472578 669 (string= (concat rss-ns "item") (car item))
01c52d31
MB
670 (progn (setq hash-index (nnrss-make-hash-index item))
671 (not (gethash hash-index nnrss-group-hashtb))))
23f87bed 672 (setq subject (nnrss-node-text rss-ns 'title item))
01c52d31
MB
673 (setq url (nnrss-decode-entities-string
674 (nnrss-node-text rss-ns 'link (cddr item))))
675 (setq extra (or (nnrss-node-text content-ns 'encoded item)
23f87bed 676 (nnrss-node-text rss-ns 'description item)))
7dafe00b
MB
677 (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
678 (setq extra (concat feed-subject "<br /><br />" extra)))
23f87bed
MB
679 (setq author (or (nnrss-node-text rss-ns 'author item)
680 (nnrss-node-text dc-ns 'creator item)
681 (nnrss-node-text dc-ns 'contributor item)))
7dafe00b
MB
682 (setq date (nnrss-normalize-date
683 (or (nnrss-node-text dc-ns 'date item)
684 (nnrss-node-text rss-ns 'pubDate item))))
d3a597b7 685 (setq comments (nnrss-node-text rss-ns 'comments item))
31640842
MB
686 (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item)))
687 (let ((url (cdr (assq 'url enclosure)))
688 (len (cdr (assq 'length enclosure)))
689 (type (cdr (assq 'type enclosure)))
690 (name))
691 (setq len
692 (if (and len (integerp (setq len (string-to-number len))))
693 ;; actually already in `ls-lisp-format-file-size' but
694 ;; probably not worth to require it for one function
695 (do ((size (/ len 1.0) (/ size 1024.0))
696 (post-fixes (list "" "k" "M" "G" "T" "P" "E")
697 (cdr post-fixes)))
698 ((< size 1024)
699 (format "%.1f%s" size (car post-fixes))))
700 "0"))
701 (setq url (or url ""))
702 (setq name (if (string-match "/\\([^/]*\\)$" url)
703 (match-string 1 url)
704 "file"))
705 (setq type (or type ""))
706 (setq enclosure (list url name len type))))
23f87bed
MB
707 (push
708 (list
709 (incf nnrss-group-max)
710 (current-time)
711 url
91472578
MB
712 (and subject (nnrss-mime-encode-string subject))
713 (and author (nnrss-mime-encode-string author))
23f87bed 714 date
31640842 715 (and extra (nnrss-decode-entities-string extra))
d3a597b7 716 enclosure
01c52d31
MB
717 comments
718 hash-index)
23f87bed 719 nnrss-group-data)
01c52d31 720 (puthash hash-index t nnrss-group-hashtb)
91472578
MB
721 (setq changed t))
722 (setq extra nil))
23f87bed
MB
723 (when changed
724 (nnrss-save-group-data group server)
725 (let ((pair (assoc group nnrss-server-data)))
726 (if pair
727 (setcar (cdr pair) nnrss-group-max)
728 (push (list group nnrss-group-max) nnrss-server-data)))
729 (nnrss-save-server-data server))))
730
0f7cbeb9
GM
731(declare-function gnus-group-make-rss-group "gnus-group" (&optional url))
732
91472578
MB
733(defun nnrss-opml-import (opml-file)
734 "OPML subscriptions import.
735Read the file and attempt to subscribe to each Feed in the file."
736 (interactive "fImport file: ")
c4288669
MB
737 (mapc
738 (lambda (node)
739 (let ((xmlurl (cdr (assq 'xmlUrl (cadr node)))))
740 (when (and xmlurl
741 (not (string-match "\\`[\t ]*\\'" xmlurl))
742 (prog1
743 (y-or-n-p (format "Subscribe to %s " xmlurl))
744 (message "")))
745 (condition-case err
746 (progn
747 (gnus-group-make-rss-group xmlurl)
748 (forward-line 1))
749 (error
750 (message
751 "Failed to subscribe to %s (%s); type any key to continue: "
752 xmlurl
753 (error-message-string err))
754 (let ((echo-keystrokes 0))
755 (read-char)))))))
91472578 756 (nnrss-find-el 'outline
c4288669
MB
757 (mm-with-multibyte-buffer
758 (insert-file-contents opml-file)
759 (xml-parse-region (point-min) (point-max))))))
91472578
MB
760
761(defun nnrss-opml-export ()
762 "OPML subscription export.
763Export subscriptions to a buffer in OPML Format."
764 (interactive)
765 (with-current-buffer (get-buffer-create "*OPML Export*")
766 (mm-set-buffer-file-coding-system 'utf-8)
767 (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
768 "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n"
769 "<opml version=\"1.1\">\n"
770 " <head>\n"
771 " <title>mySubscriptions</title>\n"
772 " <dateCreated>" (format-time-string "%a, %d %b %Y %T %z")
773 "</dateCreated>\n"
774 " <ownerEmail>" user-mail-address "</ownerEmail>\n"
775 " <ownerName>" (user-full-name) "</ownerName>\n"
776 " </head>\n"
777 " <body>\n")
778 (dolist (sub nnrss-group-alist)
779 (insert " <outline text=\"" (car sub)
780 "\" xmlUrl=\"" (cadr sub) "\"/>\n"))
781 (insert " </body>\n"
782 "</opml>\n"))
783 (pop-to-buffer "*OPML Export*")
784 (when (fboundp 'sgml-mode)
785 (sgml-mode)))
786
23f87bed
MB
787(defun nnrss-generate-download-script ()
788 "Generate a download script in the current buffer.
789It is useful when `(setq nnrss-use-local t)'."
790 (interactive)
791 (insert "#!/bin/sh\n")
792 (insert "WGET=wget\n")
793 (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n")
794 (dolist (elem nnrss-server-data)
795 (let ((url (or (nth 2 elem)
796 (second (assoc (car elem) nnrss-group-alist)))))
797 (insert "$WGET -q -O \"$RSSDIR\"/'"
798 (nnrss-translate-file-chars (concat (car elem) ".xml"))
799 "' '" url "'\n"))))
800
801(defun nnrss-translate-file-chars (name)
802 (let ((nnheader-file-name-translation-alist
803 (append nnheader-file-name-translation-alist '((?' . ?_)))))
804 (nnheader-translate-file-chars name)))
805
23f87bed
MB
806(defun nnrss-node-text (namespace local-name element)
807 (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
808 element))
809 (text (if (and node (listp node))
810 (nnrss-node-just-text node)
811 node))
ff5e68bf
MB
812 (cleaned-text (if text
813 (gnus-replace-in-string
814 (gnus-replace-in-string
815 text "^[\000-\037\177]+\\|^ +\\| +$" "")
816 "\r\n" "\n"))))
23f87bed
MB
817 (if (string-equal "" cleaned-text)
818 nil
819 cleaned-text)))
820
821(defun nnrss-node-just-text (node)
822 (if (and node (listp node))
823 (mapconcat 'nnrss-node-just-text (cddr node) " ")
824 node))
825
826(defun nnrss-find-el (tag data &optional found-list)
91472578
MB
827 "Find the all matching elements in the data.
828Careful with this on large documents!"
829 (when (consp data)
830 (dolist (bit data)
831 (when (car-safe bit)
832 (when (equal tag (car bit))
833 ;; Old xml.el may return a list of string.
834 (when (and (consp (caddr bit))
835 (stringp (caaddr bit)))
836 (setcar (cddr bit) (caaddr bit)))
837 (setq found-list
838 (append found-list
839 (list bit))))
840 (if (and (consp (car-safe (caddr bit)))
841 (not (stringp (caddr bit))))
842 (setq found-list
843 (append found-list
844 (nnrss-find-el
845 tag (caddr bit))))
846 (setq found-list
847 (append found-list
848 (nnrss-find-el
849 tag (cddr bit))))))))
23f87bed
MB
850 found-list)
851
852(defun nnrss-rsslink-p (el)
853 "Test if the element we are handed is an RSS autodiscovery link."
854 (and (eq (car-safe el) 'link)
855 (string-equal (cdr (assoc 'rel (cadr el))) "alternate")
91472578 856 (or (string-equal (cdr (assoc 'type (cadr el)))
23f87bed
MB
857 "application/rss+xml")
858 (string-equal (cdr (assoc 'type (cadr el))) "text/xml"))))
859
860(defun nnrss-get-rsslinks (data)
861 "Extract the <link> elements that are links to RSS from the parsed data."
91472578 862 (delq nil (mapcar
23f87bed
MB
863 (lambda (el)
864 (if (nnrss-rsslink-p el) el))
865 (nnrss-find-el 'link data))))
866
867(defun nnrss-extract-hrefs (data)
91472578 868 "Recursively extract hrefs from a page's source.
4d2226bf 869DATA should be the output of `xml-parse-region'."
23f87bed
MB
870 (mapcar (lambda (ahref)
871 (cdr (assoc 'href (cadr ahref))))
872 (nnrss-find-el 'a data)))
873
91472578 874(defmacro nnrss-match-macro (base-uri item onsite-list offsite-list)
23f87bed 875 `(cond ((or (string-match (concat "^" ,base-uri) ,item)
91472578
MB
876 (not (string-match "://" ,item)))
877 (setq ,onsite-list (append ,onsite-list (list ,item))))
878 (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
23f87bed
MB
879
880(defun nnrss-order-hrefs (base-uri hrefs)
881 "Given a list of hrefs, sort them using the following priorities:
882 1. links ending in .rss
883 2. links ending in .rdf
884 3. links ending in .xml
885 4. links containing the above
886 5. offsite links
887
888BASE-URI is used to determine the location of the links and
889whether they are `offsite' or `onsite'."
890 (let (rss-onsite-end rdf-onsite-end xml-onsite-end
891 rss-onsite-in rdf-onsite-in xml-onsite-in
892 rss-offsite-end rdf-offsite-end xml-offsite-end
01c52d31 893 rss-offsite-in rdf-offsite-in xml-offsite-in)
91472578
MB
894 (dolist (href hrefs)
895 (cond ((null href))
896 ((string-match "\\.rss$" href)
897 (nnrss-match-macro
898 base-uri href rss-onsite-end rss-offsite-end))
899 ((string-match "\\.rdf$" href)
900 (nnrss-match-macro
901 base-uri href rdf-onsite-end rdf-offsite-end))
902 ((string-match "\\.xml$" href)
903 (nnrss-match-macro
904 base-uri href xml-onsite-end xml-offsite-end))
905 ((string-match "rss" href)
906 (nnrss-match-macro
907 base-uri href rss-onsite-in rss-offsite-in))
908 ((string-match "rdf" href)
909 (nnrss-match-macro
910 base-uri href rdf-onsite-in rdf-offsite-in))
911 ((string-match "xml" href)
912 (nnrss-match-macro
913 base-uri href xml-onsite-in xml-offsite-in))))
914 (append
23f87bed
MB
915 rss-onsite-end rdf-onsite-end xml-onsite-end
916 rss-onsite-in rdf-onsite-in xml-onsite-in
917 rss-offsite-end rdf-offsite-end xml-offsite-end
918 rss-offsite-in rdf-offsite-in xml-offsite-in)))
919
920(defun nnrss-discover-feed (url)
51457e00
LMI
921 "Given a page, find an RSS feed.
922Use Mark Pilgrim's `ultra-liberal rss locator'."
23f87bed 923 (let ((parsed-page (nnrss-fetch url)))
51457e00 924 ;; 1. if this url is the rss, use it.
23f87bed
MB
925 (if (nnrss-rss-p parsed-page)
926 (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
927 (nnrss-rss-title-description rss-ns parsed-page url))
928
51457e00
LMI
929 ;; 2. look for the <link rel="alternate"
930 ;; type="application/rss+xml" and use that if it is there.
23f87bed
MB
931 (let ((links (nnrss-get-rsslinks parsed-page)))
932 (if links
933 (let* ((xml (nnrss-fetch
934 (cdr (assoc 'href (cadar links)))))
51457e00
LMI
935 (rss-ns (nnrss-get-namespace-prefix
936 xml "http://purl.org/rss/1.0/")))
937 (nnrss-rss-title-description
938 rss-ns xml (cdr (assoc 'href (cadar links)))))
939
940 ;; 3. look for links on the site in the following order:
941 ;; - onsite links ending in .rss, .rdf, or .xml
942 ;; - onsite links containing any of the above
943 ;; - offsite links ending in .rss, .rdf, or .xml
944 ;; - offsite links containing any of the above
23f87bed
MB
945 (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
946 (match-string 0 url)))
91472578 947 (hrefs (nnrss-order-hrefs
23f87bed
MB
948 base-uri (nnrss-extract-hrefs parsed-page)))
949 (rss-link nil))
91472578
MB
950 (while (and (eq rss-link nil) (not (eq hrefs nil)))
951 (let ((href-data (nnrss-fetch (car hrefs))))
952 (if (nnrss-rss-p href-data)
953 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
954 (setq rss-link (nnrss-rss-title-description
955 rss-ns href-data (car hrefs))))
956 (setq hrefs (cdr hrefs)))))
51457e00
LMI
957 (if rss-link
958 rss-link
959 ;; 4. check syndic8
91472578 960 (nnrss-find-rss-via-syndic8 url))))))))
23f87bed
MB
961
962(defun nnrss-find-rss-via-syndic8 (url)
91472578 963 "Query syndic8 for the rss feeds it has for URL."
23f87bed
MB
964 (if (not (locate-library "xml-rpc"))
965 (progn
966 (message "XML-RPC is not available... not checking Syndic8.")
967 nil)
968 (require 'xml-rpc)
969 (let ((feedid (xml-rpc-method-call
970 "http://www.syndic8.com/xmlrpc.php"
971 'syndic8.FindSites
972 url)))
973 (when feedid
91472578 974 (let* ((feedinfo (xml-rpc-method-call
23f87bed
MB
975 "http://www.syndic8.com/xmlrpc.php"
976 'syndic8.GetFeedInfo
977 feedid))
978 (urllist
91472578 979 (delq nil
23f87bed
MB
980 (mapcar
981 (lambda (listinfo)
91472578 982 (if (string-equal
23f87bed
MB
983 (cdr (assoc "status" listinfo))
984 "Syndicated")
985 (cons
986 (cdr (assoc "sitename" listinfo))
987 (list
988 (cons 'title
91472578 989 (cdr (assoc
23f87bed
MB
990 "sitename" listinfo)))
991 (cons 'href
992 (cdr (assoc
993 "dataurl" listinfo)))))))
994 feedinfo))))
995 (if (not (> (length urllist) 1))
996 (cdar urllist)
997 (let ((completion-ignore-case t)
91472578 998 (selection
23f87bed 999 (mapcar (lambda (listinfo)
91472578 1000 (cons (cdr (assoc "sitename" listinfo))
e9bd5782 1001 (string-to-number
23f87bed
MB
1002 (cdr (assoc "feedid" listinfo)))))
1003 feedinfo)))
91472578 1004 (cdr (assoc
229b59da
G
1005 (gnus-completing-read
1006 "Multiple feeds found. Select one"
1007 selection t) urllist)))))))))
23f87bed
MB
1008
1009(defun nnrss-rss-p (data)
91472578
MB
1010 "Test if DATA is an RSS feed.
1011Simply ensures that the first element is rss or rdf."
23f87bed
MB
1012 (or (eq (caar data) 'rss)
1013 (eq (caar data) 'rdf:RDF)))
1014
1015(defun nnrss-rss-title-description (rss-namespace data url)
1016 "Return the title of an RSS feed."
1017 (if (nnrss-rss-p data)
1018 (let ((description (intern (concat rss-namespace "description")))
1019 (title (intern (concat rss-namespace "title")))
1020 (channel (nnrss-find-el (intern (concat rss-namespace "channel"))
1021 data)))
1022 (list
1023 (cons 'description (caddr (nth 0 (nnrss-find-el description channel))))
1024 (cons 'title (caddr (nth 0 (nnrss-find-el title channel))))
1025 (cons 'href url)))))
1026
1027(defun nnrss-get-namespace-prefix (el uri)
1028 "Given EL (containing a parsed element) and URI (containing a string
1029that gives the URI for which you want to retrieve the namespace
1030prefix), return the prefix."
1031 (let* ((prefix (car (rassoc uri (cadar el))))
91472578 1032 (nslist (if prefix
23f87bed
MB
1033 (split-string (symbol-name prefix) ":")))
1034 (ns (cond ((eq (length nslist) 1) ; no prefix given
1035 "")
1036 ((eq (length nslist) 2) ; extract prefix
1037 (cadr nslist)))))
91472578 1038 (if (and ns (not (string= ns "")))
23f87bed
MB
1039 (concat ns ":")
1040 ns)))
1041
1042(provide 'nnrss)
1043
23f87bed 1044;;; nnrss.el ends here