Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-716
[bpt/emacs.git] / lisp / gnus / nnrss.el
1 ;;; nnrss.el --- interfacing with RSS
2 ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
3
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
5 ;; Keywords: RSS
6
7 ;; This file is part of GNU Emacs.
8
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 2, or (at your
12 ;; option) any later version.
13
14 ;; GNU Emacs is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
18
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
23
24 ;;; Commentary:
25
26 ;;; Code:
27
28 (eval-when-compile (require 'cl))
29
30 (require 'gnus)
31 (require 'nnoo)
32 (require 'nnmail)
33 (require 'message)
34 (require 'mm-util)
35 (require 'gnus-util)
36 (require 'time-date)
37 (require 'rfc2231)
38 (require 'mm-url)
39 (eval-when-compile
40 (ignore-errors
41 (require 'xml)))
42 (eval '(require 'xml))
43
44 (nnoo-declare nnrss)
45
46 (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
47 "Where nnrss will save its files.")
48
49 ;; (group max rss-url)
50 (defvoo nnrss-server-data nil)
51
52 ;; (num timestamp url subject author date extra)
53 (defvoo nnrss-group-data nil)
54 (defvoo nnrss-group-max 0)
55 (defvoo nnrss-group-min 1)
56 (defvoo nnrss-group nil)
57 (defvoo nnrss-group-hashtb nil)
58 (defvoo nnrss-status-string "")
59
60 (defconst nnrss-version "nnrss 1.0")
61
62 (defvar nnrss-group-alist '()
63 "List of RSS addresses.")
64
65 (defvar nnrss-use-local nil)
66
67 (defvar nnrss-description-field 'X-Gnus-Description
68 "Field name used for DESCRIPTION.
69 To use the description in headers, put this name into `nnmail-extra-headers'.")
70
71 (defvar nnrss-url-field 'X-Gnus-Url
72 "Field name used for URL.
73 To use the description in headers, put this name into `nnmail-extra-headers'.")
74
75 (defvar nnrss-content-function nil
76 "A function which is called in `nnrss-request-article'.
77 The arguments are (ENTRY GROUP ARTICLE).
78 ENTRY is the record of the current headline. GROUP is the group name.
79 ARTICLE is the article number of the current headline.")
80
81 (nnoo-define-basics nnrss)
82
83 ;;; Interface functions
84
85 (eval-when-compile
86 (defmacro nnrss-string-as-multibyte (string)
87 (if (featurep 'xemacs)
88 string
89 `(string-as-multibyte ,string))))
90
91 (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
92 (nnrss-possibly-change-group group server)
93 (let (e)
94 (save-excursion
95 (set-buffer nntp-server-buffer)
96 (erase-buffer)
97 (dolist (article articles)
98 (if (setq e (assq article nnrss-group-data))
99 (insert (number-to-string (car e)) "\t" ;; number
100 (if (nth 3 e)
101 (nnrss-format-string (nth 3 e)) "")
102 "\t" ;; subject
103 (if (nth 4 e)
104 (nnrss-format-string (nth 4 e))
105 "(nobody)")
106 "\t" ;;from
107 (or (nth 5 e) "")
108 "\t" ;; date
109 (format "<%d@%s.nnrss>" (car e) group)
110 "\t" ;; id
111 "\t" ;; refs
112 "-1" "\t" ;; chars
113 "-1" "\t" ;; lines
114 "" "\t" ;; Xref
115 (if (and (nth 6 e)
116 (memq nnrss-description-field
117 nnmail-extra-headers))
118 (concat (symbol-name nnrss-description-field)
119 ": "
120 (nnrss-format-string (nth 6 e))
121 "\t")
122 "")
123 (if (and (nth 2 e)
124 (memq nnrss-url-field
125 nnmail-extra-headers))
126 (concat (symbol-name nnrss-url-field)
127 ": "
128 (nnrss-format-string (nth 2 e))
129 "\t")
130 "")
131 "\n")))))
132 'nov)
133
134 (deffoo nnrss-request-group (group &optional server dont-check)
135 (nnrss-possibly-change-group group server)
136 (if dont-check
137 t
138 (nnrss-check-group group server)
139 (nnheader-report 'nnrss "Opened group %s" group)
140 (nnheader-insert
141 "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
142 (prin1-to-string group)
143 t)))
144
145 (deffoo nnrss-close-group (group &optional server)
146 t)
147
148 (deffoo nnrss-request-article (article &optional group server buffer)
149 (nnrss-possibly-change-group group server)
150 (let ((e (assq article nnrss-group-data))
151 (boundary "=-=-=-=-=-=-=-=-=-")
152 (nntp-server-buffer (or buffer nntp-server-buffer))
153 post err)
154 (when e
155 (catch 'error
156 (with-current-buffer nntp-server-buffer
157 (erase-buffer)
158 (goto-char (point-min))
159 (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n")
160 (if group
161 (insert "Newsgroups: " group "\n"))
162 (if (nth 3 e)
163 (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n"))
164 (if (nth 4 e)
165 (insert "From: " (nnrss-format-string (nth 4 e)) "\n"))
166 (if (nth 5 e)
167 (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
168 (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n")
169 (insert "\n")
170 (let ((text (if (nth 6 e)
171 (nnrss-string-as-multibyte (nth 6 e))))
172 (link (if (nth 2 e)
173 (nth 2 e))))
174 (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n")
175 (let ((point (point)))
176 (if text
177 (progn (insert text)
178 (goto-char point)
179 (while (re-search-forward "\n" nil t)
180 (replace-match " "))
181 (goto-char (point-max))
182 (insert "\n\n")))
183 (if link
184 (insert link)))
185 (insert "\n\n--" boundary "\nContent-Type: text/html\n\n")
186 (let ((point (point)))
187 (if text
188 (progn (insert "<html><head></head><body>\n" text "\n</body></html>")
189 (goto-char point)
190 (while (re-search-forward "\n" nil t)
191 (replace-match " "))
192 (goto-char (point-max))
193 (insert "\n\n")))
194 (if link
195 (insert "<p><a href=\"" link "\">link</a></p>\n"))))
196 (if nnrss-content-function
197 (funcall nnrss-content-function e group article)))))
198 (cond
199 (err
200 (nnheader-report 'nnrss err))
201 ((not e)
202 (nnheader-report 'nnrss "no such id: %d" article))
203 (t
204 (nnheader-report 'nnrss "article %s retrieved" (car e))
205 ;; we return the article number.
206 (cons nnrss-group (car e))))))
207
208 (deffoo nnrss-request-list (&optional server)
209 (nnrss-possibly-change-group nil server)
210 (nnrss-generate-active)
211 t)
212
213 (deffoo nnrss-open-server (server &optional defs connectionless)
214 (nnrss-read-server-data server)
215 (nnoo-change-server 'nnrss server defs)
216 t)
217
218 (deffoo nnrss-request-expire-articles
219 (articles group &optional server force)
220 (nnrss-possibly-change-group group server)
221 (let (e days not-expirable changed)
222 (dolist (art articles)
223 (if (and (setq e (assq art nnrss-group-data))
224 (nnmail-expired-article-p
225 group
226 (if (listp (setq days (nth 1 e))) days
227 (days-to-time (- days (time-to-days '(0 0)))))
228 force))
229 (setq nnrss-group-data (delq e nnrss-group-data)
230 changed t)
231 (push art not-expirable)))
232 (if changed
233 (nnrss-save-group-data group server))
234 not-expirable))
235
236 (deffoo nnrss-request-delete-group (group &optional force server)
237 (nnrss-possibly-change-group group server)
238 (setq nnrss-server-data
239 (delq (assoc group nnrss-server-data) nnrss-server-data))
240 (nnrss-save-server-data server)
241 (let ((file (expand-file-name
242 (nnrss-translate-file-chars
243 (concat group (and server
244 (not (equal server ""))
245 "-")
246 server ".el")) nnrss-directory)))
247 (ignore-errors
248 (delete-file file)))
249 t)
250
251 (deffoo nnrss-request-list-newsgroups (&optional server)
252 (nnrss-possibly-change-group nil server)
253 (save-excursion
254 (set-buffer nntp-server-buffer)
255 (erase-buffer)
256 (dolist (elem nnrss-group-alist)
257 (if (third elem)
258 (insert (car elem) "\t" (third elem) "\n"))))
259 t)
260
261 (nnoo-define-skeleton nnrss)
262
263 ;;; Internal functions
264 (eval-when-compile (defun xml-rpc-method-call (&rest args)))
265 (defun nnrss-fetch (url &optional local)
266 "Fetch the url and put it in a the expected lisp structure."
267 (with-temp-buffer
268 ;some CVS versions of url.el need this to close the connection quickly
269 (let* (xmlform htmlform)
270 ;; bit o' work necessary for w3 pre-cvs and post-cvs
271 (if local
272 (let ((coding-system-for-read 'binary))
273 (insert-file-contents url))
274 (mm-url-insert url))
275
276 ;; Because xml-parse-region can't deal with anything that isn't
277 ;; xml and w3-parse-buffer can't deal with some xml, we have to
278 ;; parse with xml-parse-region first and, if that fails, parse
279 ;; with w3-parse-buffer. Yuck. Eventually, someone should find out
280 ;; why w3-parse-buffer fails to parse some well-formed xml and
281 ;; fix it.
282
283 (condition-case err
284 (setq xmlform (xml-parse-region (point-min) (point-max)))
285 (error (if (fboundp 'w3-parse-buffer)
286 (setq htmlform (caddar (w3-parse-buffer
287 (current-buffer))))
288 (message "nnrss: Not valid XML and w3 parse not available (%s)"
289 url))))
290 (if htmlform
291 htmlform
292 xmlform))))
293
294 (defun nnrss-possibly-change-group (&optional group server)
295 (when (and server
296 (not (nnrss-server-opened server)))
297 (nnrss-open-server server))
298 (when (and group (not (equal group nnrss-group)))
299 (nnrss-read-group-data group server)
300 (setq nnrss-group group)))
301
302 (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories))
303
304 (defun nnrss-generate-active ()
305 (if (y-or-n-p "fetch extra categories? ")
306 (dolist (func nnrss-extra-categories)
307 (funcall func)))
308 (save-excursion
309 (set-buffer nntp-server-buffer)
310 (erase-buffer)
311 (dolist (elem nnrss-group-alist)
312 (insert (prin1-to-string (car elem)) " 0 1 y\n"))
313 (dolist (elem nnrss-server-data)
314 (unless (assoc (car elem) nnrss-group-alist)
315 (insert (prin1-to-string (car elem)) " 0 1 y\n")))))
316
317 ;;; data functions
318
319 (defun nnrss-read-server-data (server)
320 (setq nnrss-server-data nil)
321 (let ((file (expand-file-name
322 (nnrss-translate-file-chars
323 (concat "nnrss" (and server
324 (not (equal server ""))
325 "-")
326 server
327 ".el"))
328 nnrss-directory)))
329 (when (file-exists-p file)
330 (with-temp-buffer
331 (let ((coding-system-for-read 'binary)
332 emacs-lisp-mode-hook)
333 (insert-file-contents file)
334 (emacs-lisp-mode)
335 (goto-char (point-min))
336 (eval-buffer))))))
337
338 (defun nnrss-save-server-data (server)
339 (gnus-make-directory nnrss-directory)
340 (let ((file (expand-file-name
341 (nnrss-translate-file-chars
342 (concat "nnrss" (and server
343 (not (equal server ""))
344 "-")
345 server ".el"))
346 nnrss-directory)))
347 (let ((coding-system-for-write 'binary)
348 print-level print-length)
349 (with-temp-file file
350 (insert "(setq nnrss-group-alist '"
351 (prin1-to-string nnrss-group-alist)
352 ")\n")
353 (insert "(setq nnrss-server-data '"
354 (prin1-to-string nnrss-server-data)
355 ")\n")))))
356
357 (defun nnrss-read-group-data (group server)
358 (setq nnrss-group-data nil)
359 (setq nnrss-group-hashtb (gnus-make-hashtable))
360 (let ((pair (assoc group nnrss-server-data)))
361 (setq nnrss-group-max (or (cadr pair) 0))
362 (setq nnrss-group-min (+ nnrss-group-max 1)))
363 (let ((file (expand-file-name
364 (nnrss-translate-file-chars
365 (concat group (and server
366 (not (equal server ""))
367 "-")
368 server ".el"))
369 nnrss-directory)))
370 (when (file-exists-p file)
371 (with-temp-buffer
372 (let ((coding-system-for-read 'binary)
373 emacs-lisp-mode-hook)
374 (insert-file-contents file)
375 (emacs-lisp-mode)
376 (goto-char (point-min))
377 (eval-buffer)))
378 (dolist (e nnrss-group-data)
379 (gnus-sethash (nth 2 e) e nnrss-group-hashtb)
380 (if (and (car e) (> nnrss-group-min (car e)))
381 (setq nnrss-group-min (car e)))
382 (if (and (car e) (< nnrss-group-max (car e)))
383 (setq nnrss-group-max (car e)))))))
384
385 (defun nnrss-save-group-data (group server)
386 (gnus-make-directory nnrss-directory)
387 (let ((file (expand-file-name
388 (nnrss-translate-file-chars
389 (concat group (and server
390 (not (equal server ""))
391 "-")
392 server ".el"))
393 nnrss-directory)))
394 (let ((coding-system-for-write 'binary)
395 print-level print-length)
396 (with-temp-file file
397 (insert "(setq nnrss-group-data '"
398 (prin1-to-string nnrss-group-data)
399 ")\n")))))
400
401 ;;; URL interface
402
403 (defun nnrss-no-cache (url)
404 "")
405
406 (defun nnrss-insert-w3 (url)
407 (mm-with-unibyte-current-buffer
408 (mm-url-insert url)))
409
410 (defun nnrss-decode-entities-unibyte-string (string)
411 (if string
412 (mm-with-unibyte-buffer
413 (insert string)
414 (mm-url-decode-entities-nbsp)
415 (buffer-string))))
416
417 (defalias 'nnrss-insert 'nnrss-insert-w3)
418
419 ;;; Snarf functions
420
421 (defun nnrss-check-group (group server)
422 (let (file xml subject url extra changed author
423 date rss-ns rdf-ns content-ns dc-ns)
424 (if (and nnrss-use-local
425 (file-exists-p (setq file (expand-file-name
426 (nnrss-translate-file-chars
427 (concat group ".xml"))
428 nnrss-directory))))
429 (setq xml (nnrss-fetch file t))
430 (setq url (or (nth 2 (assoc group nnrss-server-data))
431 (second (assoc group nnrss-group-alist))))
432 (unless url
433 (setq url
434 (cdr
435 (assoc 'href
436 (nnrss-discover-feed
437 (read-string
438 (format "URL to search for %s: " group) "http://")))))
439 (let ((pair (assoc group nnrss-server-data)))
440 (if pair
441 (setcdr (cdr pair) (list url))
442 (push (list group nnrss-group-max url) nnrss-server-data)))
443 (setq changed t))
444 (setq xml (nnrss-fetch url)))
445 ;; See
446 ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html
447 ;; for more RSS namespaces.
448 (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/")
449 rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
450 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
451 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
452 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
453 (when (and (listp item)
454 (eq (intern (concat rss-ns "item")) (car item))
455 (setq url (nnrss-decode-entities-unibyte-string
456 (nnrss-node-text rss-ns 'link (cddr item))))
457 (not (gnus-gethash url nnrss-group-hashtb)))
458 (setq subject (nnrss-node-text rss-ns 'title item))
459 (setq extra (or (nnrss-node-text content-ns 'encoded item)
460 (nnrss-node-text rss-ns 'description item)))
461 (setq author (or (nnrss-node-text rss-ns 'author item)
462 (nnrss-node-text dc-ns 'creator item)
463 (nnrss-node-text dc-ns 'contributor item)))
464 (setq date (or (nnrss-node-text dc-ns 'date item)
465 (nnrss-node-text rss-ns 'pubDate item)
466 (message-make-date)))
467 (push
468 (list
469 (incf nnrss-group-max)
470 (current-time)
471 url
472 (and subject (nnrss-decode-entities-unibyte-string subject))
473 (and author (nnrss-decode-entities-unibyte-string author))
474 date
475 (and extra (nnrss-decode-entities-unibyte-string extra)))
476 nnrss-group-data)
477 (gnus-sethash url (car nnrss-group-data) nnrss-group-hashtb)
478 (setq changed t)))
479 (when changed
480 (nnrss-save-group-data group server)
481 (let ((pair (assoc group nnrss-server-data)))
482 (if pair
483 (setcar (cdr pair) nnrss-group-max)
484 (push (list group nnrss-group-max) nnrss-server-data)))
485 (nnrss-save-server-data server))))
486
487 (defun nnrss-generate-download-script ()
488 "Generate a download script in the current buffer.
489 It is useful when `(setq nnrss-use-local t)'."
490 (interactive)
491 (insert "#!/bin/sh\n")
492 (insert "WGET=wget\n")
493 (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n")
494 (dolist (elem nnrss-server-data)
495 (let ((url (or (nth 2 elem)
496 (second (assoc (car elem) nnrss-group-alist)))))
497 (insert "$WGET -q -O \"$RSSDIR\"/'"
498 (nnrss-translate-file-chars (concat (car elem) ".xml"))
499 "' '" url "'\n"))))
500
501 (defun nnrss-translate-file-chars (name)
502 (let ((nnheader-file-name-translation-alist
503 (append nnheader-file-name-translation-alist '((?' . ?_)))))
504 (nnheader-translate-file-chars name)))
505
506 (defvar nnrss-moreover-url
507 "http://w.moreover.com/categories/category_list_rss.html"
508 "The url of moreover.com categories.")
509
510 (defun nnrss-snarf-moreover-categories ()
511 "Snarf RSS links from moreover.com."
512 (interactive)
513 (let (category name url changed)
514 (with-temp-buffer
515 (nnrss-insert nnrss-moreover-url)
516 (goto-char (point-min))
517 (while (re-search-forward
518 "<a name=\"\\([^\"]+\\)\">\\|<a href=\"\\(http://[^\"]*moreover\\.com[^\"]+page\\?c=\\([^\"&]+\\)&o=rss\\)" nil t)
519 (if (match-string 1)
520 (setq category (match-string 1))
521 (setq url (match-string 2)
522 name (mm-url-decode-entities-string
523 (rfc2231-decode-encoded-string
524 (match-string 3))))
525 (if category
526 (setq name (concat category "." name)))
527 (unless (assoc name nnrss-server-data)
528 (setq changed t)
529 (push (list name 0 url) nnrss-server-data)))))
530 (if changed
531 (nnrss-save-server-data ""))))
532
533 (defun nnrss-format-string (string)
534 (gnus-replace-in-string (nnrss-string-as-multibyte string) " *\n *" " "))
535
536 (defun nnrss-node-text (namespace local-name element)
537 (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
538 element))
539 (text (if (and node (listp node))
540 (nnrss-node-just-text node)
541 node))
542 (cleaned-text (if text (gnus-replace-in-string
543 text "^[\000-\037\177]+\\|^ +\\| +$" ""))))
544 (if (string-equal "" cleaned-text)
545 nil
546 cleaned-text)))
547
548 (defun nnrss-node-just-text (node)
549 (if (and node (listp node))
550 (mapconcat 'nnrss-node-just-text (cddr node) " ")
551 node))
552
553 (defun nnrss-find-el (tag data &optional found-list)
554 "Find the all matching elements in the data. Careful with this on
555 large documents!"
556 (if (listp data)
557 (mapcar (lambda (bit)
558 (if (car-safe bit)
559 (progn (if (equal tag (car bit))
560 (setq found-list
561 (append found-list
562 (list bit))))
563 (if (and (listp (car-safe (caddr bit)))
564 (not (stringp (caddr bit))))
565 (setq found-list
566 (append found-list
567 (nnrss-find-el
568 tag (caddr bit))))
569 (setq found-list
570 (append found-list
571 (nnrss-find-el
572 tag (cddr bit))))))))
573 data))
574 found-list)
575
576 (defun nnrss-rsslink-p (el)
577 "Test if the element we are handed is an RSS autodiscovery link."
578 (and (eq (car-safe el) 'link)
579 (string-equal (cdr (assoc 'rel (cadr el))) "alternate")
580 (or (string-equal (cdr (assoc 'type (cadr el)))
581 "application/rss+xml")
582 (string-equal (cdr (assoc 'type (cadr el))) "text/xml"))))
583
584 (defun nnrss-get-rsslinks (data)
585 "Extract the <link> elements that are links to RSS from the parsed data."
586 (delq nil (mapcar
587 (lambda (el)
588 (if (nnrss-rsslink-p el) el))
589 (nnrss-find-el 'link data))))
590
591 (defun nnrss-extract-hrefs (data)
592 "Recursively extract hrefs from a page's source. DATA should be
593 the output of xml-parse-region or w3-parse-buffer."
594 (mapcar (lambda (ahref)
595 (cdr (assoc 'href (cadr ahref))))
596 (nnrss-find-el 'a data)))
597
598 (defmacro nnrss-match-macro (base-uri item
599 onsite-list offsite-list)
600 `(cond ((or (string-match (concat "^" ,base-uri) ,item)
601 (not (string-match "://" ,item)))
602 (setq ,onsite-list (append ,onsite-list (list ,item))))
603 (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
604
605 (defun nnrss-order-hrefs (base-uri hrefs)
606 "Given a list of hrefs, sort them using the following priorities:
607 1. links ending in .rss
608 2. links ending in .rdf
609 3. links ending in .xml
610 4. links containing the above
611 5. offsite links
612
613 BASE-URI is used to determine the location of the links and
614 whether they are `offsite' or `onsite'."
615 (let (rss-onsite-end rdf-onsite-end xml-onsite-end
616 rss-onsite-in rdf-onsite-in xml-onsite-in
617 rss-offsite-end rdf-offsite-end xml-offsite-end
618 rss-offsite-in rdf-offsite-in xml-offsite-in)
619 (mapcar (lambda (href)
620 (if (not (null href))
621 (cond ((string-match "\\.rss$" href)
622 (nnrss-match-macro
623 base-uri href rss-onsite-end rss-offsite-end))
624 ((string-match "\\.rdf$" href)
625 (nnrss-match-macro
626 base-uri href rdf-onsite-end rdf-offsite-end))
627 ((string-match "\\.xml$" href)
628 (nnrss-match-macro
629 base-uri href xml-onsite-end xml-offsite-end))
630 ((string-match "rss" href)
631 (nnrss-match-macro
632 base-uri href rss-onsite-in rss-offsite-in))
633 ((string-match "rdf" href)
634 (nnrss-match-macro
635 base-uri href rdf-onsite-in rdf-offsite-in))
636 ((string-match "xml" href)
637 (nnrss-match-macro
638 base-uri href xml-onsite-in xml-offsite-in)))))
639 hrefs)
640 (append
641 rss-onsite-end rdf-onsite-end xml-onsite-end
642 rss-onsite-in rdf-onsite-in xml-onsite-in
643 rss-offsite-end rdf-offsite-end xml-offsite-end
644 rss-offsite-in rdf-offsite-in xml-offsite-in)))
645
646 (defun nnrss-discover-feed (url)
647 "Given a page, find an RSS feed using Mark Pilgrim's
648 `ultra-liberal rss locator' (http://diveintomark.org/2002/08/15.html)."
649
650 (let ((parsed-page (nnrss-fetch url)))
651
652 ;; 1. if this url is the rss, use it.
653 (if (nnrss-rss-p parsed-page)
654 (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
655 (nnrss-rss-title-description rss-ns parsed-page url))
656
657 ;; 2. look for the <link rel="alternate"
658 ;; type="application/rss+xml" and use that if it is there.
659 (let ((links (nnrss-get-rsslinks parsed-page)))
660 (if links
661 (let* ((xml (nnrss-fetch
662 (cdr (assoc 'href (cadar links)))))
663 (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")))
664 (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links)))))
665
666 ;; 3. look for links on the site in the following order:
667 ;; - onsite links ending in .rss, .rdf, or .xml
668 ;; - onsite links containing any of the above
669 ;; - offsite links ending in .rss, .rdf, or .xml
670 ;; - offsite links containing any of the above
671 (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
672 (match-string 0 url)))
673 (hrefs (nnrss-order-hrefs
674 base-uri (nnrss-extract-hrefs parsed-page)))
675 (rss-link nil))
676 (while (and (eq rss-link nil) (not (eq hrefs nil)))
677 (let ((href-data (nnrss-fetch (car hrefs))))
678 (if (nnrss-rss-p href-data)
679 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
680 (setq rss-link (nnrss-rss-title-description
681 rss-ns href-data (car hrefs))))
682 (setq hrefs (cdr hrefs)))))
683 (if rss-link rss-link
684
685 ;; 4. check syndic8
686 (nnrss-find-rss-via-syndic8 url))))))))
687
688 (defun nnrss-find-rss-via-syndic8 (url)
689 "query syndic8 for the rss feeds it has for the url."
690 (if (not (locate-library "xml-rpc"))
691 (progn
692 (message "XML-RPC is not available... not checking Syndic8.")
693 nil)
694 (require 'xml-rpc)
695 (let ((feedid (xml-rpc-method-call
696 "http://www.syndic8.com/xmlrpc.php"
697 'syndic8.FindSites
698 url)))
699 (when feedid
700 (let* ((feedinfo (xml-rpc-method-call
701 "http://www.syndic8.com/xmlrpc.php"
702 'syndic8.GetFeedInfo
703 feedid))
704 (urllist
705 (delq nil
706 (mapcar
707 (lambda (listinfo)
708 (if (string-equal
709 (cdr (assoc "status" listinfo))
710 "Syndicated")
711 (cons
712 (cdr (assoc "sitename" listinfo))
713 (list
714 (cons 'title
715 (cdr (assoc
716 "sitename" listinfo)))
717 (cons 'href
718 (cdr (assoc
719 "dataurl" listinfo)))))))
720 feedinfo))))
721 (if (not (> (length urllist) 1))
722 (cdar urllist)
723 (let ((completion-ignore-case t)
724 (selection
725 (mapcar (lambda (listinfo)
726 (cons (cdr (assoc "sitename" listinfo))
727 (string-to-int
728 (cdr (assoc "feedid" listinfo)))))
729 feedinfo)))
730 (cdr (assoc
731 (completing-read
732 "Multiple feeds found. Select one: "
733 selection nil t) urllist)))))))))
734
735 (defun nnrss-rss-p (data)
736 "Test if data is an RSS feed. Simply ensures that the first
737 element is rss or rdf."
738 (or (eq (caar data) 'rss)
739 (eq (caar data) 'rdf:RDF)))
740
741 (defun nnrss-rss-title-description (rss-namespace data url)
742 "Return the title of an RSS feed."
743 (if (nnrss-rss-p data)
744 (let ((description (intern (concat rss-namespace "description")))
745 (title (intern (concat rss-namespace "title")))
746 (channel (nnrss-find-el (intern (concat rss-namespace "channel"))
747 data)))
748 (list
749 (cons 'description (caddr (nth 0 (nnrss-find-el description channel))))
750 (cons 'title (caddr (nth 0 (nnrss-find-el title channel))))
751 (cons 'href url)))))
752
753 (defun nnrss-get-namespace-prefix (el uri)
754 "Given EL (containing a parsed element) and URI (containing a string
755 that gives the URI for which you want to retrieve the namespace
756 prefix), return the prefix."
757 (let* ((prefix (car (rassoc uri (cadar el))))
758 (nslist (if prefix
759 (split-string (symbol-name prefix) ":")))
760 (ns (cond ((eq (length nslist) 1) ; no prefix given
761 "")
762 ((eq (length nslist) 2) ; extract prefix
763 (cadr nslist)))))
764 (if (and ns (not (eq ns "")))
765 (concat ns ":")
766 ns)))
767
768 (provide 'nnrss)
769
770
771 ;;; nnrss.el ends here
772
773 ;;; arch-tag: 12910c07-0cdf-44fb-8d2c-416ded64c267