Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; nnrss.el --- interfacing with RSS |
e84b4b86 | 2 | |
c4288669 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, |
e3fe4da0 | 4 | ;; 2006, 2007, 2008 Free Software Foundation, Inc. |
23f87bed MB |
5 | |
6 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
7 | ;; Keywords: RSS | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published | |
5a9dffec | 13 | ;; by the Free Software Foundation; either version 3, or (at your |
23f87bed MB |
14 | ;; option) any later version. |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, but | |
17 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | ;; General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
23 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 | ;; Boston, MA 02110-1301, USA. | |
23f87bed MB |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;;; Code: | |
29 | ||
0f7cbeb9 GM |
30 | ;; For Emacs < 22.2. |
31 | (eval-and-compile | |
32 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
33 | ||
23f87bed MB |
34 | (eval-when-compile (require 'cl)) |
35 | ||
36 | (require 'gnus) | |
37 | (require 'nnoo) | |
38 | (require 'nnmail) | |
39 | (require 'message) | |
40 | (require 'mm-util) | |
41 | (require 'gnus-util) | |
42 | (require 'time-date) | |
43 | (require 'rfc2231) | |
44 | (require 'mm-url) | |
91472578 MB |
45 | (require 'rfc2047) |
46 | (require 'mml) | |
23f87bed MB |
47 | (eval-when-compile |
48 | (ignore-errors | |
91472578 | 49 | (require 'xml))) |
23f87bed MB |
50 | (eval '(require 'xml)) |
51 | ||
52 | (nnoo-declare nnrss) | |
53 | ||
54 | (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") | |
55 | "Where nnrss will save its files.") | |
56 | ||
01c52d31 MB |
57 | (defvoo nnrss-ignore-article-fields '(slash:comments) |
58 | "*List of fields that should be ignored when comparing RSS articles. | |
59 | Some RSS feeds update article fields during their lives, e.g. to | |
60 | indicate the number of comments or the number of times the | |
61 | articles have been seen. However, if there is a difference | |
62 | between the local article and the distant one, the latter is | |
63 | considered to be new. To avoid this and discard some fields, set | |
64 | this variable to the list of fields to be ignored.") | |
65 | ||
23f87bed MB |
66 | ;; (group max rss-url) |
67 | (defvoo nnrss-server-data nil) | |
68 | ||
69 | ;; (num timestamp url subject author date extra) | |
70 | (defvoo nnrss-group-data nil) | |
71 | (defvoo nnrss-group-max 0) | |
72 | (defvoo nnrss-group-min 1) | |
73 | (defvoo nnrss-group nil) | |
01c52d31 | 74 | (defvoo nnrss-group-hashtb (make-hash-table :test 'equal)) |
23f87bed MB |
75 | (defvoo nnrss-status-string "") |
76 | ||
77 | (defconst nnrss-version "nnrss 1.0") | |
78 | ||
79 | (defvar nnrss-group-alist '() | |
80 | "List of RSS addresses.") | |
81 | ||
82 | (defvar nnrss-use-local nil) | |
83 | ||
84 | (defvar nnrss-description-field 'X-Gnus-Description | |
85 | "Field name used for DESCRIPTION. | |
86 | To use the description in headers, put this name into `nnmail-extra-headers'.") | |
87 | ||
88 | (defvar nnrss-url-field 'X-Gnus-Url | |
89 | "Field name used for URL. | |
90 | To use the description in headers, put this name into `nnmail-extra-headers'.") | |
91 | ||
92 | (defvar nnrss-content-function nil | |
93 | "A function which is called in `nnrss-request-article'. | |
94 | The arguments are (ENTRY GROUP ARTICLE). | |
91472578 | 95 | ENTRY is the record of the current headline. GROUP is the group name. |
23f87bed MB |
96 | ARTICLE is the article number of the current headline.") |
97 | ||
91472578 | 98 | (defvar nnrss-file-coding-system mm-universal-coding-system |
01c52d31 MB |
99 | "*Coding system used when reading and writing files. |
100 | If you run Gnus with various versions of Emacsen, the value of this | |
101 | variable should be the coding system that all those Emacsen support. | |
102 | Note that you have to regenerate all the nnrss groups if you change | |
103 | the value. Moreover, you should be patient even if you are made to | |
104 | read the same articles twice, that arises for the difference of the | |
105 | versions of xml.el.") | |
91472578 | 106 | |
82fe1aed MB |
107 | (defvar nnrss-compatible-encoding-alist |
108 | (delq nil (mapcar (lambda (elem) | |
109 | (if (and (mm-coding-system-p (car elem)) | |
110 | (mm-coding-system-p (cdr elem))) | |
111 | elem)) | |
112 | mm-charset-override-alist)) | |
91472578 MB |
113 | "Alist of encodings and those supersets. |
114 | The cdr of each element is used to decode data if it is available when | |
7dafe00b | 115 | the car is what the data specify as the encoding. Or, the car is used |
91472578 MB |
116 | for decoding when the cdr that the data specify is not available.") |
117 | ||
7dafe00b MB |
118 | (defvar nnrss-wash-html-in-text-plain-parts nil |
119 | "*Non-nil means render text in text/plain parts as HTML. | |
120 | The function specified by the `mm-text-html-renderer' variable will be | |
121 | used to render text. If it is nil, text will simply be folded.") | |
122 | ||
23f87bed MB |
123 | (nnoo-define-basics nnrss) |
124 | ||
125 | ;;; Interface functions | |
126 | ||
91472578 MB |
127 | (defsubst nnrss-format-string (string) |
128 | (gnus-replace-in-string string " *\n *" " ")) | |
129 | ||
130 | (defun nnrss-decode-group-name (group) | |
131 | (if (and group (mm-coding-system-p 'utf-8)) | |
132 | (setq group (mm-decode-coding-string group 'utf-8)) | |
133 | group)) | |
ad136a7c | 134 | |
23f87bed | 135 | (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old) |
91472578 | 136 | (setq group (nnrss-decode-group-name group)) |
23f87bed MB |
137 | (nnrss-possibly-change-group group server) |
138 | (let (e) | |
139 | (save-excursion | |
140 | (set-buffer nntp-server-buffer) | |
141 | (erase-buffer) | |
142 | (dolist (article articles) | |
143 | (if (setq e (assq article nnrss-group-data)) | |
144 | (insert (number-to-string (car e)) "\t" ;; number | |
91472578 MB |
145 | ;; subject |
146 | (or (nth 3 e) "") | |
147 | "\t" | |
148 | ;; from | |
149 | (or (nth 4 e) "(nobody)") | |
150 | "\t" | |
151 | ;; date | |
23f87bed | 152 | (or (nth 5 e) "") |
91472578 MB |
153 | "\t" |
154 | ;; id | |
23f87bed | 155 | (format "<%d@%s.nnrss>" (car e) group) |
91472578 MB |
156 | "\t" |
157 | ;; refs | |
158 | "\t" | |
159 | ;; chars | |
160 | "-1" "\t" | |
161 | ;; lines | |
162 | "-1" "\t" | |
163 | ;; Xref | |
164 | "" "\t" | |
23f87bed MB |
165 | (if (and (nth 6 e) |
166 | (memq nnrss-description-field | |
167 | nnmail-extra-headers)) | |
168 | (concat (symbol-name nnrss-description-field) | |
169 | ": " | |
170 | (nnrss-format-string (nth 6 e)) | |
171 | "\t") | |
172 | "") | |
173 | (if (and (nth 2 e) | |
174 | (memq nnrss-url-field | |
175 | nnmail-extra-headers)) | |
176 | (concat (symbol-name nnrss-url-field) | |
177 | ": " | |
178 | (nnrss-format-string (nth 2 e)) | |
179 | "\t") | |
180 | "") | |
181 | "\n"))))) | |
182 | 'nov) | |
183 | ||
184 | (deffoo nnrss-request-group (group &optional server dont-check) | |
91472578 MB |
185 | (setq group (nnrss-decode-group-name group)) |
186 | (nnheader-message 6 "nnrss: Requesting %s..." group) | |
23f87bed | 187 | (nnrss-possibly-change-group group server) |
91472578 MB |
188 | (prog1 |
189 | (if dont-check | |
190 | t | |
191 | (nnrss-check-group group server) | |
192 | (nnheader-report 'nnrss "Opened group %s" group) | |
193 | (nnheader-insert | |
194 | "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max | |
195 | (prin1-to-string group) | |
196 | t)) | |
197 | (nnheader-message 6 "nnrss: Requesting %s...done" group))) | |
23f87bed MB |
198 | |
199 | (deffoo nnrss-close-group (group &optional server) | |
200 | t) | |
201 | ||
9efa445f DN |
202 | (defvar mm-text-html-renderer) |
203 | (defvar mm-text-html-washer-alist) | |
7dafe00b | 204 | |
23f87bed | 205 | (deffoo nnrss-request-article (article &optional group server buffer) |
91472578 MB |
206 | (setq group (nnrss-decode-group-name group)) |
207 | (when (stringp article) | |
208 | (setq article (if (string-match "\\`<\\([0-9]+\\)@" article) | |
209 | (string-to-number (match-string 1 article)) | |
210 | 0))) | |
23f87bed MB |
211 | (nnrss-possibly-change-group group server) |
212 | (let ((e (assq article nnrss-group-data)) | |
23f87bed MB |
213 | (nntp-server-buffer (or buffer nntp-server-buffer)) |
214 | post err) | |
215 | (when e | |
91472578 MB |
216 | (with-current-buffer nntp-server-buffer |
217 | (erase-buffer) | |
218 | (if group | |
219 | (insert "Newsgroups: " group "\n")) | |
220 | (if (nth 3 e) | |
221 | (insert "Subject: " (nth 3 e) "\n")) | |
222 | (if (nth 4 e) | |
223 | (insert "From: " (nth 4 e) "\n")) | |
224 | (if (nth 5 e) | |
225 | (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) | |
226 | (let ((header (buffer-string)) | |
7dafe00b | 227 | (text (nth 6 e)) |
91472578 | 228 | (link (nth 2 e)) |
31640842 | 229 | (enclosure (nth 7 e)) |
d3a597b7 | 230 | (comments (nth 8 e)) |
91472578 MB |
231 | ;; Enable encoding of Newsgroups header in XEmacs. |
232 | (default-enable-multibyte-characters t) | |
233 | (rfc2047-header-encoding-alist | |
234 | (if (mm-coding-system-p 'utf-8) | |
235 | (cons '("Newsgroups" . utf-8) | |
236 | rfc2047-header-encoding-alist) | |
237 | rfc2047-header-encoding-alist)) | |
7dafe00b | 238 | rfc2047-encode-encoded-words body fn) |
d3a597b7 | 239 | (when (or text link enclosure comments) |
91472578 MB |
240 | (insert "\n") |
241 | (insert "<#multipart type=alternative>\n" | |
242 | "<#part type=\"text/plain\">\n") | |
243 | (setq body (point)) | |
31640842 | 244 | (when text |
7dafe00b MB |
245 | (insert text) |
246 | (goto-char body) | |
247 | (if (and nnrss-wash-html-in-text-plain-parts | |
248 | (progn | |
249 | (require 'mm-view) | |
250 | (setq fn (or (cdr (assq mm-text-html-renderer | |
251 | mm-text-html-washer-alist)) | |
252 | mm-text-html-renderer)))) | |
253 | (progn | |
254 | (narrow-to-region body (point-max)) | |
255 | (if (functionp fn) | |
256 | (funcall fn) | |
257 | (apply (car fn) (cdr fn))) | |
258 | (widen) | |
259 | (goto-char body) | |
260 | (re-search-forward "[^\t\n ]" nil t) | |
261 | (beginning-of-line) | |
262 | (delete-region body (point)) | |
263 | (goto-char (point-max)) | |
264 | (skip-chars-backward "\t\n ") | |
265 | (end-of-line) | |
266 | (delete-region (point) (point-max)) | |
267 | (insert "\n")) | |
268 | (while (re-search-forward "\n+" nil t) | |
269 | (replace-match " ")) | |
270 | (goto-char body) | |
271 | ;; See `nnrss-check-group', which inserts "<br /><br />". | |
272 | (when (search-forward "<br /><br />" nil t) | |
273 | (if (eobp) | |
274 | (replace-match "\n") | |
275 | (replace-match "\n\n"))) | |
276 | (unless (eobp) | |
277 | (let ((fill-column default-fill-column) | |
278 | (window (get-buffer-window nntp-server-buffer))) | |
279 | (when window | |
280 | (setq fill-column | |
281 | (max 1 (/ (* (window-width window) 7) 8)))) | |
282 | (fill-region (point) (point-max)) | |
283 | (goto-char (point-max)) | |
284 | ;; XEmacs version of `fill-region' inserts newline. | |
285 | (unless (bolp) | |
286 | (insert "\n"))))) | |
31640842 MB |
287 | (when (or link enclosure) |
288 | (insert "\n"))) | |
289 | (when link | |
290 | (insert link "\n")) | |
291 | (when enclosure | |
292 | (insert (car enclosure) " " | |
293 | (nth 2 enclosure) " " | |
294 | (nth 3 enclosure) "\n")) | |
d3a597b7 MB |
295 | (when comments |
296 | (insert comments "\n")) | |
91472578 MB |
297 | (setq body (buffer-substring body (point))) |
298 | (insert "<#/part>\n" | |
299 | "<#part type=\"text/html\">\n" | |
300 | "<html><head></head><body>\n") | |
301 | (when text | |
302 | (insert text "\n")) | |
303 | (when link | |
304 | (insert "<p><a href=\"" link "\">link</a></p>\n")) | |
31640842 MB |
305 | (when enclosure |
306 | (insert "<p><a href=\"" (car enclosure) "\">" | |
307 | (cadr enclosure) "</a> " (nth 2 enclosure) | |
308 | " " (nth 3 enclosure) "</p>\n")) | |
d3a597b7 MB |
309 | (when comments |
310 | (insert "<p><a href=\"" comments "\">comments</a></p>\n")) | |
91472578 MB |
311 | (insert "</body></html>\n" |
312 | "<#/part>\n" | |
313 | "<#/multipart>\n")) | |
314 | (condition-case nil | |
315 | (mml-to-mime) | |
316 | (error | |
317 | (erase-buffer) | |
318 | (insert header | |
319 | "Content-Type: text/plain; charset=gnus-decoded\n" | |
320 | "Content-Transfer-Encoding: 8bit\n\n" | |
321 | body) | |
322 | (nnheader-message | |
323 | 3 "Warning - there might be invalid characters")))) | |
324 | (goto-char (point-min)) | |
325 | (search-forward "\n\n") | |
326 | (forward-line -1) | |
327 | (insert (format "Message-ID: <%d@%s.nnrss>\n" | |
328 | (car e) | |
329 | (let ((rfc2047-encoding-type 'mime) | |
330 | rfc2047-encode-max-chars) | |
331 | (rfc2047-encode-string | |
332 | (gnus-replace-in-string group "[\t\n ]+" "_"))))) | |
333 | (when nnrss-content-function | |
334 | (funcall nnrss-content-function e group article)))) | |
23f87bed MB |
335 | (cond |
336 | (err | |
337 | (nnheader-report 'nnrss err)) | |
338 | ((not e) | |
339 | (nnheader-report 'nnrss "no such id: %d" article)) | |
340 | (t | |
341 | (nnheader-report 'nnrss "article %s retrieved" (car e)) | |
342 | ;; we return the article number. | |
343 | (cons nnrss-group (car e)))))) | |
344 | ||
345 | (deffoo nnrss-request-list (&optional server) | |
346 | (nnrss-possibly-change-group nil server) | |
347 | (nnrss-generate-active) | |
348 | t) | |
349 | ||
350 | (deffoo nnrss-open-server (server &optional defs connectionless) | |
351 | (nnrss-read-server-data server) | |
352 | (nnoo-change-server 'nnrss server defs) | |
353 | t) | |
354 | ||
355 | (deffoo nnrss-request-expire-articles | |
356 | (articles group &optional server force) | |
91472578 | 357 | (setq group (nnrss-decode-group-name group)) |
23f87bed MB |
358 | (nnrss-possibly-change-group group server) |
359 | (let (e days not-expirable changed) | |
360 | (dolist (art articles) | |
361 | (if (and (setq e (assq art nnrss-group-data)) | |
362 | (nnmail-expired-article-p | |
363 | group | |
364 | (if (listp (setq days (nth 1 e))) days | |
365 | (days-to-time (- days (time-to-days '(0 0))))) | |
366 | force)) | |
367 | (setq nnrss-group-data (delq e nnrss-group-data) | |
368 | changed t) | |
369 | (push art not-expirable))) | |
370 | (if changed | |
371 | (nnrss-save-group-data group server)) | |
372 | not-expirable)) | |
373 | ||
374 | (deffoo nnrss-request-delete-group (group &optional force server) | |
91472578 | 375 | (setq group (nnrss-decode-group-name group)) |
23f87bed | 376 | (nnrss-possibly-change-group group server) |
91472578 MB |
377 | (let (elem) |
378 | ;; There may be two or more entries in `nnrss-group-alist' since | |
379 | ;; this function didn't delete them formerly. | |
380 | (while (setq elem (assoc group nnrss-group-alist)) | |
381 | (setq nnrss-group-alist (delq elem nnrss-group-alist)))) | |
23f87bed MB |
382 | (setq nnrss-server-data |
383 | (delq (assoc group nnrss-server-data) nnrss-server-data)) | |
384 | (nnrss-save-server-data server) | |
91472578 | 385 | (ignore-errors |
01c52d31 MB |
386 | (let ((file-name-coding-system nnmail-pathname-coding-system)) |
387 | (delete-file (nnrss-make-filename group server)))) | |
23f87bed MB |
388 | t) |
389 | ||
390 | (deffoo nnrss-request-list-newsgroups (&optional server) | |
391 | (nnrss-possibly-change-group nil server) | |
392 | (save-excursion | |
393 | (set-buffer nntp-server-buffer) | |
394 | (erase-buffer) | |
395 | (dolist (elem nnrss-group-alist) | |
396 | (if (third elem) | |
397 | (insert (car elem) "\t" (third elem) "\n")))) | |
398 | t) | |
399 | ||
400 | (nnoo-define-skeleton nnrss) | |
401 | ||
402 | ;;; Internal functions | |
403 | (eval-when-compile (defun xml-rpc-method-call (&rest args))) | |
91472578 MB |
404 | |
405 | (defun nnrss-get-encoding () | |
406 | "Return an encoding attribute specified in the current xml contents. | |
407 | If `nnrss-compatible-encoding-alist' specifies the compatible encoding, | |
408 | it is used instead. If the xml contents doesn't specify the encoding, | |
409 | return `utf-8' which is the default encoding for xml if it is available, | |
410 | otherwise return nil." | |
411 | (goto-char (point-min)) | |
412 | (if (re-search-forward | |
01c52d31 | 413 | "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)" |
91472578 | 414 | nil t) |
01c52d31 MB |
415 | (let ((encoding (intern (downcase (or (match-string 1) |
416 | (match-string 2)))))) | |
91472578 MB |
417 | (or |
418 | (mm-coding-system-p (cdr (assq encoding | |
419 | nnrss-compatible-encoding-alist))) | |
420 | (mm-coding-system-p encoding) | |
421 | (mm-coding-system-p (car (rassq encoding | |
422 | nnrss-compatible-encoding-alist))))) | |
423 | (mm-coding-system-p 'utf-8))) | |
424 | ||
0f7cbeb9 GM |
425 | (declare-function w3-parse-buffer "ext:w3-parse" (&optional buff)) |
426 | ||
23f87bed | 427 | (defun nnrss-fetch (url &optional local) |
91472578 MB |
428 | "Fetch URL and put it in a the expected Lisp structure." |
429 | (mm-with-unibyte-buffer | |
430 | ;;some CVS versions of url.el need this to close the connection quickly | |
431 | (let (cs xmlform htmlform) | |
23f87bed MB |
432 | ;; bit o' work necessary for w3 pre-cvs and post-cvs |
433 | (if local | |
434 | (let ((coding-system-for-read 'binary)) | |
435 | (insert-file-contents url)) | |
91472578 MB |
436 | ;; FIXME: shouldn't binding `coding-system-for-read' be moved |
437 | ;; to `mm-url-insert'? | |
438 | (let ((coding-system-for-read 'binary)) | |
7dafe00b MB |
439 | (condition-case err |
440 | (mm-url-insert url) | |
441 | (error (if (or debug-on-quit debug-on-error) | |
442 | (signal (car err) (cdr err)) | |
443 | (message "nnrss: Failed to fetch %s" url)))))) | |
91472578 MB |
444 | (nnheader-remove-cr-followed-by-lf) |
445 | ;; Decode text according to the encoding attribute. | |
446 | (when (setq cs (nnrss-get-encoding)) | |
11e95b02 MB |
447 | (insert (prog1 |
448 | (mm-decode-coding-string (buffer-string) cs) | |
449 | (erase-buffer) | |
450 | (mm-enable-multibyte)))) | |
91472578 MB |
451 | (goto-char (point-min)) |
452 | ||
453 | ;; Because xml-parse-region can't deal with anything that isn't | |
454 | ;; xml and w3-parse-buffer can't deal with some xml, we have to | |
455 | ;; parse with xml-parse-region first and, if that fails, parse | |
456 | ;; with w3-parse-buffer. Yuck. Eventually, someone should find out | |
457 | ;; why w3-parse-buffer fails to parse some well-formed xml and | |
458 | ;; fix it. | |
459 | ||
460 | (condition-case err1 | |
461 | (setq xmlform (xml-parse-region (point-min) (point-max))) | |
462 | (error | |
463 | (condition-case err2 | |
464 | (setq htmlform (caddar (w3-parse-buffer | |
465 | (current-buffer)))) | |
466 | (error | |
467 | (message "\ | |
468 | nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" | |
469 | url err1 err2))))) | |
470 | (if htmlform | |
471 | htmlform | |
472 | xmlform)))) | |
23f87bed MB |
473 | |
474 | (defun nnrss-possibly-change-group (&optional group server) | |
475 | (when (and server | |
476 | (not (nnrss-server-opened server))) | |
477 | (nnrss-open-server server)) | |
478 | (when (and group (not (equal group nnrss-group))) | |
479 | (nnrss-read-group-data group server) | |
480 | (setq nnrss-group group))) | |
481 | ||
482 | (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) | |
483 | ||
484 | (defun nnrss-generate-active () | |
91472578 | 485 | (when (y-or-n-p "Fetch extra categories? ") |
01c52d31 | 486 | (mapc 'funcall nnrss-extra-categories)) |
23f87bed MB |
487 | (save-excursion |
488 | (set-buffer nntp-server-buffer) | |
489 | (erase-buffer) | |
490 | (dolist (elem nnrss-group-alist) | |
491 | (insert (prin1-to-string (car elem)) " 0 1 y\n")) | |
492 | (dolist (elem nnrss-server-data) | |
493 | (unless (assoc (car elem) nnrss-group-alist) | |
494 | (insert (prin1-to-string (car elem)) " 0 1 y\n"))))) | |
495 | ||
7dafe00b MB |
496 | (eval-and-compile (autoload 'timezone-parse-date "timezone")) |
497 | ||
498 | (defun nnrss-normalize-date (date) | |
499 | "Return a date string of DATE in the RFC822 style. | |
500 | This function handles the ISO 8601 date format described in | |
501 | <URL:http://www.w3.org/TR/NOTE-datetime>, and also the RFC822 style | |
502 | which RSS 2.0 allows." | |
503 | (let (case-fold-search vector year month day time zone cts) | |
504 | (cond ((null date)) | |
505 | ;; RFC822 | |
506 | ((string-match " [0-9]+ " date) | |
507 | (setq vector (timezone-parse-date date) | |
508 | year (string-to-number (aref vector 0))) | |
509 | (when (>= year 1969) | |
510 | (setq month (string-to-number (aref vector 1)) | |
511 | day (string-to-number (aref vector 2))) | |
512 | (unless (>= (length (setq time (aref vector 3))) 3) | |
513 | (setq time "00:00:00")) | |
514 | (when (and (setq zone (aref vector 4)) | |
515 | (not (string-match "\\`[A-Z+-]" zone))) | |
516 | (setq zone nil)))) | |
517 | ;; ISO 8601 | |
518 | ((string-match | |
519 | (eval-when-compile | |
520 | (concat | |
521 | ;; 1. year | |
522 | "\\(199[0-9]\\|20[0-9][0-9]\\)" | |
01c52d31 MB |
523 | "\\(?:-" |
524 | ;; 2. month | |
7dafe00b | 525 | "\\([01][0-9]\\)" |
01c52d31 MB |
526 | "\\(?:-" |
527 | ;; 3. day | |
7dafe00b | 528 | "\\([0-3][0-9]\\)" |
01c52d31 MB |
529 | "\\)?\\)?\\(?:T" |
530 | ;; 4. hh:mm | |
7dafe00b | 531 | "\\([012][0-9]:[0-5][0-9]\\)" |
01c52d31 MB |
532 | "\\(?:" |
533 | ;; 5. :ss | |
7dafe00b | 534 | "\\(:[0-5][0-9]\\)" |
01c52d31 MB |
535 | "\\(?:\\.[0-9]+\\)?\\)?\\)?" |
536 | ;; 6+7,8,9. zone | |
537 | "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)" | |
7dafe00b MB |
538 | "\\|\\([+-][012][0-9][0-5][0-9]\\)" |
539 | "\\|\\(Z\\)\\)?")) | |
540 | date) | |
541 | (setq year (string-to-number (match-string 1 date)) | |
01c52d31 MB |
542 | month (string-to-number (or (match-string 2 date) "1")) |
543 | day (string-to-number (or (match-string 3 date) "1")) | |
544 | time (if (match-beginning 5) | |
545 | (substring date (match-beginning 4) (match-end 5)) | |
546 | (concat (or (match-string 4 date) "00:00") ":00")) | |
547 | zone (cond ((match-beginning 6) | |
548 | (concat (match-string 6 date) | |
549 | (match-string 7 date))) | |
550 | ((match-beginning 9) ;; Z | |
7dafe00b MB |
551 | "+0000") |
552 | (t ;; nil if zone is not provided. | |
01c52d31 | 553 | (match-string 8 date)))))) |
7dafe00b MB |
554 | (if month |
555 | (progn | |
556 | (setq cts (current-time-string (encode-time 0 0 0 day month year))) | |
557 | (format "%s, %02d %s %04d %s%s" | |
558 | (substring cts 0 3) day (substring cts 4 7) year time | |
559 | (if zone | |
560 | (concat " " zone) | |
561 | ""))) | |
562 | (message-make-date)))) | |
563 | ||
23f87bed MB |
564 | ;;; data functions |
565 | ||
566 | (defun nnrss-read-server-data (server) | |
567 | (setq nnrss-server-data nil) | |
01c52d31 MB |
568 | (let ((file (nnrss-make-filename "nnrss" server)) |
569 | (file-name-coding-system nnmail-pathname-coding-system)) | |
23f87bed | 570 | (when (file-exists-p file) |
91472578 MB |
571 | ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII |
572 | ;; file names. So, we use `insert-file-contents' instead. | |
573 | (mm-with-multibyte-buffer | |
01c52d31 | 574 | (let ((coding-system-for-read nnrss-file-coding-system)) |
23f87bed | 575 | (insert-file-contents file) |
91472578 | 576 | (eval-region (point-min) (point-max))))))) |
23f87bed MB |
577 | |
578 | (defun nnrss-save-server-data (server) | |
579 | (gnus-make-directory nnrss-directory) | |
91472578 MB |
580 | (let ((coding-system-for-write nnrss-file-coding-system) |
581 | (file-name-coding-system nnmail-pathname-coding-system)) | |
582 | (with-temp-file (nnrss-make-filename "nnrss" server) | |
583 | (insert (format ";; -*- coding: %s; -*-\n" | |
584 | nnrss-file-coding-system)) | |
585 | (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist)) | |
586 | (insert "\n") | |
587 | (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data))))) | |
23f87bed MB |
588 | |
589 | (defun nnrss-read-group-data (group server) | |
590 | (setq nnrss-group-data nil) | |
01c52d31 MB |
591 | (if (hash-table-p nnrss-group-hashtb) |
592 | (clrhash nnrss-group-hashtb) | |
593 | (setq nnrss-group-hashtb (make-hash-table :test 'equal))) | |
23f87bed MB |
594 | (let ((pair (assoc group nnrss-server-data))) |
595 | (setq nnrss-group-max (or (cadr pair) 0)) | |
596 | (setq nnrss-group-min (+ nnrss-group-max 1))) | |
01c52d31 MB |
597 | (let ((file (nnrss-make-filename group server)) |
598 | (file-name-coding-system nnmail-pathname-coding-system)) | |
23f87bed | 599 | (when (file-exists-p file) |
91472578 MB |
600 | ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII |
601 | ;; file names. So, we use `insert-file-contents' instead. | |
602 | (mm-with-multibyte-buffer | |
01c52d31 | 603 | (let ((coding-system-for-read nnrss-file-coding-system)) |
23f87bed | 604 | (insert-file-contents file) |
91472578 | 605 | (eval-region (point-min) (point-max)))) |
23f87bed | 606 | (dolist (e nnrss-group-data) |
01c52d31 | 607 | (puthash (nth 9 e) t nnrss-group-hashtb) |
91472578 MB |
608 | (when (and (car e) (> nnrss-group-min (car e))) |
609 | (setq nnrss-group-min (car e))) | |
610 | (when (and (car e) (< nnrss-group-max (car e))) | |
611 | (setq nnrss-group-max (car e))))))) | |
23f87bed MB |
612 | |
613 | (defun nnrss-save-group-data (group server) | |
614 | (gnus-make-directory nnrss-directory) | |
91472578 MB |
615 | (let ((coding-system-for-write nnrss-file-coding-system) |
616 | (file-name-coding-system nnmail-pathname-coding-system)) | |
617 | (with-temp-file (nnrss-make-filename group server) | |
618 | (insert (format ";; -*- coding: %s; -*-\n" | |
619 | nnrss-file-coding-system)) | |
620 | (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data))))) | |
621 | ||
622 | (defun nnrss-make-filename (name server) | |
623 | (expand-file-name | |
624 | (nnrss-translate-file-chars | |
625 | (concat name | |
626 | (and server | |
627 | (not (equal server "")) | |
628 | "-") | |
629 | server | |
630 | ".el")) | |
631 | nnrss-directory)) | |
632 | ||
633 | (gnus-add-shutdown 'nnrss-close 'gnus) | |
634 | ||
635 | (defun nnrss-close () | |
636 | "Clear internal nnrss variables." | |
637 | (setq nnrss-group-data nil | |
638 | nnrss-server-data nil | |
639 | nnrss-group-hashtb nil | |
640 | nnrss-group-alist nil)) | |
23f87bed MB |
641 | |
642 | ;;; URL interface | |
643 | ||
644 | (defun nnrss-no-cache (url) | |
645 | "") | |
646 | ||
647 | (defun nnrss-insert-w3 (url) | |
648 | (mm-with-unibyte-current-buffer | |
7dafe00b MB |
649 | (condition-case err |
650 | (mm-url-insert url) | |
651 | (error (if (or debug-on-quit debug-on-error) | |
652 | (signal (car err) (cdr err)) | |
653 | (message "nnrss: Failed to fetch %s" url)))))) | |
23f87bed | 654 | |
91472578 | 655 | (defun nnrss-decode-entities-string (string) |
23f87bed | 656 | (if string |
91472578 | 657 | (mm-with-multibyte-buffer |
23f87bed MB |
658 | (insert string) |
659 | (mm-url-decode-entities-nbsp) | |
660 | (buffer-string)))) | |
661 | ||
662 | (defalias 'nnrss-insert 'nnrss-insert-w3) | |
663 | ||
91472578 MB |
664 | (defun nnrss-mime-encode-string (string) |
665 | (mm-with-multibyte-buffer | |
666 | (insert string) | |
667 | (mm-url-decode-entities-nbsp) | |
668 | (goto-char (point-min)) | |
669 | (while (re-search-forward "[\t\n ]+" nil t) | |
670 | (replace-match " ")) | |
671 | (goto-char (point-min)) | |
672 | (skip-chars-forward " ") | |
673 | (delete-region (point-min) (point)) | |
674 | (goto-char (point-max)) | |
675 | (skip-chars-forward " ") | |
676 | (delete-region (point) (point-max)) | |
677 | (let ((rfc2047-encoding-type 'mime) | |
678 | rfc2047-encode-max-chars) | |
679 | (rfc2047-encode-region (point-min) (point-max))) | |
680 | (goto-char (point-min)) | |
681 | (while (search-forward "\n" nil t) | |
682 | (delete-backward-char 1)) | |
683 | (buffer-string))) | |
684 | ||
23f87bed MB |
685 | ;;; Snarf functions |
686 | ||
01c52d31 MB |
687 | (defun nnrss-make-hash-index (item) |
688 | (setq item (gnus-remove-if | |
689 | (lambda (field) | |
690 | (when (listp field) | |
691 | (memq (car field) nnrss-ignore-article-fields))) | |
692 | item)) | |
693 | (md5 (gnus-prin1-to-string item) | |
694 | nil nil | |
695 | nnrss-file-coding-system)) | |
696 | ||
23f87bed | 697 | (defun nnrss-check-group (group server) |
7dafe00b | 698 | (let (file xml subject url extra changed author date feed-subject |
01c52d31 MB |
699 | enclosure comments rss-ns rdf-ns content-ns dc-ns |
700 | hash-index) | |
23f87bed MB |
701 | (if (and nnrss-use-local |
702 | (file-exists-p (setq file (expand-file-name | |
703 | (nnrss-translate-file-chars | |
704 | (concat group ".xml")) | |
705 | nnrss-directory)))) | |
706 | (setq xml (nnrss-fetch file t)) | |
707 | (setq url (or (nth 2 (assoc group nnrss-server-data)) | |
708 | (second (assoc group nnrss-group-alist)))) | |
709 | (unless url | |
710 | (setq url | |
91472578 MB |
711 | (cdr |
712 | (assoc 'href | |
713 | (nnrss-discover-feed | |
714 | (read-string | |
715 | (format "URL to search for %s: " group) "http://"))))) | |
23f87bed MB |
716 | (let ((pair (assoc group nnrss-server-data))) |
717 | (if pair | |
718 | (setcdr (cdr pair) (list url)) | |
719 | (push (list group nnrss-group-max url) nnrss-server-data))) | |
720 | (setq changed t)) | |
721 | (setq xml (nnrss-fetch url))) | |
722 | ;; See | |
723 | ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html | |
724 | ;; for more RSS namespaces. | |
725 | (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/") | |
726 | rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#") | |
727 | rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/") | |
728 | content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) | |
729 | (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) | |
730 | (when (and (listp item) | |
91472578 | 731 | (string= (concat rss-ns "item") (car item)) |
01c52d31 MB |
732 | (progn (setq hash-index (nnrss-make-hash-index item)) |
733 | (not (gethash hash-index nnrss-group-hashtb)))) | |
23f87bed | 734 | (setq subject (nnrss-node-text rss-ns 'title item)) |
01c52d31 MB |
735 | (setq url (nnrss-decode-entities-string |
736 | (nnrss-node-text rss-ns 'link (cddr item)))) | |
737 | (setq extra (or (nnrss-node-text content-ns 'encoded item) | |
23f87bed | 738 | (nnrss-node-text rss-ns 'description item))) |
7dafe00b MB |
739 | (if (setq feed-subject (nnrss-node-text dc-ns 'subject item)) |
740 | (setq extra (concat feed-subject "<br /><br />" extra))) | |
23f87bed MB |
741 | (setq author (or (nnrss-node-text rss-ns 'author item) |
742 | (nnrss-node-text dc-ns 'creator item) | |
743 | (nnrss-node-text dc-ns 'contributor item))) | |
7dafe00b MB |
744 | (setq date (nnrss-normalize-date |
745 | (or (nnrss-node-text dc-ns 'date item) | |
746 | (nnrss-node-text rss-ns 'pubDate item)))) | |
d3a597b7 | 747 | (setq comments (nnrss-node-text rss-ns 'comments item)) |
31640842 MB |
748 | (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item))) |
749 | (let ((url (cdr (assq 'url enclosure))) | |
750 | (len (cdr (assq 'length enclosure))) | |
751 | (type (cdr (assq 'type enclosure))) | |
752 | (name)) | |
753 | (setq len | |
754 | (if (and len (integerp (setq len (string-to-number len)))) | |
755 | ;; actually already in `ls-lisp-format-file-size' but | |
756 | ;; probably not worth to require it for one function | |
757 | (do ((size (/ len 1.0) (/ size 1024.0)) | |
758 | (post-fixes (list "" "k" "M" "G" "T" "P" "E") | |
759 | (cdr post-fixes))) | |
760 | ((< size 1024) | |
761 | (format "%.1f%s" size (car post-fixes)))) | |
762 | "0")) | |
763 | (setq url (or url "")) | |
764 | (setq name (if (string-match "/\\([^/]*\\)$" url) | |
765 | (match-string 1 url) | |
766 | "file")) | |
767 | (setq type (or type "")) | |
768 | (setq enclosure (list url name len type)))) | |
23f87bed MB |
769 | (push |
770 | (list | |
771 | (incf nnrss-group-max) | |
772 | (current-time) | |
773 | url | |
91472578 MB |
774 | (and subject (nnrss-mime-encode-string subject)) |
775 | (and author (nnrss-mime-encode-string author)) | |
23f87bed | 776 | date |
31640842 | 777 | (and extra (nnrss-decode-entities-string extra)) |
d3a597b7 | 778 | enclosure |
01c52d31 MB |
779 | comments |
780 | hash-index) | |
23f87bed | 781 | nnrss-group-data) |
01c52d31 | 782 | (puthash hash-index t nnrss-group-hashtb) |
91472578 MB |
783 | (setq changed t)) |
784 | (setq extra nil)) | |
23f87bed MB |
785 | (when changed |
786 | (nnrss-save-group-data group server) | |
787 | (let ((pair (assoc group nnrss-server-data))) | |
788 | (if pair | |
789 | (setcar (cdr pair) nnrss-group-max) | |
790 | (push (list group nnrss-group-max) nnrss-server-data))) | |
791 | (nnrss-save-server-data server)))) | |
792 | ||
0f7cbeb9 GM |
793 | (declare-function gnus-group-make-rss-group "gnus-group" (&optional url)) |
794 | ||
91472578 MB |
795 | (defun nnrss-opml-import (opml-file) |
796 | "OPML subscriptions import. | |
797 | Read the file and attempt to subscribe to each Feed in the file." | |
798 | (interactive "fImport file: ") | |
c4288669 MB |
799 | (mapc |
800 | (lambda (node) | |
801 | (let ((xmlurl (cdr (assq 'xmlUrl (cadr node))))) | |
802 | (when (and xmlurl | |
803 | (not (string-match "\\`[\t ]*\\'" xmlurl)) | |
804 | (prog1 | |
805 | (y-or-n-p (format "Subscribe to %s " xmlurl)) | |
806 | (message ""))) | |
807 | (condition-case err | |
808 | (progn | |
809 | (gnus-group-make-rss-group xmlurl) | |
810 | (forward-line 1)) | |
811 | (error | |
812 | (message | |
813 | "Failed to subscribe to %s (%s); type any key to continue: " | |
814 | xmlurl | |
815 | (error-message-string err)) | |
816 | (let ((echo-keystrokes 0)) | |
817 | (read-char))))))) | |
91472578 | 818 | (nnrss-find-el 'outline |
c4288669 MB |
819 | (mm-with-multibyte-buffer |
820 | (insert-file-contents opml-file) | |
821 | (xml-parse-region (point-min) (point-max)))))) | |
91472578 MB |
822 | |
823 | (defun nnrss-opml-export () | |
824 | "OPML subscription export. | |
825 | Export subscriptions to a buffer in OPML Format." | |
826 | (interactive) | |
827 | (with-current-buffer (get-buffer-create "*OPML Export*") | |
828 | (mm-set-buffer-file-coding-system 'utf-8) | |
829 | (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" | |
830 | "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n" | |
831 | "<opml version=\"1.1\">\n" | |
832 | " <head>\n" | |
833 | " <title>mySubscriptions</title>\n" | |
834 | " <dateCreated>" (format-time-string "%a, %d %b %Y %T %z") | |
835 | "</dateCreated>\n" | |
836 | " <ownerEmail>" user-mail-address "</ownerEmail>\n" | |
837 | " <ownerName>" (user-full-name) "</ownerName>\n" | |
838 | " </head>\n" | |
839 | " <body>\n") | |
840 | (dolist (sub nnrss-group-alist) | |
841 | (insert " <outline text=\"" (car sub) | |
842 | "\" xmlUrl=\"" (cadr sub) "\"/>\n")) | |
843 | (insert " </body>\n" | |
844 | "</opml>\n")) | |
845 | (pop-to-buffer "*OPML Export*") | |
846 | (when (fboundp 'sgml-mode) | |
847 | (sgml-mode))) | |
848 | ||
23f87bed MB |
849 | (defun nnrss-generate-download-script () |
850 | "Generate a download script in the current buffer. | |
851 | It is useful when `(setq nnrss-use-local t)'." | |
852 | (interactive) | |
853 | (insert "#!/bin/sh\n") | |
854 | (insert "WGET=wget\n") | |
855 | (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n") | |
856 | (dolist (elem nnrss-server-data) | |
857 | (let ((url (or (nth 2 elem) | |
858 | (second (assoc (car elem) nnrss-group-alist))))) | |
859 | (insert "$WGET -q -O \"$RSSDIR\"/'" | |
860 | (nnrss-translate-file-chars (concat (car elem) ".xml")) | |
861 | "' '" url "'\n")))) | |
862 | ||
863 | (defun nnrss-translate-file-chars (name) | |
864 | (let ((nnheader-file-name-translation-alist | |
865 | (append nnheader-file-name-translation-alist '((?' . ?_))))) | |
866 | (nnheader-translate-file-chars name))) | |
867 | ||
868 | (defvar nnrss-moreover-url | |
869 | "http://w.moreover.com/categories/category_list_rss.html" | |
870 | "The url of moreover.com categories.") | |
871 | ||
872 | (defun nnrss-snarf-moreover-categories () | |
873 | "Snarf RSS links from moreover.com." | |
874 | (interactive) | |
875 | (let (category name url changed) | |
876 | (with-temp-buffer | |
877 | (nnrss-insert nnrss-moreover-url) | |
878 | (goto-char (point-min)) | |
879 | (while (re-search-forward | |
880 | "<a name=\"\\([^\"]+\\)\">\\|<a href=\"\\(http://[^\"]*moreover\\.com[^\"]+page\\?c=\\([^\"&]+\\)&o=rss\\)" nil t) | |
881 | (if (match-string 1) | |
882 | (setq category (match-string 1)) | |
883 | (setq url (match-string 2) | |
884 | name (mm-url-decode-entities-string | |
885 | (rfc2231-decode-encoded-string | |
886 | (match-string 3)))) | |
887 | (if category | |
888 | (setq name (concat category "." name))) | |
889 | (unless (assoc name nnrss-server-data) | |
890 | (setq changed t) | |
891 | (push (list name 0 url) nnrss-server-data))))) | |
892 | (if changed | |
893 | (nnrss-save-server-data "")))) | |
894 | ||
23f87bed MB |
895 | (defun nnrss-node-text (namespace local-name element) |
896 | (let* ((node (assq (intern (concat namespace (symbol-name local-name))) | |
897 | element)) | |
898 | (text (if (and node (listp node)) | |
899 | (nnrss-node-just-text node) | |
900 | node)) | |
ff5e68bf MB |
901 | (cleaned-text (if text |
902 | (gnus-replace-in-string | |
903 | (gnus-replace-in-string | |
904 | text "^[\000-\037\177]+\\|^ +\\| +$" "") | |
905 | "\r\n" "\n")))) | |
23f87bed MB |
906 | (if (string-equal "" cleaned-text) |
907 | nil | |
908 | cleaned-text))) | |
909 | ||
910 | (defun nnrss-node-just-text (node) | |
911 | (if (and node (listp node)) | |
912 | (mapconcat 'nnrss-node-just-text (cddr node) " ") | |
913 | node)) | |
914 | ||
915 | (defun nnrss-find-el (tag data &optional found-list) | |
91472578 MB |
916 | "Find the all matching elements in the data. |
917 | Careful with this on large documents!" | |
918 | (when (consp data) | |
919 | (dolist (bit data) | |
920 | (when (car-safe bit) | |
921 | (when (equal tag (car bit)) | |
922 | ;; Old xml.el may return a list of string. | |
923 | (when (and (consp (caddr bit)) | |
924 | (stringp (caaddr bit))) | |
925 | (setcar (cddr bit) (caaddr bit))) | |
926 | (setq found-list | |
927 | (append found-list | |
928 | (list bit)))) | |
929 | (if (and (consp (car-safe (caddr bit))) | |
930 | (not (stringp (caddr bit)))) | |
931 | (setq found-list | |
932 | (append found-list | |
933 | (nnrss-find-el | |
934 | tag (caddr bit)))) | |
935 | (setq found-list | |
936 | (append found-list | |
937 | (nnrss-find-el | |
938 | tag (cddr bit)))))))) | |
23f87bed MB |
939 | found-list) |
940 | ||
941 | (defun nnrss-rsslink-p (el) | |
942 | "Test if the element we are handed is an RSS autodiscovery link." | |
943 | (and (eq (car-safe el) 'link) | |
944 | (string-equal (cdr (assoc 'rel (cadr el))) "alternate") | |
91472578 | 945 | (or (string-equal (cdr (assoc 'type (cadr el))) |
23f87bed MB |
946 | "application/rss+xml") |
947 | (string-equal (cdr (assoc 'type (cadr el))) "text/xml")))) | |
948 | ||
949 | (defun nnrss-get-rsslinks (data) | |
950 | "Extract the <link> elements that are links to RSS from the parsed data." | |
91472578 | 951 | (delq nil (mapcar |
23f87bed MB |
952 | (lambda (el) |
953 | (if (nnrss-rsslink-p el) el)) | |
954 | (nnrss-find-el 'link data)))) | |
955 | ||
956 | (defun nnrss-extract-hrefs (data) | |
91472578 MB |
957 | "Recursively extract hrefs from a page's source. |
958 | DATA should be the output of `xml-parse-region' or | |
959 | `w3-parse-buffer'." | |
23f87bed MB |
960 | (mapcar (lambda (ahref) |
961 | (cdr (assoc 'href (cadr ahref)))) | |
962 | (nnrss-find-el 'a data))) | |
963 | ||
91472578 | 964 | (defmacro nnrss-match-macro (base-uri item onsite-list offsite-list) |
23f87bed | 965 | `(cond ((or (string-match (concat "^" ,base-uri) ,item) |
91472578 MB |
966 | (not (string-match "://" ,item))) |
967 | (setq ,onsite-list (append ,onsite-list (list ,item)))) | |
968 | (t (setq ,offsite-list (append ,offsite-list (list ,item)))))) | |
23f87bed MB |
969 | |
970 | (defun nnrss-order-hrefs (base-uri hrefs) | |
971 | "Given a list of hrefs, sort them using the following priorities: | |
972 | 1. links ending in .rss | |
973 | 2. links ending in .rdf | |
974 | 3. links ending in .xml | |
975 | 4. links containing the above | |
976 | 5. offsite links | |
977 | ||
978 | BASE-URI is used to determine the location of the links and | |
979 | whether they are `offsite' or `onsite'." | |
980 | (let (rss-onsite-end rdf-onsite-end xml-onsite-end | |
981 | rss-onsite-in rdf-onsite-in xml-onsite-in | |
982 | rss-offsite-end rdf-offsite-end xml-offsite-end | |
01c52d31 | 983 | rss-offsite-in rdf-offsite-in xml-offsite-in) |
91472578 MB |
984 | (dolist (href hrefs) |
985 | (cond ((null href)) | |
986 | ((string-match "\\.rss$" href) | |
987 | (nnrss-match-macro | |
988 | base-uri href rss-onsite-end rss-offsite-end)) | |
989 | ((string-match "\\.rdf$" href) | |
990 | (nnrss-match-macro | |
991 | base-uri href rdf-onsite-end rdf-offsite-end)) | |
992 | ((string-match "\\.xml$" href) | |
993 | (nnrss-match-macro | |
994 | base-uri href xml-onsite-end xml-offsite-end)) | |
995 | ((string-match "rss" href) | |
996 | (nnrss-match-macro | |
997 | base-uri href rss-onsite-in rss-offsite-in)) | |
998 | ((string-match "rdf" href) | |
999 | (nnrss-match-macro | |
1000 | base-uri href rdf-onsite-in rdf-offsite-in)) | |
1001 | ((string-match "xml" href) | |
1002 | (nnrss-match-macro | |
1003 | base-uri href xml-onsite-in xml-offsite-in)))) | |
1004 | (append | |
23f87bed MB |
1005 | rss-onsite-end rdf-onsite-end xml-onsite-end |
1006 | rss-onsite-in rdf-onsite-in xml-onsite-in | |
1007 | rss-offsite-end rdf-offsite-end xml-offsite-end | |
1008 | rss-offsite-in rdf-offsite-in xml-offsite-in))) | |
1009 | ||
1010 | (defun nnrss-discover-feed (url) | |
1011 | "Given a page, find an RSS feed using Mark Pilgrim's | |
1012 | `ultra-liberal rss locator' (http://diveintomark.org/2002/08/15.html)." | |
1013 | ||
1014 | (let ((parsed-page (nnrss-fetch url))) | |
1015 | ||
1016 | ;; 1. if this url is the rss, use it. | |
1017 | (if (nnrss-rss-p parsed-page) | |
1018 | (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/"))) | |
1019 | (nnrss-rss-title-description rss-ns parsed-page url)) | |
1020 | ||
1021 | ;; 2. look for the <link rel="alternate" | |
1022 | ;; type="application/rss+xml" and use that if it is there. | |
1023 | (let ((links (nnrss-get-rsslinks parsed-page))) | |
1024 | (if links | |
1025 | (let* ((xml (nnrss-fetch | |
1026 | (cdr (assoc 'href (cadar links))))) | |
1027 | (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/"))) | |
1028 | (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links))))) | |
1029 | ||
1030 | ;; 3. look for links on the site in the following order: | |
1031 | ;; - onsite links ending in .rss, .rdf, or .xml | |
1032 | ;; - onsite links containing any of the above | |
1033 | ;; - offsite links ending in .rss, .rdf, or .xml | |
1034 | ;; - offsite links containing any of the above | |
1035 | (let* ((base-uri (progn (string-match ".*://[^/]+/?" url) | |
1036 | (match-string 0 url))) | |
91472578 | 1037 | (hrefs (nnrss-order-hrefs |
23f87bed MB |
1038 | base-uri (nnrss-extract-hrefs parsed-page))) |
1039 | (rss-link nil)) | |
91472578 MB |
1040 | (while (and (eq rss-link nil) (not (eq hrefs nil))) |
1041 | (let ((href-data (nnrss-fetch (car hrefs)))) | |
1042 | (if (nnrss-rss-p href-data) | |
1043 | (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/"))) | |
1044 | (setq rss-link (nnrss-rss-title-description | |
1045 | rss-ns href-data (car hrefs)))) | |
1046 | (setq hrefs (cdr hrefs))))) | |
1047 | (if rss-link rss-link | |
23f87bed MB |
1048 | |
1049 | ;; 4. check syndic8 | |
91472578 | 1050 | (nnrss-find-rss-via-syndic8 url)))))))) |
23f87bed MB |
1051 | |
1052 | (defun nnrss-find-rss-via-syndic8 (url) | |
91472578 | 1053 | "Query syndic8 for the rss feeds it has for URL." |
23f87bed MB |
1054 | (if (not (locate-library "xml-rpc")) |
1055 | (progn | |
1056 | (message "XML-RPC is not available... not checking Syndic8.") | |
1057 | nil) | |
1058 | (require 'xml-rpc) | |
1059 | (let ((feedid (xml-rpc-method-call | |
1060 | "http://www.syndic8.com/xmlrpc.php" | |
1061 | 'syndic8.FindSites | |
1062 | url))) | |
1063 | (when feedid | |
91472578 | 1064 | (let* ((feedinfo (xml-rpc-method-call |
23f87bed MB |
1065 | "http://www.syndic8.com/xmlrpc.php" |
1066 | 'syndic8.GetFeedInfo | |
1067 | feedid)) | |
1068 | (urllist | |
91472578 | 1069 | (delq nil |
23f87bed MB |
1070 | (mapcar |
1071 | (lambda (listinfo) | |
91472578 | 1072 | (if (string-equal |
23f87bed MB |
1073 | (cdr (assoc "status" listinfo)) |
1074 | "Syndicated") | |
1075 | (cons | |
1076 | (cdr (assoc "sitename" listinfo)) | |
1077 | (list | |
1078 | (cons 'title | |
91472578 | 1079 | (cdr (assoc |
23f87bed MB |
1080 | "sitename" listinfo))) |
1081 | (cons 'href | |
1082 | (cdr (assoc | |
1083 | "dataurl" listinfo))))))) | |
1084 | feedinfo)))) | |
1085 | (if (not (> (length urllist) 1)) | |
1086 | (cdar urllist) | |
1087 | (let ((completion-ignore-case t) | |
91472578 | 1088 | (selection |
23f87bed | 1089 | (mapcar (lambda (listinfo) |
91472578 | 1090 | (cons (cdr (assoc "sitename" listinfo)) |
e9bd5782 | 1091 | (string-to-number |
23f87bed MB |
1092 | (cdr (assoc "feedid" listinfo))))) |
1093 | feedinfo))) | |
91472578 | 1094 | (cdr (assoc |
23f87bed MB |
1095 | (completing-read |
1096 | "Multiple feeds found. Select one: " | |
1097 | selection nil t) urllist))))))))) | |
1098 | ||
1099 | (defun nnrss-rss-p (data) | |
91472578 MB |
1100 | "Test if DATA is an RSS feed. |
1101 | Simply ensures that the first element is rss or rdf." | |
23f87bed MB |
1102 | (or (eq (caar data) 'rss) |
1103 | (eq (caar data) 'rdf:RDF))) | |
1104 | ||
1105 | (defun nnrss-rss-title-description (rss-namespace data url) | |
1106 | "Return the title of an RSS feed." | |
1107 | (if (nnrss-rss-p data) | |
1108 | (let ((description (intern (concat rss-namespace "description"))) | |
1109 | (title (intern (concat rss-namespace "title"))) | |
1110 | (channel (nnrss-find-el (intern (concat rss-namespace "channel")) | |
1111 | data))) | |
1112 | (list | |
1113 | (cons 'description (caddr (nth 0 (nnrss-find-el description channel)))) | |
1114 | (cons 'title (caddr (nth 0 (nnrss-find-el title channel)))) | |
1115 | (cons 'href url))))) | |
1116 | ||
1117 | (defun nnrss-get-namespace-prefix (el uri) | |
1118 | "Given EL (containing a parsed element) and URI (containing a string | |
1119 | that gives the URI for which you want to retrieve the namespace | |
1120 | prefix), return the prefix." | |
1121 | (let* ((prefix (car (rassoc uri (cadar el)))) | |
91472578 | 1122 | (nslist (if prefix |
23f87bed MB |
1123 | (split-string (symbol-name prefix) ":"))) |
1124 | (ns (cond ((eq (length nslist) 1) ; no prefix given | |
1125 | "") | |
1126 | ((eq (length nslist) 2) ; extract prefix | |
1127 | (cadr nslist))))) | |
91472578 | 1128 | (if (and ns (not (string= ns ""))) |
23f87bed MB |
1129 | (concat ns ":") |
1130 | ns))) | |
1131 | ||
1132 | (provide 'nnrss) | |
1133 | ||
1134 | ||
1135 | ;;; nnrss.el ends here | |
1136 | ||
cbee283d | 1137 | ;; arch-tag: 12910c07-0cdf-44fb-8d2c-416ded64c267 |