Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; nnrss.el --- interfacing with RSS |
e84b4b86 | 2 | |
8abf1b22 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, |
114f9c96 | 4 | ;; 2008, 2009, 2010 Free Software Foundation, Inc. |
23f87bed MB |
5 | |
6 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
7 | ;; Keywords: RSS | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
5e809f55 GM |
11 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
12 | ;; it under the terms of the GNU General Public License as published by | |
13 | ;; the Free Software Foundation, either version 3 of the License, or | |
14 | ;; (at your option) any later version. | |
23f87bed | 15 | |
5e809f55 GM |
16 | ;; GNU Emacs is distributed in the hope that it will be useful, |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
23f87bed MB |
20 | |
21 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 22 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
23f87bed MB |
23 | |
24 | ;;; Commentary: | |
25 | ||
26 | ;;; Code: | |
27 | ||
0f7cbeb9 GM |
28 | ;; For Emacs < 22.2. |
29 | (eval-and-compile | |
30 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
31 | ||
23f87bed MB |
32 | (eval-when-compile (require 'cl)) |
33 | ||
34 | (require 'gnus) | |
35 | (require 'nnoo) | |
36 | (require 'nnmail) | |
37 | (require 'message) | |
38 | (require 'mm-util) | |
39 | (require 'gnus-util) | |
40 | (require 'time-date) | |
41 | (require 'rfc2231) | |
42 | (require 'mm-url) | |
91472578 MB |
43 | (require 'rfc2047) |
44 | (require 'mml) | |
23f87bed MB |
45 | (eval-when-compile |
46 | (ignore-errors | |
91472578 | 47 | (require 'xml))) |
23f87bed MB |
48 | (eval '(require 'xml)) |
49 | ||
50 | (nnoo-declare nnrss) | |
51 | ||
52 | (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") | |
53 | "Where nnrss will save its files.") | |
54 | ||
01c52d31 MB |
55 | (defvoo nnrss-ignore-article-fields '(slash:comments) |
56 | "*List of fields that should be ignored when comparing RSS articles. | |
57 | Some RSS feeds update article fields during their lives, e.g. to | |
58 | indicate the number of comments or the number of times the | |
59 | articles have been seen. However, if there is a difference | |
60 | between the local article and the distant one, the latter is | |
61 | considered to be new. To avoid this and discard some fields, set | |
62 | this variable to the list of fields to be ignored.") | |
63 | ||
23f87bed MB |
64 | ;; (group max rss-url) |
65 | (defvoo nnrss-server-data nil) | |
66 | ||
67 | ;; (num timestamp url subject author date extra) | |
68 | (defvoo nnrss-group-data nil) | |
69 | (defvoo nnrss-group-max 0) | |
70 | (defvoo nnrss-group-min 1) | |
71 | (defvoo nnrss-group nil) | |
01c52d31 | 72 | (defvoo nnrss-group-hashtb (make-hash-table :test 'equal)) |
23f87bed MB |
73 | (defvoo nnrss-status-string "") |
74 | ||
75 | (defconst nnrss-version "nnrss 1.0") | |
76 | ||
77 | (defvar nnrss-group-alist '() | |
78 | "List of RSS addresses.") | |
79 | ||
80 | (defvar nnrss-use-local nil) | |
81 | ||
82 | (defvar nnrss-description-field 'X-Gnus-Description | |
83 | "Field name used for DESCRIPTION. | |
84 | To use the description in headers, put this name into `nnmail-extra-headers'.") | |
85 | ||
86 | (defvar nnrss-url-field 'X-Gnus-Url | |
87 | "Field name used for URL. | |
88 | To use the description in headers, put this name into `nnmail-extra-headers'.") | |
89 | ||
90 | (defvar nnrss-content-function nil | |
91 | "A function which is called in `nnrss-request-article'. | |
92 | The arguments are (ENTRY GROUP ARTICLE). | |
91472578 | 93 | ENTRY is the record of the current headline. GROUP is the group name. |
23f87bed MB |
94 | ARTICLE is the article number of the current headline.") |
95 | ||
91472578 | 96 | (defvar nnrss-file-coding-system mm-universal-coding-system |
01c52d31 MB |
97 | "*Coding system used when reading and writing files. |
98 | If you run Gnus with various versions of Emacsen, the value of this | |
99 | variable should be the coding system that all those Emacsen support. | |
100 | Note that you have to regenerate all the nnrss groups if you change | |
101 | the value. Moreover, you should be patient even if you are made to | |
102 | read the same articles twice, that arises for the difference of the | |
103 | versions of xml.el.") | |
91472578 | 104 | |
82fe1aed MB |
105 | (defvar nnrss-compatible-encoding-alist |
106 | (delq nil (mapcar (lambda (elem) | |
107 | (if (and (mm-coding-system-p (car elem)) | |
108 | (mm-coding-system-p (cdr elem))) | |
109 | elem)) | |
110 | mm-charset-override-alist)) | |
91472578 MB |
111 | "Alist of encodings and those supersets. |
112 | The cdr of each element is used to decode data if it is available when | |
7dafe00b | 113 | the car is what the data specify as the encoding. Or, the car is used |
91472578 MB |
114 | for decoding when the cdr that the data specify is not available.") |
115 | ||
7dafe00b MB |
116 | (defvar nnrss-wash-html-in-text-plain-parts nil |
117 | "*Non-nil means render text in text/plain parts as HTML. | |
118 | The function specified by the `mm-text-html-renderer' variable will be | |
119 | used to render text. If it is nil, text will simply be folded.") | |
120 | ||
23f87bed MB |
121 | (nnoo-define-basics nnrss) |
122 | ||
123 | ;;; Interface functions | |
124 | ||
91472578 MB |
125 | (defsubst nnrss-format-string (string) |
126 | (gnus-replace-in-string string " *\n *" " ")) | |
127 | ||
128 | (defun nnrss-decode-group-name (group) | |
129 | (if (and group (mm-coding-system-p 'utf-8)) | |
130 | (setq group (mm-decode-coding-string group 'utf-8)) | |
131 | group)) | |
ad136a7c | 132 | |
23f87bed | 133 | (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old) |
91472578 | 134 | (setq group (nnrss-decode-group-name group)) |
23f87bed MB |
135 | (nnrss-possibly-change-group group server) |
136 | (let (e) | |
137 | (save-excursion | |
138 | (set-buffer nntp-server-buffer) | |
139 | (erase-buffer) | |
140 | (dolist (article articles) | |
141 | (if (setq e (assq article nnrss-group-data)) | |
142 | (insert (number-to-string (car e)) "\t" ;; number | |
91472578 MB |
143 | ;; subject |
144 | (or (nth 3 e) "") | |
145 | "\t" | |
146 | ;; from | |
147 | (or (nth 4 e) "(nobody)") | |
148 | "\t" | |
149 | ;; date | |
23f87bed | 150 | (or (nth 5 e) "") |
91472578 MB |
151 | "\t" |
152 | ;; id | |
23f87bed | 153 | (format "<%d@%s.nnrss>" (car e) group) |
91472578 MB |
154 | "\t" |
155 | ;; refs | |
156 | "\t" | |
157 | ;; chars | |
158 | "-1" "\t" | |
159 | ;; lines | |
160 | "-1" "\t" | |
161 | ;; Xref | |
162 | "" "\t" | |
23f87bed MB |
163 | (if (and (nth 6 e) |
164 | (memq nnrss-description-field | |
165 | nnmail-extra-headers)) | |
166 | (concat (symbol-name nnrss-description-field) | |
167 | ": " | |
168 | (nnrss-format-string (nth 6 e)) | |
169 | "\t") | |
170 | "") | |
171 | (if (and (nth 2 e) | |
172 | (memq nnrss-url-field | |
173 | nnmail-extra-headers)) | |
174 | (concat (symbol-name nnrss-url-field) | |
175 | ": " | |
176 | (nnrss-format-string (nth 2 e)) | |
177 | "\t") | |
178 | "") | |
179 | "\n"))))) | |
180 | 'nov) | |
181 | ||
182 | (deffoo nnrss-request-group (group &optional server dont-check) | |
91472578 MB |
183 | (setq group (nnrss-decode-group-name group)) |
184 | (nnheader-message 6 "nnrss: Requesting %s..." group) | |
23f87bed | 185 | (nnrss-possibly-change-group group server) |
91472578 MB |
186 | (prog1 |
187 | (if dont-check | |
188 | t | |
189 | (nnrss-check-group group server) | |
190 | (nnheader-report 'nnrss "Opened group %s" group) | |
191 | (nnheader-insert | |
192 | "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max | |
193 | (prin1-to-string group) | |
194 | t)) | |
195 | (nnheader-message 6 "nnrss: Requesting %s...done" group))) | |
23f87bed MB |
196 | |
197 | (deffoo nnrss-close-group (group &optional server) | |
198 | t) | |
199 | ||
9efa445f DN |
200 | (defvar mm-text-html-renderer) |
201 | (defvar mm-text-html-washer-alist) | |
7dafe00b | 202 | |
23f87bed | 203 | (deffoo nnrss-request-article (article &optional group server buffer) |
91472578 MB |
204 | (setq group (nnrss-decode-group-name group)) |
205 | (when (stringp article) | |
206 | (setq article (if (string-match "\\`<\\([0-9]+\\)@" article) | |
207 | (string-to-number (match-string 1 article)) | |
208 | 0))) | |
23f87bed MB |
209 | (nnrss-possibly-change-group group server) |
210 | (let ((e (assq article nnrss-group-data)) | |
23f87bed MB |
211 | (nntp-server-buffer (or buffer nntp-server-buffer)) |
212 | post err) | |
213 | (when e | |
91472578 MB |
214 | (with-current-buffer nntp-server-buffer |
215 | (erase-buffer) | |
216 | (if group | |
217 | (insert "Newsgroups: " group "\n")) | |
218 | (if (nth 3 e) | |
219 | (insert "Subject: " (nth 3 e) "\n")) | |
220 | (if (nth 4 e) | |
221 | (insert "From: " (nth 4 e) "\n")) | |
222 | (if (nth 5 e) | |
223 | (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) | |
224 | (let ((header (buffer-string)) | |
7dafe00b | 225 | (text (nth 6 e)) |
91472578 | 226 | (link (nth 2 e)) |
31640842 | 227 | (enclosure (nth 7 e)) |
d3a597b7 | 228 | (comments (nth 8 e)) |
91472578 MB |
229 | (rfc2047-header-encoding-alist |
230 | (if (mm-coding-system-p 'utf-8) | |
231 | (cons '("Newsgroups" . utf-8) | |
232 | rfc2047-header-encoding-alist) | |
233 | rfc2047-header-encoding-alist)) | |
7dafe00b | 234 | rfc2047-encode-encoded-words body fn) |
d3a597b7 | 235 | (when (or text link enclosure comments) |
91472578 MB |
236 | (insert "\n") |
237 | (insert "<#multipart type=alternative>\n" | |
238 | "<#part type=\"text/plain\">\n") | |
239 | (setq body (point)) | |
31640842 | 240 | (when text |
7dafe00b MB |
241 | (insert text) |
242 | (goto-char body) | |
243 | (if (and nnrss-wash-html-in-text-plain-parts | |
244 | (progn | |
245 | (require 'mm-view) | |
246 | (setq fn (or (cdr (assq mm-text-html-renderer | |
247 | mm-text-html-washer-alist)) | |
248 | mm-text-html-renderer)))) | |
249 | (progn | |
250 | (narrow-to-region body (point-max)) | |
251 | (if (functionp fn) | |
252 | (funcall fn) | |
253 | (apply (car fn) (cdr fn))) | |
254 | (widen) | |
255 | (goto-char body) | |
256 | (re-search-forward "[^\t\n ]" nil t) | |
257 | (beginning-of-line) | |
258 | (delete-region body (point)) | |
259 | (goto-char (point-max)) | |
260 | (skip-chars-backward "\t\n ") | |
261 | (end-of-line) | |
262 | (delete-region (point) (point-max)) | |
263 | (insert "\n")) | |
264 | (while (re-search-forward "\n+" nil t) | |
265 | (replace-match " ")) | |
266 | (goto-char body) | |
267 | ;; See `nnrss-check-group', which inserts "<br /><br />". | |
268 | (when (search-forward "<br /><br />" nil t) | |
269 | (if (eobp) | |
270 | (replace-match "\n") | |
271 | (replace-match "\n\n"))) | |
272 | (unless (eobp) | |
d3192c8f | 273 | (let ((fill-column (default-value 'fill-column)) |
7dafe00b MB |
274 | (window (get-buffer-window nntp-server-buffer))) |
275 | (when window | |
276 | (setq fill-column | |
277 | (max 1 (/ (* (window-width window) 7) 8)))) | |
278 | (fill-region (point) (point-max)) | |
279 | (goto-char (point-max)) | |
280 | ;; XEmacs version of `fill-region' inserts newline. | |
281 | (unless (bolp) | |
282 | (insert "\n"))))) | |
31640842 MB |
283 | (when (or link enclosure) |
284 | (insert "\n"))) | |
285 | (when link | |
286 | (insert link "\n")) | |
287 | (when enclosure | |
288 | (insert (car enclosure) " " | |
289 | (nth 2 enclosure) " " | |
290 | (nth 3 enclosure) "\n")) | |
d3a597b7 MB |
291 | (when comments |
292 | (insert comments "\n")) | |
91472578 MB |
293 | (setq body (buffer-substring body (point))) |
294 | (insert "<#/part>\n" | |
295 | "<#part type=\"text/html\">\n" | |
296 | "<html><head></head><body>\n") | |
297 | (when text | |
298 | (insert text "\n")) | |
299 | (when link | |
300 | (insert "<p><a href=\"" link "\">link</a></p>\n")) | |
31640842 MB |
301 | (when enclosure |
302 | (insert "<p><a href=\"" (car enclosure) "\">" | |
303 | (cadr enclosure) "</a> " (nth 2 enclosure) | |
304 | " " (nth 3 enclosure) "</p>\n")) | |
d3a597b7 MB |
305 | (when comments |
306 | (insert "<p><a href=\"" comments "\">comments</a></p>\n")) | |
91472578 MB |
307 | (insert "</body></html>\n" |
308 | "<#/part>\n" | |
309 | "<#/multipart>\n")) | |
310 | (condition-case nil | |
554a69b8 KY |
311 | ;; Allow `mml-to-mime' to generate MIME article without |
312 | ;; making inquiry to a user for unknown encoding. | |
313 | (let ((mml-confirmation-set | |
314 | (cons 'unknown-encoding mml-confirmation-set))) | |
315 | (mml-to-mime)) | |
91472578 MB |
316 | (error |
317 | (erase-buffer) | |
318 | (insert header | |
319 | "Content-Type: text/plain; charset=gnus-decoded\n" | |
320 | "Content-Transfer-Encoding: 8bit\n\n" | |
321 | body) | |
322 | (nnheader-message | |
323 | 3 "Warning - there might be invalid characters")))) | |
324 | (goto-char (point-min)) | |
325 | (search-forward "\n\n") | |
326 | (forward-line -1) | |
327 | (insert (format "Message-ID: <%d@%s.nnrss>\n" | |
328 | (car e) | |
329 | (let ((rfc2047-encoding-type 'mime) | |
330 | rfc2047-encode-max-chars) | |
331 | (rfc2047-encode-string | |
332 | (gnus-replace-in-string group "[\t\n ]+" "_"))))) | |
333 | (when nnrss-content-function | |
334 | (funcall nnrss-content-function e group article)))) | |
23f87bed MB |
335 | (cond |
336 | (err | |
337 | (nnheader-report 'nnrss err)) | |
338 | ((not e) | |
339 | (nnheader-report 'nnrss "no such id: %d" article)) | |
340 | (t | |
341 | (nnheader-report 'nnrss "article %s retrieved" (car e)) | |
342 | ;; we return the article number. | |
343 | (cons nnrss-group (car e)))))) | |
344 | ||
345 | (deffoo nnrss-request-list (&optional server) | |
346 | (nnrss-possibly-change-group nil server) | |
347 | (nnrss-generate-active) | |
348 | t) | |
349 | ||
350 | (deffoo nnrss-open-server (server &optional defs connectionless) | |
351 | (nnrss-read-server-data server) | |
352 | (nnoo-change-server 'nnrss server defs) | |
353 | t) | |
354 | ||
355 | (deffoo nnrss-request-expire-articles | |
356 | (articles group &optional server force) | |
91472578 | 357 | (setq group (nnrss-decode-group-name group)) |
23f87bed MB |
358 | (nnrss-possibly-change-group group server) |
359 | (let (e days not-expirable changed) | |
360 | (dolist (art articles) | |
361 | (if (and (setq e (assq art nnrss-group-data)) | |
362 | (nnmail-expired-article-p | |
363 | group | |
364 | (if (listp (setq days (nth 1 e))) days | |
365 | (days-to-time (- days (time-to-days '(0 0))))) | |
366 | force)) | |
367 | (setq nnrss-group-data (delq e nnrss-group-data) | |
368 | changed t) | |
369 | (push art not-expirable))) | |
370 | (if changed | |
371 | (nnrss-save-group-data group server)) | |
372 | not-expirable)) | |
373 | ||
374 | (deffoo nnrss-request-delete-group (group &optional force server) | |
91472578 | 375 | (setq group (nnrss-decode-group-name group)) |
23f87bed | 376 | (nnrss-possibly-change-group group server) |
91472578 MB |
377 | (let (elem) |
378 | ;; There may be two or more entries in `nnrss-group-alist' since | |
379 | ;; this function didn't delete them formerly. | |
380 | (while (setq elem (assoc group nnrss-group-alist)) | |
381 | (setq nnrss-group-alist (delq elem nnrss-group-alist)))) | |
23f87bed MB |
382 | (setq nnrss-server-data |
383 | (delq (assoc group nnrss-server-data) nnrss-server-data)) | |
384 | (nnrss-save-server-data server) | |
91472578 | 385 | (ignore-errors |
01c52d31 MB |
386 | (let ((file-name-coding-system nnmail-pathname-coding-system)) |
387 | (delete-file (nnrss-make-filename group server)))) | |
23f87bed MB |
388 | t) |
389 | ||
390 | (deffoo nnrss-request-list-newsgroups (&optional server) | |
391 | (nnrss-possibly-change-group nil server) | |
392 | (save-excursion | |
393 | (set-buffer nntp-server-buffer) | |
394 | (erase-buffer) | |
395 | (dolist (elem nnrss-group-alist) | |
396 | (if (third elem) | |
397 | (insert (car elem) "\t" (third elem) "\n")))) | |
398 | t) | |
399 | ||
400 | (nnoo-define-skeleton nnrss) | |
401 | ||
402 | ;;; Internal functions | |
403 | (eval-when-compile (defun xml-rpc-method-call (&rest args))) | |
91472578 MB |
404 | |
405 | (defun nnrss-get-encoding () | |
406 | "Return an encoding attribute specified in the current xml contents. | |
407 | If `nnrss-compatible-encoding-alist' specifies the compatible encoding, | |
408 | it is used instead. If the xml contents doesn't specify the encoding, | |
409 | return `utf-8' which is the default encoding for xml if it is available, | |
410 | otherwise return nil." | |
411 | (goto-char (point-min)) | |
412 | (if (re-search-forward | |
01c52d31 | 413 | "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)" |
91472578 | 414 | nil t) |
01c52d31 MB |
415 | (let ((encoding (intern (downcase (or (match-string 1) |
416 | (match-string 2)))))) | |
91472578 MB |
417 | (or |
418 | (mm-coding-system-p (cdr (assq encoding | |
419 | nnrss-compatible-encoding-alist))) | |
420 | (mm-coding-system-p encoding) | |
421 | (mm-coding-system-p (car (rassq encoding | |
422 | nnrss-compatible-encoding-alist))))) | |
423 | (mm-coding-system-p 'utf-8))) | |
424 | ||
0f7cbeb9 GM |
425 | (declare-function w3-parse-buffer "ext:w3-parse" (&optional buff)) |
426 | ||
23f87bed | 427 | (defun nnrss-fetch (url &optional local) |
91472578 MB |
428 | "Fetch URL and put it in a the expected Lisp structure." |
429 | (mm-with-unibyte-buffer | |
bff3818b | 430 | ;;some versions of url.el need this to close the connection quickly |
91472578 | 431 | (let (cs xmlform htmlform) |
23f87bed MB |
432 | ;; bit o' work necessary for w3 pre-cvs and post-cvs |
433 | (if local | |
434 | (let ((coding-system-for-read 'binary)) | |
435 | (insert-file-contents url)) | |
91472578 MB |
436 | ;; FIXME: shouldn't binding `coding-system-for-read' be moved |
437 | ;; to `mm-url-insert'? | |
438 | (let ((coding-system-for-read 'binary)) | |
7dafe00b MB |
439 | (condition-case err |
440 | (mm-url-insert url) | |
441 | (error (if (or debug-on-quit debug-on-error) | |
442 | (signal (car err) (cdr err)) | |
443 | (message "nnrss: Failed to fetch %s" url)))))) | |
91472578 MB |
444 | (nnheader-remove-cr-followed-by-lf) |
445 | ;; Decode text according to the encoding attribute. | |
446 | (when (setq cs (nnrss-get-encoding)) | |
11e95b02 MB |
447 | (insert (prog1 |
448 | (mm-decode-coding-string (buffer-string) cs) | |
449 | (erase-buffer) | |
450 | (mm-enable-multibyte)))) | |
91472578 MB |
451 | (goto-char (point-min)) |
452 | ||
453 | ;; Because xml-parse-region can't deal with anything that isn't | |
454 | ;; xml and w3-parse-buffer can't deal with some xml, we have to | |
455 | ;; parse with xml-parse-region first and, if that fails, parse | |
456 | ;; with w3-parse-buffer. Yuck. Eventually, someone should find out | |
457 | ;; why w3-parse-buffer fails to parse some well-formed xml and | |
458 | ;; fix it. | |
459 | ||
460 | (condition-case err1 | |
461 | (setq xmlform (xml-parse-region (point-min) (point-max))) | |
462 | (error | |
463 | (condition-case err2 | |
464 | (setq htmlform (caddar (w3-parse-buffer | |
465 | (current-buffer)))) | |
466 | (error | |
467 | (message "\ | |
468 | nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" | |
469 | url err1 err2))))) | |
470 | (if htmlform | |
471 | htmlform | |
472 | xmlform)))) | |
23f87bed MB |
473 | |
474 | (defun nnrss-possibly-change-group (&optional group server) | |
475 | (when (and server | |
476 | (not (nnrss-server-opened server))) | |
477 | (nnrss-open-server server)) | |
478 | (when (and group (not (equal group nnrss-group))) | |
479 | (nnrss-read-group-data group server) | |
480 | (setq nnrss-group group))) | |
481 | ||
482 | (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) | |
483 | ||
484 | (defun nnrss-generate-active () | |
91472578 | 485 | (when (y-or-n-p "Fetch extra categories? ") |
01c52d31 | 486 | (mapc 'funcall nnrss-extra-categories)) |
23f87bed MB |
487 | (save-excursion |
488 | (set-buffer nntp-server-buffer) | |
489 | (erase-buffer) | |
490 | (dolist (elem nnrss-group-alist) | |
491 | (insert (prin1-to-string (car elem)) " 0 1 y\n")) | |
492 | (dolist (elem nnrss-server-data) | |
493 | (unless (assoc (car elem) nnrss-group-alist) | |
494 | (insert (prin1-to-string (car elem)) " 0 1 y\n"))))) | |
495 | ||
8abf1b22 | 496 | (autoload 'timezone-parse-date "timezone") |
7dafe00b MB |
497 | |
498 | (defun nnrss-normalize-date (date) | |
499 | "Return a date string of DATE in the RFC822 style. | |
500 | This function handles the ISO 8601 date format described in | |
0b10e437 | 501 | URL `http://www.w3.org/TR/NOTE-datetime', and also the RFC822 style |
7dafe00b | 502 | which RSS 2.0 allows." |
4079589f MB |
503 | (let (case-fold-search vector year month day time zone cts given) |
504 | (cond ((null date)) ; do nothing for this case | |
505 | ;; if the date is just digits (unix time stamp): | |
506 | ((string-match "^[0-9]+$" date) | |
507 | (setq given (seconds-to-time (string-to-number date)))) | |
7dafe00b MB |
508 | ;; RFC822 |
509 | ((string-match " [0-9]+ " date) | |
510 | (setq vector (timezone-parse-date date) | |
511 | year (string-to-number (aref vector 0))) | |
512 | (when (>= year 1969) | |
513 | (setq month (string-to-number (aref vector 1)) | |
514 | day (string-to-number (aref vector 2))) | |
515 | (unless (>= (length (setq time (aref vector 3))) 3) | |
516 | (setq time "00:00:00")) | |
517 | (when (and (setq zone (aref vector 4)) | |
518 | (not (string-match "\\`[A-Z+-]" zone))) | |
519 | (setq zone nil)))) | |
520 | ;; ISO 8601 | |
521 | ((string-match | |
522 | (eval-when-compile | |
523 | (concat | |
524 | ;; 1. year | |
525 | "\\(199[0-9]\\|20[0-9][0-9]\\)" | |
01c52d31 MB |
526 | "\\(?:-" |
527 | ;; 2. month | |
7dafe00b | 528 | "\\([01][0-9]\\)" |
01c52d31 MB |
529 | "\\(?:-" |
530 | ;; 3. day | |
7dafe00b | 531 | "\\([0-3][0-9]\\)" |
01c52d31 MB |
532 | "\\)?\\)?\\(?:T" |
533 | ;; 4. hh:mm | |
7dafe00b | 534 | "\\([012][0-9]:[0-5][0-9]\\)" |
01c52d31 MB |
535 | "\\(?:" |
536 | ;; 5. :ss | |
7dafe00b | 537 | "\\(:[0-5][0-9]\\)" |
01c52d31 MB |
538 | "\\(?:\\.[0-9]+\\)?\\)?\\)?" |
539 | ;; 6+7,8,9. zone | |
540 | "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)" | |
7dafe00b MB |
541 | "\\|\\([+-][012][0-9][0-5][0-9]\\)" |
542 | "\\|\\(Z\\)\\)?")) | |
543 | date) | |
544 | (setq year (string-to-number (match-string 1 date)) | |
01c52d31 MB |
545 | month (string-to-number (or (match-string 2 date) "1")) |
546 | day (string-to-number (or (match-string 3 date) "1")) | |
547 | time (if (match-beginning 5) | |
548 | (substring date (match-beginning 4) (match-end 5)) | |
549 | (concat (or (match-string 4 date) "00:00") ":00")) | |
550 | zone (cond ((match-beginning 6) | |
551 | (concat (match-string 6 date) | |
552 | (match-string 7 date))) | |
553 | ((match-beginning 9) ;; Z | |
7dafe00b MB |
554 | "+0000") |
555 | (t ;; nil if zone is not provided. | |
01c52d31 | 556 | (match-string 8 date)))))) |
7dafe00b MB |
557 | (if month |
558 | (progn | |
559 | (setq cts (current-time-string (encode-time 0 0 0 day month year))) | |
560 | (format "%s, %02d %s %04d %s%s" | |
561 | (substring cts 0 3) day (substring cts 4 7) year time | |
562 | (if zone | |
563 | (concat " " zone) | |
564 | ""))) | |
4079589f | 565 | (message-make-date given)))) |
7dafe00b | 566 | |
23f87bed MB |
567 | ;;; data functions |
568 | ||
569 | (defun nnrss-read-server-data (server) | |
570 | (setq nnrss-server-data nil) | |
01c52d31 MB |
571 | (let ((file (nnrss-make-filename "nnrss" server)) |
572 | (file-name-coding-system nnmail-pathname-coding-system)) | |
23f87bed | 573 | (when (file-exists-p file) |
91472578 MB |
574 | ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII |
575 | ;; file names. So, we use `insert-file-contents' instead. | |
576 | (mm-with-multibyte-buffer | |
01c52d31 | 577 | (let ((coding-system-for-read nnrss-file-coding-system)) |
23f87bed | 578 | (insert-file-contents file) |
91472578 | 579 | (eval-region (point-min) (point-max))))))) |
23f87bed MB |
580 | |
581 | (defun nnrss-save-server-data (server) | |
582 | (gnus-make-directory nnrss-directory) | |
91472578 MB |
583 | (let ((coding-system-for-write nnrss-file-coding-system) |
584 | (file-name-coding-system nnmail-pathname-coding-system)) | |
585 | (with-temp-file (nnrss-make-filename "nnrss" server) | |
586 | (insert (format ";; -*- coding: %s; -*-\n" | |
587 | nnrss-file-coding-system)) | |
588 | (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist)) | |
589 | (insert "\n") | |
590 | (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data))))) | |
23f87bed MB |
591 | |
592 | (defun nnrss-read-group-data (group server) | |
593 | (setq nnrss-group-data nil) | |
01c52d31 MB |
594 | (if (hash-table-p nnrss-group-hashtb) |
595 | (clrhash nnrss-group-hashtb) | |
596 | (setq nnrss-group-hashtb (make-hash-table :test 'equal))) | |
23f87bed MB |
597 | (let ((pair (assoc group nnrss-server-data))) |
598 | (setq nnrss-group-max (or (cadr pair) 0)) | |
599 | (setq nnrss-group-min (+ nnrss-group-max 1))) | |
01c52d31 MB |
600 | (let ((file (nnrss-make-filename group server)) |
601 | (file-name-coding-system nnmail-pathname-coding-system)) | |
23f87bed | 602 | (when (file-exists-p file) |
91472578 MB |
603 | ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII |
604 | ;; file names. So, we use `insert-file-contents' instead. | |
605 | (mm-with-multibyte-buffer | |
01c52d31 | 606 | (let ((coding-system-for-read nnrss-file-coding-system)) |
23f87bed | 607 | (insert-file-contents file) |
91472578 | 608 | (eval-region (point-min) (point-max)))) |
23f87bed | 609 | (dolist (e nnrss-group-data) |
01c52d31 | 610 | (puthash (nth 9 e) t nnrss-group-hashtb) |
91472578 MB |
611 | (when (and (car e) (> nnrss-group-min (car e))) |
612 | (setq nnrss-group-min (car e))) | |
613 | (when (and (car e) (< nnrss-group-max (car e))) | |
614 | (setq nnrss-group-max (car e))))))) | |
23f87bed MB |
615 | |
616 | (defun nnrss-save-group-data (group server) | |
617 | (gnus-make-directory nnrss-directory) | |
91472578 MB |
618 | (let ((coding-system-for-write nnrss-file-coding-system) |
619 | (file-name-coding-system nnmail-pathname-coding-system)) | |
620 | (with-temp-file (nnrss-make-filename group server) | |
621 | (insert (format ";; -*- coding: %s; -*-\n" | |
622 | nnrss-file-coding-system)) | |
623 | (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data))))) | |
624 | ||
625 | (defun nnrss-make-filename (name server) | |
626 | (expand-file-name | |
627 | (nnrss-translate-file-chars | |
628 | (concat name | |
629 | (and server | |
630 | (not (equal server "")) | |
631 | "-") | |
632 | server | |
633 | ".el")) | |
634 | nnrss-directory)) | |
635 | ||
636 | (gnus-add-shutdown 'nnrss-close 'gnus) | |
637 | ||
638 | (defun nnrss-close () | |
639 | "Clear internal nnrss variables." | |
640 | (setq nnrss-group-data nil | |
641 | nnrss-server-data nil | |
642 | nnrss-group-hashtb nil | |
643 | nnrss-group-alist nil)) | |
23f87bed MB |
644 | |
645 | ;;; URL interface | |
646 | ||
647 | (defun nnrss-no-cache (url) | |
648 | "") | |
649 | ||
650 | (defun nnrss-insert-w3 (url) | |
651 | (mm-with-unibyte-current-buffer | |
7dafe00b MB |
652 | (condition-case err |
653 | (mm-url-insert url) | |
654 | (error (if (or debug-on-quit debug-on-error) | |
655 | (signal (car err) (cdr err)) | |
656 | (message "nnrss: Failed to fetch %s" url)))))) | |
23f87bed | 657 | |
91472578 | 658 | (defun nnrss-decode-entities-string (string) |
23f87bed | 659 | (if string |
91472578 | 660 | (mm-with-multibyte-buffer |
23f87bed MB |
661 | (insert string) |
662 | (mm-url-decode-entities-nbsp) | |
663 | (buffer-string)))) | |
664 | ||
665 | (defalias 'nnrss-insert 'nnrss-insert-w3) | |
666 | ||
91472578 MB |
667 | (defun nnrss-mime-encode-string (string) |
668 | (mm-with-multibyte-buffer | |
669 | (insert string) | |
670 | (mm-url-decode-entities-nbsp) | |
671 | (goto-char (point-min)) | |
672 | (while (re-search-forward "[\t\n ]+" nil t) | |
673 | (replace-match " ")) | |
674 | (goto-char (point-min)) | |
675 | (skip-chars-forward " ") | |
676 | (delete-region (point-min) (point)) | |
677 | (goto-char (point-max)) | |
678 | (skip-chars-forward " ") | |
679 | (delete-region (point) (point-max)) | |
680 | (let ((rfc2047-encoding-type 'mime) | |
681 | rfc2047-encode-max-chars) | |
682 | (rfc2047-encode-region (point-min) (point-max))) | |
683 | (goto-char (point-min)) | |
684 | (while (search-forward "\n" nil t) | |
d355a0b7 | 685 | (delete-char -1)) |
91472578 MB |
686 | (buffer-string))) |
687 | ||
23f87bed | 688 | ;;; Snarf functions |
01c52d31 | 689 | (defun nnrss-make-hash-index (item) |
5b51650c | 690 | (gnus-message 9 "nnrss: Making hash index of %s" (gnus-prin1-to-string item)) |
01c52d31 MB |
691 | (setq item (gnus-remove-if |
692 | (lambda (field) | |
693 | (when (listp field) | |
694 | (memq (car field) nnrss-ignore-article-fields))) | |
695 | item)) | |
696 | (md5 (gnus-prin1-to-string item) | |
697 | nil nil | |
698 | nnrss-file-coding-system)) | |
699 | ||
23f87bed | 700 | (defun nnrss-check-group (group server) |
7dafe00b | 701 | (let (file xml subject url extra changed author date feed-subject |
01c52d31 MB |
702 | enclosure comments rss-ns rdf-ns content-ns dc-ns |
703 | hash-index) | |
23f87bed MB |
704 | (if (and nnrss-use-local |
705 | (file-exists-p (setq file (expand-file-name | |
706 | (nnrss-translate-file-chars | |
707 | (concat group ".xml")) | |
708 | nnrss-directory)))) | |
709 | (setq xml (nnrss-fetch file t)) | |
710 | (setq url (or (nth 2 (assoc group nnrss-server-data)) | |
711 | (second (assoc group nnrss-group-alist)))) | |
712 | (unless url | |
713 | (setq url | |
91472578 MB |
714 | (cdr |
715 | (assoc 'href | |
716 | (nnrss-discover-feed | |
717 | (read-string | |
718 | (format "URL to search for %s: " group) "http://"))))) | |
23f87bed MB |
719 | (let ((pair (assoc group nnrss-server-data))) |
720 | (if pair | |
721 | (setcdr (cdr pair) (list url)) | |
722 | (push (list group nnrss-group-max url) nnrss-server-data))) | |
723 | (setq changed t)) | |
724 | (setq xml (nnrss-fetch url))) | |
725 | ;; See | |
726 | ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html | |
727 | ;; for more RSS namespaces. | |
728 | (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/") | |
729 | rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#") | |
730 | rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/") | |
731 | content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) | |
732 | (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) | |
733 | (when (and (listp item) | |
91472578 | 734 | (string= (concat rss-ns "item") (car item)) |
01c52d31 MB |
735 | (progn (setq hash-index (nnrss-make-hash-index item)) |
736 | (not (gethash hash-index nnrss-group-hashtb)))) | |
23f87bed | 737 | (setq subject (nnrss-node-text rss-ns 'title item)) |
01c52d31 MB |
738 | (setq url (nnrss-decode-entities-string |
739 | (nnrss-node-text rss-ns 'link (cddr item)))) | |
740 | (setq extra (or (nnrss-node-text content-ns 'encoded item) | |
23f87bed | 741 | (nnrss-node-text rss-ns 'description item))) |
7dafe00b MB |
742 | (if (setq feed-subject (nnrss-node-text dc-ns 'subject item)) |
743 | (setq extra (concat feed-subject "<br /><br />" extra))) | |
23f87bed MB |
744 | (setq author (or (nnrss-node-text rss-ns 'author item) |
745 | (nnrss-node-text dc-ns 'creator item) | |
746 | (nnrss-node-text dc-ns 'contributor item))) | |
7dafe00b MB |
747 | (setq date (nnrss-normalize-date |
748 | (or (nnrss-node-text dc-ns 'date item) | |
749 | (nnrss-node-text rss-ns 'pubDate item)))) | |
d3a597b7 | 750 | (setq comments (nnrss-node-text rss-ns 'comments item)) |
31640842 MB |
751 | (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item))) |
752 | (let ((url (cdr (assq 'url enclosure))) | |
753 | (len (cdr (assq 'length enclosure))) | |
754 | (type (cdr (assq 'type enclosure))) | |
755 | (name)) | |
756 | (setq len | |
757 | (if (and len (integerp (setq len (string-to-number len)))) | |
758 | ;; actually already in `ls-lisp-format-file-size' but | |
759 | ;; probably not worth to require it for one function | |
760 | (do ((size (/ len 1.0) (/ size 1024.0)) | |
761 | (post-fixes (list "" "k" "M" "G" "T" "P" "E") | |
762 | (cdr post-fixes))) | |
763 | ((< size 1024) | |
764 | (format "%.1f%s" size (car post-fixes)))) | |
765 | "0")) | |
766 | (setq url (or url "")) | |
767 | (setq name (if (string-match "/\\([^/]*\\)$" url) | |
768 | (match-string 1 url) | |
769 | "file")) | |
770 | (setq type (or type "")) | |
771 | (setq enclosure (list url name len type)))) | |
23f87bed MB |
772 | (push |
773 | (list | |
774 | (incf nnrss-group-max) | |
775 | (current-time) | |
776 | url | |
91472578 MB |
777 | (and subject (nnrss-mime-encode-string subject)) |
778 | (and author (nnrss-mime-encode-string author)) | |
23f87bed | 779 | date |
31640842 | 780 | (and extra (nnrss-decode-entities-string extra)) |
d3a597b7 | 781 | enclosure |
01c52d31 MB |
782 | comments |
783 | hash-index) | |
23f87bed | 784 | nnrss-group-data) |
01c52d31 | 785 | (puthash hash-index t nnrss-group-hashtb) |
91472578 MB |
786 | (setq changed t)) |
787 | (setq extra nil)) | |
23f87bed MB |
788 | (when changed |
789 | (nnrss-save-group-data group server) | |
790 | (let ((pair (assoc group nnrss-server-data))) | |
791 | (if pair | |
792 | (setcar (cdr pair) nnrss-group-max) | |
793 | (push (list group nnrss-group-max) nnrss-server-data))) | |
794 | (nnrss-save-server-data server)))) | |
795 | ||
0f7cbeb9 GM |
796 | (declare-function gnus-group-make-rss-group "gnus-group" (&optional url)) |
797 | ||
91472578 MB |
798 | (defun nnrss-opml-import (opml-file) |
799 | "OPML subscriptions import. | |
800 | Read the file and attempt to subscribe to each Feed in the file." | |
801 | (interactive "fImport file: ") | |
c4288669 MB |
802 | (mapc |
803 | (lambda (node) | |
804 | (let ((xmlurl (cdr (assq 'xmlUrl (cadr node))))) | |
805 | (when (and xmlurl | |
806 | (not (string-match "\\`[\t ]*\\'" xmlurl)) | |
807 | (prog1 | |
808 | (y-or-n-p (format "Subscribe to %s " xmlurl)) | |
809 | (message ""))) | |
810 | (condition-case err | |
811 | (progn | |
812 | (gnus-group-make-rss-group xmlurl) | |
813 | (forward-line 1)) | |
814 | (error | |
815 | (message | |
816 | "Failed to subscribe to %s (%s); type any key to continue: " | |
817 | xmlurl | |
818 | (error-message-string err)) | |
819 | (let ((echo-keystrokes 0)) | |
820 | (read-char))))))) | |
91472578 | 821 | (nnrss-find-el 'outline |
c4288669 MB |
822 | (mm-with-multibyte-buffer |
823 | (insert-file-contents opml-file) | |
824 | (xml-parse-region (point-min) (point-max)))))) | |
91472578 MB |
825 | |
826 | (defun nnrss-opml-export () | |
827 | "OPML subscription export. | |
828 | Export subscriptions to a buffer in OPML Format." | |
829 | (interactive) | |
830 | (with-current-buffer (get-buffer-create "*OPML Export*") | |
831 | (mm-set-buffer-file-coding-system 'utf-8) | |
832 | (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" | |
833 | "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n" | |
834 | "<opml version=\"1.1\">\n" | |
835 | " <head>\n" | |
836 | " <title>mySubscriptions</title>\n" | |
837 | " <dateCreated>" (format-time-string "%a, %d %b %Y %T %z") | |
838 | "</dateCreated>\n" | |
839 | " <ownerEmail>" user-mail-address "</ownerEmail>\n" | |
840 | " <ownerName>" (user-full-name) "</ownerName>\n" | |
841 | " </head>\n" | |
842 | " <body>\n") | |
843 | (dolist (sub nnrss-group-alist) | |
844 | (insert " <outline text=\"" (car sub) | |
845 | "\" xmlUrl=\"" (cadr sub) "\"/>\n")) | |
846 | (insert " </body>\n" | |
847 | "</opml>\n")) | |
848 | (pop-to-buffer "*OPML Export*") | |
849 | (when (fboundp 'sgml-mode) | |
850 | (sgml-mode))) | |
851 | ||
23f87bed MB |
852 | (defun nnrss-generate-download-script () |
853 | "Generate a download script in the current buffer. | |
854 | It is useful when `(setq nnrss-use-local t)'." | |
855 | (interactive) | |
856 | (insert "#!/bin/sh\n") | |
857 | (insert "WGET=wget\n") | |
858 | (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n") | |
859 | (dolist (elem nnrss-server-data) | |
860 | (let ((url (or (nth 2 elem) | |
861 | (second (assoc (car elem) nnrss-group-alist))))) | |
862 | (insert "$WGET -q -O \"$RSSDIR\"/'" | |
863 | (nnrss-translate-file-chars (concat (car elem) ".xml")) | |
864 | "' '" url "'\n")))) | |
865 | ||
866 | (defun nnrss-translate-file-chars (name) | |
867 | (let ((nnheader-file-name-translation-alist | |
868 | (append nnheader-file-name-translation-alist '((?' . ?_))))) | |
869 | (nnheader-translate-file-chars name))) | |
870 | ||
871 | (defvar nnrss-moreover-url | |
872 | "http://w.moreover.com/categories/category_list_rss.html" | |
873 | "The url of moreover.com categories.") | |
874 | ||
875 | (defun nnrss-snarf-moreover-categories () | |
876 | "Snarf RSS links from moreover.com." | |
877 | (interactive) | |
878 | (let (category name url changed) | |
879 | (with-temp-buffer | |
880 | (nnrss-insert nnrss-moreover-url) | |
881 | (goto-char (point-min)) | |
882 | (while (re-search-forward | |
883 | "<a name=\"\\([^\"]+\\)\">\\|<a href=\"\\(http://[^\"]*moreover\\.com[^\"]+page\\?c=\\([^\"&]+\\)&o=rss\\)" nil t) | |
884 | (if (match-string 1) | |
885 | (setq category (match-string 1)) | |
886 | (setq url (match-string 2) | |
887 | name (mm-url-decode-entities-string | |
888 | (rfc2231-decode-encoded-string | |
889 | (match-string 3)))) | |
890 | (if category | |
891 | (setq name (concat category "." name))) | |
892 | (unless (assoc name nnrss-server-data) | |
893 | (setq changed t) | |
894 | (push (list name 0 url) nnrss-server-data))))) | |
895 | (if changed | |
896 | (nnrss-save-server-data "")))) | |
897 | ||
23f87bed MB |
898 | (defun nnrss-node-text (namespace local-name element) |
899 | (let* ((node (assq (intern (concat namespace (symbol-name local-name))) | |
900 | element)) | |
901 | (text (if (and node (listp node)) | |
902 | (nnrss-node-just-text node) | |
903 | node)) | |
ff5e68bf MB |
904 | (cleaned-text (if text |
905 | (gnus-replace-in-string | |
906 | (gnus-replace-in-string | |
907 | text "^[\000-\037\177]+\\|^ +\\| +$" "") | |
908 | "\r\n" "\n")))) | |
23f87bed MB |
909 | (if (string-equal "" cleaned-text) |
910 | nil | |
911 | cleaned-text))) | |
912 | ||
913 | (defun nnrss-node-just-text (node) | |
914 | (if (and node (listp node)) | |
915 | (mapconcat 'nnrss-node-just-text (cddr node) " ") | |
916 | node)) | |
917 | ||
918 | (defun nnrss-find-el (tag data &optional found-list) | |
91472578 MB |
919 | "Find the all matching elements in the data. |
920 | Careful with this on large documents!" | |
921 | (when (consp data) | |
922 | (dolist (bit data) | |
923 | (when (car-safe bit) | |
924 | (when (equal tag (car bit)) | |
925 | ;; Old xml.el may return a list of string. | |
926 | (when (and (consp (caddr bit)) | |
927 | (stringp (caaddr bit))) | |
928 | (setcar (cddr bit) (caaddr bit))) | |
929 | (setq found-list | |
930 | (append found-list | |
931 | (list bit)))) | |
932 | (if (and (consp (car-safe (caddr bit))) | |
933 | (not (stringp (caddr bit)))) | |
934 | (setq found-list | |
935 | (append found-list | |
936 | (nnrss-find-el | |
937 | tag (caddr bit)))) | |
938 | (setq found-list | |
939 | (append found-list | |
940 | (nnrss-find-el | |
941 | tag (cddr bit)))))))) | |
23f87bed MB |
942 | found-list) |
943 | ||
944 | (defun nnrss-rsslink-p (el) | |
945 | "Test if the element we are handed is an RSS autodiscovery link." | |
946 | (and (eq (car-safe el) 'link) | |
947 | (string-equal (cdr (assoc 'rel (cadr el))) "alternate") | |
91472578 | 948 | (or (string-equal (cdr (assoc 'type (cadr el))) |
23f87bed MB |
949 | "application/rss+xml") |
950 | (string-equal (cdr (assoc 'type (cadr el))) "text/xml")))) | |
951 | ||
952 | (defun nnrss-get-rsslinks (data) | |
953 | "Extract the <link> elements that are links to RSS from the parsed data." | |
91472578 | 954 | (delq nil (mapcar |
23f87bed MB |
955 | (lambda (el) |
956 | (if (nnrss-rsslink-p el) el)) | |
957 | (nnrss-find-el 'link data)))) | |
958 | ||
959 | (defun nnrss-extract-hrefs (data) | |
91472578 MB |
960 | "Recursively extract hrefs from a page's source. |
961 | DATA should be the output of `xml-parse-region' or | |
962 | `w3-parse-buffer'." | |
23f87bed MB |
963 | (mapcar (lambda (ahref) |
964 | (cdr (assoc 'href (cadr ahref)))) | |
965 | (nnrss-find-el 'a data))) | |
966 | ||
91472578 | 967 | (defmacro nnrss-match-macro (base-uri item onsite-list offsite-list) |
23f87bed | 968 | `(cond ((or (string-match (concat "^" ,base-uri) ,item) |
91472578 MB |
969 | (not (string-match "://" ,item))) |
970 | (setq ,onsite-list (append ,onsite-list (list ,item)))) | |
971 | (t (setq ,offsite-list (append ,offsite-list (list ,item)))))) | |
23f87bed MB |
972 | |
973 | (defun nnrss-order-hrefs (base-uri hrefs) | |
974 | "Given a list of hrefs, sort them using the following priorities: | |
975 | 1. links ending in .rss | |
976 | 2. links ending in .rdf | |
977 | 3. links ending in .xml | |
978 | 4. links containing the above | |
979 | 5. offsite links | |
980 | ||
981 | BASE-URI is used to determine the location of the links and | |
982 | whether they are `offsite' or `onsite'." | |
983 | (let (rss-onsite-end rdf-onsite-end xml-onsite-end | |
984 | rss-onsite-in rdf-onsite-in xml-onsite-in | |
985 | rss-offsite-end rdf-offsite-end xml-offsite-end | |
01c52d31 | 986 | rss-offsite-in rdf-offsite-in xml-offsite-in) |
91472578 MB |
987 | (dolist (href hrefs) |
988 | (cond ((null href)) | |
989 | ((string-match "\\.rss$" href) | |
990 | (nnrss-match-macro | |
991 | base-uri href rss-onsite-end rss-offsite-end)) | |
992 | ((string-match "\\.rdf$" href) | |
993 | (nnrss-match-macro | |
994 | base-uri href rdf-onsite-end rdf-offsite-end)) | |
995 | ((string-match "\\.xml$" href) | |
996 | (nnrss-match-macro | |
997 | base-uri href xml-onsite-end xml-offsite-end)) | |
998 | ((string-match "rss" href) | |
999 | (nnrss-match-macro | |
1000 | base-uri href rss-onsite-in rss-offsite-in)) | |
1001 | ((string-match "rdf" href) | |
1002 | (nnrss-match-macro | |
1003 | base-uri href rdf-onsite-in rdf-offsite-in)) | |
1004 | ((string-match "xml" href) | |
1005 | (nnrss-match-macro | |
1006 | base-uri href xml-onsite-in xml-offsite-in)))) | |
1007 | (append | |
23f87bed MB |
1008 | rss-onsite-end rdf-onsite-end xml-onsite-end |
1009 | rss-onsite-in rdf-onsite-in xml-onsite-in | |
1010 | rss-offsite-end rdf-offsite-end xml-offsite-end | |
1011 | rss-offsite-in rdf-offsite-in xml-offsite-in))) | |
1012 | ||
1013 | (defun nnrss-discover-feed (url) | |
1014 | "Given a page, find an RSS feed using Mark Pilgrim's | |
0b10e437 | 1015 | `ultra-liberal rss locator' (URL `http://diveintomark.org/2002/08/15.html')." |
23f87bed MB |
1016 | |
1017 | (let ((parsed-page (nnrss-fetch url))) | |
1018 | ||
1019 | ;; 1. if this url is the rss, use it. | |
1020 | (if (nnrss-rss-p parsed-page) | |
1021 | (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/"))) | |
1022 | (nnrss-rss-title-description rss-ns parsed-page url)) | |
1023 | ||
1024 | ;; 2. look for the <link rel="alternate" | |
1025 | ;; type="application/rss+xml" and use that if it is there. | |
1026 | (let ((links (nnrss-get-rsslinks parsed-page))) | |
1027 | (if links | |
1028 | (let* ((xml (nnrss-fetch | |
1029 | (cdr (assoc 'href (cadar links))))) | |
1030 | (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/"))) | |
1031 | (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links))))) | |
1032 | ||
1033 | ;; 3. look for links on the site in the following order: | |
1034 | ;; - onsite links ending in .rss, .rdf, or .xml | |
1035 | ;; - onsite links containing any of the above | |
1036 | ;; - offsite links ending in .rss, .rdf, or .xml | |
1037 | ;; - offsite links containing any of the above | |
1038 | (let* ((base-uri (progn (string-match ".*://[^/]+/?" url) | |
1039 | (match-string 0 url))) | |
91472578 | 1040 | (hrefs (nnrss-order-hrefs |
23f87bed MB |
1041 | base-uri (nnrss-extract-hrefs parsed-page))) |
1042 | (rss-link nil)) | |
91472578 MB |
1043 | (while (and (eq rss-link nil) (not (eq hrefs nil))) |
1044 | (let ((href-data (nnrss-fetch (car hrefs)))) | |
1045 | (if (nnrss-rss-p href-data) | |
1046 | (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/"))) | |
1047 | (setq rss-link (nnrss-rss-title-description | |
1048 | rss-ns href-data (car hrefs)))) | |
1049 | (setq hrefs (cdr hrefs))))) | |
1050 | (if rss-link rss-link | |
23f87bed MB |
1051 | |
1052 | ;; 4. check syndic8 | |
91472578 | 1053 | (nnrss-find-rss-via-syndic8 url)))))))) |
23f87bed MB |
1054 | |
1055 | (defun nnrss-find-rss-via-syndic8 (url) | |
91472578 | 1056 | "Query syndic8 for the rss feeds it has for URL." |
23f87bed MB |
1057 | (if (not (locate-library "xml-rpc")) |
1058 | (progn | |
1059 | (message "XML-RPC is not available... not checking Syndic8.") | |
1060 | nil) | |
1061 | (require 'xml-rpc) | |
1062 | (let ((feedid (xml-rpc-method-call | |
1063 | "http://www.syndic8.com/xmlrpc.php" | |
1064 | 'syndic8.FindSites | |
1065 | url))) | |
1066 | (when feedid | |
91472578 | 1067 | (let* ((feedinfo (xml-rpc-method-call |
23f87bed MB |
1068 | "http://www.syndic8.com/xmlrpc.php" |
1069 | 'syndic8.GetFeedInfo | |
1070 | feedid)) | |
1071 | (urllist | |
91472578 | 1072 | (delq nil |
23f87bed MB |
1073 | (mapcar |
1074 | (lambda (listinfo) | |
91472578 | 1075 | (if (string-equal |
23f87bed MB |
1076 | (cdr (assoc "status" listinfo)) |
1077 | "Syndicated") | |
1078 | (cons | |
1079 | (cdr (assoc "sitename" listinfo)) | |
1080 | (list | |
1081 | (cons 'title | |
91472578 | 1082 | (cdr (assoc |
23f87bed MB |
1083 | "sitename" listinfo))) |
1084 | (cons 'href | |
1085 | (cdr (assoc | |
1086 | "dataurl" listinfo))))))) | |
1087 | feedinfo)))) | |
1088 | (if (not (> (length urllist) 1)) | |
1089 | (cdar urllist) | |
1090 | (let ((completion-ignore-case t) | |
91472578 | 1091 | (selection |
23f87bed | 1092 | (mapcar (lambda (listinfo) |
91472578 | 1093 | (cons (cdr (assoc "sitename" listinfo)) |
e9bd5782 | 1094 | (string-to-number |
23f87bed MB |
1095 | (cdr (assoc "feedid" listinfo))))) |
1096 | feedinfo))) | |
91472578 | 1097 | (cdr (assoc |
23f87bed MB |
1098 | (completing-read |
1099 | "Multiple feeds found. Select one: " | |
1100 | selection nil t) urllist))))))))) | |
1101 | ||
1102 | (defun nnrss-rss-p (data) | |
91472578 MB |
1103 | "Test if DATA is an RSS feed. |
1104 | Simply ensures that the first element is rss or rdf." | |
23f87bed MB |
1105 | (or (eq (caar data) 'rss) |
1106 | (eq (caar data) 'rdf:RDF))) | |
1107 | ||
1108 | (defun nnrss-rss-title-description (rss-namespace data url) | |
1109 | "Return the title of an RSS feed." | |
1110 | (if (nnrss-rss-p data) | |
1111 | (let ((description (intern (concat rss-namespace "description"))) | |
1112 | (title (intern (concat rss-namespace "title"))) | |
1113 | (channel (nnrss-find-el (intern (concat rss-namespace "channel")) | |
1114 | data))) | |
1115 | (list | |
1116 | (cons 'description (caddr (nth 0 (nnrss-find-el description channel)))) | |
1117 | (cons 'title (caddr (nth 0 (nnrss-find-el title channel)))) | |
1118 | (cons 'href url))))) | |
1119 | ||
1120 | (defun nnrss-get-namespace-prefix (el uri) | |
1121 | "Given EL (containing a parsed element) and URI (containing a string | |
1122 | that gives the URI for which you want to retrieve the namespace | |
1123 | prefix), return the prefix." | |
1124 | (let* ((prefix (car (rassoc uri (cadar el)))) | |
91472578 | 1125 | (nslist (if prefix |
23f87bed MB |
1126 | (split-string (symbol-name prefix) ":"))) |
1127 | (ns (cond ((eq (length nslist) 1) ; no prefix given | |
1128 | "") | |
1129 | ((eq (length nslist) 2) ; extract prefix | |
1130 | (cadr nslist))))) | |
91472578 | 1131 | (if (and ns (not (string= ns ""))) |
23f87bed MB |
1132 | (concat ns ":") |
1133 | ns))) | |
1134 | ||
1135 | (provide 'nnrss) | |
1136 | ||
cbee283d | 1137 | ;; arch-tag: 12910c07-0cdf-44fb-8d2c-416ded64c267 |
23f87bed | 1138 | ;;; nnrss.el ends here |