Commit | Line | Data |
---|---|---|
eec82323 | 1 | ;;; nnweb.el --- retrieving articles via web search engines |
e84b4b86 TTN |
2 | |
3 | ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, | |
e3fe4da0 | 4 | ;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
eec82323 | 5 | |
6748645f | 6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
eec82323 LMI |
7 | ;; Keywords: news |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
5a9dffec | 13 | ;; the Free Software Foundation; either version 3, or (at your option) |
eec82323 LMI |
14 | ;; any later version. |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
23 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 | ;; Boston, MA 02110-1301, USA. | |
eec82323 LMI |
25 | |
26 | ;;; Commentary: | |
27 | ||
23f87bed | 28 | ;; Note: You need to have `w3' installed for some functions to work. |
eec82323 LMI |
29 | |
30 | ;;; Code: | |
31 | ||
5ab7173c RS |
32 | (eval-when-compile (require 'cl)) |
33 | ||
eec82323 LMI |
34 | (require 'nnoo) |
35 | (require 'message) | |
36 | (require 'gnus-util) | |
37 | (require 'gnus) | |
eec82323 | 38 | (require 'nnmail) |
16409b0b | 39 | (require 'mm-util) |
23f87bed MB |
40 | (require 'mm-url) |
41 | (eval-and-compile | |
0d5dc4a5 | 42 | (ignore-errors |
23f87bed MB |
43 | (require 'url))) |
44 | (autoload 'w3-parse-buffer "w3-parse") | |
eec82323 LMI |
45 | |
46 | (nnoo-declare nnweb) | |
47 | ||
48 | (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") | |
49 | "Where nnweb will save its files.") | |
50 | ||
95fa1ff7 | 51 | (defvoo nnweb-type 'google |
6748645f | 52 | "What search engine type is being used. |
23f87bed | 53 | Valid types include `google', `dejanews', and `gmane'.") |
eec82323 | 54 | |
16409b0b | 55 | (defvar nnweb-type-definition |
23f87bed | 56 | |
4a2358e9 | 57 | (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") |
46e8fe3d | 58 | (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") |
4a2358e9 | 59 | (article . nnweb-google-wash-article) |
95fa1ff7 SZ |
60 | (reference . identity) |
61 | (map . nnweb-google-create-mapping) | |
62 | (search . nnweb-google-search) | |
4a2358e9 MB |
63 | (address . "http://groups.google.com/groups") |
64 | (base . "http://groups.google.com") | |
95fa1ff7 SZ |
65 | (identifier . nnweb-google-identity)) |
66 | (dejanews ;; alias of google | |
46e8fe3d MB |
67 | (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") |
68 | (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") | |
69 | (article . nnweb-google-wash-article) | |
95fa1ff7 SZ |
70 | (reference . identity) |
71 | (map . nnweb-google-create-mapping) | |
72 | (search . nnweb-google-search) | |
73 | (address . "http://groups.google.com/groups") | |
5f5475ac | 74 | (base . "http://groups.google.com") |
95fa1ff7 | 75 | (identifier . nnweb-google-identity)) |
23f87bed MB |
76 | (gmane |
77 | (article . nnweb-gmane-wash-article) | |
78 | (id . "http://gmane.org/view.php?group=%s") | |
79 | (reference . identity) | |
80 | (map . nnweb-gmane-create-mapping) | |
81 | (search . nnweb-gmane-search) | |
719120ef | 82 | (address . "http://search.gmane.org/nov.php") |
23f87bed | 83 | (identifier . nnweb-gmane-identity))) |
eec82323 LMI |
84 | "Type-definition alist.") |
85 | ||
86 | (defvoo nnweb-search nil | |
23f87bed | 87 | "Search string to feed to Google.") |
eec82323 | 88 | |
6748645f | 89 | (defvoo nnweb-max-hits 999 |
eec82323 LMI |
90 | "Maximum number of hits to display.") |
91 | ||
92 | (defvoo nnweb-ephemeral-p nil | |
93 | "Whether this nnweb server is ephemeral.") | |
94 | ||
95 | ;;; Internal variables | |
96 | ||
97 | (defvoo nnweb-articles nil) | |
98 | (defvoo nnweb-buffer nil) | |
719120ef | 99 | (defvoo nnweb-group-alist nil) |
eec82323 LMI |
100 | (defvoo nnweb-group nil) |
101 | (defvoo nnweb-hashtb nil) | |
102 | ||
103 | ;;; Interface functions | |
104 | ||
105 | (nnoo-define-basics nnweb) | |
106 | ||
107 | (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) | |
108 | (nnweb-possibly-change-server group server) | |
109 | (save-excursion | |
110 | (set-buffer nntp-server-buffer) | |
111 | (erase-buffer) | |
112 | (let (article header) | |
16409b0b GM |
113 | (mm-with-unibyte-current-buffer |
114 | (while (setq article (pop articles)) | |
115 | (when (setq header (cadr (assq article nnweb-articles))) | |
116 | (nnheader-insert-nov header)))) | |
eec82323 LMI |
117 | 'nov))) |
118 | ||
119 | (deffoo nnweb-request-scan (&optional group server) | |
120 | (nnweb-possibly-change-server group server) | |
95fa1ff7 | 121 | (if nnweb-ephemeral-p |
46e8fe3d MB |
122 | (setq nnweb-hashtb (gnus-make-hashtable 4095)) |
123 | (unless nnweb-articles | |
124 | (nnweb-read-overview group))) | |
eec82323 LMI |
125 | (funcall (nnweb-definition 'map)) |
126 | (unless nnweb-ephemeral-p | |
127 | (nnweb-write-active) | |
128 | (nnweb-write-overview group))) | |
129 | ||
130 | (deffoo nnweb-request-group (group &optional server dont-check) | |
46e8fe3d MB |
131 | (nnweb-possibly-change-server group server) |
132 | (unless (or nnweb-ephemeral-p | |
6203370b MB |
133 | dont-check |
134 | nnweb-articles) | |
46e8fe3d | 135 | (nnweb-read-overview group)) |
eec82323 LMI |
136 | (cond |
137 | ((not nnweb-articles) | |
138 | (nnheader-report 'nnweb "No matching articles")) | |
139 | (t | |
140 | (let ((active (if nnweb-ephemeral-p | |
141 | (cons (caar nnweb-articles) | |
142 | (caar (last nnweb-articles))) | |
143 | (cadr (assoc group nnweb-group-alist))))) | |
144 | (nnheader-report 'nnweb "Opened group %s" group) | |
145 | (nnheader-insert | |
146 | "211 %d %d %d %s\n" (length nnweb-articles) | |
147 | (car active) (cdr active) group))))) | |
148 | ||
149 | (deffoo nnweb-close-group (group &optional server) | |
150 | (nnweb-possibly-change-server group server) | |
151 | (when (gnus-buffer-live-p nnweb-buffer) | |
152 | (save-excursion | |
153 | (set-buffer nnweb-buffer) | |
154 | (set-buffer-modified-p nil) | |
155 | (kill-buffer nnweb-buffer))) | |
156 | t) | |
157 | ||
158 | (deffoo nnweb-request-article (article &optional group server buffer) | |
159 | (nnweb-possibly-change-server group server) | |
160 | (save-excursion | |
161 | (set-buffer (or buffer nntp-server-buffer)) | |
162 | (let* ((header (cadr (assq article nnweb-articles))) | |
163 | (url (and header (mail-header-xref header)))) | |
164 | (when (or (and url | |
16409b0b | 165 | (mm-with-unibyte-current-buffer |
23f87bed | 166 | (mm-url-insert url))) |
eec82323 LMI |
167 | (and (stringp article) |
168 | (nnweb-definition 'id t) | |
169 | (let ((fetch (nnweb-definition 'id)) | |
95fa1ff7 | 170 | art active) |
eec82323 LMI |
171 | (when (string-match "^<\\(.*\\)>$" article) |
172 | (setq art (match-string 1 article))) | |
95fa1ff7 | 173 | (when (and fetch art) |
7ce31649 MB |
174 | (setq url (format fetch |
175 | (mm-url-form-encode-xwfu art))) | |
95fa1ff7 | 176 | (mm-with-unibyte-current-buffer |
23f87bed | 177 | (mm-url-insert url)) |
95fa1ff7 SZ |
178 | (if (nnweb-definition 'reference t) |
179 | (setq article | |
180 | (funcall (nnweb-definition | |
181 | 'reference) article))))))) | |
eec82323 | 182 | (unless nnheader-callback-function |
95fa1ff7 | 183 | (funcall (nnweb-definition 'article))) |
eec82323 | 184 | (nnheader-report 'nnweb "Fetched article %s" article) |
16409b0b | 185 | (cons group (and (numberp article) article)))))) |
eec82323 LMI |
186 | |
187 | (deffoo nnweb-close-server (&optional server) | |
188 | (when (and (nnweb-server-opened server) | |
189 | (gnus-buffer-live-p nnweb-buffer)) | |
190 | (save-excursion | |
191 | (set-buffer nnweb-buffer) | |
192 | (set-buffer-modified-p nil) | |
193 | (kill-buffer nnweb-buffer))) | |
194 | (nnoo-close-server 'nnweb server)) | |
195 | ||
196 | (deffoo nnweb-request-list (&optional server) | |
197 | (nnweb-possibly-change-server nil server) | |
198 | (save-excursion | |
199 | (set-buffer nntp-server-buffer) | |
46e8fe3d | 200 | (nnmail-generate-active (list (assoc server nnweb-group-alist))) |
eec82323 LMI |
201 | t)) |
202 | ||
203 | (deffoo nnweb-request-update-info (group info &optional server) | |
16409b0b | 204 | (nnweb-possibly-change-server group server)) |
eec82323 LMI |
205 | |
206 | (deffoo nnweb-asynchronous-p () | |
23f87bed | 207 | nil) |
eec82323 LMI |
208 | |
209 | (deffoo nnweb-request-create-group (group &optional server args) | |
210 | (nnweb-possibly-change-server nil server) | |
211 | (nnweb-request-delete-group group) | |
46e8fe3d | 212 | (push `(,group ,(cons 1 0)) nnweb-group-alist) |
eec82323 LMI |
213 | (nnweb-write-active) |
214 | t) | |
215 | ||
216 | (deffoo nnweb-request-delete-group (group &optional force server) | |
217 | (nnweb-possibly-change-server group server) | |
16409b0b GM |
218 | (gnus-pull group nnweb-group-alist t) |
219 | (nnweb-write-active) | |
eec82323 LMI |
220 | (gnus-delete-file (nnweb-overview-file group)) |
221 | t) | |
222 | ||
223 | (nnoo-define-skeleton nnweb) | |
224 | ||
225 | ;;; Internal functions | |
226 | ||
227 | (defun nnweb-read-overview (group) | |
228 | "Read the overview of GROUP and build the map." | |
229 | (when (file-exists-p (nnweb-overview-file group)) | |
16409b0b | 230 | (mm-with-unibyte-buffer |
eec82323 LMI |
231 | (nnheader-insert-file-contents (nnweb-overview-file group)) |
232 | (goto-char (point-min)) | |
233 | (let (header) | |
234 | (while (not (eobp)) | |
235 | (setq header (nnheader-parse-nov)) | |
236 | (forward-line 1) | |
237 | (push (list (mail-header-number header) | |
238 | header (mail-header-xref header)) | |
239 | nnweb-articles) | |
240 | (nnweb-set-hashtb header (car nnweb-articles))))))) | |
241 | ||
242 | (defun nnweb-write-overview (group) | |
243 | "Write the overview file for GROUP." | |
16409b0b | 244 | (with-temp-file (nnweb-overview-file group) |
eec82323 LMI |
245 | (let ((articles nnweb-articles)) |
246 | (while articles | |
247 | (nnheader-insert-nov (cadr (pop articles))))))) | |
248 | ||
249 | (defun nnweb-set-hashtb (header data) | |
250 | (gnus-sethash (nnweb-identifier (mail-header-xref header)) | |
251 | data nnweb-hashtb)) | |
252 | ||
253 | (defun nnweb-get-hashtb (url) | |
254 | (gnus-gethash (nnweb-identifier url) nnweb-hashtb)) | |
255 | ||
256 | (defun nnweb-identifier (ident) | |
257 | (funcall (nnweb-definition 'identifier) ident)) | |
258 | ||
259 | (defun nnweb-overview-file (group) | |
260 | "Return the name of the overview file of GROUP." | |
261 | (nnheader-concat nnweb-directory group ".overview")) | |
262 | ||
263 | (defun nnweb-write-active () | |
264 | "Save the active file." | |
16409b0b GM |
265 | (gnus-make-directory nnweb-directory) |
266 | (with-temp-file (nnheader-concat nnweb-directory "active") | |
eec82323 LMI |
267 | (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) |
268 | ||
269 | (defun nnweb-read-active () | |
270 | "Read the active file." | |
271 | (load (nnheader-concat nnweb-directory "active") t t t)) | |
272 | ||
273 | (defun nnweb-definition (type &optional noerror) | |
274 | "Return the definition of TYPE." | |
275 | (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition))))) | |
276 | (when (and (not def) | |
277 | (not noerror)) | |
278 | (error "Undefined definition %s" type)) | |
279 | def)) | |
280 | ||
281 | (defun nnweb-possibly-change-server (&optional group server) | |
eec82323 LMI |
282 | (when server |
283 | (unless (nnweb-server-opened server) | |
46e8fe3d MB |
284 | (nnweb-open-server server)) |
285 | (nnweb-init server)) | |
eec82323 LMI |
286 | (unless nnweb-group-alist |
287 | (nnweb-read-active)) | |
95fa1ff7 SZ |
288 | (unless nnweb-hashtb |
289 | (setq nnweb-hashtb (gnus-make-hashtable 4095))) | |
eec82323 | 290 | (when group |
46e8fe3d | 291 | (setq nnweb-group group))) |
eec82323 LMI |
292 | |
293 | (defun nnweb-init (server) | |
294 | "Initialize buffers and such." | |
295 | (unless (gnus-buffer-live-p nnweb-buffer) | |
296 | (setq nnweb-buffer | |
297 | (save-excursion | |
16409b0b GM |
298 | (mm-with-unibyte |
299 | (nnheader-set-temp-buffer | |
300 | (format " *nnweb %s %s %s*" | |
301 | nnweb-type nnweb-search server)) | |
302 | (current-buffer)))))) | |
eec82323 | 303 | |
95fa1ff7 | 304 | ;;; |
4a2358e9 | 305 | ;;; groups.google.com |
95fa1ff7 SZ |
306 | ;;; |
307 | ||
308 | (defun nnweb-google-wash-article () | |
4a2358e9 | 309 | ;; We have Google's masked e-mail addresses here. :-/ |
719120ef | 310 | (let ((case-fold-search t) |
0565caeb MB |
311 | (start-re "<pre>[\r\n ]*") |
312 | (end-re "[\r\n ]*</pre>")) | |
95fa1ff7 | 313 | (goto-char (point-min)) |
d752cf53 MB |
314 | (if (save-excursion |
315 | (or (re-search-forward "The requested message.*could not be found." | |
316 | nil t) | |
719120ef MB |
317 | (not (and (re-search-forward start-re nil t) |
318 | (re-search-forward end-re nil t))))) | |
d752cf53 MB |
319 | ;; FIXME: Don't know how to indicate "not found". |
320 | ;; Should this function throw an error? --rsteib | |
321 | (progn | |
322 | (gnus-message 3 "Requested article not found") | |
323 | (erase-buffer)) | |
324 | (delete-region (point-min) | |
719120ef | 325 | (re-search-forward start-re)) |
d752cf53 | 326 | (goto-char (point-min)) |
719120ef MB |
327 | (delete-region (progn |
328 | (re-search-forward end-re) | |
329 | (match-beginning 0)) | |
d752cf53 MB |
330 | (point-max)) |
331 | (mm-url-decode-entities)))) | |
95fa1ff7 SZ |
332 | |
333 | (defun nnweb-google-parse-1 (&optional Message-ID) | |
46e8fe3d | 334 | "Parse search result in current buffer." |
95fa1ff7 SZ |
335 | (let ((i 0) |
336 | (case-fold-search t) | |
337 | (active (cadr (assoc nnweb-group nnweb-group-alist))) | |
338 | Subject Score Date Newsgroups From | |
339 | map url mid) | |
340 | (unless active | |
46e8fe3d | 341 | (push (list nnweb-group (setq active (cons 1 0))) |
95fa1ff7 SZ |
342 | nnweb-group-alist)) |
343 | ;; Go through all the article hits on this page. | |
344 | (goto-char (point-min)) | |
46e8fe3d MB |
345 | (while |
346 | (re-search-forward | |
347 | "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)" | |
348 | nil t) | |
349 | (setq Newsgroups (match-string-no-properties 1) | |
350 | ;; Note: Starting with Google Groups 2, `mid' is a Google-internal | |
351 | ;; ID, not a proper Message-ID. | |
352 | mid (match-string-no-properties 2) | |
debad045 | 353 | url (format |
46e8fe3d | 354 | (nnweb-definition 'result) Newsgroups mid)) |
95fa1ff7 SZ |
355 | (narrow-to-region (search-forward ">" nil t) |
356 | (search-forward "</a>" nil t)) | |
23f87bed MB |
357 | (mm-url-remove-markup) |
358 | (mm-url-decode-entities) | |
95fa1ff7 SZ |
359 | (setq Subject (buffer-string)) |
360 | (goto-char (point-max)) | |
361 | (widen) | |
46e8fe3d | 362 | (narrow-to-region (point) |
c91f4b83 | 363 | (search-forward "</table" nil t)) |
46e8fe3d MB |
364 | |
365 | (mm-url-remove-markup) | |
366 | (mm-url-decode-entities) | |
c91f4b83 MB |
367 | (goto-char (point-max)) |
368 | (when | |
369 | (re-search-backward | |
14e8de0c | 370 | "^\\(?:\\(\\w+\\) \\([0-9]+\\)\\|\\S-+\\)\\(?: \\([0-9]\\{4\\}\\)\\)? by ?\\(.*\\)" |
c91f4b83 | 371 | nil t) |
aa260d63 MB |
372 | (setq Date (if (match-string 1) |
373 | (format "%s %s 00:00:00 %s" | |
374 | (match-string 1) | |
375 | (match-string 2) | |
376 | (or (match-string 3) | |
377 | (substring (current-time-string) -4))) | |
378 | (current-time-string))) | |
c91f4b83 | 379 | (setq From (match-string 4))) |
46e8fe3d | 380 | (widen) |
95fa1ff7 SZ |
381 | (incf i) |
382 | (unless (nnweb-get-hashtb url) | |
383 | (push | |
384 | (list | |
385 | (incf (cdr active)) | |
386 | (make-full-mail-header | |
387 | (cdr active) (if Newsgroups | |
388 | (concat "(" Newsgroups ") " Subject) | |
389 | Subject) | |
390 | From Date (or Message-ID mid) | |
391 | nil 0 0 url)) | |
392 | map) | |
393 | (nnweb-set-hashtb (cadar map) (car map)))) | |
394 | map)) | |
395 | ||
396 | (defun nnweb-google-reference (id) | |
397 | (let ((map (nnweb-google-parse-1 id)) header) | |
398 | (setq nnweb-articles | |
399 | (nconc nnweb-articles map)) | |
400 | (when (setq header (cadar map)) | |
401 | (mm-with-unibyte-current-buffer | |
23f87bed | 402 | (mm-url-insert (mail-header-xref header))) |
95fa1ff7 SZ |
403 | (caar map)))) |
404 | ||
405 | (defun nnweb-google-create-mapping () | |
debad045 | 406 | "Perform the search and create a number-to-url alist." |
95fa1ff7 SZ |
407 | (save-excursion |
408 | (set-buffer nnweb-buffer) | |
409 | (erase-buffer) | |
719120ef | 410 | (nnheader-message 7 "Searching google...") |
95fa1ff7 | 411 | (when (funcall (nnweb-definition 'search) nnweb-search) |
23f87bed MB |
412 | (let ((more t) |
413 | (i 0)) | |
95fa1ff7 SZ |
414 | (while more |
415 | (setq nnweb-articles | |
416 | (nconc nnweb-articles (nnweb-google-parse-1))) | |
23f87bed MB |
417 | ;; Check if there are more articles to fetch |
418 | (goto-char (point-min)) | |
419 | (incf i 100) | |
420 | (if (or (not (re-search-forward | |
bd876f90 | 421 | "<a [^>]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*<img[^>]+src=[^>]+next" |
719120ef | 422 | nil t)) |
23f87bed MB |
423 | (>= i nnweb-max-hits)) |
424 | (setq more nil) | |
425 | ;; Yup, there are more articles | |
5f5475ac | 426 | (setq more (concat (nnweb-definition 'base) (match-string 1))) |
23f87bed MB |
427 | (when more |
428 | (erase-buffer) | |
719120ef | 429 | (nnheader-message 7 "Searching google...(%d)" i) |
23f87bed | 430 | (mm-url-insert more)))) |
95fa1ff7 | 431 | ;; Return the articles in the right order. |
719120ef | 432 | (nnheader-message 7 "Searching google...done") |
95fa1ff7 SZ |
433 | (setq nnweb-articles |
434 | (sort nnweb-articles 'car-less-than-car)))))) | |
435 | ||
436 | (defun nnweb-google-search (search) | |
23f87bed | 437 | (mm-url-insert |
95fa1ff7 SZ |
438 | (concat |
439 | (nnweb-definition 'address) | |
440 | "?" | |
23f87bed | 441 | (mm-url-encode-www-form-urlencoded |
95fa1ff7 | 442 | `(("q" . ,search) |
7ce31649 MB |
443 | ("num" . ,(number-to-string |
444 | (min 100 nnweb-max-hits))) | |
95fa1ff7 | 445 | ("hq" . "") |
5f5475ac | 446 | ("hl" . "en") |
95fa1ff7 SZ |
447 | ("lr" . "") |
448 | ("safe" . "off") | |
46e8fe3d MB |
449 | ("sites" . "groups") |
450 | ("filter" . "0"))))) | |
95fa1ff7 SZ |
451 | t) |
452 | ||
453 | (defun nnweb-google-identity (url) | |
454 | "Return an unique identifier based on URL." | |
455 | (if (string-match "selm=\\([^ &>]+\\)" url) | |
456 | (match-string 1 url) | |
457 | url)) | |
458 | ||
23f87bed MB |
459 | ;;; |
460 | ;;; gmane.org | |
461 | ;;; | |
462 | (defun nnweb-gmane-create-mapping () | |
463 | "Perform the search and create a number-to-url alist." | |
464 | (save-excursion | |
465 | (set-buffer nnweb-buffer) | |
719120ef MB |
466 | (let ((case-fold-search t) |
467 | (active (or (cadr (assoc nnweb-group nnweb-group-alist)) | |
468 | (cons 1 0))) | |
469 | map) | |
470 | (erase-buffer) | |
471 | (nnheader-message 7 "Searching Gmane..." ) | |
472 | (when (funcall (nnweb-definition 'search) nnweb-search) | |
23f87bed | 473 | (goto-char (point-min)) |
719120ef MB |
474 | ;; Skip the status line |
475 | (forward-line 1) | |
476 | ;; Thanks to Olly Betts we now have NOV lines in our buffer! | |
477 | (while (not (eobp)) | |
478 | (unless (or (eolp) (looking-at "\x0d")) | |
479 | (let ((header (nnheader-parse-nov))) | |
480 | (let ((xref (mail-header-xref header)) | |
481 | (from (mail-header-from header)) | |
482 | (subject (mail-header-subject header)) | |
483 | (rfc2047-encoding-type 'mime)) | |
ba361211 | 484 | (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref) |
719120ef MB |
485 | (mail-header-set-xref |
486 | header | |
487 | (format "http://article.gmane.org/%s/%s/raw" | |
488 | (match-string 1 xref) | |
489 | (match-string 2 xref)))) | |
490 | ||
491 | ;; Add host part to gmane-encrypted addresses | |
492 | (when (string-match "@$" from) | |
493 | (mail-header-set-from header | |
494 | (concat from "public.gmane.org"))) | |
495 | ||
496 | (mail-header-set-subject header | |
497 | (rfc2047-encode-string subject)) | |
498 | ||
499 | (unless (nnweb-get-hashtb (mail-header-xref header)) | |
ba361211 MB |
500 | (mail-header-set-number header (incf (cdr active))) |
501 | (push (list (mail-header-number header) header) map) | |
719120ef MB |
502 | (nnweb-set-hashtb (cadar map) (car map)))))) |
503 | (forward-line 1))) | |
504 | (nnheader-message 7 "Searching Gmane...done") | |
505 | (setq nnweb-articles | |
506 | (sort (nconc nnweb-articles map) 'car-less-than-car))))) | |
23f87bed MB |
507 | |
508 | (defun nnweb-gmane-wash-article () | |
509 | (let ((case-fold-search t)) | |
510 | (goto-char (point-min)) | |
719120ef MB |
511 | (when (search-forward "<!--X-Head-of-Message-->" nil t) |
512 | (delete-region (point-min) (point)) | |
513 | (goto-char (point-min)) | |
514 | (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>") | |
515 | (replace-match "\\1\\2" t) | |
516 | (forward-line 1)) | |
517 | (mm-url-remove-markup)))) | |
23f87bed MB |
518 | |
519 | (defun nnweb-gmane-search (search) | |
520 | (mm-url-insert | |
521 | (concat | |
522 | (nnweb-definition 'address) | |
523 | "?" | |
524 | (mm-url-encode-www-form-urlencoded | |
719120ef | 525 | `(("query" . ,search) |
01c52d31 MB |
526 | ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)) |
527 | ;;("TOPDOC" . "1000") | |
528 | )))) | |
23f87bed | 529 | (setq buffer-file-name nil) |
719120ef MB |
530 | (set-buffer-multibyte t) |
531 | (mm-decode-coding-region (point-min) (point-max) 'utf-8) | |
23f87bed MB |
532 | t) |
533 | ||
23f87bed MB |
534 | (defun nnweb-gmane-identity (url) |
535 | "Return a unique identifier based on URL." | |
536 | (if (string-match "group=\\(.+\\)" url) | |
537 | (match-string 1 url) | |
538 | url)) | |
539 | ||
16409b0b GM |
540 | ;;; |
541 | ;;; General web/w3 interface utility functions | |
542 | ;;; | |
543 | ||
544 | (defun nnweb-insert-html (parse) | |
545 | "Insert HTML based on a w3 parse tree." | |
546 | (if (stringp parse) | |
944c87e0 SM |
547 | ;; We used to call nnheader-string-as-multibyte here, but it cannot |
548 | ;; be right, so I removed it. If a bug shows up because of this change, | |
549 | ;; please do not blindly revert the change, but help me find the real | |
550 | ;; cause of the bug instead. --Stef | |
551 | (insert parse) | |
16409b0b GM |
552 | (insert "<" (symbol-name (car parse)) " ") |
553 | (insert (mapconcat | |
554 | (lambda (param) | |
555 | (concat (symbol-name (car param)) "=" | |
556 | (prin1-to-string | |
557 | (if (consp (cdr param)) | |
558 | (cadr param) | |
559 | (cdr param))))) | |
560 | (nth 1 parse) | |
561 | " ")) | |
562 | (insert ">\n") | |
01c52d31 | 563 | (mapc 'nnweb-insert-html (nth 2 parse)) |
16409b0b GM |
564 | (insert "</" (symbol-name (car parse)) ">\n"))) |
565 | ||
16409b0b GM |
566 | (defun nnweb-parse-find (type parse &optional maxdepth) |
567 | "Find the element of TYPE in PARSE." | |
568 | (catch 'found | |
569 | (nnweb-parse-find-1 type parse maxdepth))) | |
570 | ||
571 | (defun nnweb-parse-find-1 (type contents maxdepth) | |
572 | (when (or (null maxdepth) | |
573 | (not (zerop maxdepth))) | |
574 | (when (consp contents) | |
575 | (when (eq (car contents) type) | |
576 | (throw 'found contents)) | |
577 | (when (listp (cdr contents)) | |
578 | (dolist (element contents) | |
579 | (when (consp element) | |
580 | (nnweb-parse-find-1 type element | |
581 | (and maxdepth (1- maxdepth))))))))) | |
582 | ||
583 | (defun nnweb-parse-find-all (type parse) | |
584 | "Find all elements of TYPE in PARSE." | |
585 | (catch 'found | |
586 | (nnweb-parse-find-all-1 type parse))) | |
587 | ||
588 | (defun nnweb-parse-find-all-1 (type contents) | |
589 | (let (result) | |
590 | (when (consp contents) | |
591 | (if (eq (car contents) type) | |
592 | (push contents result) | |
593 | (when (listp (cdr contents)) | |
594 | (dolist (element contents) | |
595 | (when (consp element) | |
596 | (setq result | |
597 | (nconc result (nnweb-parse-find-all-1 type element)))))))) | |
598 | result)) | |
599 | ||
600 | (defvar nnweb-text) | |
601 | (defun nnweb-text (parse) | |
602 | "Return a list of text contents in PARSE." | |
603 | (let ((nnweb-text nil)) | |
604 | (nnweb-text-1 parse) | |
605 | (nreverse nnweb-text))) | |
606 | ||
607 | (defun nnweb-text-1 (contents) | |
608 | (dolist (element contents) | |
609 | (if (stringp element) | |
610 | (push element nnweb-text) | |
611 | (when (and (consp element) | |
612 | (listp (cdr element))) | |
613 | (nnweb-text-1 element))))) | |
614 | ||
eec82323 LMI |
615 | (provide 'nnweb) |
616 | ||
944c87e0 | 617 | ;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697 |
eec82323 | 618 | ;;; nnweb.el ends here |