| 1 | ;;; nnweb.el --- retrieving articles via web search engines |
| 2 | |
| 3 | ;; Copyright (C) 1996-2012 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
| 6 | ;; Keywords: news |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; Note: You need to have `w3' installed for some functions to work. |
| 26 | |
| 27 | ;;; Code: |
| 28 | |
| 29 | (eval-when-compile (require 'cl)) |
| 30 | |
| 31 | (require 'nnoo) |
| 32 | (require 'message) |
| 33 | (require 'gnus-util) |
| 34 | (require 'gnus) |
| 35 | (require 'nnmail) |
| 36 | (require 'mm-util) |
| 37 | (require 'mm-url) |
| 38 | (eval-and-compile |
| 39 | (ignore-errors |
| 40 | (require 'url))) |
| 41 | (autoload 'w3-parse-buffer "w3-parse") |
| 42 | |
| 43 | (nnoo-declare nnweb) |
| 44 | |
| 45 | (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") |
| 46 | "Where nnweb will save its files.") |
| 47 | |
| 48 | (defvoo nnweb-type 'google |
| 49 | "What search engine type is being used. |
| 50 | Valid types include `google', `dejanews', and `gmane'.") |
| 51 | |
| 52 | (defvar nnweb-type-definition |
| 53 | '((google |
| 54 | (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") |
| 55 | (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") |
| 56 | (article . nnweb-google-wash-article) |
| 57 | (reference . identity) |
| 58 | (map . nnweb-google-create-mapping) |
| 59 | (search . nnweb-google-search) |
| 60 | (address . "http://groups.google.com/groups") |
| 61 | (base . "http://groups.google.com") |
| 62 | (identifier . nnweb-google-identity)) |
| 63 | (dejanews ;; alias of google |
| 64 | (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") |
| 65 | (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") |
| 66 | (article . nnweb-google-wash-article) |
| 67 | (reference . identity) |
| 68 | (map . nnweb-google-create-mapping) |
| 69 | (search . nnweb-google-search) |
| 70 | (address . "http://groups.google.com/groups") |
| 71 | (base . "http://groups.google.com") |
| 72 | (identifier . nnweb-google-identity)) |
| 73 | (gmane |
| 74 | (article . nnweb-gmane-wash-article) |
| 75 | (id . "http://gmane.org/view.php?group=%s") |
| 76 | (reference . identity) |
| 77 | (map . nnweb-gmane-create-mapping) |
| 78 | (search . nnweb-gmane-search) |
| 79 | (address . "http://search.gmane.org/nov.php") |
| 80 | (identifier . nnweb-gmane-identity))) |
| 81 | "Type-definition alist.") |
| 82 | |
| 83 | (defvoo nnweb-search nil |
| 84 | "Search string to feed to Google.") |
| 85 | |
| 86 | (defvoo nnweb-max-hits 999 |
| 87 | "Maximum number of hits to display.") |
| 88 | |
| 89 | (defvoo nnweb-ephemeral-p nil |
| 90 | "Whether this nnweb server is ephemeral.") |
| 91 | |
| 92 | ;;; Internal variables |
| 93 | |
| 94 | (defvoo nnweb-articles nil) |
| 95 | (defvoo nnweb-buffer nil) |
| 96 | (defvoo nnweb-group-alist nil) |
| 97 | (defvoo nnweb-group nil) |
| 98 | (defvoo nnweb-hashtb nil) |
| 99 | |
| 100 | ;;; Interface functions |
| 101 | |
| 102 | (nnoo-define-basics nnweb) |
| 103 | |
| 104 | (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) |
| 105 | (nnweb-possibly-change-server group server) |
| 106 | (with-current-buffer nntp-server-buffer |
| 107 | (erase-buffer) |
| 108 | (let (article header) |
| 109 | (mm-with-unibyte-current-buffer |
| 110 | (while (setq article (pop articles)) |
| 111 | (when (setq header (cadr (assq article nnweb-articles))) |
| 112 | (nnheader-insert-nov header)))) |
| 113 | 'nov))) |
| 114 | |
| 115 | (deffoo nnweb-request-scan (&optional group server) |
| 116 | (nnweb-possibly-change-server group server) |
| 117 | (if nnweb-ephemeral-p |
| 118 | (setq nnweb-hashtb (gnus-make-hashtable 4095)) |
| 119 | (unless nnweb-articles |
| 120 | (nnweb-read-overview group))) |
| 121 | (funcall (nnweb-definition 'map)) |
| 122 | (unless nnweb-ephemeral-p |
| 123 | (nnweb-write-active) |
| 124 | (nnweb-write-overview group))) |
| 125 | |
| 126 | (deffoo nnweb-request-group (group &optional server dont-check info) |
| 127 | (nnweb-possibly-change-server group server) |
| 128 | (unless (or nnweb-ephemeral-p |
| 129 | dont-check |
| 130 | nnweb-articles) |
| 131 | (nnweb-read-overview group)) |
| 132 | (cond |
| 133 | ((not nnweb-articles) |
| 134 | (nnheader-report 'nnweb "No matching articles")) |
| 135 | (t |
| 136 | (let ((active (if nnweb-ephemeral-p |
| 137 | (cons (caar nnweb-articles) |
| 138 | (caar (last nnweb-articles))) |
| 139 | (cadr (assoc group nnweb-group-alist))))) |
| 140 | (nnheader-report 'nnweb "Opened group %s" group) |
| 141 | (nnheader-insert |
| 142 | "211 %d %d %d %s\n" (length nnweb-articles) |
| 143 | (car active) (cdr active) group))))) |
| 144 | |
| 145 | (deffoo nnweb-close-group (group &optional server) |
| 146 | (nnweb-possibly-change-server group server) |
| 147 | (when (gnus-buffer-live-p nnweb-buffer) |
| 148 | (with-current-buffer nnweb-buffer |
| 149 | (set-buffer-modified-p nil) |
| 150 | (kill-buffer nnweb-buffer))) |
| 151 | t) |
| 152 | |
| 153 | (deffoo nnweb-request-article (article &optional group server buffer) |
| 154 | (nnweb-possibly-change-server group server) |
| 155 | (with-current-buffer (or buffer nntp-server-buffer) |
| 156 | (let* ((header (cadr (assq article nnweb-articles))) |
| 157 | (url (and header (mail-header-xref header)))) |
| 158 | (when (or (and url |
| 159 | (mm-with-unibyte-current-buffer |
| 160 | (mm-url-insert url))) |
| 161 | (and (stringp article) |
| 162 | (nnweb-definition 'id t) |
| 163 | (let ((fetch (nnweb-definition 'id)) |
| 164 | art active) |
| 165 | (when (string-match "^<\\(.*\\)>$" article) |
| 166 | (setq art (match-string 1 article))) |
| 167 | (when (and fetch art) |
| 168 | (setq url (format fetch |
| 169 | (mm-url-form-encode-xwfu art))) |
| 170 | (mm-with-unibyte-current-buffer |
| 171 | (mm-url-insert url)) |
| 172 | (if (nnweb-definition 'reference t) |
| 173 | (setq article |
| 174 | (funcall (nnweb-definition |
| 175 | 'reference) article))))))) |
| 176 | (unless nnheader-callback-function |
| 177 | (funcall (nnweb-definition 'article))) |
| 178 | (nnheader-report 'nnweb "Fetched article %s" article) |
| 179 | (cons group (and (numberp article) article)))))) |
| 180 | |
| 181 | (deffoo nnweb-close-server (&optional server) |
| 182 | (when (and (nnweb-server-opened server) |
| 183 | (gnus-buffer-live-p nnweb-buffer)) |
| 184 | (with-current-buffer nnweb-buffer |
| 185 | (set-buffer-modified-p nil) |
| 186 | (kill-buffer nnweb-buffer))) |
| 187 | (nnoo-close-server 'nnweb server)) |
| 188 | |
| 189 | (deffoo nnweb-request-list (&optional server) |
| 190 | (nnweb-possibly-change-server nil server) |
| 191 | (with-current-buffer nntp-server-buffer |
| 192 | (nnmail-generate-active (list (assoc server nnweb-group-alist))) |
| 193 | t)) |
| 194 | |
| 195 | (deffoo nnweb-request-update-info (group info &optional server)) |
| 196 | |
| 197 | (deffoo nnweb-asynchronous-p () |
| 198 | nil) |
| 199 | |
| 200 | (deffoo nnweb-request-create-group (group &optional server args) |
| 201 | (nnweb-possibly-change-server nil server) |
| 202 | (nnweb-request-delete-group group) |
| 203 | (push `(,group ,(cons 1 0)) nnweb-group-alist) |
| 204 | (nnweb-write-active) |
| 205 | t) |
| 206 | |
| 207 | (deffoo nnweb-request-delete-group (group &optional force server) |
| 208 | (nnweb-possibly-change-server group server) |
| 209 | (gnus-alist-pull group nnweb-group-alist t) |
| 210 | (nnweb-write-active) |
| 211 | (gnus-delete-file (nnweb-overview-file group)) |
| 212 | t) |
| 213 | |
| 214 | (nnoo-define-skeleton nnweb) |
| 215 | |
| 216 | ;;; Internal functions |
| 217 | |
| 218 | (defun nnweb-read-overview (group) |
| 219 | "Read the overview of GROUP and build the map." |
| 220 | (when (file-exists-p (nnweb-overview-file group)) |
| 221 | (mm-with-unibyte-buffer |
| 222 | (nnheader-insert-file-contents (nnweb-overview-file group)) |
| 223 | (goto-char (point-min)) |
| 224 | (let (header) |
| 225 | (while (not (eobp)) |
| 226 | (setq header (nnheader-parse-nov)) |
| 227 | (forward-line 1) |
| 228 | (push (list (mail-header-number header) |
| 229 | header (mail-header-xref header)) |
| 230 | nnweb-articles) |
| 231 | (nnweb-set-hashtb header (car nnweb-articles))))))) |
| 232 | |
| 233 | (defun nnweb-write-overview (group) |
| 234 | "Write the overview file for GROUP." |
| 235 | (with-temp-file (nnweb-overview-file group) |
| 236 | (let ((articles nnweb-articles)) |
| 237 | (while articles |
| 238 | (nnheader-insert-nov (cadr (pop articles))))))) |
| 239 | |
| 240 | (defun nnweb-set-hashtb (header data) |
| 241 | (gnus-sethash (nnweb-identifier (mail-header-xref header)) |
| 242 | data nnweb-hashtb)) |
| 243 | |
| 244 | (defun nnweb-get-hashtb (url) |
| 245 | (gnus-gethash (nnweb-identifier url) nnweb-hashtb)) |
| 246 | |
| 247 | (defun nnweb-identifier (ident) |
| 248 | (funcall (nnweb-definition 'identifier) ident)) |
| 249 | |
| 250 | (defun nnweb-overview-file (group) |
| 251 | "Return the name of the overview file of GROUP." |
| 252 | (nnheader-concat nnweb-directory group ".overview")) |
| 253 | |
| 254 | (defun nnweb-write-active () |
| 255 | "Save the active file." |
| 256 | (gnus-make-directory nnweb-directory) |
| 257 | (with-temp-file (nnheader-concat nnweb-directory "active") |
| 258 | (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) |
| 259 | |
| 260 | (defun nnweb-read-active () |
| 261 | "Read the active file." |
| 262 | (load (nnheader-concat nnweb-directory "active") t t t)) |
| 263 | |
| 264 | (defun nnweb-definition (type &optional noerror) |
| 265 | "Return the definition of TYPE." |
| 266 | (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition))))) |
| 267 | (when (and (not def) |
| 268 | (not noerror)) |
| 269 | (error "Undefined definition %s" type)) |
| 270 | def)) |
| 271 | |
| 272 | (defun nnweb-possibly-change-server (&optional group server) |
| 273 | (when server |
| 274 | (unless (nnweb-server-opened server) |
| 275 | (nnweb-open-server server)) |
| 276 | (nnweb-init server)) |
| 277 | (unless nnweb-group-alist |
| 278 | (nnweb-read-active)) |
| 279 | (unless nnweb-hashtb |
| 280 | (setq nnweb-hashtb (gnus-make-hashtable 4095))) |
| 281 | (when group |
| 282 | (setq nnweb-group group))) |
| 283 | |
| 284 | (defun nnweb-init (server) |
| 285 | "Initialize buffers and such." |
| 286 | (unless (gnus-buffer-live-p nnweb-buffer) |
| 287 | (setq nnweb-buffer |
| 288 | (save-current-buffer |
| 289 | (nnheader-set-temp-buffer |
| 290 | (format " *nnweb %s %s %s*" |
| 291 | nnweb-type nnweb-search server)) |
| 292 | (mm-disable-multibyte) |
| 293 | (current-buffer))))) |
| 294 | |
| 295 | ;;; |
| 296 | ;;; groups.google.com |
| 297 | ;;; |
| 298 | |
| 299 | (defun nnweb-google-wash-article () |
| 300 | ;; We have Google's masked e-mail addresses here. :-/ |
| 301 | (let ((case-fold-search t) |
| 302 | (start-re "<pre>[\r\n ]*") |
| 303 | (end-re "[\r\n ]*</pre>")) |
| 304 | (goto-char (point-min)) |
| 305 | (if (save-excursion |
| 306 | (or (re-search-forward "The requested message.*could not be found." |
| 307 | nil t) |
| 308 | (not (and (re-search-forward start-re nil t) |
| 309 | (re-search-forward end-re nil t))))) |
| 310 | ;; FIXME: Don't know how to indicate "not found". |
| 311 | ;; Should this function throw an error? --rsteib |
| 312 | (progn |
| 313 | (gnus-message 3 "Requested article not found") |
| 314 | (erase-buffer)) |
| 315 | (delete-region (point-min) |
| 316 | (re-search-forward start-re)) |
| 317 | (goto-char (point-min)) |
| 318 | (delete-region (progn |
| 319 | (re-search-forward end-re) |
| 320 | (match-beginning 0)) |
| 321 | (point-max)) |
| 322 | (mm-url-decode-entities)))) |
| 323 | |
| 324 | (defun nnweb-google-parse-1 (&optional Message-ID) |
| 325 | "Parse search result in current buffer." |
| 326 | (let ((i 0) |
| 327 | (case-fold-search t) |
| 328 | (active (cadr (assoc nnweb-group nnweb-group-alist))) |
| 329 | Subject Score Date Newsgroups From |
| 330 | map url mid) |
| 331 | (unless active |
| 332 | (push (list nnweb-group (setq active (cons 1 0))) |
| 333 | nnweb-group-alist)) |
| 334 | ;; Go through all the article hits on this page. |
| 335 | (goto-char (point-min)) |
| 336 | (while |
| 337 | (re-search-forward |
| 338 | "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)" |
| 339 | nil t) |
| 340 | (setq Newsgroups (match-string-no-properties 1) |
| 341 | ;; Note: Starting with Google Groups 2, `mid' is a Google-internal |
| 342 | ;; ID, not a proper Message-ID. |
| 343 | mid (match-string-no-properties 2) |
| 344 | url (format |
| 345 | (nnweb-definition 'result) Newsgroups mid)) |
| 346 | (narrow-to-region (search-forward ">" nil t) |
| 347 | (search-forward "</a>" nil t)) |
| 348 | (mm-url-remove-markup) |
| 349 | (mm-url-decode-entities) |
| 350 | (setq Subject (buffer-string)) |
| 351 | (goto-char (point-max)) |
| 352 | (widen) |
| 353 | (narrow-to-region (point) |
| 354 | (search-forward "</table" nil t)) |
| 355 | |
| 356 | (mm-url-remove-markup) |
| 357 | (mm-url-decode-entities) |
| 358 | (goto-char (point-max)) |
| 359 | (when |
| 360 | (re-search-backward |
| 361 | "^\\(?:\\(\\w+\\) \\([0-9]+\\)\\|\\S-+\\)\\(?: \\([0-9]\\{4\\}\\)\\)? by ?\\(.*\\)" |
| 362 | nil t) |
| 363 | (setq Date (if (match-string 1) |
| 364 | (format "%s %s 00:00:00 %s" |
| 365 | (match-string 1) |
| 366 | (match-string 2) |
| 367 | (or (match-string 3) |
| 368 | (substring (current-time-string) -4))) |
| 369 | (current-time-string))) |
| 370 | (setq From (match-string 4))) |
| 371 | (widen) |
| 372 | (incf i) |
| 373 | (unless (nnweb-get-hashtb url) |
| 374 | (push |
| 375 | (list |
| 376 | (incf (cdr active)) |
| 377 | (make-full-mail-header |
| 378 | (cdr active) (if Newsgroups |
| 379 | (concat "(" Newsgroups ") " Subject) |
| 380 | Subject) |
| 381 | From Date (or Message-ID mid) |
| 382 | nil 0 0 url)) |
| 383 | map) |
| 384 | (nnweb-set-hashtb (cadar map) (car map)))) |
| 385 | map)) |
| 386 | |
| 387 | (defun nnweb-google-reference (id) |
| 388 | (let ((map (nnweb-google-parse-1 id)) header) |
| 389 | (setq nnweb-articles |
| 390 | (nconc nnweb-articles map)) |
| 391 | (when (setq header (cadar map)) |
| 392 | (mm-with-unibyte-current-buffer |
| 393 | (mm-url-insert (mail-header-xref header))) |
| 394 | (caar map)))) |
| 395 | |
| 396 | (defun nnweb-google-create-mapping () |
| 397 | "Perform the search and create a number-to-url alist." |
| 398 | (with-current-buffer nnweb-buffer |
| 399 | (erase-buffer) |
| 400 | (nnheader-message 7 "Searching google...") |
| 401 | (when (funcall (nnweb-definition 'search) nnweb-search) |
| 402 | (let ((more t) |
| 403 | (i 0)) |
| 404 | (while more |
| 405 | (setq nnweb-articles |
| 406 | (nconc nnweb-articles (nnweb-google-parse-1))) |
| 407 | ;; Check if there are more articles to fetch |
| 408 | (goto-char (point-min)) |
| 409 | (incf i 100) |
| 410 | (if (or (not (re-search-forward |
| 411 | "<a [^>]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*<img[^>]+src=[^>]+next" |
| 412 | nil t)) |
| 413 | (>= i nnweb-max-hits)) |
| 414 | (setq more nil) |
| 415 | ;; Yup, there are more articles |
| 416 | (setq more (concat (nnweb-definition 'base) (match-string 1))) |
| 417 | (when more |
| 418 | (erase-buffer) |
| 419 | (nnheader-message 7 "Searching google...(%d)" i) |
| 420 | (mm-url-insert more)))) |
| 421 | ;; Return the articles in the right order. |
| 422 | (nnheader-message 7 "Searching google...done") |
| 423 | (setq nnweb-articles |
| 424 | (sort nnweb-articles 'car-less-than-car)))))) |
| 425 | |
| 426 | (defun nnweb-google-search (search) |
| 427 | (mm-url-insert |
| 428 | (concat |
| 429 | (nnweb-definition 'address) |
| 430 | "?" |
| 431 | (mm-url-encode-www-form-urlencoded |
| 432 | `(("q" . ,search) |
| 433 | ("num" . ,(number-to-string |
| 434 | (min 100 nnweb-max-hits))) |
| 435 | ("hq" . "") |
| 436 | ("hl" . "en") |
| 437 | ("lr" . "") |
| 438 | ("safe" . "off") |
| 439 | ("sites" . "groups") |
| 440 | ("filter" . "0"))))) |
| 441 | t) |
| 442 | |
| 443 | (defun nnweb-google-identity (url) |
| 444 | "Return an unique identifier based on URL." |
| 445 | (if (string-match "selm=\\([^ &>]+\\)" url) |
| 446 | (match-string 1 url) |
| 447 | url)) |
| 448 | |
| 449 | ;;; |
| 450 | ;;; gmane.org |
| 451 | ;;; |
| 452 | (defun nnweb-gmane-create-mapping () |
| 453 | "Perform the search and create a number-to-url alist." |
| 454 | (with-current-buffer nnweb-buffer |
| 455 | (let ((case-fold-search t) |
| 456 | (active (or (cadr (assoc nnweb-group nnweb-group-alist)) |
| 457 | (cons 1 0))) |
| 458 | map) |
| 459 | (erase-buffer) |
| 460 | (nnheader-message 7 "Searching Gmane..." ) |
| 461 | (when (funcall (nnweb-definition 'search) nnweb-search) |
| 462 | (goto-char (point-min)) |
| 463 | ;; Skip the status line |
| 464 | (forward-line 1) |
| 465 | ;; Thanks to Olly Betts we now have NOV lines in our buffer! |
| 466 | (while (not (eobp)) |
| 467 | (unless (or (eolp) (looking-at "\x0d")) |
| 468 | (let ((header (nnheader-parse-nov))) |
| 469 | (let ((xref (mail-header-xref header)) |
| 470 | (from (mail-header-from header)) |
| 471 | (subject (mail-header-subject header)) |
| 472 | (rfc2047-encoding-type 'mime)) |
| 473 | (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref) |
| 474 | (mail-header-set-xref |
| 475 | header |
| 476 | (format "http://article.gmane.org/%s/%s/raw" |
| 477 | (match-string 1 xref) |
| 478 | (match-string 2 xref)))) |
| 479 | |
| 480 | ;; Add host part to gmane-encrypted addresses |
| 481 | (when (string-match "@$" from) |
| 482 | (mail-header-set-from header |
| 483 | (concat from "public.gmane.org"))) |
| 484 | |
| 485 | (mail-header-set-subject header |
| 486 | (rfc2047-encode-string subject)) |
| 487 | |
| 488 | (unless (nnweb-get-hashtb (mail-header-xref header)) |
| 489 | (mail-header-set-number header (incf (cdr active))) |
| 490 | (push (list (mail-header-number header) header) map) |
| 491 | (nnweb-set-hashtb (cadar map) (car map)))))) |
| 492 | (forward-line 1))) |
| 493 | (nnheader-message 7 "Searching Gmane...done") |
| 494 | (setq nnweb-articles |
| 495 | (sort (nconc nnweb-articles map) 'car-less-than-car))))) |
| 496 | |
| 497 | (defun nnweb-gmane-wash-article () |
| 498 | (let ((case-fold-search t)) |
| 499 | (goto-char (point-min)) |
| 500 | (when (search-forward "<!--X-Head-of-Message-->" nil t) |
| 501 | (delete-region (point-min) (point)) |
| 502 | (goto-char (point-min)) |
| 503 | (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>") |
| 504 | (replace-match "\\1\\2" t) |
| 505 | (forward-line 1)) |
| 506 | (mm-url-remove-markup)))) |
| 507 | |
| 508 | (defun nnweb-gmane-search (search) |
| 509 | (mm-url-insert |
| 510 | (concat |
| 511 | (nnweb-definition 'address) |
| 512 | "?" |
| 513 | (mm-url-encode-www-form-urlencoded |
| 514 | `(("query" . ,search) |
| 515 | ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)) |
| 516 | ;;("TOPDOC" . "1000") |
| 517 | )))) |
| 518 | (setq buffer-file-name nil) |
| 519 | (unless (featurep 'xemacs) (set-buffer-multibyte t)) |
| 520 | (mm-decode-coding-region (point-min) (point-max) 'utf-8) |
| 521 | t) |
| 522 | |
| 523 | (defun nnweb-gmane-identity (url) |
| 524 | "Return a unique identifier based on URL." |
| 525 | (if (string-match "group=\\(.+\\)" url) |
| 526 | (match-string 1 url) |
| 527 | url)) |
| 528 | |
| 529 | ;;; |
| 530 | ;;; General web/w3 interface utility functions |
| 531 | ;;; |
| 532 | |
| 533 | (defun nnweb-insert-html (parse) |
| 534 | "Insert HTML based on a w3 parse tree." |
| 535 | (if (stringp parse) |
| 536 | ;; We used to call nnheader-string-as-multibyte here, but it cannot |
| 537 | ;; be right, so I removed it. If a bug shows up because of this change, |
| 538 | ;; please do not blindly revert the change, but help me find the real |
| 539 | ;; cause of the bug instead. --Stef |
| 540 | (insert parse) |
| 541 | (insert "<" (symbol-name (car parse)) " ") |
| 542 | (insert (mapconcat |
| 543 | (lambda (param) |
| 544 | (concat (symbol-name (car param)) "=" |
| 545 | (prin1-to-string |
| 546 | (if (consp (cdr param)) |
| 547 | (cadr param) |
| 548 | (cdr param))))) |
| 549 | (nth 1 parse) |
| 550 | " ")) |
| 551 | (insert ">\n") |
| 552 | (mapc 'nnweb-insert-html (nth 2 parse)) |
| 553 | (insert "</" (symbol-name (car parse)) ">\n"))) |
| 554 | |
| 555 | (defun nnweb-parse-find (type parse &optional maxdepth) |
| 556 | "Find the element of TYPE in PARSE." |
| 557 | (catch 'found |
| 558 | (nnweb-parse-find-1 type parse maxdepth))) |
| 559 | |
| 560 | (defun nnweb-parse-find-1 (type contents maxdepth) |
| 561 | (when (or (null maxdepth) |
| 562 | (not (zerop maxdepth))) |
| 563 | (when (consp contents) |
| 564 | (when (eq (car contents) type) |
| 565 | (throw 'found contents)) |
| 566 | (when (listp (cdr contents)) |
| 567 | (dolist (element contents) |
| 568 | (when (consp element) |
| 569 | (nnweb-parse-find-1 type element |
| 570 | (and maxdepth (1- maxdepth))))))))) |
| 571 | |
| 572 | (defun nnweb-parse-find-all (type parse) |
| 573 | "Find all elements of TYPE in PARSE." |
| 574 | (catch 'found |
| 575 | (nnweb-parse-find-all-1 type parse))) |
| 576 | |
| 577 | (defun nnweb-parse-find-all-1 (type contents) |
| 578 | (let (result) |
| 579 | (when (consp contents) |
| 580 | (if (eq (car contents) type) |
| 581 | (push contents result) |
| 582 | (when (listp (cdr contents)) |
| 583 | (dolist (element contents) |
| 584 | (when (consp element) |
| 585 | (setq result |
| 586 | (nconc result (nnweb-parse-find-all-1 type element)))))))) |
| 587 | result)) |
| 588 | |
| 589 | (defvar nnweb-text) |
| 590 | (defun nnweb-text (parse) |
| 591 | "Return a list of text contents in PARSE." |
| 592 | (let ((nnweb-text nil)) |
| 593 | (nnweb-text-1 parse) |
| 594 | (nreverse nnweb-text))) |
| 595 | |
| 596 | (defun nnweb-text-1 (contents) |
| 597 | (dolist (element contents) |
| 598 | (if (stringp element) |
| 599 | (push element nnweb-text) |
| 600 | (when (and (consp element) |
| 601 | (listp (cdr element))) |
| 602 | (nnweb-text-1 element))))) |
| 603 | |
| 604 | (provide 'nnweb) |
| 605 | |
| 606 | ;;; nnweb.el ends here |