| 1 | ;;; url.el --- Uniform Resource Locator retrieval tool -*- lexical-binding: t -*- |
| 2 | |
| 3 | ;; Copyright (C) 1996-1999, 2001, 2004-2012 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Bill Perry <wmperry@gnu.org> |
| 6 | ;; Keywords: comm, data, processes, hypermedia |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | ;; |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; Registered URI schemes: http://www.iana.org/assignments/uri-schemes |
| 26 | |
| 27 | ;;; Code: |
| 28 | |
| 29 | |
| 30 | (require 'mailcap) |
| 31 | |
| 32 | (eval-when-compile |
| 33 | (require 'mm-decode) |
| 34 | (require 'mm-view)) |
| 35 | |
| 36 | (require 'url-vars) |
| 37 | (require 'url-cookie) |
| 38 | (require 'url-history) |
| 39 | (require 'url-expand) |
| 40 | (require 'url-privacy) |
| 41 | (require 'url-methods) |
| 42 | (require 'url-proxy) |
| 43 | (require 'url-parse) |
| 44 | (require 'url-util) |
| 45 | |
| 46 | |
| 47 | (defcustom url-configuration-directory |
| 48 | (locate-user-emacs-file "url/" ".url/") |
| 49 | "Directory used by the URL package for cookies, history, etc." |
| 50 | :type 'directory |
| 51 | :group 'url) |
| 52 | |
| 53 | (defun url-do-setup () |
| 54 | "Setup the URL package. |
| 55 | This is to avoid conflict with user settings if URL is dumped with |
| 56 | Emacs." |
| 57 | (unless url-setup-done |
| 58 | |
| 59 | ;; Make OS/2 happy |
| 60 | ;;(push '("http" "80") tcp-binary-process-input-services) |
| 61 | |
| 62 | (mailcap-parse-mailcaps) |
| 63 | (mailcap-parse-mimetypes) |
| 64 | |
| 65 | ;; Register all the authentication schemes we can handle |
| 66 | (url-register-auth-scheme "basic" nil 4) |
| 67 | (url-register-auth-scheme "digest" nil 7) |
| 68 | |
| 69 | (setq url-cookie-file |
| 70 | (or url-cookie-file |
| 71 | (expand-file-name "cookies" url-configuration-directory))) |
| 72 | |
| 73 | (setq url-history-file |
| 74 | (or url-history-file |
| 75 | (expand-file-name "history" url-configuration-directory))) |
| 76 | |
| 77 | ;; Parse the global history file if it exists, so that it can be used |
| 78 | ;; for URL completion, etc. |
| 79 | (url-history-parse-history) |
| 80 | (url-history-setup-save-timer) |
| 81 | |
| 82 | ;; Ditto for cookies |
| 83 | (url-cookie-setup-save-timer) |
| 84 | (url-cookie-parse-file url-cookie-file) |
| 85 | |
| 86 | ;; Read in proxy gateways |
| 87 | (let ((noproxy (and (not (assoc "no_proxy" url-proxy-services)) |
| 88 | (or (getenv "NO_PROXY") |
| 89 | (getenv "no_PROXY") |
| 90 | (getenv "no_proxy"))))) |
| 91 | (if noproxy |
| 92 | (setq url-proxy-services |
| 93 | (cons (cons "no_proxy" |
| 94 | (concat "\\(" |
| 95 | (mapconcat |
| 96 | (lambda (x) |
| 97 | (cond |
| 98 | ((= x ?,) "\\|") |
| 99 | ((= x ? ) "") |
| 100 | ((= x ?.) (regexp-quote ".")) |
| 101 | ((= x ?*) ".*") |
| 102 | ((= x ??) ".") |
| 103 | (t (char-to-string x)))) |
| 104 | noproxy "") "\\)")) |
| 105 | url-proxy-services)))) |
| 106 | |
| 107 | (url-setup-privacy-info) |
| 108 | (run-hooks 'url-load-hook) |
| 109 | (setq url-setup-done t))) |
| 110 | |
| 111 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 112 | ;;; Retrieval functions |
| 113 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 114 | |
| 115 | (defvar url-redirect-buffer nil |
| 116 | "New buffer into which the retrieval will take place. |
| 117 | Sometimes while retrieving a URL, the URL library needs to use another buffer |
| 118 | than the one returned initially by `url-retrieve'. In this case, it sets this |
| 119 | variable in the original buffer as a forwarding pointer.") |
| 120 | |
| 121 | (defvar url-retrieve-number-of-calls 0) |
| 122 | (autoload 'url-cache-prune-cache "url-cache") |
| 123 | |
| 124 | ;;;###autoload |
| 125 | (defun url-retrieve (url callback &optional cbargs silent inhibit-cookies) |
| 126 | "Retrieve URL asynchronously and call CALLBACK with CBARGS when finished. |
| 127 | URL is either a string or a parsed URL. If it is a string |
| 128 | containing characters that are not valid in a URI, those |
| 129 | characters are percent-encoded; see `url-encode-url'. |
| 130 | |
| 131 | CALLBACK is called when the object has been completely retrieved, with |
| 132 | the current buffer containing the object, and any MIME headers associated |
| 133 | with it. It is called as (apply CALLBACK STATUS CBARGS). |
| 134 | STATUS is a list with an even number of elements representing |
| 135 | what happened during the request, with most recent events first, |
| 136 | or an empty list if no events have occurred. Each pair is one of: |
| 137 | |
| 138 | \(:redirect REDIRECTED-TO) - the request was redirected to this URL |
| 139 | \(:error (ERROR-SYMBOL . DATA)) - an error occurred. The error can be |
| 140 | signaled with (signal ERROR-SYMBOL DATA). |
| 141 | |
| 142 | Return the buffer URL will load into, or nil if the process has |
| 143 | already completed (i.e. URL was a mailto URL or similar; in this case |
| 144 | the callback is not called). |
| 145 | |
| 146 | The variables `url-request-data', `url-request-method' and |
| 147 | `url-request-extra-headers' can be dynamically bound around the |
| 148 | request; dynamic binding of other variables doesn't necessarily |
| 149 | take effect. |
| 150 | |
| 151 | If SILENT, then don't message progress reports and the like. |
| 152 | If INHIBIT-COOKIES, cookies will neither be stored nor sent to |
| 153 | the server. |
| 154 | If URL is a multibyte string, it will be encoded as utf-8 and |
| 155 | URL-encoded before it's used." |
| 156 | ;;; XXX: There is code in Emacs that does dynamic binding |
| 157 | ;;; of the following variables around url-retrieve: |
| 158 | ;;; url-standalone-mode, url-gateway-unplugged, w3-honor-stylesheets, |
| 159 | ;;; url-confirmation-func, url-cookie-multiple-line, |
| 160 | ;;; url-cookie-{{,secure-}storage,confirmation} |
| 161 | ;;; url-standalone-mode and url-gateway-unplugged should work as |
| 162 | ;;; usual. url-confirmation-func is only used in nnwarchive.el and |
| 163 | ;;; webmail.el; the latter should be updated. Is |
| 164 | ;;; url-cookie-multiple-line needed anymore? The other url-cookie-* |
| 165 | ;;; are (for now) only used in synchronous retrievals. |
| 166 | (url-retrieve-internal url callback (cons nil cbargs) silent |
| 167 | inhibit-cookies)) |
| 168 | |
| 169 | (defun url-retrieve-internal (url callback cbargs &optional silent |
| 170 | inhibit-cookies) |
| 171 | "Internal function; external interface is `url-retrieve'. |
| 172 | CBARGS is what the callback will actually receive - the first item is |
| 173 | the list of events, as described in the docstring of `url-retrieve'. |
| 174 | |
| 175 | If SILENT, don't message progress reports and the like. |
| 176 | If INHIBIT-COOKIES, cookies will neither be stored nor sent to |
| 177 | the server. |
| 178 | If URL is a multibyte string, it will be encoded as utf-8 and |
| 179 | URL-encoded before it's used." |
| 180 | (url-do-setup) |
| 181 | (url-gc-dead-buffers) |
| 182 | (when (stringp url) |
| 183 | (set-text-properties 0 (length url) nil url) |
| 184 | (setq url (url-encode-url url))) |
| 185 | (if (not (vectorp url)) |
| 186 | (setq url (url-generic-parse-url url))) |
| 187 | (if (not (functionp callback)) |
| 188 | (error "Must provide a callback function to url-retrieve")) |
| 189 | (unless (url-type url) |
| 190 | (error "Bad url: %s" (url-recreate-url url))) |
| 191 | (setf (url-silent url) silent) |
| 192 | (setf (url-use-cookies url) (not inhibit-cookies)) |
| 193 | ;; Once in a while, remove old entries from the URL cache. |
| 194 | (when (zerop (% url-retrieve-number-of-calls 1000)) |
| 195 | (condition-case error |
| 196 | (url-cache-prune-cache) |
| 197 | (file-error |
| 198 | (message "Error when expiring the cache: %s" error)))) |
| 199 | (setq url-retrieve-number-of-calls (1+ url-retrieve-number-of-calls)) |
| 200 | (let ((loader (url-scheme-get-property (url-type url) 'loader)) |
| 201 | (url-using-proxy (if (url-host url) |
| 202 | (url-find-proxy-for-url url (url-host url)))) |
| 203 | (buffer nil) |
| 204 | (asynch (url-scheme-get-property (url-type url) 'asynchronous-p))) |
| 205 | (if url-using-proxy |
| 206 | (setq asynch t |
| 207 | loader 'url-proxy)) |
| 208 | (if asynch |
| 209 | (let ((url-current-object url)) |
| 210 | (setq buffer (funcall loader url callback cbargs))) |
| 211 | (setq buffer (funcall loader url)) |
| 212 | (if buffer |
| 213 | (with-current-buffer buffer |
| 214 | (apply callback cbargs)))) |
| 215 | (if url-history-track |
| 216 | (url-history-update-url url (current-time))) |
| 217 | buffer)) |
| 218 | |
| 219 | ;;;###autoload |
| 220 | (defun url-retrieve-synchronously (url) |
| 221 | "Retrieve URL synchronously. |
| 222 | Return the buffer containing the data, or nil if there are no data |
| 223 | associated with it (the case for dired, info, or mailto URLs that need |
| 224 | no further processing). URL is either a string or a parsed URL." |
| 225 | (url-do-setup) |
| 226 | |
| 227 | (let ((retrieval-done nil) |
| 228 | (asynch-buffer nil)) |
| 229 | (setq asynch-buffer |
| 230 | (url-retrieve url (lambda (&rest ignored) |
| 231 | (url-debug 'retrieval "Synchronous fetching done (%S)" (current-buffer)) |
| 232 | (setq retrieval-done t |
| 233 | asynch-buffer (current-buffer))))) |
| 234 | (if (null asynch-buffer) |
| 235 | ;; We do not need to do anything, it was a mailto or something |
| 236 | ;; similar that takes processing completely outside of the URL |
| 237 | ;; package. |
| 238 | nil |
| 239 | (let ((proc (get-buffer-process asynch-buffer))) |
| 240 | ;; If the access method was synchronous, `retrieval-done' should |
| 241 | ;; hopefully already be set to t. If it is nil, and `proc' is also |
| 242 | ;; nil, it implies that the async process is not running in |
| 243 | ;; asynch-buffer. This happens e.g. for FTP files. In such a case |
| 244 | ;; url-file.el should probably set something like a `url-process' |
| 245 | ;; buffer-local variable so we can find the exact process that we |
| 246 | ;; should be waiting for. In the mean time, we'll just wait for any |
| 247 | ;; process output. |
| 248 | (while (not retrieval-done) |
| 249 | (url-debug 'retrieval |
| 250 | "Spinning in url-retrieve-synchronously: %S (%S)" |
| 251 | retrieval-done asynch-buffer) |
| 252 | (if (buffer-local-value 'url-redirect-buffer asynch-buffer) |
| 253 | (setq proc (get-buffer-process |
| 254 | (setq asynch-buffer |
| 255 | (buffer-local-value 'url-redirect-buffer |
| 256 | asynch-buffer)))) |
| 257 | (if (and proc (memq (process-status proc) |
| 258 | '(closed exit signal failed)) |
| 259 | ;; Make sure another process hasn't been started. |
| 260 | (eq proc (or (get-buffer-process asynch-buffer) proc))) |
| 261 | ;; FIXME: It's not clear whether url-retrieve's callback is |
| 262 | ;; guaranteed to be called or not. It seems that url-http |
| 263 | ;; decides sometimes consciously not to call it, so it's not |
| 264 | ;; clear that it's a bug, but even then we need to decide how |
| 265 | ;; url-http can then warn us that the download has completed. |
| 266 | ;; In the mean time, we use this here workaround. |
| 267 | ;; XXX: The callback must always be called. Any |
| 268 | ;; exception is a bug that should be fixed, not worked |
| 269 | ;; around. |
| 270 | (progn ;; Call delete-process so we run any sentinel now. |
| 271 | (delete-process proc) |
| 272 | (setq retrieval-done t))) |
| 273 | ;; We used to use `sit-for' here, but in some cases it wouldn't |
| 274 | ;; work because apparently pending keyboard input would always |
| 275 | ;; interrupt it before it got a chance to handle process input. |
| 276 | ;; `sleep-for' was tried but it lead to other forms of |
| 277 | ;; hanging. --Stef |
| 278 | (unless (or (with-local-quit |
| 279 | (accept-process-output proc)) |
| 280 | (null proc)) |
| 281 | ;; accept-process-output returned nil, maybe because the process |
| 282 | ;; exited (and may have been replaced with another). If we got |
| 283 | ;; a quit, just stop. |
| 284 | (when quit-flag |
| 285 | (delete-process proc)) |
| 286 | (setq proc (and (not quit-flag) |
| 287 | (get-buffer-process asynch-buffer))))))) |
| 288 | asynch-buffer))) |
| 289 | |
| 290 | (defun url-mm-callback (&rest ignored) |
| 291 | (let ((handle (mm-dissect-buffer t))) |
| 292 | (url-mark-buffer-as-dead (current-buffer)) |
| 293 | (with-current-buffer |
| 294 | (generate-new-buffer (url-recreate-url url-current-object)) |
| 295 | (if (eq (mm-display-part handle) 'external) |
| 296 | (progn |
| 297 | (set-process-sentinel |
| 298 | ;; Fixme: this shouldn't have to know the form of the |
| 299 | ;; undisplayer produced by `mm-display-part'. |
| 300 | (get-buffer-process (cdr (mm-handle-undisplayer handle))) |
| 301 | `(lambda (proc event) |
| 302 | (mm-destroy-parts (quote ,handle)))) |
| 303 | (message "Viewing externally") |
| 304 | (kill-buffer (current-buffer))) |
| 305 | (display-buffer (current-buffer)) |
| 306 | (add-hook 'kill-buffer-hook |
| 307 | `(lambda () (mm-destroy-parts ',handle)) |
| 308 | nil |
| 309 | t))))) |
| 310 | |
| 311 | (defun url-mm-url (url) |
| 312 | "Retrieve URL and pass to the appropriate viewing application." |
| 313 | ;; These requires could advantageously be moved to url-mm-callback or |
| 314 | ;; turned into autoloads, but I suspect that it would introduce some bugs |
| 315 | ;; because loading those files from a process sentinel or filter may |
| 316 | ;; result in some undesirable corner cases. |
| 317 | (require 'mm-decode) |
| 318 | (require 'mm-view) |
| 319 | (url-retrieve url 'url-mm-callback nil)) |
| 320 | |
| 321 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 322 | ;;; Miscellaneous |
| 323 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 324 | (defvar url-dead-buffer-list nil) |
| 325 | |
| 326 | (defun url-mark-buffer-as-dead (buff) |
| 327 | (push buff url-dead-buffer-list)) |
| 328 | |
| 329 | (defun url-gc-dead-buffers () |
| 330 | (let ((buff)) |
| 331 | (while (setq buff (pop url-dead-buffer-list)) |
| 332 | (if (buffer-live-p buff) |
| 333 | (kill-buffer buff))))) |
| 334 | |
| 335 | (cond |
| 336 | ((fboundp 'display-warning) |
| 337 | (defalias 'url-warn 'display-warning)) |
| 338 | ((fboundp 'warn) |
| 339 | (defun url-warn (class message &optional level) |
| 340 | (warn "(%s/%s) %s" class (or level 'warning) message))) |
| 341 | (t |
| 342 | (defun url-warn (class message &optional level) |
| 343 | (with-current-buffer (get-buffer-create "*URL-WARNINGS*") |
| 344 | (goto-char (point-max)) |
| 345 | (save-excursion |
| 346 | (insert (format "(%s/%s) %s\n" class (or level 'warning) message))) |
| 347 | (display-buffer (current-buffer)))))) |
| 348 | |
| 349 | (provide 'url) |
| 350 | |
| 351 | ;;; url.el ends here |