Commit | Line | Data |
---|---|---|
5c77c3ed LMI |
1 | ;;; url-queue.el --- Fetching web pages in parallel |
2 | ||
ba318903 | 3 | ;; Copyright (C) 2011-2014 Free Software Foundation, Inc. |
5c77c3ed LMI |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; Keywords: comm | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;; The point of this package is to allow fetching web pages in | |
26 | ;; parallel -- but control the level of parallelism to avoid DoS-ing | |
27 | ;; web servers and Emacs. | |
28 | ||
29 | ;;; Code: | |
30 | ||
a464a6c7 | 31 | (eval-when-compile (require 'cl-lib)) |
5c77c3ed | 32 | (require 'browse-url) |
b6ea20f3 | 33 | (require 'url-parse) |
5c77c3ed | 34 | |
5a94384b | 35 | (defcustom url-queue-parallel-processes 6 |
5c77c3ed | 36 | "The number of concurrent processes." |
3b7d5980 | 37 | :version "24.1" |
5c77c3ed LMI |
38 | :type 'integer |
39 | :group 'url) | |
40 | ||
41 | (defcustom url-queue-timeout 5 | |
42 | "How long to let a job live once it's started (in seconds)." | |
3b7d5980 | 43 | :version "24.1" |
5c77c3ed LMI |
44 | :type 'integer |
45 | :group 'url) | |
46 | ||
47 | ;;; Internal variables. | |
48 | ||
49 | (defvar url-queue nil) | |
50 | ||
a464a6c7 | 51 | (cl-defstruct url-queue |
5c77c3ed | 52 | url callback cbargs silentp |
aacaa419 LI |
53 | buffer start-time pre-triggered |
54 | inhibit-cookiesp) | |
5c77c3ed | 55 | |
471129b1 | 56 | ;;;###autoload |
aacaa419 | 57 | (defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies) |
5c77c3ed | 58 | "Retrieve URL asynchronously and call CALLBACK with CBARGS when finished. |
b74c9672 | 59 | This is like `url-retrieve' (which see for details of the arguments), |
a48ec60c GM |
60 | but with limits on the degree of parallelism. The variable |
61 | `url-queue-parallel-processes' sets the number of concurrent processes. | |
62 | The variable `url-queue-timeout' sets a timeout." | |
5c77c3ed LMI |
63 | (setq url-queue |
64 | (append url-queue | |
65 | (list (make-url-queue :url url | |
66 | :callback callback | |
67 | :cbargs cbargs | |
aacaa419 LI |
68 | :silentp silent |
69 | :inhibit-cookiesp inhibit-cookies)))) | |
b6ea20f3 LI |
70 | (url-queue-setup-runners)) |
71 | ||
da5ecfa9 | 72 | ;; To ensure asynch behavior, we start the required number of queue |
b6ea20f3 LI |
73 | ;; runners from `run-with-idle-timer'. So we're basically going |
74 | ;; through the queue in two ways: 1) synchronously when a program | |
75 | ;; calls `url-queue-retrieve' (which will then start the required | |
76 | ;; number of queue runners), and 2) at the exit of each job, which | |
77 | ;; will then not start any further threads, but just reuse the | |
78 | ;; previous "slot". | |
79 | ||
80 | (defun url-queue-setup-runners () | |
81 | (let ((running 0) | |
82 | waiting) | |
83 | (dolist (entry url-queue) | |
84 | (cond | |
85 | ((or (url-queue-start-time entry) | |
86 | (url-queue-pre-triggered entry)) | |
a464a6c7 | 87 | (cl-incf running)) |
b6ea20f3 LI |
88 | ((not waiting) |
89 | (setq waiting entry)))) | |
90 | (when (and waiting | |
91 | (< running url-queue-parallel-processes)) | |
92 | (setf (url-queue-pre-triggered waiting) t) | |
93 | (run-with-idle-timer 0.01 nil 'url-queue-run-queue)))) | |
5c77c3ed LMI |
94 | |
95 | (defun url-queue-run-queue () | |
96 | (url-queue-prune-old-entries) | |
97 | (let ((running 0) | |
98 | waiting) | |
99 | (dolist (entry url-queue) | |
08da93f1 LMI |
100 | (cond |
101 | ((url-queue-start-time entry) | |
a464a6c7 | 102 | (cl-incf running)) |
08da93f1 LMI |
103 | ((not waiting) |
104 | (setq waiting entry)))) | |
5c77c3ed LMI |
105 | (when (and waiting |
106 | (< running url-queue-parallel-processes)) | |
107 | (setf (url-queue-start-time waiting) (float-time)) | |
108 | (url-queue-start-retrieve waiting)))) | |
109 | ||
110 | (defun url-queue-callback-function (status job) | |
55645c67 | 111 | (setq url-queue (delq job url-queue)) |
b6ea20f3 LI |
112 | (when (and (eq (car status) :error) |
113 | (eq (cadr (cadr status)) 'connection-failed)) | |
114 | ;; If we get a connection error, then flush all other jobs from | |
115 | ;; the host from the queue. This particularly makes sense if the | |
116 | ;; error really is a DNS resolver issue, which happens | |
117 | ;; synchronously and totally halts Emacs. | |
118 | (url-queue-remove-jobs-from-host | |
119 | (plist-get (nthcdr 3 (cadr status)) :host))) | |
5c77c3ed LMI |
120 | (url-queue-run-queue) |
121 | (apply (url-queue-callback job) (cons status (url-queue-cbargs job)))) | |
122 | ||
b6ea20f3 LI |
123 | (defun url-queue-remove-jobs-from-host (host) |
124 | (let ((jobs nil)) | |
125 | (dolist (job url-queue) | |
126 | (when (equal (url-host (url-generic-parse-url (url-queue-url job))) | |
127 | host) | |
128 | (push job jobs))) | |
129 | (dolist (job jobs) | |
f15bcb40 | 130 | (url-queue-kill-job job) |
b6ea20f3 | 131 | (setq url-queue (delq job url-queue))))) |
b74c9672 | 132 | |
5c77c3ed | 133 | (defun url-queue-start-retrieve (job) |
471129b1 | 134 | (setf (url-queue-buffer job) |
5c77c3ed LMI |
135 | (ignore-errors |
136 | (url-retrieve (url-queue-url job) | |
137 | #'url-queue-callback-function (list job) | |
aacaa419 LI |
138 | (url-queue-silentp job) |
139 | (url-queue-inhibit-cookiesp job))))) | |
5c77c3ed LMI |
140 | |
141 | (defun url-queue-prune-old-entries () | |
142 | (let (dead-jobs) | |
143 | (dolist (job url-queue) | |
11aedcec | 144 | ;; Kill jobs that have lasted longer than the timeout. |
5c77c3ed LMI |
145 | (when (and (url-queue-start-time job) |
146 | (> (- (float-time) (url-queue-start-time job)) | |
147 | url-queue-timeout)) | |
148 | (push job dead-jobs))) | |
149 | (dolist (job dead-jobs) | |
f15bcb40 | 150 | (url-queue-kill-job job) |
471129b1 | 151 | (setq url-queue (delq job url-queue))))) |
5c77c3ed | 152 | |
f15bcb40 LI |
153 | (defun url-queue-kill-job (job) |
154 | (when (bufferp (url-queue-buffer job)) | |
27e7172c LI |
155 | (let (process) |
156 | (while (setq process (get-buffer-process (url-queue-buffer job))) | |
157 | (set-process-sentinel process 'ignore) | |
158 | (ignore-errors | |
cc2ab732 LMI |
159 | (delete-process process))))) |
160 | ;; Call the callback with an error message to ensure that the caller | |
161 | ;; is notified that the job has failed. | |
162 | (with-current-buffer | |
e0fe1d55 LMI |
163 | (if (and (bufferp (url-queue-buffer job)) |
164 | (buffer-live-p (url-queue-buffer job))) | |
cc2ab732 LMI |
165 | ;; Use the (partially filled) process buffer it it exists. |
166 | (url-queue-buffer job) | |
167 | ;; If not, just create a new buffer, which will probably be | |
168 | ;; killed again by the caller. | |
169 | (generate-new-buffer " *temp*")) | |
170 | (apply (url-queue-callback job) | |
171 | (cons (list :error (list 'error 'url-queue-timeout | |
172 | "Queue timeout exceeded")) | |
173 | (url-queue-cbargs job))))) | |
f15bcb40 | 174 | |
5c77c3ed LMI |
175 | (provide 'url-queue) |
176 | ||
177 | ;;; url-queue.el ends here |