Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; mm-url.el --- a wrapper of url functions/commands for Gnus |
e84b4b86 | 2 | |
ba318903 | 3 | ;; Copyright (C) 2001-2014 Free Software Foundation, Inc. |
23f87bed MB |
4 | |
5 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
6 | ||
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 GM |
9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
10 | ;; it under the terms of the GNU General Public License as published by | |
11 | ;; the Free Software Foundation, either version 3 of the License, or | |
12 | ;; (at your option) any later version. | |
23f87bed | 13 | |
5e809f55 GM |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | ;; GNU General Public License for more details. | |
23f87bed MB |
18 | |
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
23f87bed MB |
21 | |
22 | ;;; Commentary: | |
23 | ||
4d2226bf | 24 | ;; Some code is stolen from w3 and url packages. Some are moved from |
23f87bed MB |
25 | ;; nnweb. |
26 | ||
27 | ;; TODO: Support POST, cookie. | |
28 | ||
29 | ;;; Code: | |
30 | ||
31 | (eval-when-compile (require 'cl)) | |
32 | ||
33 | (require 'mm-util) | |
34 | (require 'gnus) | |
35 | ||
8d892d7f JB |
36 | (defvar url-current-object) |
37 | (defvar url-package-name) | |
38 | (defvar url-package-version) | |
39 | ||
23f87bed MB |
40 | (defgroup mm-url nil |
41 | "A wrapper of url package and external url command for Gnus." | |
42 | :group 'gnus) | |
43 | ||
44 | (defcustom mm-url-use-external (not | |
45 | (condition-case nil | |
46 | (require 'url) | |
47 | (error nil))) | |
48 | "*If non-nil, use external grab program `mm-url-program'." | |
bf247b6e | 49 | :version "22.1" |
23f87bed MB |
50 | :type 'boolean |
51 | :group 'mm-url) | |
52 | ||
53 | (defvar mm-url-predefined-programs | |
54 | '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-") | |
55 | (w3m "w3m" "-dump_source") | |
56 | (lynx "lynx" "-source") | |
3aa84d30 | 57 | (curl "curl" "--silent" "--user-agent" "mm-url" "--location"))) |
23f87bed MB |
58 | |
59 | (defcustom mm-url-program | |
60 | (cond | |
61 | ((executable-find "wget") 'wget) | |
62 | ((executable-find "w3m") 'w3m) | |
63 | ((executable-find "lynx") 'lynx) | |
64 | ((executable-find "curl") 'curl) | |
65 | (t "GET")) | |
66 | "The url grab program. | |
67 | Likely values are `wget', `w3m', `lynx' and `curl'." | |
bf247b6e | 68 | :version "22.1" |
23f87bed MB |
69 | :type '(choice |
70 | (symbol :tag "wget" wget) | |
71 | (symbol :tag "w3m" w3m) | |
72 | (symbol :tag "lynx" lynx) | |
73 | (symbol :tag "curl" curl) | |
74 | (string :tag "other")) | |
75 | :group 'mm-url) | |
76 | ||
77 | (defcustom mm-url-arguments nil | |
78 | "The arguments for `mm-url-program'." | |
bf247b6e | 79 | :version "22.1" |
23f87bed MB |
80 | :type '(repeat string) |
81 | :group 'mm-url) | |
82 | ||
83 | \f | |
84 | ;;; Internal variables | |
85 | ||
23f87bed MB |
86 | ;; Stolen from w3. |
87 | (defvar mm-url-html-entities | |
88 | '( | |
89 | ;;(excl . 33) | |
90 | (quot . 34) | |
91 | ;;(num . 35) | |
92 | ;;(dollar . 36) | |
93 | ;;(percent . 37) | |
94 | (amp . 38) | |
95 | (rsquo . 39) ; should be U+8217 | |
96 | ;;(apos . 39) | |
97 | ;;(lpar . 40) | |
98 | ;;(rpar . 41) | |
99 | ;;(ast . 42) | |
100 | ;;(plus . 43) | |
101 | ;;(comma . 44) | |
102 | ;;(period . 46) | |
103 | ;;(colon . 58) | |
104 | ;;(semi . 59) | |
105 | (lt . 60) | |
106 | ;;(equals . 61) | |
107 | (gt . 62) | |
108 | ;;(quest . 63) | |
109 | ;;(commat . 64) | |
110 | ;;(lsqb . 91) | |
111 | ;;(rsqb . 93) | |
112 | (uarr . 94) ; should be U+8593 | |
113 | ;;(lowbar . 95) | |
114 | (lsquo . 96) ; should be U+8216 | |
115 | (lcub . 123) | |
116 | ;;(verbar . 124) | |
117 | (rcub . 125) | |
118 | (tilde . 126) | |
119 | (nbsp . 160) | |
120 | (iexcl . 161) | |
121 | (cent . 162) | |
122 | (pound . 163) | |
123 | (curren . 164) | |
124 | (yen . 165) | |
125 | (brvbar . 166) | |
126 | (sect . 167) | |
127 | (uml . 168) | |
128 | (copy . 169) | |
129 | (ordf . 170) | |
130 | (laquo . 171) | |
131 | (not . 172) | |
132 | (shy . 173) | |
133 | (reg . 174) | |
134 | (macr . 175) | |
135 | (deg . 176) | |
136 | (plusmn . 177) | |
137 | (sup2 . 178) | |
138 | (sup3 . 179) | |
139 | (acute . 180) | |
140 | (micro . 181) | |
141 | (para . 182) | |
142 | (middot . 183) | |
143 | (cedil . 184) | |
144 | (sup1 . 185) | |
145 | (ordm . 186) | |
146 | (raquo . 187) | |
147 | (frac14 . 188) | |
148 | (frac12 . 189) | |
149 | (frac34 . 190) | |
150 | (iquest . 191) | |
151 | (Agrave . 192) | |
152 | (Aacute . 193) | |
153 | (Acirc . 194) | |
154 | (Atilde . 195) | |
155 | (Auml . 196) | |
156 | (Aring . 197) | |
157 | (AElig . 198) | |
158 | (Ccedil . 199) | |
159 | (Egrave . 200) | |
160 | (Eacute . 201) | |
161 | (Ecirc . 202) | |
162 | (Euml . 203) | |
163 | (Igrave . 204) | |
164 | (Iacute . 205) | |
165 | (Icirc . 206) | |
166 | (Iuml . 207) | |
167 | (ETH . 208) | |
168 | (Ntilde . 209) | |
169 | (Ograve . 210) | |
170 | (Oacute . 211) | |
171 | (Ocirc . 212) | |
172 | (Otilde . 213) | |
173 | (Ouml . 214) | |
174 | (times . 215) | |
175 | (Oslash . 216) | |
176 | (Ugrave . 217) | |
177 | (Uacute . 218) | |
178 | (Ucirc . 219) | |
179 | (Uuml . 220) | |
180 | (Yacute . 221) | |
181 | (THORN . 222) | |
182 | (szlig . 223) | |
183 | (agrave . 224) | |
184 | (aacute . 225) | |
185 | (acirc . 226) | |
186 | (atilde . 227) | |
187 | (auml . 228) | |
188 | (aring . 229) | |
189 | (aelig . 230) | |
190 | (ccedil . 231) | |
191 | (egrave . 232) | |
192 | (eacute . 233) | |
193 | (ecirc . 234) | |
194 | (euml . 235) | |
195 | (igrave . 236) | |
196 | (iacute . 237) | |
197 | (icirc . 238) | |
198 | (iuml . 239) | |
199 | (eth . 240) | |
200 | (ntilde . 241) | |
201 | (ograve . 242) | |
202 | (oacute . 243) | |
203 | (ocirc . 244) | |
204 | (otilde . 245) | |
205 | (ouml . 246) | |
206 | (divide . 247) | |
207 | (oslash . 248) | |
208 | (ugrave . 249) | |
209 | (uacute . 250) | |
210 | (ucirc . 251) | |
211 | (uuml . 252) | |
212 | (yacute . 253) | |
213 | (thorn . 254) | |
214 | (yuml . 255) | |
215 | ||
216 | ;; Special handling of these | |
217 | (frac56 . "5/6") | |
218 | (frac16 . "1/6") | |
219 | (frac45 . "4/5") | |
220 | (frac35 . "3/5") | |
221 | (frac25 . "2/5") | |
222 | (frac15 . "1/5") | |
223 | (frac23 . "2/3") | |
224 | (frac13 . "1/3") | |
225 | (frac78 . "7/8") | |
226 | (frac58 . "5/8") | |
227 | (frac38 . "3/8") | |
228 | (frac18 . "1/8") | |
229 | ||
230 | ;; The following 5 entities are not mentioned in the HTML 2.0 | |
231 | ;; standard, nor in any other HTML proposed standard of which I | |
232 | ;; am aware. I am not even sure they are ISO entity names. *** | |
233 | ;; Hence, some arrangement should be made to give a bad HTML | |
234 | ;; message when they are seen. | |
235 | (ndash . 45) | |
236 | (mdash . 45) | |
237 | (emsp . 32) | |
238 | (ensp . 32) | |
239 | (sim . 126) | |
240 | (le . "<=") | |
241 | (agr . "alpha") | |
242 | (rdquo . "''") | |
243 | (ldquo . "``") | |
244 | (trade . "(TM)") | |
245 | ;; To be done | |
246 | ;; (shy . ????) ; soft hyphen | |
247 | ) | |
248 | "*An assoc list of entity names and how to actually display them.") | |
249 | ||
250 | (defconst mm-url-unreserved-chars | |
251 | '( | |
252 | ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z | |
253 | ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z | |
254 | ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 | |
255 | ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\)) | |
256 | "A list of characters that are _NOT_ reserved in the URL spec. | |
257 | This is taken from RFC 2396.") | |
258 | ||
259 | (defun mm-url-load-url () | |
260 | "Load `url-insert-file-contents'." | |
261 | (unless (condition-case () | |
e2328c7a RS |
262 | (progn |
263 | (require 'url-handlers) | |
264 | (require 'url-parse) | |
265 | (require 'url-vars)) | |
23f87bed | 266 | (error nil)) |
23f87bed MB |
267 | (require 'url))) |
268 | ||
269 | ;;;###autoload | |
270 | (defun mm-url-insert-file-contents (url) | |
271 | "Insert file contents of URL. | |
272 | If `mm-url-use-external' is non-nil, use `mm-url-program'." | |
273 | (if mm-url-use-external | |
274 | (progn | |
275 | (if (string-match "^file:/+" url) | |
276 | (insert-file-contents (substring url (1- (match-end 0)))) | |
277 | (mm-url-insert-file-contents-external url)) | |
278 | (goto-char (point-min)) | |
279 | (if (fboundp 'url-generic-parse-url) | |
280 | (setq url-current-object | |
281 | (url-generic-parse-url url))) | |
282 | (list url (buffer-size))) | |
283 | (mm-url-load-url) | |
284 | (let ((name buffer-file-name) | |
bd876f90 MB |
285 | (url-request-extra-headers |
286 | ;; ISTM setting a Connection header was a workaround for | |
287 | ;; older versions of url included with w3, but it does more | |
288 | ;; harm than good with the one shipped with Emacs. --ansel | |
289 | (if (not (and (boundp 'url-version) | |
290 | (equal url-version "Emacs"))) | |
291 | (list (cons "Connection" "Close")))) | |
23f87bed MB |
292 | result) |
293 | (setq result (url-insert-file-contents url)) | |
294 | (save-excursion | |
295 | (goto-char (point-min)) | |
296 | (while (re-search-forward "\r 1000\r ?" nil t) | |
297 | (replace-match ""))) | |
298 | (setq buffer-file-name name) | |
299 | (if (and (fboundp 'url-generic-parse-url) | |
300 | (listp result)) | |
301 | (setq url-current-object (url-generic-parse-url | |
302 | (car result)))) | |
303 | result))) | |
304 | ||
305 | ;;;###autoload | |
306 | (defun mm-url-insert-file-contents-external (url) | |
307 | "Insert file contents of URL using `mm-url-program'." | |
308 | (let (program args) | |
309 | (if (symbolp mm-url-program) | |
310 | (let ((item (cdr (assq mm-url-program mm-url-predefined-programs)))) | |
311 | (setq program (car item) | |
312 | args (append (cdr item) (list url)))) | |
313 | (setq program mm-url-program | |
314 | args (append mm-url-arguments (list url)))) | |
315 | (unless (eq 0 (apply 'call-process program nil t nil args)) | |
316 | (error "Couldn't fetch %s" url)))) | |
317 | ||
318 | (defvar mm-url-timeout 30 | |
319 | "The number of seconds before timing out an URL fetch.") | |
320 | ||
321 | (defvar mm-url-retries 10 | |
322 | "The number of retries after timing out when fetching an URL.") | |
323 | ||
324 | (defun mm-url-insert (url &optional follow-refresh) | |
325 | "Insert the contents from an URL in the current buffer. | |
326 | If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
327 | (let ((times mm-url-retries) | |
328 | (done nil) | |
329 | (first t) | |
330 | result) | |
331 | (while (and (not (zerop (decf times))) | |
332 | (not done)) | |
333 | (with-timeout (mm-url-timeout) | |
334 | (unless first | |
335 | (message "Trying again (%s)..." (- mm-url-retries times))) | |
336 | (setq first nil) | |
337 | (if follow-refresh | |
338 | (save-restriction | |
339 | (narrow-to-region (point) (point)) | |
340 | (mm-url-insert-file-contents url) | |
341 | (goto-char (point-min)) | |
342 | (when (re-search-forward | |
343 | "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
344 | (let ((url (match-string 1))) | |
345 | (delete-region (point-min) (point-max)) | |
346 | (setq result (mm-url-insert url t))))) | |
347 | (setq result (mm-url-insert-file-contents url))) | |
348 | (setq done t))) | |
349 | result)) | |
350 | ||
351 | (defun mm-url-decode-entities () | |
352 | "Decode all HTML entities." | |
353 | (goto-char (point-min)) | |
c912b478 KY |
354 | (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" |
355 | nil t) | |
8bff7c00 KY |
356 | (let* ((entity (match-string 1)) |
357 | (elem (if (eq (aref entity 0) ?\#) | |
c912b478 KY |
358 | (let ((c |
359 | ;; Hex number: ㈒ | |
360 | (if (eq (aref entity 1) ?x) | |
361 | (string-to-number (substring entity 2) | |
362 | 16) | |
363 | ;; Decimal number:  | |
364 | (string-to-number (substring entity 1))))) | |
365 | (setq c (or (cdr (assq c mm-extra-numeric-entities)) | |
366 | (mm-ucs-to-char c))) | |
8bff7c00 KY |
367 | (if (mm-char-or-char-int-p c) c ?#)) |
368 | (or (cdr (assq (intern entity) | |
369 | mm-url-html-entities)) | |
370 | ?#)))) | |
23f87bed MB |
371 | (unless (stringp elem) |
372 | (setq elem (char-to-string elem))) | |
373 | (replace-match elem t t)))) | |
374 | ||
375 | (defun mm-url-decode-entities-nbsp () | |
376 | "Decode all HTML entities and to a space." | |
377 | (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities))) | |
378 | (mm-url-decode-entities))) | |
379 | ||
380 | (defun mm-url-decode-entities-string (string) | |
381 | (with-temp-buffer | |
382 | (insert string) | |
383 | (mm-url-decode-entities) | |
384 | (buffer-string))) | |
385 | ||
386 | (defun mm-url-form-encode-xwfu (chunk) | |
387 | "Escape characters in a string for application/x-www-form-urlencoded. | |
388 | Blasphemous crap because someone didn't think %20 was good enough for encoding | |
389 | spaces. Die Die Die." | |
390 | ;; This will get rid of the 'attributes' specified by the file type, | |
391 | ;; which are useless for an application/x-www-form-urlencoded form. | |
392 | (if (consp chunk) | |
393 | (setq chunk (cdr chunk))) | |
394 | ||
395 | (mapconcat | |
396 | (lambda (char) | |
397 | (cond | |
398 | ((= char ? ) "+") | |
399 | ((memq char mm-url-unreserved-chars) (char-to-string char)) | |
400 | (t (upcase (format "%%%02x" char))))) | |
765d4319 KY |
401 | (mm-encode-coding-string chunk |
402 | (if (fboundp 'find-coding-systems-string) | |
403 | (car (find-coding-systems-string chunk)) | |
404 | buffer-file-coding-system)) | |
23f87bed MB |
405 | "")) |
406 | ||
407 | (defun mm-url-encode-www-form-urlencoded (pairs) | |
408 | "Return PAIRS encoded for forms." | |
409 | (mapconcat | |
410 | (lambda (data) | |
411 | (concat (mm-url-form-encode-xwfu (car data)) "=" | |
412 | (mm-url-form-encode-xwfu (cdr data)))) | |
413 | pairs "&")) | |
414 | ||
8e7d4ca1 GM |
415 | (autoload 'mml-compute-boundary "mml") |
416 | ||
23f87bed MB |
417 | (defun mm-url-remove-markup () |
418 | "Remove all HTML markup, leaving just plain text." | |
419 | (goto-char (point-min)) | |
420 | (while (search-forward "<!--" nil t) | |
421 | (delete-region (match-beginning 0) | |
422 | (or (search-forward "-->" nil t) | |
423 | (point-max)))) | |
424 | (goto-char (point-min)) | |
425 | (while (re-search-forward "<[^>]+>" nil t) | |
426 | (replace-match "" t t))) | |
427 | ||
428 | (provide 'mm-url) | |
429 | ||
23f87bed | 430 | ;;; mm-url.el ends here |