Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; mm-url.el --- a wrapper of url functions/commands for Gnus |
e84b4b86 | 2 | |
e3fe4da0 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
23f87bed MB |
4 | |
5 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
6 | ||
7 | ;; This file is part of GNU Emacs. | |
8 | ||
9 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
10 | ;; it under the terms of the GNU General Public License as published | |
5a9dffec | 11 | ;; by the Free Software Foundation; either version 3, or (at your |
23f87bed MB |
12 | ;; option) any later version. |
13 | ||
14 | ;; GNU Emacs is distributed in the hope that it will be useful, but | |
15 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;; General Public License for more details. | |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
20 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
21 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
22 | ;; Boston, MA 02110-1301, USA. | |
23f87bed MB |
23 | |
24 | ;;; Commentary: | |
25 | ||
26 | ;; Some codes are stolen from w3 and url packages. Some are moved from | |
27 | ;; nnweb. | |
28 | ||
29 | ;; TODO: Support POST, cookie. | |
30 | ||
31 | ;;; Code: | |
32 | ||
33 | (eval-when-compile (require 'cl)) | |
34 | ||
35 | (require 'mm-util) | |
36 | (require 'gnus) | |
37 | ||
8d892d7f JB |
38 | (defvar url-current-object) |
39 | (defvar url-package-name) | |
40 | (defvar url-package-version) | |
41 | ||
23f87bed MB |
42 | (defgroup mm-url nil |
43 | "A wrapper of url package and external url command for Gnus." | |
44 | :group 'gnus) | |
45 | ||
46 | (defcustom mm-url-use-external (not | |
47 | (condition-case nil | |
48 | (require 'url) | |
49 | (error nil))) | |
50 | "*If non-nil, use external grab program `mm-url-program'." | |
bf247b6e | 51 | :version "22.1" |
23f87bed MB |
52 | :type 'boolean |
53 | :group 'mm-url) | |
54 | ||
55 | (defvar mm-url-predefined-programs | |
56 | '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-") | |
57 | (w3m "w3m" "-dump_source") | |
58 | (lynx "lynx" "-source") | |
3aa84d30 | 59 | (curl "curl" "--silent" "--user-agent" "mm-url" "--location"))) |
23f87bed MB |
60 | |
61 | (defcustom mm-url-program | |
62 | (cond | |
63 | ((executable-find "wget") 'wget) | |
64 | ((executable-find "w3m") 'w3m) | |
65 | ((executable-find "lynx") 'lynx) | |
66 | ((executable-find "curl") 'curl) | |
67 | (t "GET")) | |
68 | "The url grab program. | |
69 | Likely values are `wget', `w3m', `lynx' and `curl'." | |
bf247b6e | 70 | :version "22.1" |
23f87bed MB |
71 | :type '(choice |
72 | (symbol :tag "wget" wget) | |
73 | (symbol :tag "w3m" w3m) | |
74 | (symbol :tag "lynx" lynx) | |
75 | (symbol :tag "curl" curl) | |
76 | (string :tag "other")) | |
77 | :group 'mm-url) | |
78 | ||
79 | (defcustom mm-url-arguments nil | |
80 | "The arguments for `mm-url-program'." | |
bf247b6e | 81 | :version "22.1" |
23f87bed MB |
82 | :type '(repeat string) |
83 | :group 'mm-url) | |
84 | ||
85 | \f | |
86 | ;;; Internal variables | |
87 | ||
88 | (defvar mm-url-package-name | |
89 | (gnus-replace-in-string | |
90 | (gnus-replace-in-string gnus-version " v.*$" "") | |
91 | " " "-")) | |
92 | ||
93 | (defvar mm-url-package-version gnus-version-number) | |
94 | ||
95 | ;; Stolen from w3. | |
96 | (defvar mm-url-html-entities | |
97 | '( | |
98 | ;;(excl . 33) | |
99 | (quot . 34) | |
100 | ;;(num . 35) | |
101 | ;;(dollar . 36) | |
102 | ;;(percent . 37) | |
103 | (amp . 38) | |
104 | (rsquo . 39) ; should be U+8217 | |
105 | ;;(apos . 39) | |
106 | ;;(lpar . 40) | |
107 | ;;(rpar . 41) | |
108 | ;;(ast . 42) | |
109 | ;;(plus . 43) | |
110 | ;;(comma . 44) | |
111 | ;;(period . 46) | |
112 | ;;(colon . 58) | |
113 | ;;(semi . 59) | |
114 | (lt . 60) | |
115 | ;;(equals . 61) | |
116 | (gt . 62) | |
117 | ;;(quest . 63) | |
118 | ;;(commat . 64) | |
119 | ;;(lsqb . 91) | |
120 | ;;(rsqb . 93) | |
121 | (uarr . 94) ; should be U+8593 | |
122 | ;;(lowbar . 95) | |
123 | (lsquo . 96) ; should be U+8216 | |
124 | (lcub . 123) | |
125 | ;;(verbar . 124) | |
126 | (rcub . 125) | |
127 | (tilde . 126) | |
128 | (nbsp . 160) | |
129 | (iexcl . 161) | |
130 | (cent . 162) | |
131 | (pound . 163) | |
132 | (curren . 164) | |
133 | (yen . 165) | |
134 | (brvbar . 166) | |
135 | (sect . 167) | |
136 | (uml . 168) | |
137 | (copy . 169) | |
138 | (ordf . 170) | |
139 | (laquo . 171) | |
140 | (not . 172) | |
141 | (shy . 173) | |
142 | (reg . 174) | |
143 | (macr . 175) | |
144 | (deg . 176) | |
145 | (plusmn . 177) | |
146 | (sup2 . 178) | |
147 | (sup3 . 179) | |
148 | (acute . 180) | |
149 | (micro . 181) | |
150 | (para . 182) | |
151 | (middot . 183) | |
152 | (cedil . 184) | |
153 | (sup1 . 185) | |
154 | (ordm . 186) | |
155 | (raquo . 187) | |
156 | (frac14 . 188) | |
157 | (frac12 . 189) | |
158 | (frac34 . 190) | |
159 | (iquest . 191) | |
160 | (Agrave . 192) | |
161 | (Aacute . 193) | |
162 | (Acirc . 194) | |
163 | (Atilde . 195) | |
164 | (Auml . 196) | |
165 | (Aring . 197) | |
166 | (AElig . 198) | |
167 | (Ccedil . 199) | |
168 | (Egrave . 200) | |
169 | (Eacute . 201) | |
170 | (Ecirc . 202) | |
171 | (Euml . 203) | |
172 | (Igrave . 204) | |
173 | (Iacute . 205) | |
174 | (Icirc . 206) | |
175 | (Iuml . 207) | |
176 | (ETH . 208) | |
177 | (Ntilde . 209) | |
178 | (Ograve . 210) | |
179 | (Oacute . 211) | |
180 | (Ocirc . 212) | |
181 | (Otilde . 213) | |
182 | (Ouml . 214) | |
183 | (times . 215) | |
184 | (Oslash . 216) | |
185 | (Ugrave . 217) | |
186 | (Uacute . 218) | |
187 | (Ucirc . 219) | |
188 | (Uuml . 220) | |
189 | (Yacute . 221) | |
190 | (THORN . 222) | |
191 | (szlig . 223) | |
192 | (agrave . 224) | |
193 | (aacute . 225) | |
194 | (acirc . 226) | |
195 | (atilde . 227) | |
196 | (auml . 228) | |
197 | (aring . 229) | |
198 | (aelig . 230) | |
199 | (ccedil . 231) | |
200 | (egrave . 232) | |
201 | (eacute . 233) | |
202 | (ecirc . 234) | |
203 | (euml . 235) | |
204 | (igrave . 236) | |
205 | (iacute . 237) | |
206 | (icirc . 238) | |
207 | (iuml . 239) | |
208 | (eth . 240) | |
209 | (ntilde . 241) | |
210 | (ograve . 242) | |
211 | (oacute . 243) | |
212 | (ocirc . 244) | |
213 | (otilde . 245) | |
214 | (ouml . 246) | |
215 | (divide . 247) | |
216 | (oslash . 248) | |
217 | (ugrave . 249) | |
218 | (uacute . 250) | |
219 | (ucirc . 251) | |
220 | (uuml . 252) | |
221 | (yacute . 253) | |
222 | (thorn . 254) | |
223 | (yuml . 255) | |
224 | ||
225 | ;; Special handling of these | |
226 | (frac56 . "5/6") | |
227 | (frac16 . "1/6") | |
228 | (frac45 . "4/5") | |
229 | (frac35 . "3/5") | |
230 | (frac25 . "2/5") | |
231 | (frac15 . "1/5") | |
232 | (frac23 . "2/3") | |
233 | (frac13 . "1/3") | |
234 | (frac78 . "7/8") | |
235 | (frac58 . "5/8") | |
236 | (frac38 . "3/8") | |
237 | (frac18 . "1/8") | |
238 | ||
239 | ;; The following 5 entities are not mentioned in the HTML 2.0 | |
240 | ;; standard, nor in any other HTML proposed standard of which I | |
241 | ;; am aware. I am not even sure they are ISO entity names. *** | |
242 | ;; Hence, some arrangement should be made to give a bad HTML | |
243 | ;; message when they are seen. | |
244 | (ndash . 45) | |
245 | (mdash . 45) | |
246 | (emsp . 32) | |
247 | (ensp . 32) | |
248 | (sim . 126) | |
249 | (le . "<=") | |
250 | (agr . "alpha") | |
251 | (rdquo . "''") | |
252 | (ldquo . "``") | |
253 | (trade . "(TM)") | |
254 | ;; To be done | |
255 | ;; (shy . ????) ; soft hyphen | |
256 | ) | |
257 | "*An assoc list of entity names and how to actually display them.") | |
258 | ||
259 | (defconst mm-url-unreserved-chars | |
260 | '( | |
261 | ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z | |
262 | ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z | |
263 | ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 | |
264 | ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\)) | |
265 | "A list of characters that are _NOT_ reserved in the URL spec. | |
266 | This is taken from RFC 2396.") | |
267 | ||
268 | (defun mm-url-load-url () | |
269 | "Load `url-insert-file-contents'." | |
270 | (unless (condition-case () | |
e2328c7a RS |
271 | (progn |
272 | (require 'url-handlers) | |
273 | (require 'url-parse) | |
274 | (require 'url-vars)) | |
23f87bed MB |
275 | (error nil)) |
276 | ;; w3-4.0pre0.46 or earlier version. | |
277 | (require 'w3-vars) | |
278 | (require 'url))) | |
279 | ||
280 | ;;;###autoload | |
281 | (defun mm-url-insert-file-contents (url) | |
282 | "Insert file contents of URL. | |
283 | If `mm-url-use-external' is non-nil, use `mm-url-program'." | |
284 | (if mm-url-use-external | |
285 | (progn | |
286 | (if (string-match "^file:/+" url) | |
287 | (insert-file-contents (substring url (1- (match-end 0)))) | |
288 | (mm-url-insert-file-contents-external url)) | |
289 | (goto-char (point-min)) | |
290 | (if (fboundp 'url-generic-parse-url) | |
291 | (setq url-current-object | |
292 | (url-generic-parse-url url))) | |
293 | (list url (buffer-size))) | |
294 | (mm-url-load-url) | |
295 | (let ((name buffer-file-name) | |
bd876f90 MB |
296 | (url-request-extra-headers |
297 | ;; ISTM setting a Connection header was a workaround for | |
298 | ;; older versions of url included with w3, but it does more | |
299 | ;; harm than good with the one shipped with Emacs. --ansel | |
300 | (if (not (and (boundp 'url-version) | |
301 | (equal url-version "Emacs"))) | |
302 | (list (cons "Connection" "Close")))) | |
23f87bed MB |
303 | (url-package-name (or mm-url-package-name |
304 | url-package-name)) | |
305 | (url-package-version (or mm-url-package-version | |
306 | url-package-version)) | |
307 | result) | |
308 | (setq result (url-insert-file-contents url)) | |
309 | (save-excursion | |
310 | (goto-char (point-min)) | |
311 | (while (re-search-forward "\r 1000\r ?" nil t) | |
312 | (replace-match ""))) | |
313 | (setq buffer-file-name name) | |
314 | (if (and (fboundp 'url-generic-parse-url) | |
315 | (listp result)) | |
316 | (setq url-current-object (url-generic-parse-url | |
317 | (car result)))) | |
318 | result))) | |
319 | ||
320 | ;;;###autoload | |
321 | (defun mm-url-insert-file-contents-external (url) | |
322 | "Insert file contents of URL using `mm-url-program'." | |
323 | (let (program args) | |
324 | (if (symbolp mm-url-program) | |
325 | (let ((item (cdr (assq mm-url-program mm-url-predefined-programs)))) | |
326 | (setq program (car item) | |
327 | args (append (cdr item) (list url)))) | |
328 | (setq program mm-url-program | |
329 | args (append mm-url-arguments (list url)))) | |
330 | (unless (eq 0 (apply 'call-process program nil t nil args)) | |
331 | (error "Couldn't fetch %s" url)))) | |
332 | ||
333 | (defvar mm-url-timeout 30 | |
334 | "The number of seconds before timing out an URL fetch.") | |
335 | ||
336 | (defvar mm-url-retries 10 | |
337 | "The number of retries after timing out when fetching an URL.") | |
338 | ||
339 | (defun mm-url-insert (url &optional follow-refresh) | |
340 | "Insert the contents from an URL in the current buffer. | |
341 | If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
342 | (let ((times mm-url-retries) | |
343 | (done nil) | |
344 | (first t) | |
345 | result) | |
346 | (while (and (not (zerop (decf times))) | |
347 | (not done)) | |
348 | (with-timeout (mm-url-timeout) | |
349 | (unless first | |
350 | (message "Trying again (%s)..." (- mm-url-retries times))) | |
351 | (setq first nil) | |
352 | (if follow-refresh | |
353 | (save-restriction | |
354 | (narrow-to-region (point) (point)) | |
355 | (mm-url-insert-file-contents url) | |
356 | (goto-char (point-min)) | |
357 | (when (re-search-forward | |
358 | "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
359 | (let ((url (match-string 1))) | |
360 | (delete-region (point-min) (point-max)) | |
361 | (setq result (mm-url-insert url t))))) | |
362 | (setq result (mm-url-insert-file-contents url))) | |
363 | (setq done t))) | |
364 | result)) | |
365 | ||
366 | (defun mm-url-decode-entities () | |
367 | "Decode all HTML entities." | |
368 | (goto-char (point-min)) | |
5fae1cae | 369 | (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+[0-9]*\\);" nil t) |
23f87bed MB |
370 | (let ((elem (if (eq (aref (match-string 1) 0) ?\#) |
371 | (let ((c | |
372 | (string-to-number (substring | |
373 | (match-string 1) 1)))) | |
374 | (if (mm-char-or-char-int-p c) c 32)) | |
375 | (or (cdr (assq (intern (match-string 1)) | |
376 | mm-url-html-entities)) | |
377 | ?#)))) | |
378 | (unless (stringp elem) | |
379 | (setq elem (char-to-string elem))) | |
380 | (replace-match elem t t)))) | |
381 | ||
382 | (defun mm-url-decode-entities-nbsp () | |
383 | "Decode all HTML entities and to a space." | |
384 | (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities))) | |
385 | (mm-url-decode-entities))) | |
386 | ||
387 | (defun mm-url-decode-entities-string (string) | |
388 | (with-temp-buffer | |
389 | (insert string) | |
390 | (mm-url-decode-entities) | |
391 | (buffer-string))) | |
392 | ||
393 | (defun mm-url-form-encode-xwfu (chunk) | |
394 | "Escape characters in a string for application/x-www-form-urlencoded. | |
395 | Blasphemous crap because someone didn't think %20 was good enough for encoding | |
396 | spaces. Die Die Die." | |
397 | ;; This will get rid of the 'attributes' specified by the file type, | |
398 | ;; which are useless for an application/x-www-form-urlencoded form. | |
399 | (if (consp chunk) | |
400 | (setq chunk (cdr chunk))) | |
401 | ||
402 | (mapconcat | |
403 | (lambda (char) | |
404 | (cond | |
405 | ((= char ? ) "+") | |
406 | ((memq char mm-url-unreserved-chars) (char-to-string char)) | |
407 | (t (upcase (format "%%%02x" char))))) | |
408 | ;; Fixme: Should this actually be accepting multibyte? Is there a | |
409 | ;; better way in XEmacs? | |
410 | (if (featurep 'mule) | |
411 | (encode-coding-string chunk | |
412 | (if (fboundp 'find-coding-systems-string) | |
413 | (car (find-coding-systems-string chunk)) | |
414 | buffer-file-coding-system)) | |
415 | chunk) | |
416 | "")) | |
417 | ||
418 | (defun mm-url-encode-www-form-urlencoded (pairs) | |
419 | "Return PAIRS encoded for forms." | |
420 | (mapconcat | |
421 | (lambda (data) | |
422 | (concat (mm-url-form-encode-xwfu (car data)) "=" | |
423 | (mm-url-form-encode-xwfu (cdr data)))) | |
424 | pairs "&")) | |
425 | ||
426 | (defun mm-url-fetch-form (url pairs) | |
427 | "Fetch a form from URL with PAIRS as the data using the POST method." | |
428 | (mm-url-load-url) | |
429 | (let ((url-request-data (mm-url-encode-www-form-urlencoded pairs)) | |
430 | (url-request-method "POST") | |
431 | (url-request-extra-headers | |
432 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
433 | (url-insert-file-contents url) | |
434 | (setq buffer-file-name nil)) | |
435 | t) | |
436 | ||
437 | (defun mm-url-fetch-simple (url content) | |
438 | (mm-url-load-url) | |
439 | (let ((url-request-data content) | |
440 | (url-request-method "POST") | |
441 | (url-request-extra-headers | |
442 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
443 | (url-insert-file-contents url) | |
444 | (setq buffer-file-name nil)) | |
445 | t) | |
446 | ||
447 | (defun mm-url-remove-markup () | |
448 | "Remove all HTML markup, leaving just plain text." | |
449 | (goto-char (point-min)) | |
450 | (while (search-forward "<!--" nil t) | |
451 | (delete-region (match-beginning 0) | |
452 | (or (search-forward "-->" nil t) | |
453 | (point-max)))) | |
454 | (goto-char (point-min)) | |
455 | (while (re-search-forward "<[^>]+>" nil t) | |
456 | (replace-match "" t t))) | |
457 | ||
458 | (provide 'mm-url) | |
459 | ||
cbee283d | 460 | ;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f |
23f87bed | 461 | ;;; mm-url.el ends here |