Commit | Line | Data |
---|---|---|
23f87bed MB |
1 | ;;; mm-url.el --- a wrapper of url functions/commands for Gnus |
2 | ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. | |
3 | ||
4 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
5 | ||
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published | |
10 | ;; by the Free Software Foundation; either version 2, or (at your | |
11 | ;; option) any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, but | |
14 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | ;; General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;; Some codes are stolen from w3 and url packages. Some are moved from | |
26 | ;; nnweb. | |
27 | ||
28 | ;; TODO: Support POST, cookie. | |
29 | ||
30 | ;;; Code: | |
31 | ||
32 | (eval-when-compile (require 'cl)) | |
33 | ||
34 | (require 'mm-util) | |
35 | (require 'gnus) | |
36 | ||
37 | (eval-and-compile | |
38 | (autoload 'executable-find "executable")) | |
39 | ||
40 | (eval-when-compile | |
fa9a04e1 MB |
41 | (if (featurep 'xemacs) |
42 | (require 'timer-funcs) | |
43 | (require 'timer))) | |
23f87bed MB |
44 | |
45 | (defgroup mm-url nil | |
46 | "A wrapper of url package and external url command for Gnus." | |
47 | :group 'gnus) | |
48 | ||
49 | (defcustom mm-url-use-external (not | |
50 | (condition-case nil | |
51 | (require 'url) | |
52 | (error nil))) | |
53 | "*If non-nil, use external grab program `mm-url-program'." | |
bf247b6e | 54 | :version "22.1" |
23f87bed MB |
55 | :type 'boolean |
56 | :group 'mm-url) | |
57 | ||
58 | (defvar mm-url-predefined-programs | |
59 | '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-") | |
60 | (w3m "w3m" "-dump_source") | |
61 | (lynx "lynx" "-source") | |
62 | (curl "curl"))) | |
63 | ||
64 | (defcustom mm-url-program | |
65 | (cond | |
66 | ((executable-find "wget") 'wget) | |
67 | ((executable-find "w3m") 'w3m) | |
68 | ((executable-find "lynx") 'lynx) | |
69 | ((executable-find "curl") 'curl) | |
70 | (t "GET")) | |
71 | "The url grab program. | |
72 | Likely values are `wget', `w3m', `lynx' and `curl'." | |
bf247b6e | 73 | :version "22.1" |
23f87bed MB |
74 | :type '(choice |
75 | (symbol :tag "wget" wget) | |
76 | (symbol :tag "w3m" w3m) | |
77 | (symbol :tag "lynx" lynx) | |
78 | (symbol :tag "curl" curl) | |
79 | (string :tag "other")) | |
80 | :group 'mm-url) | |
81 | ||
82 | (defcustom mm-url-arguments nil | |
83 | "The arguments for `mm-url-program'." | |
bf247b6e | 84 | :version "22.1" |
23f87bed MB |
85 | :type '(repeat string) |
86 | :group 'mm-url) | |
87 | ||
88 | \f | |
89 | ;;; Internal variables | |
90 | ||
91 | (defvar mm-url-package-name | |
92 | (gnus-replace-in-string | |
93 | (gnus-replace-in-string gnus-version " v.*$" "") | |
94 | " " "-")) | |
95 | ||
96 | (defvar mm-url-package-version gnus-version-number) | |
97 | ||
98 | ;; Stolen from w3. | |
99 | (defvar mm-url-html-entities | |
100 | '( | |
101 | ;;(excl . 33) | |
102 | (quot . 34) | |
103 | ;;(num . 35) | |
104 | ;;(dollar . 36) | |
105 | ;;(percent . 37) | |
106 | (amp . 38) | |
107 | (rsquo . 39) ; should be U+8217 | |
108 | ;;(apos . 39) | |
109 | ;;(lpar . 40) | |
110 | ;;(rpar . 41) | |
111 | ;;(ast . 42) | |
112 | ;;(plus . 43) | |
113 | ;;(comma . 44) | |
114 | ;;(period . 46) | |
115 | ;;(colon . 58) | |
116 | ;;(semi . 59) | |
117 | (lt . 60) | |
118 | ;;(equals . 61) | |
119 | (gt . 62) | |
120 | ;;(quest . 63) | |
121 | ;;(commat . 64) | |
122 | ;;(lsqb . 91) | |
123 | ;;(rsqb . 93) | |
124 | (uarr . 94) ; should be U+8593 | |
125 | ;;(lowbar . 95) | |
126 | (lsquo . 96) ; should be U+8216 | |
127 | (lcub . 123) | |
128 | ;;(verbar . 124) | |
129 | (rcub . 125) | |
130 | (tilde . 126) | |
131 | (nbsp . 160) | |
132 | (iexcl . 161) | |
133 | (cent . 162) | |
134 | (pound . 163) | |
135 | (curren . 164) | |
136 | (yen . 165) | |
137 | (brvbar . 166) | |
138 | (sect . 167) | |
139 | (uml . 168) | |
140 | (copy . 169) | |
141 | (ordf . 170) | |
142 | (laquo . 171) | |
143 | (not . 172) | |
144 | (shy . 173) | |
145 | (reg . 174) | |
146 | (macr . 175) | |
147 | (deg . 176) | |
148 | (plusmn . 177) | |
149 | (sup2 . 178) | |
150 | (sup3 . 179) | |
151 | (acute . 180) | |
152 | (micro . 181) | |
153 | (para . 182) | |
154 | (middot . 183) | |
155 | (cedil . 184) | |
156 | (sup1 . 185) | |
157 | (ordm . 186) | |
158 | (raquo . 187) | |
159 | (frac14 . 188) | |
160 | (frac12 . 189) | |
161 | (frac34 . 190) | |
162 | (iquest . 191) | |
163 | (Agrave . 192) | |
164 | (Aacute . 193) | |
165 | (Acirc . 194) | |
166 | (Atilde . 195) | |
167 | (Auml . 196) | |
168 | (Aring . 197) | |
169 | (AElig . 198) | |
170 | (Ccedil . 199) | |
171 | (Egrave . 200) | |
172 | (Eacute . 201) | |
173 | (Ecirc . 202) | |
174 | (Euml . 203) | |
175 | (Igrave . 204) | |
176 | (Iacute . 205) | |
177 | (Icirc . 206) | |
178 | (Iuml . 207) | |
179 | (ETH . 208) | |
180 | (Ntilde . 209) | |
181 | (Ograve . 210) | |
182 | (Oacute . 211) | |
183 | (Ocirc . 212) | |
184 | (Otilde . 213) | |
185 | (Ouml . 214) | |
186 | (times . 215) | |
187 | (Oslash . 216) | |
188 | (Ugrave . 217) | |
189 | (Uacute . 218) | |
190 | (Ucirc . 219) | |
191 | (Uuml . 220) | |
192 | (Yacute . 221) | |
193 | (THORN . 222) | |
194 | (szlig . 223) | |
195 | (agrave . 224) | |
196 | (aacute . 225) | |
197 | (acirc . 226) | |
198 | (atilde . 227) | |
199 | (auml . 228) | |
200 | (aring . 229) | |
201 | (aelig . 230) | |
202 | (ccedil . 231) | |
203 | (egrave . 232) | |
204 | (eacute . 233) | |
205 | (ecirc . 234) | |
206 | (euml . 235) | |
207 | (igrave . 236) | |
208 | (iacute . 237) | |
209 | (icirc . 238) | |
210 | (iuml . 239) | |
211 | (eth . 240) | |
212 | (ntilde . 241) | |
213 | (ograve . 242) | |
214 | (oacute . 243) | |
215 | (ocirc . 244) | |
216 | (otilde . 245) | |
217 | (ouml . 246) | |
218 | (divide . 247) | |
219 | (oslash . 248) | |
220 | (ugrave . 249) | |
221 | (uacute . 250) | |
222 | (ucirc . 251) | |
223 | (uuml . 252) | |
224 | (yacute . 253) | |
225 | (thorn . 254) | |
226 | (yuml . 255) | |
227 | ||
228 | ;; Special handling of these | |
229 | (frac56 . "5/6") | |
230 | (frac16 . "1/6") | |
231 | (frac45 . "4/5") | |
232 | (frac35 . "3/5") | |
233 | (frac25 . "2/5") | |
234 | (frac15 . "1/5") | |
235 | (frac23 . "2/3") | |
236 | (frac13 . "1/3") | |
237 | (frac78 . "7/8") | |
238 | (frac58 . "5/8") | |
239 | (frac38 . "3/8") | |
240 | (frac18 . "1/8") | |
241 | ||
242 | ;; The following 5 entities are not mentioned in the HTML 2.0 | |
243 | ;; standard, nor in any other HTML proposed standard of which I | |
244 | ;; am aware. I am not even sure they are ISO entity names. *** | |
245 | ;; Hence, some arrangement should be made to give a bad HTML | |
246 | ;; message when they are seen. | |
247 | (ndash . 45) | |
248 | (mdash . 45) | |
249 | (emsp . 32) | |
250 | (ensp . 32) | |
251 | (sim . 126) | |
252 | (le . "<=") | |
253 | (agr . "alpha") | |
254 | (rdquo . "''") | |
255 | (ldquo . "``") | |
256 | (trade . "(TM)") | |
257 | ;; To be done | |
258 | ;; (shy . ????) ; soft hyphen | |
259 | ) | |
260 | "*An assoc list of entity names and how to actually display them.") | |
261 | ||
262 | (defconst mm-url-unreserved-chars | |
263 | '( | |
264 | ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z | |
265 | ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z | |
266 | ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 | |
267 | ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\)) | |
268 | "A list of characters that are _NOT_ reserved in the URL spec. | |
269 | This is taken from RFC 2396.") | |
270 | ||
271 | (defun mm-url-load-url () | |
272 | "Load `url-insert-file-contents'." | |
273 | (unless (condition-case () | |
274 | (require 'url-handlers) | |
275 | (error nil)) | |
276 | ;; w3-4.0pre0.46 or earlier version. | |
277 | (require 'w3-vars) | |
278 | (require 'url))) | |
279 | ||
280 | ;;;###autoload | |
281 | (defun mm-url-insert-file-contents (url) | |
282 | "Insert file contents of URL. | |
283 | If `mm-url-use-external' is non-nil, use `mm-url-program'." | |
284 | (if mm-url-use-external | |
285 | (progn | |
286 | (if (string-match "^file:/+" url) | |
287 | (insert-file-contents (substring url (1- (match-end 0)))) | |
288 | (mm-url-insert-file-contents-external url)) | |
289 | (goto-char (point-min)) | |
290 | (if (fboundp 'url-generic-parse-url) | |
291 | (setq url-current-object | |
292 | (url-generic-parse-url url))) | |
293 | (list url (buffer-size))) | |
294 | (mm-url-load-url) | |
295 | (let ((name buffer-file-name) | |
296 | (url-request-extra-headers (list (cons "Connection" "Close"))) | |
297 | (url-package-name (or mm-url-package-name | |
298 | url-package-name)) | |
299 | (url-package-version (or mm-url-package-version | |
300 | url-package-version)) | |
301 | result) | |
302 | (setq result (url-insert-file-contents url)) | |
303 | (save-excursion | |
304 | (goto-char (point-min)) | |
305 | (while (re-search-forward "\r 1000\r ?" nil t) | |
306 | (replace-match ""))) | |
307 | (setq buffer-file-name name) | |
308 | (if (and (fboundp 'url-generic-parse-url) | |
309 | (listp result)) | |
310 | (setq url-current-object (url-generic-parse-url | |
311 | (car result)))) | |
312 | result))) | |
313 | ||
314 | ;;;###autoload | |
315 | (defun mm-url-insert-file-contents-external (url) | |
316 | "Insert file contents of URL using `mm-url-program'." | |
317 | (let (program args) | |
318 | (if (symbolp mm-url-program) | |
319 | (let ((item (cdr (assq mm-url-program mm-url-predefined-programs)))) | |
320 | (setq program (car item) | |
321 | args (append (cdr item) (list url)))) | |
322 | (setq program mm-url-program | |
323 | args (append mm-url-arguments (list url)))) | |
324 | (unless (eq 0 (apply 'call-process program nil t nil args)) | |
325 | (error "Couldn't fetch %s" url)))) | |
326 | ||
327 | (defvar mm-url-timeout 30 | |
328 | "The number of seconds before timing out an URL fetch.") | |
329 | ||
330 | (defvar mm-url-retries 10 | |
331 | "The number of retries after timing out when fetching an URL.") | |
332 | ||
333 | (defun mm-url-insert (url &optional follow-refresh) | |
334 | "Insert the contents from an URL in the current buffer. | |
335 | If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
336 | (let ((times mm-url-retries) | |
337 | (done nil) | |
338 | (first t) | |
339 | result) | |
340 | (while (and (not (zerop (decf times))) | |
341 | (not done)) | |
342 | (with-timeout (mm-url-timeout) | |
343 | (unless first | |
344 | (message "Trying again (%s)..." (- mm-url-retries times))) | |
345 | (setq first nil) | |
346 | (if follow-refresh | |
347 | (save-restriction | |
348 | (narrow-to-region (point) (point)) | |
349 | (mm-url-insert-file-contents url) | |
350 | (goto-char (point-min)) | |
351 | (when (re-search-forward | |
352 | "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
353 | (let ((url (match-string 1))) | |
354 | (delete-region (point-min) (point-max)) | |
355 | (setq result (mm-url-insert url t))))) | |
356 | (setq result (mm-url-insert-file-contents url))) | |
357 | (setq done t))) | |
358 | result)) | |
359 | ||
360 | (defun mm-url-decode-entities () | |
361 | "Decode all HTML entities." | |
362 | (goto-char (point-min)) | |
363 | (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) | |
364 | (let ((elem (if (eq (aref (match-string 1) 0) ?\#) | |
365 | (let ((c | |
366 | (string-to-number (substring | |
367 | (match-string 1) 1)))) | |
368 | (if (mm-char-or-char-int-p c) c 32)) | |
369 | (or (cdr (assq (intern (match-string 1)) | |
370 | mm-url-html-entities)) | |
371 | ?#)))) | |
372 | (unless (stringp elem) | |
373 | (setq elem (char-to-string elem))) | |
374 | (replace-match elem t t)))) | |
375 | ||
376 | (defun mm-url-decode-entities-nbsp () | |
377 | "Decode all HTML entities and to a space." | |
378 | (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities))) | |
379 | (mm-url-decode-entities))) | |
380 | ||
381 | (defun mm-url-decode-entities-string (string) | |
382 | (with-temp-buffer | |
383 | (insert string) | |
384 | (mm-url-decode-entities) | |
385 | (buffer-string))) | |
386 | ||
387 | (defun mm-url-form-encode-xwfu (chunk) | |
388 | "Escape characters in a string for application/x-www-form-urlencoded. | |
389 | Blasphemous crap because someone didn't think %20 was good enough for encoding | |
390 | spaces. Die Die Die." | |
391 | ;; This will get rid of the 'attributes' specified by the file type, | |
392 | ;; which are useless for an application/x-www-form-urlencoded form. | |
393 | (if (consp chunk) | |
394 | (setq chunk (cdr chunk))) | |
395 | ||
396 | (mapconcat | |
397 | (lambda (char) | |
398 | (cond | |
399 | ((= char ? ) "+") | |
400 | ((memq char mm-url-unreserved-chars) (char-to-string char)) | |
401 | (t (upcase (format "%%%02x" char))))) | |
402 | ;; Fixme: Should this actually be accepting multibyte? Is there a | |
403 | ;; better way in XEmacs? | |
404 | (if (featurep 'mule) | |
405 | (encode-coding-string chunk | |
406 | (if (fboundp 'find-coding-systems-string) | |
407 | (car (find-coding-systems-string chunk)) | |
408 | buffer-file-coding-system)) | |
409 | chunk) | |
410 | "")) | |
411 | ||
412 | (defun mm-url-encode-www-form-urlencoded (pairs) | |
413 | "Return PAIRS encoded for forms." | |
414 | (mapconcat | |
415 | (lambda (data) | |
416 | (concat (mm-url-form-encode-xwfu (car data)) "=" | |
417 | (mm-url-form-encode-xwfu (cdr data)))) | |
418 | pairs "&")) | |
419 | ||
420 | (defun mm-url-fetch-form (url pairs) | |
421 | "Fetch a form from URL with PAIRS as the data using the POST method." | |
422 | (mm-url-load-url) | |
423 | (let ((url-request-data (mm-url-encode-www-form-urlencoded pairs)) | |
424 | (url-request-method "POST") | |
425 | (url-request-extra-headers | |
426 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
427 | (url-insert-file-contents url) | |
428 | (setq buffer-file-name nil)) | |
429 | t) | |
430 | ||
431 | (defun mm-url-fetch-simple (url content) | |
432 | (mm-url-load-url) | |
433 | (let ((url-request-data content) | |
434 | (url-request-method "POST") | |
435 | (url-request-extra-headers | |
436 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
437 | (url-insert-file-contents url) | |
438 | (setq buffer-file-name nil)) | |
439 | t) | |
440 | ||
441 | (defun mm-url-remove-markup () | |
442 | "Remove all HTML markup, leaving just plain text." | |
443 | (goto-char (point-min)) | |
444 | (while (search-forward "<!--" nil t) | |
445 | (delete-region (match-beginning 0) | |
446 | (or (search-forward "-->" nil t) | |
447 | (point-max)))) | |
448 | (goto-char (point-min)) | |
449 | (while (re-search-forward "<[^>]+>" nil t) | |
450 | (replace-match "" t t))) | |
451 | ||
452 | (provide 'mm-url) | |
453 | ||
454 | ;;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f | |
455 | ;;; mm-url.el ends here |