Commit | Line | Data |
---|---|---|
23f87bed MB |
1 | ;;; mm-url.el --- a wrapper of url functions/commands for Gnus |
2 | ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. | |
3 | ||
4 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
5 | ||
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published | |
10 | ;; by the Free Software Foundation; either version 2, or (at your | |
11 | ;; option) any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, but | |
14 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | ;; General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;; Some codes are stolen from w3 and url packages. Some are moved from | |
26 | ;; nnweb. | |
27 | ||
28 | ;; TODO: Support POST, cookie. | |
29 | ||
30 | ;;; Code: | |
31 | ||
32 | (eval-when-compile (require 'cl)) | |
33 | ||
34 | (require 'mm-util) | |
35 | (require 'gnus) | |
36 | ||
37 | (eval-and-compile | |
38 | (autoload 'executable-find "executable")) | |
39 | ||
40 | (eval-when-compile | |
41 | (require 'timer)) | |
42 | ||
43 | (defgroup mm-url nil | |
44 | "A wrapper of url package and external url command for Gnus." | |
45 | :group 'gnus) | |
46 | ||
47 | (defcustom mm-url-use-external (not | |
48 | (condition-case nil | |
49 | (require 'url) | |
50 | (error nil))) | |
51 | "*If non-nil, use external grab program `mm-url-program'." | |
bf247b6e | 52 | :version "22.1" |
23f87bed MB |
53 | :type 'boolean |
54 | :group 'mm-url) | |
55 | ||
56 | (defvar mm-url-predefined-programs | |
57 | '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-") | |
58 | (w3m "w3m" "-dump_source") | |
59 | (lynx "lynx" "-source") | |
60 | (curl "curl"))) | |
61 | ||
62 | (defcustom mm-url-program | |
63 | (cond | |
64 | ((executable-find "wget") 'wget) | |
65 | ((executable-find "w3m") 'w3m) | |
66 | ((executable-find "lynx") 'lynx) | |
67 | ((executable-find "curl") 'curl) | |
68 | (t "GET")) | |
69 | "The url grab program. | |
70 | Likely values are `wget', `w3m', `lynx' and `curl'." | |
bf247b6e | 71 | :version "22.1" |
23f87bed MB |
72 | :type '(choice |
73 | (symbol :tag "wget" wget) | |
74 | (symbol :tag "w3m" w3m) | |
75 | (symbol :tag "lynx" lynx) | |
76 | (symbol :tag "curl" curl) | |
77 | (string :tag "other")) | |
78 | :group 'mm-url) | |
79 | ||
80 | (defcustom mm-url-arguments nil | |
81 | "The arguments for `mm-url-program'." | |
bf247b6e | 82 | :version "22.1" |
23f87bed MB |
83 | :type '(repeat string) |
84 | :group 'mm-url) | |
85 | ||
86 | \f | |
87 | ;;; Internal variables | |
88 | ||
89 | (defvar mm-url-package-name | |
90 | (gnus-replace-in-string | |
91 | (gnus-replace-in-string gnus-version " v.*$" "") | |
92 | " " "-")) | |
93 | ||
94 | (defvar mm-url-package-version gnus-version-number) | |
95 | ||
96 | ;; Stolen from w3. | |
97 | (defvar mm-url-html-entities | |
98 | '( | |
99 | ;;(excl . 33) | |
100 | (quot . 34) | |
101 | ;;(num . 35) | |
102 | ;;(dollar . 36) | |
103 | ;;(percent . 37) | |
104 | (amp . 38) | |
105 | (rsquo . 39) ; should be U+8217 | |
106 | ;;(apos . 39) | |
107 | ;;(lpar . 40) | |
108 | ;;(rpar . 41) | |
109 | ;;(ast . 42) | |
110 | ;;(plus . 43) | |
111 | ;;(comma . 44) | |
112 | ;;(period . 46) | |
113 | ;;(colon . 58) | |
114 | ;;(semi . 59) | |
115 | (lt . 60) | |
116 | ;;(equals . 61) | |
117 | (gt . 62) | |
118 | ;;(quest . 63) | |
119 | ;;(commat . 64) | |
120 | ;;(lsqb . 91) | |
121 | ;;(rsqb . 93) | |
122 | (uarr . 94) ; should be U+8593 | |
123 | ;;(lowbar . 95) | |
124 | (lsquo . 96) ; should be U+8216 | |
125 | (lcub . 123) | |
126 | ;;(verbar . 124) | |
127 | (rcub . 125) | |
128 | (tilde . 126) | |
129 | (nbsp . 160) | |
130 | (iexcl . 161) | |
131 | (cent . 162) | |
132 | (pound . 163) | |
133 | (curren . 164) | |
134 | (yen . 165) | |
135 | (brvbar . 166) | |
136 | (sect . 167) | |
137 | (uml . 168) | |
138 | (copy . 169) | |
139 | (ordf . 170) | |
140 | (laquo . 171) | |
141 | (not . 172) | |
142 | (shy . 173) | |
143 | (reg . 174) | |
144 | (macr . 175) | |
145 | (deg . 176) | |
146 | (plusmn . 177) | |
147 | (sup2 . 178) | |
148 | (sup3 . 179) | |
149 | (acute . 180) | |
150 | (micro . 181) | |
151 | (para . 182) | |
152 | (middot . 183) | |
153 | (cedil . 184) | |
154 | (sup1 . 185) | |
155 | (ordm . 186) | |
156 | (raquo . 187) | |
157 | (frac14 . 188) | |
158 | (frac12 . 189) | |
159 | (frac34 . 190) | |
160 | (iquest . 191) | |
161 | (Agrave . 192) | |
162 | (Aacute . 193) | |
163 | (Acirc . 194) | |
164 | (Atilde . 195) | |
165 | (Auml . 196) | |
166 | (Aring . 197) | |
167 | (AElig . 198) | |
168 | (Ccedil . 199) | |
169 | (Egrave . 200) | |
170 | (Eacute . 201) | |
171 | (Ecirc . 202) | |
172 | (Euml . 203) | |
173 | (Igrave . 204) | |
174 | (Iacute . 205) | |
175 | (Icirc . 206) | |
176 | (Iuml . 207) | |
177 | (ETH . 208) | |
178 | (Ntilde . 209) | |
179 | (Ograve . 210) | |
180 | (Oacute . 211) | |
181 | (Ocirc . 212) | |
182 | (Otilde . 213) | |
183 | (Ouml . 214) | |
184 | (times . 215) | |
185 | (Oslash . 216) | |
186 | (Ugrave . 217) | |
187 | (Uacute . 218) | |
188 | (Ucirc . 219) | |
189 | (Uuml . 220) | |
190 | (Yacute . 221) | |
191 | (THORN . 222) | |
192 | (szlig . 223) | |
193 | (agrave . 224) | |
194 | (aacute . 225) | |
195 | (acirc . 226) | |
196 | (atilde . 227) | |
197 | (auml . 228) | |
198 | (aring . 229) | |
199 | (aelig . 230) | |
200 | (ccedil . 231) | |
201 | (egrave . 232) | |
202 | (eacute . 233) | |
203 | (ecirc . 234) | |
204 | (euml . 235) | |
205 | (igrave . 236) | |
206 | (iacute . 237) | |
207 | (icirc . 238) | |
208 | (iuml . 239) | |
209 | (eth . 240) | |
210 | (ntilde . 241) | |
211 | (ograve . 242) | |
212 | (oacute . 243) | |
213 | (ocirc . 244) | |
214 | (otilde . 245) | |
215 | (ouml . 246) | |
216 | (divide . 247) | |
217 | (oslash . 248) | |
218 | (ugrave . 249) | |
219 | (uacute . 250) | |
220 | (ucirc . 251) | |
221 | (uuml . 252) | |
222 | (yacute . 253) | |
223 | (thorn . 254) | |
224 | (yuml . 255) | |
225 | ||
226 | ;; Special handling of these | |
227 | (frac56 . "5/6") | |
228 | (frac16 . "1/6") | |
229 | (frac45 . "4/5") | |
230 | (frac35 . "3/5") | |
231 | (frac25 . "2/5") | |
232 | (frac15 . "1/5") | |
233 | (frac23 . "2/3") | |
234 | (frac13 . "1/3") | |
235 | (frac78 . "7/8") | |
236 | (frac58 . "5/8") | |
237 | (frac38 . "3/8") | |
238 | (frac18 . "1/8") | |
239 | ||
240 | ;; The following 5 entities are not mentioned in the HTML 2.0 | |
241 | ;; standard, nor in any other HTML proposed standard of which I | |
242 | ;; am aware. I am not even sure they are ISO entity names. *** | |
243 | ;; Hence, some arrangement should be made to give a bad HTML | |
244 | ;; message when they are seen. | |
245 | (ndash . 45) | |
246 | (mdash . 45) | |
247 | (emsp . 32) | |
248 | (ensp . 32) | |
249 | (sim . 126) | |
250 | (le . "<=") | |
251 | (agr . "alpha") | |
252 | (rdquo . "''") | |
253 | (ldquo . "``") | |
254 | (trade . "(TM)") | |
255 | ;; To be done | |
256 | ;; (shy . ????) ; soft hyphen | |
257 | ) | |
258 | "*An assoc list of entity names and how to actually display them.") | |
259 | ||
260 | (defconst mm-url-unreserved-chars | |
261 | '( | |
262 | ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z | |
263 | ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z | |
264 | ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 | |
265 | ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\)) | |
266 | "A list of characters that are _NOT_ reserved in the URL spec. | |
267 | This is taken from RFC 2396.") | |
268 | ||
269 | (defun mm-url-load-url () | |
270 | "Load `url-insert-file-contents'." | |
271 | (unless (condition-case () | |
272 | (require 'url-handlers) | |
273 | (error nil)) | |
274 | ;; w3-4.0pre0.46 or earlier version. | |
275 | (require 'w3-vars) | |
276 | (require 'url))) | |
277 | ||
278 | ;;;###autoload | |
279 | (defun mm-url-insert-file-contents (url) | |
280 | "Insert file contents of URL. | |
281 | If `mm-url-use-external' is non-nil, use `mm-url-program'." | |
282 | (if mm-url-use-external | |
283 | (progn | |
284 | (if (string-match "^file:/+" url) | |
285 | (insert-file-contents (substring url (1- (match-end 0)))) | |
286 | (mm-url-insert-file-contents-external url)) | |
287 | (goto-char (point-min)) | |
288 | (if (fboundp 'url-generic-parse-url) | |
289 | (setq url-current-object | |
290 | (url-generic-parse-url url))) | |
291 | (list url (buffer-size))) | |
292 | (mm-url-load-url) | |
293 | (let ((name buffer-file-name) | |
294 | (url-request-extra-headers (list (cons "Connection" "Close"))) | |
295 | (url-package-name (or mm-url-package-name | |
296 | url-package-name)) | |
297 | (url-package-version (or mm-url-package-version | |
298 | url-package-version)) | |
299 | result) | |
300 | (setq result (url-insert-file-contents url)) | |
301 | (save-excursion | |
302 | (goto-char (point-min)) | |
303 | (while (re-search-forward "\r 1000\r ?" nil t) | |
304 | (replace-match ""))) | |
305 | (setq buffer-file-name name) | |
306 | (if (and (fboundp 'url-generic-parse-url) | |
307 | (listp result)) | |
308 | (setq url-current-object (url-generic-parse-url | |
309 | (car result)))) | |
310 | result))) | |
311 | ||
312 | ;;;###autoload | |
313 | (defun mm-url-insert-file-contents-external (url) | |
314 | "Insert file contents of URL using `mm-url-program'." | |
315 | (let (program args) | |
316 | (if (symbolp mm-url-program) | |
317 | (let ((item (cdr (assq mm-url-program mm-url-predefined-programs)))) | |
318 | (setq program (car item) | |
319 | args (append (cdr item) (list url)))) | |
320 | (setq program mm-url-program | |
321 | args (append mm-url-arguments (list url)))) | |
322 | (unless (eq 0 (apply 'call-process program nil t nil args)) | |
323 | (error "Couldn't fetch %s" url)))) | |
324 | ||
325 | (defvar mm-url-timeout 30 | |
326 | "The number of seconds before timing out an URL fetch.") | |
327 | ||
328 | (defvar mm-url-retries 10 | |
329 | "The number of retries after timing out when fetching an URL.") | |
330 | ||
331 | (defun mm-url-insert (url &optional follow-refresh) | |
332 | "Insert the contents from an URL in the current buffer. | |
333 | If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
334 | (let ((times mm-url-retries) | |
335 | (done nil) | |
336 | (first t) | |
337 | result) | |
338 | (while (and (not (zerop (decf times))) | |
339 | (not done)) | |
340 | (with-timeout (mm-url-timeout) | |
341 | (unless first | |
342 | (message "Trying again (%s)..." (- mm-url-retries times))) | |
343 | (setq first nil) | |
344 | (if follow-refresh | |
345 | (save-restriction | |
346 | (narrow-to-region (point) (point)) | |
347 | (mm-url-insert-file-contents url) | |
348 | (goto-char (point-min)) | |
349 | (when (re-search-forward | |
350 | "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
351 | (let ((url (match-string 1))) | |
352 | (delete-region (point-min) (point-max)) | |
353 | (setq result (mm-url-insert url t))))) | |
354 | (setq result (mm-url-insert-file-contents url))) | |
355 | (setq done t))) | |
356 | result)) | |
357 | ||
358 | (defun mm-url-decode-entities () | |
359 | "Decode all HTML entities." | |
360 | (goto-char (point-min)) | |
361 | (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) | |
362 | (let ((elem (if (eq (aref (match-string 1) 0) ?\#) | |
363 | (let ((c | |
364 | (string-to-number (substring | |
365 | (match-string 1) 1)))) | |
366 | (if (mm-char-or-char-int-p c) c 32)) | |
367 | (or (cdr (assq (intern (match-string 1)) | |
368 | mm-url-html-entities)) | |
369 | ?#)))) | |
370 | (unless (stringp elem) | |
371 | (setq elem (char-to-string elem))) | |
372 | (replace-match elem t t)))) | |
373 | ||
374 | (defun mm-url-decode-entities-nbsp () | |
375 | "Decode all HTML entities and to a space." | |
376 | (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities))) | |
377 | (mm-url-decode-entities))) | |
378 | ||
379 | (defun mm-url-decode-entities-string (string) | |
380 | (with-temp-buffer | |
381 | (insert string) | |
382 | (mm-url-decode-entities) | |
383 | (buffer-string))) | |
384 | ||
385 | (defun mm-url-form-encode-xwfu (chunk) | |
386 | "Escape characters in a string for application/x-www-form-urlencoded. | |
387 | Blasphemous crap because someone didn't think %20 was good enough for encoding | |
388 | spaces. Die Die Die." | |
389 | ;; This will get rid of the 'attributes' specified by the file type, | |
390 | ;; which are useless for an application/x-www-form-urlencoded form. | |
391 | (if (consp chunk) | |
392 | (setq chunk (cdr chunk))) | |
393 | ||
394 | (mapconcat | |
395 | (lambda (char) | |
396 | (cond | |
397 | ((= char ? ) "+") | |
398 | ((memq char mm-url-unreserved-chars) (char-to-string char)) | |
399 | (t (upcase (format "%%%02x" char))))) | |
400 | ;; Fixme: Should this actually be accepting multibyte? Is there a | |
401 | ;; better way in XEmacs? | |
402 | (if (featurep 'mule) | |
403 | (encode-coding-string chunk | |
404 | (if (fboundp 'find-coding-systems-string) | |
405 | (car (find-coding-systems-string chunk)) | |
406 | buffer-file-coding-system)) | |
407 | chunk) | |
408 | "")) | |
409 | ||
410 | (defun mm-url-encode-www-form-urlencoded (pairs) | |
411 | "Return PAIRS encoded for forms." | |
412 | (mapconcat | |
413 | (lambda (data) | |
414 | (concat (mm-url-form-encode-xwfu (car data)) "=" | |
415 | (mm-url-form-encode-xwfu (cdr data)))) | |
416 | pairs "&")) | |
417 | ||
418 | (defun mm-url-fetch-form (url pairs) | |
419 | "Fetch a form from URL with PAIRS as the data using the POST method." | |
420 | (mm-url-load-url) | |
421 | (let ((url-request-data (mm-url-encode-www-form-urlencoded pairs)) | |
422 | (url-request-method "POST") | |
423 | (url-request-extra-headers | |
424 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
425 | (url-insert-file-contents url) | |
426 | (setq buffer-file-name nil)) | |
427 | t) | |
428 | ||
429 | (defun mm-url-fetch-simple (url content) | |
430 | (mm-url-load-url) | |
431 | (let ((url-request-data content) | |
432 | (url-request-method "POST") | |
433 | (url-request-extra-headers | |
434 | '(("Content-type" . "application/x-www-form-urlencoded")))) | |
435 | (url-insert-file-contents url) | |
436 | (setq buffer-file-name nil)) | |
437 | t) | |
438 | ||
439 | (defun mm-url-remove-markup () | |
440 | "Remove all HTML markup, leaving just plain text." | |
441 | (goto-char (point-min)) | |
442 | (while (search-forward "<!--" nil t) | |
443 | (delete-region (match-beginning 0) | |
444 | (or (search-forward "-->" nil t) | |
445 | (point-max)))) | |
446 | (goto-char (point-min)) | |
447 | (while (re-search-forward "<[^>]+>" nil t) | |
448 | (replace-match "" t t))) | |
449 | ||
450 | (provide 'mm-url) | |
451 | ||
452 | ;;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f | |
453 | ;;; mm-url.el ends here |