(decode_coding_utf_8): When eol-type of CODING is
[bpt/emacs.git] / lisp / url / url-util.el
CommitLineData
8c8b8430 1;;; url-util.el --- Miscellaneous helper routines for URL library
a2fd1462 2
71ddfde5 3;; Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004,
12dc447f 4;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
a2fd1462 5
8c8b8430 6;; Author: Bill Perry <wmperry@gnu.org>
8c8b8430
SM
7;; Keywords: comm, data, processes
8
a2fd1462
SM
9;; This file is part of GNU Emacs.
10;;
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
8c0ee52a 13;; the Free Software Foundation; either version 3, or (at your option)
a2fd1462
SM
14;; any later version.
15;;
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20;;
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
4fc5845f
LK
23;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24;; Boston, MA 02110-1301, USA.
a2fd1462
SM
25
26;;; Commentary:
27
28;;; Code:
8c8b8430
SM
29
30(require 'url-parse)
31(autoload 'timezone-parse-date "timezone")
32(autoload 'timezone-make-date-arpa-standard "timezone")
c6bfe6e7 33(autoload 'mail-header-extract "mailheader")
8c8b8430
SM
34
35(defvar url-parse-args-syntax-table
36 (copy-syntax-table emacs-lisp-mode-syntax-table)
37 "A syntax table for parsing sgml attributes.")
38
39(modify-syntax-entry ?' "\"" url-parse-args-syntax-table)
40(modify-syntax-entry ?` "\"" url-parse-args-syntax-table)
41(modify-syntax-entry ?{ "(" url-parse-args-syntax-table)
42(modify-syntax-entry ?} ")" url-parse-args-syntax-table)
43
44;;;###autoload
45(defcustom url-debug nil
46 "*What types of debug messages from the URL library to show.
47Debug messages are logged to the *URL-DEBUG* buffer.
48
49If t, all messages will be logged.
50If a number, all messages will be logged, as well shown via `message'.
51If a list, it is a list of the types of messages to be logged."
52 :type '(choice (const :tag "none" nil)
53 (const :tag "all" t)
54 (checklist :tag "custom"
55 (const :tag "HTTP" :value http)
56 (const :tag "DAV" :value dav)
57 (const :tag "General" :value retrieval)
58 (const :tag "Filename handlers" :value handlers)
59 (symbol :tag "Other")))
60 :group 'url-hairy)
61
62;;;###autoload
63(defun url-debug (tag &rest args)
64 (if quit-flag
65 (error "Interrupted!"))
66 (if (or (eq url-debug t)
67 (numberp url-debug)
68 (and (listp url-debug) (memq tag url-debug)))
a2fd1462 69 (with-current-buffer (get-buffer-create "*URL-DEBUG*")
8c8b8430
SM
70 (goto-char (point-max))
71 (insert (symbol-name tag) " -> " (apply 'format args) "\n")
72 (if (numberp url-debug)
73 (apply 'message args)))))
74
75;;;###autoload
76(defun url-parse-args (str &optional nodowncase)
77 ;; Return an assoc list of attribute/value pairs from an RFC822-type string
78 (let (
79 name ; From name=
80 value ; its value
81 results ; Assoc list of results
82 name-pos ; Start of XXXX= position
83 val-pos ; Start of value position
84 st
85 nd
86 )
87 (save-excursion
88 (save-restriction
89 (set-buffer (get-buffer-create " *urlparse-temp*"))
90 (set-syntax-table url-parse-args-syntax-table)
91 (erase-buffer)
92 (insert str)
93 (setq st (point-min)
94 nd (point-max))
95 (set-syntax-table url-parse-args-syntax-table)
96 (narrow-to-region st nd)
97 (goto-char (point-min))
98 (while (not (eobp))
99 (skip-chars-forward "; \n\t")
100 (setq name-pos (point))
101 (skip-chars-forward "^ \n\t=;")
102 (if (not nodowncase)
103 (downcase-region name-pos (point)))
104 (setq name (buffer-substring name-pos (point)))
105 (skip-chars-forward " \t\n")
106 (if (/= (or (char-after (point)) 0) ?=) ; There is no value
107 (setq value nil)
108 (skip-chars-forward " \t\n=")
109 (setq val-pos (point)
110 value
111 (cond
112 ((or (= (or (char-after val-pos) 0) ?\")
113 (= (or (char-after val-pos) 0) ?'))
114 (buffer-substring (1+ val-pos)
115 (condition-case ()
116 (prog2
117 (forward-sexp 1)
118 (1- (point))
119 (skip-chars-forward "\""))
120 (error
121 (skip-chars-forward "^ \t\n")
122 (point)))))
123 (t
124 (buffer-substring val-pos
125 (progn
126 (skip-chars-forward "^;")
127 (skip-chars-backward " \t")
128 (point)))))))
129 (setq results (cons (cons name value) results))
130 (skip-chars-forward "; \n\t"))
131 results))))
132
133;;;###autoload
134(defun url-insert-entities-in-string (string)
135 "Convert HTML markup-start characters to entity references in STRING.
136Also replaces the \" character, so that the result may be safely used as
137 an attribute value in a tag. Returns a new string with the result of the
138 conversion. Replaces these characters as follows:
139 & ==> &amp;
140 < ==> &lt;
141 > ==> &gt;
142 \" ==> &quot;"
143 (if (string-match "[&<>\"]" string)
144 (save-excursion
145 (set-buffer (get-buffer-create " *entity*"))
146 (erase-buffer)
147 (buffer-disable-undo (current-buffer))
148 (insert string)
149 (goto-char (point-min))
150 (while (progn
151 (skip-chars-forward "^&<>\"")
152 (not (eobp)))
153 (insert (cdr (assq (char-after (point))
154 '((?\" . "&quot;")
155 (?& . "&amp;")
156 (?< . "&lt;")
157 (?> . "&gt;")))))
158 (delete-char 1))
159 (buffer-string))
160 string))
161
162;;;###autoload
163(defun url-normalize-url (url)
164 "Return a 'normalized' version of URL.
165Strips out default port numbers, etc."
07e9b3f0 166 (let (type data retval)
8c8b8430
SM
167 (setq data (url-generic-parse-url url)
168 type (url-type data))
169 (if (member type '("www" "about" "mailto" "info"))
170 (setq retval url)
d18ec89f 171 (setf (url-target data) nil)
8c8b8430
SM
172 (setq retval (url-recreate-url data)))
173 retval))
174
175;;;###autoload
176(defun url-lazy-message (&rest args)
177 "Just like `message', but is a no-op if called more than once a second.
a2fd1462 178Will not do anything if `url-show-status' is nil."
8c8b8430
SM
179 (if (or (null url-show-status)
180 (active-minibuffer-window)
181 (= url-lazy-message-time
182 (setq url-lazy-message-time (nth 1 (current-time)))))
183 nil
184 (apply 'message args)))
185
186;;;###autoload
187(defun url-get-normalized-date (&optional specified-time)
d626f888
TZ
188 "Return a 'real' date string that most HTTP servers can understand."
189 (let ((system-time-locale "C"))
190 (format-time-string "%a, %d %b %Y %T GMT"
191 (or specified-time (current-time)) t)))
8c8b8430
SM
192
193;;;###autoload
194(defun url-eat-trailing-space (x)
195 "Remove spaces/tabs at the end of a string."
196 (let ((y (1- (length x)))
197 (skip-chars (list ? ?\t ?\n)))
198 (while (and (>= y 0) (memq (aref x y) skip-chars))
199 (setq y (1- y)))
200 (substring x 0 (1+ y))))
201
202;;;###autoload
203(defun url-strip-leading-spaces (x)
204 "Remove spaces at the front of a string."
205 (let ((y (1- (length x)))
206 (z 0)
207 (skip-chars (list ? ?\t ?\n)))
208 (while (and (<= z y) (memq (aref x z) skip-chars))
209 (setq z (1+ z)))
210 (substring x z nil)))
211
212;;;###autoload
213(defun url-pretty-length (n)
214 (cond
215 ((< n 1024)
216 (format "%d bytes" n))
217 ((< n (* 1024 1024))
218 (format "%dk" (/ n 1024.0)))
219 (t
220 (format "%2.2fM" (/ n (* 1024 1024.0))))))
221
222;;;###autoload
223(defun url-display-percentage (fmt perc &rest args)
48d2bac4
MH
224 (when url-show-status
225 (if (null fmt)
226 (if (fboundp 'clear-progress-display)
227 (clear-progress-display))
228 (if (and (fboundp 'progress-display) perc)
229 (apply 'progress-display fmt perc args)
230 (apply 'message fmt args)))))
8c8b8430
SM
231
232;;;###autoload
233(defun url-percentage (x y)
234 (if (fboundp 'float)
235 (round (* 100 (/ x (float y))))
236 (/ (* x 100) y)))
237
238;;;###autoload
76fb5862
RS
239(defun url-file-directory (file)
240 "Return the directory part of FILE, for a URL."
8c8b8430
SM
241 (cond
242 ((null file) "")
243 ((string-match (eval-when-compile (regexp-quote "?")) file)
76fb5862 244 (file-name-directory (substring file 0 (match-beginning 0))))
8c8b8430
SM
245 (t (file-name-directory file))))
246
76fb5862
RS
247;;;###autoload
248(defun url-file-nondirectory (file)
249 "Return the nondirectory part of FILE, for a URL."
250 (cond
251 ((null file) "")
252 ((string-match (eval-when-compile (regexp-quote "?")) file)
253 (file-name-nondirectory (substring file 0 (match-beginning 0))))
254 (t (file-name-nondirectory file))))
255
8c8b8430 256;;;###autoload
6fd388f3 257(defun url-parse-query-string (query &optional downcase allow-newlines)
8c8b8430
SM
258 (let (retval pairs cur key val)
259 (setq pairs (split-string query "&"))
260 (while pairs
261 (setq cur (car pairs)
262 pairs (cdr pairs))
263 (if (not (string-match "=" cur))
264 nil ; Grace
6fd388f3
CY
265 (setq key (url-unhex-string (substring cur 0 (match-beginning 0))
266 allow-newlines))
267 (setq val (url-unhex-string (substring cur (match-end 0) nil)
268 allow-newlines))
8c8b8430
SM
269 (if downcase
270 (setq key (downcase key)))
271 (setq cur (assoc key retval))
272 (if cur
273 (setcdr cur (cons val (cdr cur)))
274 (setq retval (cons (list key val) retval)))))
275 retval))
276
277(defun url-unhex (x)
278 (if (> x ?9)
279 (if (>= x ?a)
280 (+ 10 (- x ?a))
281 (+ 10 (- x ?A)))
282 (- x ?0)))
283
c6bfe6e7
SM
284;; Fixme: Is this definition better, and does it ever matter?
285
286;; (defun url-unhex-string (str &optional allow-newlines)
287;; "Remove %XX, embedded spaces, etc in a url.
288;; If optional second argument ALLOW-NEWLINES is non-nil, then allow the
289;; decoding of carriage returns and line feeds in the string, which is normally
290;; forbidden in URL encoding."
291;; (setq str (or str ""))
292;; (setq str (replace-regexp-in-string "%[[:xdigit:]]\\{2\\}"
293;; (lambda (match)
294;; (string (string-to-number
295;; (substring match 1) 16)))
296;; str t t))
297;; (if allow-newlines
298;; (replace-regexp-in-string "[\n\r]" (lambda (match)
299;; (format "%%%.2X" (aref match 0)))
300;; str t t)
301;; str))
302
8c8b8430
SM
303;;;###autoload
304(defun url-unhex-string (str &optional allow-newlines)
c6bfe6e7 305 "Remove %XX embedded spaces, etc in a url.
8c8b8430
SM
306If optional second argument ALLOW-NEWLINES is non-nil, then allow the
307decoding of carriage returns and line feeds in the string, which is normally
308forbidden in URL encoding."
309 (setq str (or str ""))
310 (let ((tmp "")
311 (case-fold-search t))
312 (while (string-match "%[0-9a-f][0-9a-f]" str)
313 (let* ((start (match-beginning 0))
314 (ch1 (url-unhex (elt str (+ start 1))))
315 (code (+ (* 16 ch1)
316 (url-unhex (elt str (+ start 2))))))
317 (setq tmp (concat
318 tmp (substring str 0 start)
319 (cond
320 (allow-newlines
321 (char-to-string code))
322 ((or (= code ?\n) (= code ?\r))
323 " ")
324 (t (char-to-string code))))
325 str (substring str (match-end 0)))))
326 (setq tmp (concat tmp str))
327 tmp))
328
329(defconst url-unreserved-chars
330 '(
331 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z
332 ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z
333 ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9
334 ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\))
335 "A list of characters that are _NOT_ reserved in the URL spec.
336This is taken from RFC 2396.")
337
338;;;###autoload
ecfbb488
TTN
339(defun url-hexify-string (string)
340 "Return a new string that is STRING URI-encoded.
341First, STRING is converted to utf-8, if necessary. Then, for each
342character in the utf-8 string, those found in `url-unreserved-chars'
343are left as-is, all others are represented as a three-character
344string: \"%\" followed by two lowercase hex digits."
07e9b3f0
SM
345 ;; To go faster and avoid a lot of consing, we could do:
346 ;;
347 ;; (defconst url-hexify-table
348 ;; (let ((map (make-vector 256 nil)))
349 ;; (dotimes (byte 256) (aset map byte
350 ;; (if (memq byte url-unreserved-chars)
351 ;; (char-to-string byte)
352 ;; (format "%%%02x" byte))))
353 ;; map))
354 ;;
355 ;; (mapconcat (curry 'aref url-hexify-table) ...)
356 (mapconcat (lambda (byte)
357 (if (memq byte url-unreserved-chars)
358 (char-to-string byte)
359 (format "%%%02x" byte)))
360 (if (multibyte-string-p string)
361 (encode-coding-string string 'utf-8)
362 string)
ecfbb488 363 ""))
8c8b8430
SM
364
365;;;###autoload
366(defun url-file-extension (fname &optional x)
367 "Return the filename extension of FNAME.
368If optional variable X is t,
369then return the basename of the file with the extension stripped off."
370 (if (and fname
76fb5862 371 (setq fname (url-file-nondirectory fname))
8c8b8430
SM
372 (string-match "\\.[^./]+$" fname))
373 (if x (substring fname 0 (match-beginning 0))
374 (substring fname (match-beginning 0) nil))
375 ;;
376 ;; If fname has no extension, and x then return fname itself instead of
377 ;; nothing. When caching it allows the correct .hdr file to be produced
378 ;; for filenames without extension.
379 ;;
380 (if x
381 fname
382 "")))
383
384;;;###autoload
385(defun url-truncate-url-for-viewing (url &optional width)
386 "Return a shortened version of URL that is WIDTH characters or less wide.
387WIDTH defaults to the current frame width."
388 (let* ((fr-width (or width (frame-width)))
389 (str-width (length url))
8c8b8430
SM
390 (fname nil)
391 (modified 0)
392 (urlobj nil))
393 ;; The first thing that can go are the search strings
394 (if (and (>= str-width fr-width)
395 (string-match "?" url))
396 (setq url (concat (substring url 0 (match-beginning 0)) "?...")
07e9b3f0 397 str-width (length url)))
8c8b8430
SM
398 (if (< str-width fr-width)
399 nil ; Hey, we are done!
400 (setq urlobj (url-generic-parse-url url)
401 fname (url-filename urlobj)
402 fr-width (- fr-width 4))
403 (while (and (>= str-width fr-width)
404 (string-match "/" fname))
405 (setq fname (substring fname (match-end 0) nil)
406 modified (1+ modified))
d18ec89f 407 (setf (url-filename urlobj) fname)
8c8b8430
SM
408 (setq url (url-recreate-url urlobj)
409 str-width (length url)))
410 (if (> modified 1)
411 (setq fname (concat "/.../" fname))
412 (setq fname (concat "/" fname)))
d18ec89f 413 (setf (url-filename urlobj) fname)
8c8b8430
SM
414 (setq url (url-recreate-url urlobj)))
415 url))
416
417;;;###autoload
418(defun url-view-url (&optional no-show)
419 "View the current document's URL.
420Optional argument NO-SHOW means just return the URL, don't show it in
421the minibuffer.
422
423This uses `url-current-object', set locally to the buffer."
424 (interactive)
425 (if (not url-current-object)
426 nil
427 (if no-show
428 (url-recreate-url url-current-object)
429 (message "%s" (url-recreate-url url-current-object)))))
430
431(eval-and-compile
432 (defvar url-get-url-filename-chars "-%.?@a-zA-Z0-9()_/:~=&"
433 "Valid characters in a URL")
434 )
435
436(defun url-get-url-at-point (&optional pt)
437 "Get the URL closest to point, but don't change position.
438Has a preference for looking backward when not directly on a symbol."
439 ;; Not at all perfect - point must be right in the name.
440 (save-excursion
441 (if pt (goto-char pt))
442 (let (start url)
443 (save-excursion
444 ;; first see if you're just past a filename
445 (if (not (eobp))
446 (if (looking-at "[] \t\n[{}()]") ; whitespace or some parens
447 (progn
448 (skip-chars-backward " \n\t\r({[]})")
449 (if (not (bobp))
450 (backward-char 1)))))
451 (if (and (char-after (point))
452 (string-match (eval-when-compile
453 (concat "[" url-get-url-filename-chars "]"))
454 (char-to-string (char-after (point)))))
455 (progn
456 (skip-chars-backward url-get-url-filename-chars)
457 (setq start (point))
458 (skip-chars-forward url-get-url-filename-chars))
459 (setq start (point)))
460 (setq url (buffer-substring-no-properties start (point))))
461 (if (and url (string-match "^(.*)\\.?$" url))
462 (setq url (match-string 1 url)))
463 (if (and url (string-match "^URL:" url))
464 (setq url (substring url 4 nil)))
465 (if (and url (string-match "\\.$" url))
466 (setq url (substring url 0 -1)))
467 (if (and url (string-match "^www\\." url))
468 (setq url (concat "http://" url)))
469 (if (and url (not (string-match url-nonrelative-link url)))
470 (setq url nil))
471 url)))
472
473(defun url-generate-unique-filename (&optional fmt)
474 "Generate a unique filename in `url-temporary-directory'."
475 (if (not fmt)
476 (let ((base (format "url-tmp.%d" (user-real-uid)))
477 (fname "")
478 (x 0))
479 (setq fname (format "%s%d" base x))
480 (while (file-exists-p
481 (expand-file-name fname url-temporary-directory))
482 (setq x (1+ x)
483 fname (concat base (int-to-string x))))
484 (expand-file-name fname url-temporary-directory))
485 (let ((base (concat "url" (int-to-string (user-real-uid))))
486 (fname "")
487 (x 0))
488 (setq fname (format fmt (concat base (int-to-string x))))
489 (while (file-exists-p
490 (expand-file-name fname url-temporary-directory))
491 (setq x (1+ x)
492 fname (format fmt (concat base (int-to-string x)))))
493 (expand-file-name fname url-temporary-directory))))
494
495(defun url-extract-mime-headers ()
496 "Set `url-current-mime-headers' in current buffer."
497 (save-excursion
498 (goto-char (point-min))
499 (unless url-current-mime-headers
500 (set (make-local-variable 'url-current-mime-headers)
501 (mail-header-extract)))))
502
8703ea53
GM
503(defun url-make-private-file (file)
504 "Make FILE only readable and writable by the current user.
505Creates FILE and its parent directories if they do not exist."
506 (let ((dir (file-name-directory file)))
507 (when dir
508 ;; For historical reasons.
509 (make-directory dir t)))
510 ;; Based on doc-view-make-safe-dir.
511 (condition-case nil
512 (let ((umask (default-file-modes)))
513 (unwind-protect
514 (progn
515 (set-default-file-modes #o0600)
516 (with-temp-buffer
517 (write-region (point-min) (point-max)
518 file nil 'silent nil 'excl)))
519 (set-default-file-modes umask)))
520 (file-already-exists
521 (if (file-symlink-p file)
522 (error "Danger: `%s' is a symbolic link" file))
523 (set-file-modes file #o0600))))
524
8c8b8430 525(provide 'url-util)
e5566bd5 526
a2fd1462
SM
527;; arch-tag: 24352abc-5a5a-412e-90cd-313b26bed5c9
528;;; url-util.el ends here