1 ;;; rng-uri.el --- URI parsing and manipulation
3 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
29 (defun rng-file-name-uri (f)
30 "Return a URI for the filename F.
31 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
32 escape them using %HH."
33 (setq f
(expand-file-name f
))
35 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
39 (if (and (> (length url
) 0)
45 (defun rng-uri-escape-multibyte (uri)
46 "Escape multibyte characters in URI."
47 (replace-regexp-in-string "[:nonascii:]"
49 (encode-coding-string uri
'utf-8
)))
51 (defun rng-percent-encode (str)
54 (format "%%%x%x" (/ ch
16) (% ch
16)))
55 (string-to-list str
))))
58 (defun rng-uri-file-name (uri)
59 "Return the filename represented by a URI.
60 Signal an error if URI is not a valid file URL."
61 (rng-uri-file-name-1 uri nil
))
63 (defun rng-uri-pattern-file-name-regexp (pattern)
64 "Return a regexp for filenames represented by URIs that match PATTERN."
65 (rng-uri-file-name-1 pattern
'match
))
67 (defun rng-uri-pattern-file-name-replace-match (pattern)
68 (rng-uri-file-name-1 pattern
'replace
))
70 ;; pattern is either nil or match or replace
71 (defun rng-uri-file-name-1 (uri pattern
)
72 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri
)
73 (rng-uri-error "Bad escapes in URI `%s'" uri
))
74 (setq uri
(rng-uri-unescape-multibyte uri
))
76 (or (rng-uri-split uri
)
77 (rng-uri-error "Cannot split URI `%s' into its components" uri
)))
78 (scheme (nth 0 components
))
79 (authority (nth 1 components
))
80 (path (nth 2 components
))
81 (absolutep (string-match "\\`/" path
))
82 (query (nth 3 components
))
83 (fragment-id (nth 4 components
)))
86 (rng-uri-error "URI `%s' does not have a scheme" uri
)))
87 ((not (string= (downcase scheme
) "file"))
88 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri
)))
89 (when (not (member authority
90 (cons system-name
'(nil "" "localhost"))))
91 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
94 (rng-uri-error "`?' not escaped in file URI `%s'" uri
))
96 (rng-uri-error "URI `%s' has a fragment identifier" uri
))
97 (when (string-match ";" path
)
98 (rng-uri-error "`;' not escaped in URI `%s'" uri
))
99 (when (string-match "%2[fF]" path
) ;; 2f is hex code of slash
100 (rng-uri-error "Escaped slash in URI `%s'" uri
))
101 (when (and (eq system-type
'windows-nt
)
103 (file-name-absolute-p (substring path
1)))
104 (setq path
(substring path
1)))
105 (when (and pattern
(string-match "\\`\\./" path
))
106 (setq path
(substring path
2)))
108 (cond ((eq pattern
'match
)
109 (rng-uri-unescape-unibyte-match path
))
110 ((eq pattern
'replace
)
111 (rng-uri-unescape-unibyte-replace path
2))
113 (rng-uri-unescape-unibyte path
))))
114 (when (string-match "\000" path
)
115 (rng-uri-error "URI `%s' has NUL character in path" uri
))
116 (when (eq pattern
'match
)
118 (concat (if absolutep
120 "\\(\\(?:[^/]*/\\)*\\)")
122 (cond ((eq pattern
'match
)
123 (concat "\\`" path
"\\'"))
124 ((and (eq pattern
'replace
)
129 (defun rng-uri-error (&rest args
)
130 (signal 'rng-uri-error
(list (apply 'format args
))))
132 (put 'rng-uri-error
'error-conditions
'(error rng-uri-error
))
133 (put 'rng-uri-error
'error-message
"Invalid URI")
135 (defun rng-uri-split (str)
136 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
137 \\(?://\\([^/?#]*\\)\\)?\
139 \\(?:\\?\\([^#]*\\)\\)?\
140 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
142 (list (match-string 1 str
)
146 (match-string 5 str
))))
148 (defun rng-uri-join (scheme authority path
&optional query fragment-id
)
152 (setq parts
(list "#" fragment-id
)))
156 (cons query parts
))))
157 (setq parts
(cons path parts
))
161 (cons authority parts
))))
166 (apply 'concat parts
))))
168 (defun rng-uri-resolve (uri-ref base-uri
)
169 "Resolve a possibly relative URI reference into absolute form.
170 URI-REF is the URI reference to be resolved.
171 BASE-URI is the base URI to use for resolving it.
172 The algorithm is specified by RFC 2396.
173 If there is some problem with URI-REF or BASE-URI, then
174 URI-REF will be returned."
175 (let* ((components (rng-uri-split uri-ref
))
176 (scheme (nth 0 components
))
177 (authority (nth 1 components
))
178 (path (nth 2 components
))
179 (query (nth 3 components
))
180 (fragment-id (nth 4 components
))
181 (base-components (rng-uri-split base-uri
)))
182 (if (or (not components
)
184 (not base-components
)
185 (not (nth 0 base-components
)))
187 (setq scheme
(nth 0 base-components
))
188 (when (not authority
)
189 (setq authority
(nth 1 base-components
))
190 (if (and (equal path
"") (not query
))
191 ;; Handle same document reference by returning
192 ;; same URI (RFC 2396bis does this too).
193 (setq path
(nth 2 base-components
)
194 query
(nth 3 base-components
))
195 (setq path
(rng-resolve-path path
(nth 2 base-components
)))))
202 ;; See RFC 2396 5.2, steps 5 and 6
203 (defun rng-resolve-path (path base-path
)
205 (if (or (string-match "\\`/" path
)
206 (not (string-match "\\`/" base-path
)))
210 (let ((segments (rng-split-path path
))
211 (base-segments (rng-split-path base-path
)))
212 (if (> (length base-segments
) 1)
213 (setq segments
(nconc (nbutlast base-segments
)
216 (concat (car base-segments
) (car segments
))))
218 (let ((last-segment (last segments
)))
219 (when (equal (car last-segment
) ".")
220 (setcar last-segment
"")))
222 (setq segments
(delete "." segments
))
227 (setq iter
(cdr segments
))
228 (while (and iter
(not matched
))
229 (if (or (not (equal (cadr iter
) ".."))
230 (equal (car iter
) ".."))
231 (setq iter
(cdr iter
))
235 (if (cddr iter
) nil
""))
237 (setq segments
(delq nil segments
))))
239 (rng-join-path segments
))))
241 (defun rng-relative-uri (full base
)
242 "Return a URI that relative to BASE is equivalent to FULL.
243 The returned URI will be relative if possible.
244 Both FULL and BASE must be absolute URIs."
245 (let* ((components (rng-uri-split full
))
246 (scheme (nth 0 components
))
247 (authority (nth 1 components
))
248 (path (nth 2 components
))
249 (query (nth 3 components
))
250 (fragment-id (nth 4 components
))
251 (base-components (rng-uri-split base
)))
256 (nth 0 base-components
)))
261 (nth 1 base-components
)))
263 (setq path
(rng-relative-path path
(nth 2 base-components
))))
264 (rng-uri-join scheme authority path query fragment-id
))
267 (defun rng-relative-path (path base-path
)
268 (let ((segments (rng-split-path path
))
269 (base-segments (rng-split-path base-path
)))
270 (when (> (length base-segments
) 1)
271 (setq base-segments
(nbutlast base-segments
)))
272 (if (or (member "." segments
)
273 (member ".." segments
)
274 (member "." base-segments
)
275 (member ".." base-segments
))
279 (string= (car segments
)
280 (car base-segments
)))
281 (setq segments
(cdr segments
))
282 (setq base-segments
(cdr base-segments
)))
284 (setq base-segments
(cdr base-segments
))
285 (setq segments
(cons ".." segments
)))
286 (when (equal (car segments
) "")
287 (setq segments
(cons "." segments
)))
288 (rng-join-path segments
))))
290 (defun rng-split-path (path)
293 (while (string-match "/" path start
)
294 (setq segments
(cons (substring path start
(match-beginning 0))
296 (setq start
(match-end 0)))
297 (nreverse (cons (substring path start
) segments
))))
299 (defun rng-join-path (segments)
301 (mapconcat 'identity segments
"/")))
303 (defun rng-uri-unescape-multibyte (str)
304 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
305 'rng-multibyte-percent-decode
308 (defun rng-multibyte-percent-decode (str)
309 (decode-coding-string (apply 'string
310 (mapcar (lambda (h) (string-to-number h
16))
311 (split-string str
"%")))
314 (defun rng-uri-unescape-unibyte (str)
315 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
317 (string-to-number (substring h
1) 16))
322 (defun rng-uri-unescape-unibyte-match (str)
323 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
325 (if (string= match
"*")
328 (if (= (length match
) 1)
330 (string-to-number (substring match
1)
336 (defun rng-uri-unescape-unibyte-replace (str next-match-index
)
337 (replace-regexp-in-string
338 "%[0-7][0-9a-fA-F]\\|[^%]"
340 (if (string= match
"*")
341 (let ((n next-match-index
))
342 (setq next-match-index
(1+ n
))
344 (let ((ch (if (= (length match
) 1)
346 (string-to-number (substring match
1)
357 ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d
358 ;;; rng-uri.el ends here