f18012abcfe8435f99e95fb3a03d541fe1428384
[bpt/emacs.git] / lisp / nxml / rng-uri.el
1 ;;; rng-uri.el --- URI parsing and manipulation
2
3 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
4
5 ;; Author: James Clark
6 ;; Keywords: XML
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;;; Commentary:
26
27 ;;; Code:
28
29 (defun rng-file-name-uri (f)
30 "Return a URI for the filename F.
31 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
32 escape them using %HH."
33 (setq f (expand-file-name f))
34 (let ((url
35 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
36 'rng-percent-encode
37 f)))
38 (concat "file:"
39 (if (and (> (length url) 0)
40 (= (aref url 0) ?/))
41 "//"
42 "///")
43 url)))
44
45 (defun rng-uri-escape-multibyte (uri)
46 "Escape multibyte characters in URI."
47 (replace-regexp-in-string "[:nonascii:]"
48 'rng-percent-encode
49 (encode-coding-string uri 'utf-8)))
50
51 (defun rng-percent-encode (str)
52 (apply 'concat
53 (mapcar (lambda (ch)
54 (format "%%%x%x" (/ ch 16) (% ch 16)))
55 (string-to-list str))))
56
57
58 (defun rng-uri-file-name (uri)
59 "Return the filename represented by a URI.
60 Signal an error if URI is not a valid file URL."
61 (rng-uri-file-name-1 uri nil))
62
63 (defun rng-uri-pattern-file-name-regexp (pattern)
64 "Return a regexp for filenames represented by URIs that match PATTERN."
65 (rng-uri-file-name-1 pattern 'match))
66
67 (defun rng-uri-pattern-file-name-replace-match (pattern)
68 (rng-uri-file-name-1 pattern 'replace))
69
70 ;; pattern is either nil or match or replace
71 (defun rng-uri-file-name-1 (uri pattern)
72 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri)
73 (rng-uri-error "Bad escapes in URI `%s'" uri))
74 (setq uri (rng-uri-unescape-multibyte uri))
75 (let* ((components
76 (or (rng-uri-split uri)
77 (rng-uri-error "Cannot split URI `%s' into its components" uri)))
78 (scheme (nth 0 components))
79 (authority (nth 1 components))
80 (path (nth 2 components))
81 (absolutep (string-match "\\`/" path))
82 (query (nth 3 components))
83 (fragment-id (nth 4 components)))
84 (cond ((not scheme)
85 (unless pattern
86 (rng-uri-error "URI `%s' does not have a scheme" uri)))
87 ((not (string= (downcase scheme) "file"))
88 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri)))
89 (when (not (member authority
90 (cons system-name '(nil "" "localhost"))))
91 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
92 uri))
93 (when query
94 (rng-uri-error "`?' not escaped in file URI `%s'" uri))
95 (when fragment-id
96 (rng-uri-error "URI `%s' has a fragment identifier" uri))
97 (when (string-match ";" path)
98 (rng-uri-error "`;' not escaped in URI `%s'" uri))
99 (when (string-match "%2[fF]" path) ;; 2f is hex code of slash
100 (rng-uri-error "Escaped slash in URI `%s'" uri))
101 (when (and (eq system-type 'windows-nt)
102 absolutep
103 (file-name-absolute-p (substring path 1)))
104 (setq path (substring path 1)))
105 (when (and pattern (string-match "\\`\\./" path))
106 (setq path (substring path 2)))
107 (setq path
108 (cond ((eq pattern 'match)
109 (rng-uri-unescape-unibyte-match path))
110 ((eq pattern 'replace)
111 (rng-uri-unescape-unibyte-replace path 2))
112 (t
113 (rng-uri-unescape-unibyte path))))
114 (when (string-match "\000" path)
115 (rng-uri-error "URI `%s' has NUL character in path" uri))
116 (when (eq pattern 'match)
117 (setq path
118 (concat (if absolutep
119 "\\(\\)"
120 "\\(\\(?:[^/]*/\\)*\\)")
121 path)))
122 (cond ((eq pattern 'match)
123 (concat "\\`" path "\\'"))
124 ((and (eq pattern 'replace)
125 (not absolutep))
126 (concat "\\1" path))
127 (t path))))
128
129 (defun rng-uri-error (&rest args)
130 (signal 'rng-uri-error (list (apply 'format args))))
131
132 (put 'rng-uri-error 'error-conditions '(error rng-uri-error))
133 (put 'rng-uri-error 'error-message "Invalid URI")
134
135 (defun rng-uri-split (str)
136 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
137 \\(?://\\([^/?#]*\\)\\)?\
138 \\([^?#]*\\)\
139 \\(?:\\?\\([^#]*\\)\\)?\
140 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
141 str)
142 (list (match-string 1 str)
143 (match-string 2 str)
144 (match-string 3 str)
145 (match-string 4 str)
146 (match-string 5 str))))
147
148 (defun rng-uri-join (scheme authority path &optional query fragment-id)
149 (when path
150 (let (parts)
151 (when fragment-id
152 (setq parts (list "#" fragment-id)))
153 (when query
154 (setq parts
155 (cons "?"
156 (cons query parts))))
157 (setq parts (cons path parts))
158 (when authority
159 (setq parts
160 (cons "//"
161 (cons authority parts))))
162 (when scheme
163 (setq parts
164 (cons scheme
165 (cons ":" parts))))
166 (apply 'concat parts))))
167
168 (defun rng-uri-resolve (uri-ref base-uri)
169 "Resolve a possibly relative URI reference into absolute form.
170 URI-REF is the URI reference to be resolved.
171 BASE-URI is the base URI to use for resolving it.
172 The algorithm is specified by RFC 2396.
173 If there is some problem with URI-REF or BASE-URI, then
174 URI-REF will be returned."
175 (let* ((components (rng-uri-split uri-ref))
176 (scheme (nth 0 components))
177 (authority (nth 1 components))
178 (path (nth 2 components))
179 (query (nth 3 components))
180 (fragment-id (nth 4 components))
181 (base-components (rng-uri-split base-uri)))
182 (if (or (not components)
183 scheme
184 (not base-components)
185 (not (nth 0 base-components)))
186 uri-ref
187 (setq scheme (nth 0 base-components))
188 (when (not authority)
189 (setq authority (nth 1 base-components))
190 (if (and (equal path "") (not query))
191 ;; Handle same document reference by returning
192 ;; same URI (RFC 2396bis does this too).
193 (setq path (nth 2 base-components)
194 query (nth 3 base-components))
195 (setq path (rng-resolve-path path (nth 2 base-components)))))
196 (rng-uri-join scheme
197 authority
198 path
199 query
200 fragment-id))))
201
202 ;; See RFC 2396 5.2, steps 5 and 6
203 (defun rng-resolve-path (path base-path)
204 ;; Step 5
205 (if (or (string-match "\\`/" path)
206 (not (string-match "\\`/" base-path)))
207 path
208 ;; Step 6
209 ;; (a), (b)
210 (let ((segments (rng-split-path path))
211 (base-segments (rng-split-path base-path)))
212 (if (> (length base-segments) 1)
213 (setq segments (nconc (nbutlast base-segments)
214 segments))
215 (setcar segments
216 (concat (car base-segments) (car segments))))
217 ;; (d)
218 (let ((last-segment (last segments)))
219 (when (equal (car last-segment) ".")
220 (setcar last-segment "")))
221 ;; (c)
222 (setq segments (delete "." segments))
223 ;; (e)
224 (let (iter matched)
225 (while (progn
226 (setq matched nil)
227 (setq iter (cdr segments))
228 (while (and iter (not matched))
229 (if (or (not (equal (cadr iter) ".."))
230 (equal (car iter) ".."))
231 (setq iter (cdr iter))
232 (setcar iter nil)
233 (setcar (cdr iter)
234 ;; (f)
235 (if (cddr iter) nil ""))
236 (setq matched t)
237 (setq segments (delq nil segments))))
238 matched)))
239 (rng-join-path segments))))
240
241 (defun rng-relative-uri (full base)
242 "Return a URI that relative to BASE is equivalent to FULL.
243 The returned URI will be relative if possible.
244 Both FULL and BASE must be absolute URIs."
245 (let* ((components (rng-uri-split full))
246 (scheme (nth 0 components))
247 (authority (nth 1 components))
248 (path (nth 2 components))
249 (query (nth 3 components))
250 (fragment-id (nth 4 components))
251 (base-components (rng-uri-split base)))
252 (if (and components
253 base-components
254 scheme
255 (equal scheme
256 (nth 0 base-components)))
257 (progn
258 (setq scheme nil)
259 (when (and authority
260 (equal authority
261 (nth 1 base-components)))
262 (setq authority nil)
263 (setq path (rng-relative-path path (nth 2 base-components))))
264 (rng-uri-join scheme authority path query fragment-id))
265 full)))
266
267 (defun rng-relative-path (path base-path)
268 (let ((segments (rng-split-path path))
269 (base-segments (rng-split-path base-path)))
270 (when (> (length base-segments) 1)
271 (setq base-segments (nbutlast base-segments)))
272 (if (or (member "." segments)
273 (member ".." segments)
274 (member "." base-segments)
275 (member ".." base-segments))
276 path
277 (while (and segments
278 base-segments
279 (string= (car segments)
280 (car base-segments)))
281 (setq segments (cdr segments))
282 (setq base-segments (cdr base-segments)))
283 (while base-segments
284 (setq base-segments (cdr base-segments))
285 (setq segments (cons ".." segments)))
286 (when (equal (car segments) "")
287 (setq segments (cons "." segments)))
288 (rng-join-path segments))))
289
290 (defun rng-split-path (path)
291 (let ((start 0)
292 segments)
293 (while (string-match "/" path start)
294 (setq segments (cons (substring path start (match-beginning 0))
295 segments))
296 (setq start (match-end 0)))
297 (nreverse (cons (substring path start) segments))))
298
299 (defun rng-join-path (segments)
300 (and segments
301 (mapconcat 'identity segments "/")))
302
303 (defun rng-uri-unescape-multibyte (str)
304 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
305 'rng-multibyte-percent-decode
306 str))
307
308 (defun rng-multibyte-percent-decode (str)
309 (decode-coding-string (apply 'string
310 (mapcar (lambda (h) (string-to-number h 16))
311 (split-string str "%")))
312 'utf-8))
313
314 (defun rng-uri-unescape-unibyte (str)
315 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
316 (lambda (h)
317 (string-to-number (substring h 1) 16))
318 str
319 t
320 t))
321
322 (defun rng-uri-unescape-unibyte-match (str)
323 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
324 (lambda (match)
325 (if (string= match "*")
326 "\\([^/]*\\)"
327 (regexp-quote
328 (if (= (length match) 1)
329 match
330 (string-to-number (substring match 1)
331 16)))))
332 str
333 t
334 t))
335
336 (defun rng-uri-unescape-unibyte-replace (str next-match-index)
337 (replace-regexp-in-string
338 "%[0-7][0-9a-fA-F]\\|[^%]"
339 (lambda (match)
340 (if (string= match "*")
341 (let ((n next-match-index))
342 (setq next-match-index (1+ n))
343 (format "\\%s" n))
344 (let ((ch (if (= (length match) 1)
345 (aref match 0)
346 (string-to-number (substring match 1)
347 16))))
348 (if (eq ch ?\\)
349 (string ?\\ ?\\)
350 (string ch)))))
351 str
352 t
353 t))
354
355 (provide 'rng-uri)
356
357 ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d
358 ;;; rng-uri.el ends here