Commit | Line | Data |
---|---|---|
8cd39fb3 MH |
1 | ;;; rng-uri.el --- URI parsing and manipulation |
2 | ||
09aa73e6 | 3 | ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. |
8cd39fb3 MH |
4 | |
5 | ;; Author: James Clark | |
6 | ;; Keywords: XML | |
7 | ||
09aa73e6 | 8 | ;; This file is part of GNU Emacs. |
8cd39fb3 | 9 | |
09aa73e6 GM |
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 3, or (at your option) | |
13 | ;; any later version. | |
8cd39fb3 | 14 | |
09aa73e6 GM |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
22 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
23 | ;; Boston, MA 02110-1301, USA. | |
8cd39fb3 MH |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;;; Code: | |
28 | ||
29 | (defun rng-file-name-uri (f) | |
30 | "Return a URI for the filename F. | |
31 | Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to | |
32 | escape them using %HH." | |
33 | (setq f (expand-file-name f)) | |
34 | (let ((url | |
35 | (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]" | |
36 | 'rng-percent-encode | |
37 | f))) | |
38 | (concat "file:" | |
39 | (if (and (> (length url) 0) | |
40 | (= (aref url 0) ?/)) | |
41 | "//" | |
42 | "///") | |
43 | url))) | |
44 | ||
45 | (defun rng-uri-escape-multibyte (uri) | |
46 | "Escape multibyte characters in URI." | |
47 | (replace-regexp-in-string "[:nonascii:]" | |
48 | 'rng-percent-encode | |
49 | (encode-coding-string uri 'utf-8))) | |
50 | ||
51 | (defun rng-percent-encode (str) | |
52 | (apply 'concat | |
53 | (mapcar (lambda (ch) | |
54 | (format "%%%x%x" (/ ch 16) (% ch 16))) | |
55 | (string-to-list str)))) | |
56 | ||
57 | ||
58 | (defun rng-uri-file-name (uri) | |
59 | "Return the filename represented by a URI. | |
60 | Signal an error if URI is not a valid file URL." | |
61 | (rng-uri-file-name-1 uri nil)) | |
62 | ||
63 | (defun rng-uri-pattern-file-name-regexp (pattern) | |
64 | "Return a regexp for filenames represented by URIs that match PATTERN." | |
65 | (rng-uri-file-name-1 pattern 'match)) | |
66 | ||
67 | (defun rng-uri-pattern-file-name-replace-match (pattern) | |
68 | (rng-uri-file-name-1 pattern 'replace)) | |
69 | ||
70 | ;; pattern is either nil or match or replace | |
71 | (defun rng-uri-file-name-1 (uri pattern) | |
72 | (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri) | |
73 | (rng-uri-error "Bad escapes in URI `%s'" uri)) | |
74 | (setq uri (rng-uri-unescape-multibyte uri)) | |
75 | (let* ((components | |
76 | (or (rng-uri-split uri) | |
77 | (rng-uri-error "Cannot split URI `%s' into its components" uri))) | |
78 | (scheme (nth 0 components)) | |
79 | (authority (nth 1 components)) | |
80 | (path (nth 2 components)) | |
81 | (absolutep (string-match "\\`/" path)) | |
82 | (query (nth 3 components)) | |
83 | (fragment-id (nth 4 components))) | |
84 | (cond ((not scheme) | |
85 | (unless pattern | |
86 | (rng-uri-error "URI `%s' does not have a scheme" uri))) | |
87 | ((not (string= (downcase scheme) "file")) | |
88 | (rng-uri-error "URI `%s' does not use the `file:' scheme" uri))) | |
89 | (when (not (member authority | |
90 | (cons system-name '(nil "" "localhost")))) | |
91 | (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'" | |
92 | uri)) | |
93 | (when query | |
94 | (rng-uri-error "`?' not escaped in file URI `%s'" uri)) | |
95 | (when fragment-id | |
96 | (rng-uri-error "URI `%s' has a fragment identifier" uri)) | |
97 | (when (string-match ";" path) | |
98 | (rng-uri-error "`;' not escaped in URI `%s'" uri)) | |
99 | (when (string-match "%2[fF]" path) ;; 2f is hex code of slash | |
100 | (rng-uri-error "Escaped slash in URI `%s'" uri)) | |
101 | (when (and (eq system-type 'windows-nt) | |
102 | absolutep | |
103 | (file-name-absolute-p (substring path 1))) | |
104 | (setq path (substring path 1))) | |
105 | (when (and pattern (string-match "\\`\\./" path)) | |
106 | (setq path (substring path 2))) | |
107 | (setq path | |
108 | (cond ((eq pattern 'match) | |
109 | (rng-uri-unescape-unibyte-match path)) | |
110 | ((eq pattern 'replace) | |
111 | (rng-uri-unescape-unibyte-replace path 2)) | |
112 | (t | |
113 | (rng-uri-unescape-unibyte path)))) | |
114 | (when (string-match "\000" path) | |
115 | (rng-uri-error "URI `%s' has NUL character in path" uri)) | |
116 | (when (eq pattern 'match) | |
117 | (setq path | |
118 | (concat (if absolutep | |
119 | "\\(\\)" | |
120 | "\\(\\(?:[^/]*/\\)*\\)") | |
121 | path))) | |
122 | (cond ((eq pattern 'match) | |
123 | (concat "\\`" path "\\'")) | |
124 | ((and (eq pattern 'replace) | |
125 | (not absolutep)) | |
126 | (concat "\\1" path)) | |
127 | (t path)))) | |
128 | ||
129 | (defun rng-uri-error (&rest args) | |
130 | (signal 'rng-uri-error (list (apply 'format args)))) | |
131 | ||
132 | (put 'rng-uri-error 'error-conditions '(error rng-uri-error)) | |
133 | (put 'rng-uri-error 'error-message "Invalid URI") | |
134 | ||
135 | (defun rng-uri-split (str) | |
136 | (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\ | |
137 | \\(?://\\([^/?#]*\\)\\)?\ | |
138 | \\([^?#]*\\)\ | |
139 | \\(?:\\?\\([^#]*\\)\\)?\ | |
140 | \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'" | |
141 | str) | |
142 | (list (match-string 1 str) | |
143 | (match-string 2 str) | |
144 | (match-string 3 str) | |
145 | (match-string 4 str) | |
146 | (match-string 5 str)))) | |
147 | ||
148 | (defun rng-uri-join (scheme authority path &optional query fragment-id) | |
149 | (when path | |
150 | (let (parts) | |
151 | (when fragment-id | |
152 | (setq parts (list "#" fragment-id))) | |
153 | (when query | |
154 | (setq parts | |
155 | (cons "?" | |
156 | (cons query parts)))) | |
157 | (setq parts (cons path parts)) | |
158 | (when authority | |
159 | (setq parts | |
160 | (cons "//" | |
161 | (cons authority parts)))) | |
162 | (when scheme | |
163 | (setq parts | |
164 | (cons scheme | |
165 | (cons ":" parts)))) | |
166 | (apply 'concat parts)))) | |
167 | ||
168 | (defun rng-uri-resolve (uri-ref base-uri) | |
169 | "Resolve a possibly relative URI reference into absolute form. | |
170 | URI-REF is the URI reference to be resolved. | |
171 | BASE-URI is the base URI to use for resolving it. | |
172 | The algorithm is specified by RFC 2396. | |
173 | If there is some problem with URI-REF or BASE-URI, then | |
174 | URI-REF will be returned." | |
175 | (let* ((components (rng-uri-split uri-ref)) | |
176 | (scheme (nth 0 components)) | |
177 | (authority (nth 1 components)) | |
178 | (path (nth 2 components)) | |
179 | (query (nth 3 components)) | |
180 | (fragment-id (nth 4 components)) | |
181 | (base-components (rng-uri-split base-uri))) | |
182 | (if (or (not components) | |
183 | scheme | |
184 | (not base-components) | |
185 | (not (nth 0 base-components))) | |
186 | uri-ref | |
187 | (setq scheme (nth 0 base-components)) | |
188 | (when (not authority) | |
189 | (setq authority (nth 1 base-components)) | |
190 | (if (and (equal path "") (not query)) | |
191 | ;; Handle same document reference by returning | |
192 | ;; same URI (RFC 2396bis does this too). | |
193 | (setq path (nth 2 base-components) | |
194 | query (nth 3 base-components)) | |
195 | (setq path (rng-resolve-path path (nth 2 base-components))))) | |
196 | (rng-uri-join scheme | |
197 | authority | |
198 | path | |
199 | query | |
200 | fragment-id)))) | |
201 | ||
202 | ;; See RFC 2396 5.2, steps 5 and 6 | |
203 | (defun rng-resolve-path (path base-path) | |
204 | ;; Step 5 | |
205 | (if (or (string-match "\\`/" path) | |
206 | (not (string-match "\\`/" base-path))) | |
207 | path | |
208 | ;; Step 6 | |
209 | ;; (a), (b) | |
210 | (let ((segments (rng-split-path path)) | |
211 | (base-segments (rng-split-path base-path))) | |
212 | (if (> (length base-segments) 1) | |
213 | (setq segments (nconc (nbutlast base-segments) | |
214 | segments)) | |
215 | (setcar segments | |
216 | (concat (car base-segments) (car segments)))) | |
217 | ;; (d) | |
218 | (let ((last-segment (last segments))) | |
219 | (when (equal (car last-segment) ".") | |
220 | (setcar last-segment ""))) | |
221 | ;; (c) | |
222 | (setq segments (delete "." segments)) | |
223 | ;; (e) | |
224 | (let (iter matched) | |
225 | (while (progn | |
226 | (setq matched nil) | |
227 | (setq iter (cdr segments)) | |
228 | (while (and iter (not matched)) | |
229 | (if (or (not (equal (cadr iter) "..")) | |
230 | (equal (car iter) "..")) | |
231 | (setq iter (cdr iter)) | |
232 | (setcar iter nil) | |
233 | (setcar (cdr iter) | |
234 | ;; (f) | |
235 | (if (cddr iter) nil "")) | |
236 | (setq matched t) | |
237 | (setq segments (delq nil segments)))) | |
238 | matched))) | |
239 | (rng-join-path segments)))) | |
240 | ||
241 | (defun rng-relative-uri (full base) | |
242 | "Return a URI that relative to BASE is equivalent to FULL. | |
243 | The returned URI will be relative if possible. | |
244 | Both FULL and BASE must be absolute URIs." | |
245 | (let* ((components (rng-uri-split full)) | |
246 | (scheme (nth 0 components)) | |
247 | (authority (nth 1 components)) | |
248 | (path (nth 2 components)) | |
249 | (query (nth 3 components)) | |
250 | (fragment-id (nth 4 components)) | |
251 | (base-components (rng-uri-split base))) | |
252 | (if (and components | |
253 | base-components | |
254 | scheme | |
255 | (equal scheme | |
256 | (nth 0 base-components))) | |
257 | (progn | |
258 | (setq scheme nil) | |
259 | (when (and authority | |
260 | (equal authority | |
261 | (nth 1 base-components))) | |
262 | (setq authority nil) | |
263 | (setq path (rng-relative-path path (nth 2 base-components)))) | |
264 | (rng-uri-join scheme authority path query fragment-id)) | |
265 | full))) | |
266 | ||
267 | (defun rng-relative-path (path base-path) | |
268 | (let ((segments (rng-split-path path)) | |
269 | (base-segments (rng-split-path base-path))) | |
270 | (when (> (length base-segments) 1) | |
271 | (setq base-segments (nbutlast base-segments))) | |
272 | (if (or (member "." segments) | |
273 | (member ".." segments) | |
274 | (member "." base-segments) | |
275 | (member ".." base-segments)) | |
276 | path | |
277 | (while (and segments | |
278 | base-segments | |
279 | (string= (car segments) | |
280 | (car base-segments))) | |
281 | (setq segments (cdr segments)) | |
282 | (setq base-segments (cdr base-segments))) | |
283 | (while base-segments | |
284 | (setq base-segments (cdr base-segments)) | |
285 | (setq segments (cons ".." segments))) | |
286 | (when (equal (car segments) "") | |
287 | (setq segments (cons "." segments))) | |
288 | (rng-join-path segments)))) | |
289 | ||
290 | (defun rng-split-path (path) | |
291 | (let ((start 0) | |
292 | segments) | |
293 | (while (string-match "/" path start) | |
294 | (setq segments (cons (substring path start (match-beginning 0)) | |
295 | segments)) | |
296 | (setq start (match-end 0))) | |
297 | (nreverse (cons (substring path start) segments)))) | |
298 | ||
299 | (defun rng-join-path (segments) | |
300 | (and segments | |
301 | (mapconcat 'identity segments "/"))) | |
302 | ||
303 | (defun rng-uri-unescape-multibyte (str) | |
304 | (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+" | |
305 | 'rng-multibyte-percent-decode | |
306 | str)) | |
307 | ||
308 | (defun rng-multibyte-percent-decode (str) | |
309 | (decode-coding-string (apply 'string | |
310 | (mapcar (lambda (h) (string-to-number h 16)) | |
311 | (split-string str "%"))) | |
312 | 'utf-8)) | |
313 | ||
314 | (defun rng-uri-unescape-unibyte (str) | |
315 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]" | |
316 | (lambda (h) | |
317 | (string-to-number (substring h 1) 16)) | |
318 | str | |
319 | t | |
320 | t)) | |
321 | ||
322 | (defun rng-uri-unescape-unibyte-match (str) | |
323 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]" | |
324 | (lambda (match) | |
325 | (if (string= match "*") | |
326 | "\\([^/]*\\)" | |
327 | (regexp-quote | |
328 | (if (= (length match) 1) | |
329 | match | |
330 | (string-to-number (substring match 1) | |
331 | 16))))) | |
332 | str | |
333 | t | |
334 | t)) | |
335 | ||
336 | (defun rng-uri-unescape-unibyte-replace (str next-match-index) | |
337 | (replace-regexp-in-string | |
338 | "%[0-7][0-9a-fA-F]\\|[^%]" | |
339 | (lambda (match) | |
340 | (if (string= match "*") | |
341 | (let ((n next-match-index)) | |
342 | (setq next-match-index (1+ n)) | |
343 | (format "\\%s" n)) | |
344 | (let ((ch (if (= (length match) 1) | |
345 | (aref match 0) | |
346 | (string-to-number (substring match 1) | |
347 | 16)))) | |
348 | (if (eq ch ?\\) | |
349 | (string ?\\ ?\\) | |
350 | (string ch))))) | |
351 | str | |
352 | t | |
353 | t)) | |
354 | ||
355 | (provide 'rng-uri) | |
356 | ||
ab4c34c6 | 357 | ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d |
8cd39fb3 | 358 | ;;; rng-uri.el ends here |