Commit | Line | Data |
---|---|---|
8cd39fb3 MH |
1 | ;;; rng-uri.el --- URI parsing and manipulation |
2 | ||
ba318903 | 3 | ;; Copyright (C) 2003, 2007-2014 Free Software Foundation, Inc. |
8cd39fb3 MH |
4 | |
5 | ;; Author: James Clark | |
3e77f05d | 6 | ;; Keywords: wp, hypermedia, languages, XML |
8cd39fb3 | 7 | |
09aa73e6 | 8 | ;; This file is part of GNU Emacs. |
8cd39fb3 | 9 | |
4936186e | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
09aa73e6 | 11 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
8cd39fb3 | 14 | |
09aa73e6 GM |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
8cd39fb3 MH |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;;; Code: | |
26 | ||
27 | (defun rng-file-name-uri (f) | |
28 | "Return a URI for the filename F. | |
10545bd8 | 29 | Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to |
8cd39fb3 MH |
30 | escape them using %HH." |
31 | (setq f (expand-file-name f)) | |
32 | (let ((url | |
33 | (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]" | |
34 | 'rng-percent-encode | |
35 | f))) | |
36 | (concat "file:" | |
37 | (if (and (> (length url) 0) | |
38 | (= (aref url 0) ?/)) | |
39 | "//" | |
40 | "///") | |
41 | url))) | |
42 | ||
43 | (defun rng-uri-escape-multibyte (uri) | |
44 | "Escape multibyte characters in URI." | |
45 | (replace-regexp-in-string "[:nonascii:]" | |
46 | 'rng-percent-encode | |
47 | (encode-coding-string uri 'utf-8))) | |
48 | ||
49 | (defun rng-percent-encode (str) | |
50 | (apply 'concat | |
51 | (mapcar (lambda (ch) | |
52 | (format "%%%x%x" (/ ch 16) (% ch 16))) | |
53 | (string-to-list str)))) | |
54 | ||
55 | ||
56 | (defun rng-uri-file-name (uri) | |
57 | "Return the filename represented by a URI. | |
58 | Signal an error if URI is not a valid file URL." | |
59 | (rng-uri-file-name-1 uri nil)) | |
60 | ||
61 | (defun rng-uri-pattern-file-name-regexp (pattern) | |
62 | "Return a regexp for filenames represented by URIs that match PATTERN." | |
63 | (rng-uri-file-name-1 pattern 'match)) | |
64 | ||
65 | (defun rng-uri-pattern-file-name-replace-match (pattern) | |
66 | (rng-uri-file-name-1 pattern 'replace)) | |
67 | ||
68 | ;; pattern is either nil or match or replace | |
69 | (defun rng-uri-file-name-1 (uri pattern) | |
70 | (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri) | |
71 | (rng-uri-error "Bad escapes in URI `%s'" uri)) | |
72 | (setq uri (rng-uri-unescape-multibyte uri)) | |
73 | (let* ((components | |
74 | (or (rng-uri-split uri) | |
75 | (rng-uri-error "Cannot split URI `%s' into its components" uri))) | |
76 | (scheme (nth 0 components)) | |
77 | (authority (nth 1 components)) | |
78 | (path (nth 2 components)) | |
79 | (absolutep (string-match "\\`/" path)) | |
80 | (query (nth 3 components)) | |
81 | (fragment-id (nth 4 components))) | |
82 | (cond ((not scheme) | |
83 | (unless pattern | |
84 | (rng-uri-error "URI `%s' does not have a scheme" uri))) | |
85 | ((not (string= (downcase scheme) "file")) | |
86 | (rng-uri-error "URI `%s' does not use the `file:' scheme" uri))) | |
87 | (when (not (member authority | |
88 | (cons system-name '(nil "" "localhost")))) | |
89 | (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'" | |
90 | uri)) | |
91 | (when query | |
92 | (rng-uri-error "`?' not escaped in file URI `%s'" uri)) | |
93 | (when fragment-id | |
94 | (rng-uri-error "URI `%s' has a fragment identifier" uri)) | |
95 | (when (string-match ";" path) | |
96 | (rng-uri-error "`;' not escaped in URI `%s'" uri)) | |
97 | (when (string-match "%2[fF]" path) ;; 2f is hex code of slash | |
98 | (rng-uri-error "Escaped slash in URI `%s'" uri)) | |
99 | (when (and (eq system-type 'windows-nt) | |
100 | absolutep | |
101 | (file-name-absolute-p (substring path 1))) | |
102 | (setq path (substring path 1))) | |
103 | (when (and pattern (string-match "\\`\\./" path)) | |
104 | (setq path (substring path 2))) | |
10545bd8 | 105 | (setq path |
8cd39fb3 MH |
106 | (cond ((eq pattern 'match) |
107 | (rng-uri-unescape-unibyte-match path)) | |
108 | ((eq pattern 'replace) | |
109 | (rng-uri-unescape-unibyte-replace path 2)) | |
110 | (t | |
111 | (rng-uri-unescape-unibyte path)))) | |
112 | (when (string-match "\000" path) | |
113 | (rng-uri-error "URI `%s' has NUL character in path" uri)) | |
114 | (when (eq pattern 'match) | |
115 | (setq path | |
116 | (concat (if absolutep | |
117 | "\\(\\)" | |
118 | "\\(\\(?:[^/]*/\\)*\\)") | |
119 | path))) | |
120 | (cond ((eq pattern 'match) | |
121 | (concat "\\`" path "\\'")) | |
122 | ((and (eq pattern 'replace) | |
123 | (not absolutep)) | |
124 | (concat "\\1" path)) | |
125 | (t path)))) | |
126 | ||
127 | (defun rng-uri-error (&rest args) | |
128 | (signal 'rng-uri-error (list (apply 'format args)))) | |
129 | ||
54bd972f | 130 | (define-error 'rng-uri-error "Invalid URI") |
8cd39fb3 MH |
131 | |
132 | (defun rng-uri-split (str) | |
133 | (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\ | |
134 | \\(?://\\([^/?#]*\\)\\)?\ | |
135 | \\([^?#]*\\)\ | |
136 | \\(?:\\?\\([^#]*\\)\\)?\ | |
137 | \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'" | |
138 | str) | |
139 | (list (match-string 1 str) | |
140 | (match-string 2 str) | |
141 | (match-string 3 str) | |
142 | (match-string 4 str) | |
143 | (match-string 5 str)))) | |
144 | ||
145 | (defun rng-uri-join (scheme authority path &optional query fragment-id) | |
146 | (when path | |
147 | (let (parts) | |
148 | (when fragment-id | |
149 | (setq parts (list "#" fragment-id))) | |
150 | (when query | |
151 | (setq parts | |
152 | (cons "?" | |
153 | (cons query parts)))) | |
154 | (setq parts (cons path parts)) | |
155 | (when authority | |
156 | (setq parts | |
157 | (cons "//" | |
158 | (cons authority parts)))) | |
159 | (when scheme | |
160 | (setq parts | |
161 | (cons scheme | |
162 | (cons ":" parts)))) | |
163 | (apply 'concat parts)))) | |
10545bd8 | 164 | |
8cd39fb3 MH |
165 | (defun rng-uri-resolve (uri-ref base-uri) |
166 | "Resolve a possibly relative URI reference into absolute form. | |
167 | URI-REF is the URI reference to be resolved. | |
168 | BASE-URI is the base URI to use for resolving it. | |
169 | The algorithm is specified by RFC 2396. | |
170 | If there is some problem with URI-REF or BASE-URI, then | |
171 | URI-REF will be returned." | |
172 | (let* ((components (rng-uri-split uri-ref)) | |
173 | (scheme (nth 0 components)) | |
174 | (authority (nth 1 components)) | |
175 | (path (nth 2 components)) | |
176 | (query (nth 3 components)) | |
177 | (fragment-id (nth 4 components)) | |
178 | (base-components (rng-uri-split base-uri))) | |
179 | (if (or (not components) | |
180 | scheme | |
181 | (not base-components) | |
182 | (not (nth 0 base-components))) | |
183 | uri-ref | |
184 | (setq scheme (nth 0 base-components)) | |
185 | (when (not authority) | |
186 | (setq authority (nth 1 base-components)) | |
187 | (if (and (equal path "") (not query)) | |
188 | ;; Handle same document reference by returning | |
189 | ;; same URI (RFC 2396bis does this too). | |
190 | (setq path (nth 2 base-components) | |
191 | query (nth 3 base-components)) | |
192 | (setq path (rng-resolve-path path (nth 2 base-components))))) | |
193 | (rng-uri-join scheme | |
194 | authority | |
195 | path | |
196 | query | |
197 | fragment-id)))) | |
198 | ||
199 | ;; See RFC 2396 5.2, steps 5 and 6 | |
200 | (defun rng-resolve-path (path base-path) | |
201 | ;; Step 5 | |
202 | (if (or (string-match "\\`/" path) | |
203 | (not (string-match "\\`/" base-path))) | |
204 | path | |
205 | ;; Step 6 | |
206 | ;; (a), (b) | |
207 | (let ((segments (rng-split-path path)) | |
208 | (base-segments (rng-split-path base-path))) | |
209 | (if (> (length base-segments) 1) | |
210 | (setq segments (nconc (nbutlast base-segments) | |
211 | segments)) | |
212 | (setcar segments | |
213 | (concat (car base-segments) (car segments)))) | |
214 | ;; (d) | |
215 | (let ((last-segment (last segments))) | |
216 | (when (equal (car last-segment) ".") | |
217 | (setcar last-segment ""))) | |
218 | ;; (c) | |
219 | (setq segments (delete "." segments)) | |
220 | ;; (e) | |
221 | (let (iter matched) | |
222 | (while (progn | |
223 | (setq matched nil) | |
224 | (setq iter (cdr segments)) | |
225 | (while (and iter (not matched)) | |
226 | (if (or (not (equal (cadr iter) "..")) | |
227 | (equal (car iter) "..")) | |
228 | (setq iter (cdr iter)) | |
229 | (setcar iter nil) | |
230 | (setcar (cdr iter) | |
231 | ;; (f) | |
232 | (if (cddr iter) nil "")) | |
233 | (setq matched t) | |
234 | (setq segments (delq nil segments)))) | |
235 | matched))) | |
236 | (rng-join-path segments)))) | |
237 | ||
238 | (defun rng-relative-uri (full base) | |
239 | "Return a URI that relative to BASE is equivalent to FULL. | |
240 | The returned URI will be relative if possible. | |
241 | Both FULL and BASE must be absolute URIs." | |
242 | (let* ((components (rng-uri-split full)) | |
243 | (scheme (nth 0 components)) | |
244 | (authority (nth 1 components)) | |
245 | (path (nth 2 components)) | |
246 | (query (nth 3 components)) | |
247 | (fragment-id (nth 4 components)) | |
248 | (base-components (rng-uri-split base))) | |
249 | (if (and components | |
250 | base-components | |
251 | scheme | |
252 | (equal scheme | |
253 | (nth 0 base-components))) | |
254 | (progn | |
255 | (setq scheme nil) | |
256 | (when (and authority | |
257 | (equal authority | |
258 | (nth 1 base-components))) | |
259 | (setq authority nil) | |
260 | (setq path (rng-relative-path path (nth 2 base-components)))) | |
261 | (rng-uri-join scheme authority path query fragment-id)) | |
262 | full))) | |
263 | ||
264 | (defun rng-relative-path (path base-path) | |
265 | (let ((segments (rng-split-path path)) | |
266 | (base-segments (rng-split-path base-path))) | |
267 | (when (> (length base-segments) 1) | |
268 | (setq base-segments (nbutlast base-segments))) | |
269 | (if (or (member "." segments) | |
270 | (member ".." segments) | |
271 | (member "." base-segments) | |
272 | (member ".." base-segments)) | |
273 | path | |
274 | (while (and segments | |
275 | base-segments | |
276 | (string= (car segments) | |
277 | (car base-segments))) | |
278 | (setq segments (cdr segments)) | |
279 | (setq base-segments (cdr base-segments))) | |
280 | (while base-segments | |
281 | (setq base-segments (cdr base-segments)) | |
282 | (setq segments (cons ".." segments))) | |
283 | (when (equal (car segments) "") | |
284 | (setq segments (cons "." segments))) | |
285 | (rng-join-path segments)))) | |
286 | ||
287 | (defun rng-split-path (path) | |
288 | (let ((start 0) | |
289 | segments) | |
290 | (while (string-match "/" path start) | |
291 | (setq segments (cons (substring path start (match-beginning 0)) | |
292 | segments)) | |
293 | (setq start (match-end 0))) | |
294 | (nreverse (cons (substring path start) segments)))) | |
295 | ||
296 | (defun rng-join-path (segments) | |
297 | (and segments | |
298 | (mapconcat 'identity segments "/"))) | |
299 | ||
300 | (defun rng-uri-unescape-multibyte (str) | |
301 | (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+" | |
302 | 'rng-multibyte-percent-decode | |
303 | str)) | |
304 | ||
305 | (defun rng-multibyte-percent-decode (str) | |
306 | (decode-coding-string (apply 'string | |
307 | (mapcar (lambda (h) (string-to-number h 16)) | |
308 | (split-string str "%"))) | |
309 | 'utf-8)) | |
10545bd8 | 310 | |
8cd39fb3 MH |
311 | (defun rng-uri-unescape-unibyte (str) |
312 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]" | |
313 | (lambda (h) | |
314 | (string-to-number (substring h 1) 16)) | |
315 | str | |
316 | t | |
317 | t)) | |
318 | ||
319 | (defun rng-uri-unescape-unibyte-match (str) | |
320 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]" | |
321 | (lambda (match) | |
322 | (if (string= match "*") | |
323 | "\\([^/]*\\)" | |
324 | (regexp-quote | |
325 | (if (= (length match) 1) | |
326 | match | |
327 | (string-to-number (substring match 1) | |
328 | 16))))) | |
329 | str | |
330 | t | |
331 | t)) | |
332 | ||
333 | (defun rng-uri-unescape-unibyte-replace (str next-match-index) | |
334 | (replace-regexp-in-string | |
335 | "%[0-7][0-9a-fA-F]\\|[^%]" | |
336 | (lambda (match) | |
337 | (if (string= match "*") | |
338 | (let ((n next-match-index)) | |
339 | (setq next-match-index (1+ n)) | |
340 | (format "\\%s" n)) | |
341 | (let ((ch (if (= (length match) 1) | |
342 | (aref match 0) | |
343 | (string-to-number (substring match 1) | |
344 | 16)))) | |
345 | (if (eq ch ?\\) | |
346 | (string ?\\ ?\\) | |
347 | (string ch))))) | |
348 | str | |
349 | t | |
350 | t)) | |
351 | ||
352 | (provide 'rng-uri) | |
353 | ||
354 | ;;; rng-uri.el ends here |