| 1 | ;;; rng-uri.el --- URI parsing and manipulation |
| 2 | |
| 3 | ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: James Clark |
| 6 | ;; Keywords: XML |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation; either version 3, or (at your option) |
| 13 | ;; any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 22 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 23 | ;; Boston, MA 02110-1301, USA. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;;; Code: |
| 28 | |
| 29 | (defun rng-file-name-uri (f) |
| 30 | "Return a URI for the filename F. |
| 31 | Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to |
| 32 | escape them using %HH." |
| 33 | (setq f (expand-file-name f)) |
| 34 | (let ((url |
| 35 | (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]" |
| 36 | 'rng-percent-encode |
| 37 | f))) |
| 38 | (concat "file:" |
| 39 | (if (and (> (length url) 0) |
| 40 | (= (aref url 0) ?/)) |
| 41 | "//" |
| 42 | "///") |
| 43 | url))) |
| 44 | |
| 45 | (defun rng-uri-escape-multibyte (uri) |
| 46 | "Escape multibyte characters in URI." |
| 47 | (replace-regexp-in-string "[:nonascii:]" |
| 48 | 'rng-percent-encode |
| 49 | (encode-coding-string uri 'utf-8))) |
| 50 | |
| 51 | (defun rng-percent-encode (str) |
| 52 | (apply 'concat |
| 53 | (mapcar (lambda (ch) |
| 54 | (format "%%%x%x" (/ ch 16) (% ch 16))) |
| 55 | (string-to-list str)))) |
| 56 | |
| 57 | |
| 58 | (defun rng-uri-file-name (uri) |
| 59 | "Return the filename represented by a URI. |
| 60 | Signal an error if URI is not a valid file URL." |
| 61 | (rng-uri-file-name-1 uri nil)) |
| 62 | |
| 63 | (defun rng-uri-pattern-file-name-regexp (pattern) |
| 64 | "Return a regexp for filenames represented by URIs that match PATTERN." |
| 65 | (rng-uri-file-name-1 pattern 'match)) |
| 66 | |
| 67 | (defun rng-uri-pattern-file-name-replace-match (pattern) |
| 68 | (rng-uri-file-name-1 pattern 'replace)) |
| 69 | |
| 70 | ;; pattern is either nil or match or replace |
| 71 | (defun rng-uri-file-name-1 (uri pattern) |
| 72 | (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri) |
| 73 | (rng-uri-error "Bad escapes in URI `%s'" uri)) |
| 74 | (setq uri (rng-uri-unescape-multibyte uri)) |
| 75 | (let* ((components |
| 76 | (or (rng-uri-split uri) |
| 77 | (rng-uri-error "Cannot split URI `%s' into its components" uri))) |
| 78 | (scheme (nth 0 components)) |
| 79 | (authority (nth 1 components)) |
| 80 | (path (nth 2 components)) |
| 81 | (absolutep (string-match "\\`/" path)) |
| 82 | (query (nth 3 components)) |
| 83 | (fragment-id (nth 4 components))) |
| 84 | (cond ((not scheme) |
| 85 | (unless pattern |
| 86 | (rng-uri-error "URI `%s' does not have a scheme" uri))) |
| 87 | ((not (string= (downcase scheme) "file")) |
| 88 | (rng-uri-error "URI `%s' does not use the `file:' scheme" uri))) |
| 89 | (when (not (member authority |
| 90 | (cons system-name '(nil "" "localhost")))) |
| 91 | (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'" |
| 92 | uri)) |
| 93 | (when query |
| 94 | (rng-uri-error "`?' not escaped in file URI `%s'" uri)) |
| 95 | (when fragment-id |
| 96 | (rng-uri-error "URI `%s' has a fragment identifier" uri)) |
| 97 | (when (string-match ";" path) |
| 98 | (rng-uri-error "`;' not escaped in URI `%s'" uri)) |
| 99 | (when (string-match "%2[fF]" path) ;; 2f is hex code of slash |
| 100 | (rng-uri-error "Escaped slash in URI `%s'" uri)) |
| 101 | (when (and (eq system-type 'windows-nt) |
| 102 | absolutep |
| 103 | (file-name-absolute-p (substring path 1))) |
| 104 | (setq path (substring path 1))) |
| 105 | (when (and pattern (string-match "\\`\\./" path)) |
| 106 | (setq path (substring path 2))) |
| 107 | (setq path |
| 108 | (cond ((eq pattern 'match) |
| 109 | (rng-uri-unescape-unibyte-match path)) |
| 110 | ((eq pattern 'replace) |
| 111 | (rng-uri-unescape-unibyte-replace path 2)) |
| 112 | (t |
| 113 | (rng-uri-unescape-unibyte path)))) |
| 114 | (when (string-match "\000" path) |
| 115 | (rng-uri-error "URI `%s' has NUL character in path" uri)) |
| 116 | (when (eq pattern 'match) |
| 117 | (setq path |
| 118 | (concat (if absolutep |
| 119 | "\\(\\)" |
| 120 | "\\(\\(?:[^/]*/\\)*\\)") |
| 121 | path))) |
| 122 | (cond ((eq pattern 'match) |
| 123 | (concat "\\`" path "\\'")) |
| 124 | ((and (eq pattern 'replace) |
| 125 | (not absolutep)) |
| 126 | (concat "\\1" path)) |
| 127 | (t path)))) |
| 128 | |
| 129 | (defun rng-uri-error (&rest args) |
| 130 | (signal 'rng-uri-error (list (apply 'format args)))) |
| 131 | |
| 132 | (put 'rng-uri-error 'error-conditions '(error rng-uri-error)) |
| 133 | (put 'rng-uri-error 'error-message "Invalid URI") |
| 134 | |
| 135 | (defun rng-uri-split (str) |
| 136 | (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\ |
| 137 | \\(?://\\([^/?#]*\\)\\)?\ |
| 138 | \\([^?#]*\\)\ |
| 139 | \\(?:\\?\\([^#]*\\)\\)?\ |
| 140 | \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'" |
| 141 | str) |
| 142 | (list (match-string 1 str) |
| 143 | (match-string 2 str) |
| 144 | (match-string 3 str) |
| 145 | (match-string 4 str) |
| 146 | (match-string 5 str)))) |
| 147 | |
| 148 | (defun rng-uri-join (scheme authority path &optional query fragment-id) |
| 149 | (when path |
| 150 | (let (parts) |
| 151 | (when fragment-id |
| 152 | (setq parts (list "#" fragment-id))) |
| 153 | (when query |
| 154 | (setq parts |
| 155 | (cons "?" |
| 156 | (cons query parts)))) |
| 157 | (setq parts (cons path parts)) |
| 158 | (when authority |
| 159 | (setq parts |
| 160 | (cons "//" |
| 161 | (cons authority parts)))) |
| 162 | (when scheme |
| 163 | (setq parts |
| 164 | (cons scheme |
| 165 | (cons ":" parts)))) |
| 166 | (apply 'concat parts)))) |
| 167 | |
| 168 | (defun rng-uri-resolve (uri-ref base-uri) |
| 169 | "Resolve a possibly relative URI reference into absolute form. |
| 170 | URI-REF is the URI reference to be resolved. |
| 171 | BASE-URI is the base URI to use for resolving it. |
| 172 | The algorithm is specified by RFC 2396. |
| 173 | If there is some problem with URI-REF or BASE-URI, then |
| 174 | URI-REF will be returned." |
| 175 | (let* ((components (rng-uri-split uri-ref)) |
| 176 | (scheme (nth 0 components)) |
| 177 | (authority (nth 1 components)) |
| 178 | (path (nth 2 components)) |
| 179 | (query (nth 3 components)) |
| 180 | (fragment-id (nth 4 components)) |
| 181 | (base-components (rng-uri-split base-uri))) |
| 182 | (if (or (not components) |
| 183 | scheme |
| 184 | (not base-components) |
| 185 | (not (nth 0 base-components))) |
| 186 | uri-ref |
| 187 | (setq scheme (nth 0 base-components)) |
| 188 | (when (not authority) |
| 189 | (setq authority (nth 1 base-components)) |
| 190 | (if (and (equal path "") (not query)) |
| 191 | ;; Handle same document reference by returning |
| 192 | ;; same URI (RFC 2396bis does this too). |
| 193 | (setq path (nth 2 base-components) |
| 194 | query (nth 3 base-components)) |
| 195 | (setq path (rng-resolve-path path (nth 2 base-components))))) |
| 196 | (rng-uri-join scheme |
| 197 | authority |
| 198 | path |
| 199 | query |
| 200 | fragment-id)))) |
| 201 | |
| 202 | ;; See RFC 2396 5.2, steps 5 and 6 |
| 203 | (defun rng-resolve-path (path base-path) |
| 204 | ;; Step 5 |
| 205 | (if (or (string-match "\\`/" path) |
| 206 | (not (string-match "\\`/" base-path))) |
| 207 | path |
| 208 | ;; Step 6 |
| 209 | ;; (a), (b) |
| 210 | (let ((segments (rng-split-path path)) |
| 211 | (base-segments (rng-split-path base-path))) |
| 212 | (if (> (length base-segments) 1) |
| 213 | (setq segments (nconc (nbutlast base-segments) |
| 214 | segments)) |
| 215 | (setcar segments |
| 216 | (concat (car base-segments) (car segments)))) |
| 217 | ;; (d) |
| 218 | (let ((last-segment (last segments))) |
| 219 | (when (equal (car last-segment) ".") |
| 220 | (setcar last-segment ""))) |
| 221 | ;; (c) |
| 222 | (setq segments (delete "." segments)) |
| 223 | ;; (e) |
| 224 | (let (iter matched) |
| 225 | (while (progn |
| 226 | (setq matched nil) |
| 227 | (setq iter (cdr segments)) |
| 228 | (while (and iter (not matched)) |
| 229 | (if (or (not (equal (cadr iter) "..")) |
| 230 | (equal (car iter) "..")) |
| 231 | (setq iter (cdr iter)) |
| 232 | (setcar iter nil) |
| 233 | (setcar (cdr iter) |
| 234 | ;; (f) |
| 235 | (if (cddr iter) nil "")) |
| 236 | (setq matched t) |
| 237 | (setq segments (delq nil segments)))) |
| 238 | matched))) |
| 239 | (rng-join-path segments)))) |
| 240 | |
| 241 | (defun rng-relative-uri (full base) |
| 242 | "Return a URI that relative to BASE is equivalent to FULL. |
| 243 | The returned URI will be relative if possible. |
| 244 | Both FULL and BASE must be absolute URIs." |
| 245 | (let* ((components (rng-uri-split full)) |
| 246 | (scheme (nth 0 components)) |
| 247 | (authority (nth 1 components)) |
| 248 | (path (nth 2 components)) |
| 249 | (query (nth 3 components)) |
| 250 | (fragment-id (nth 4 components)) |
| 251 | (base-components (rng-uri-split base))) |
| 252 | (if (and components |
| 253 | base-components |
| 254 | scheme |
| 255 | (equal scheme |
| 256 | (nth 0 base-components))) |
| 257 | (progn |
| 258 | (setq scheme nil) |
| 259 | (when (and authority |
| 260 | (equal authority |
| 261 | (nth 1 base-components))) |
| 262 | (setq authority nil) |
| 263 | (setq path (rng-relative-path path (nth 2 base-components)))) |
| 264 | (rng-uri-join scheme authority path query fragment-id)) |
| 265 | full))) |
| 266 | |
| 267 | (defun rng-relative-path (path base-path) |
| 268 | (let ((segments (rng-split-path path)) |
| 269 | (base-segments (rng-split-path base-path))) |
| 270 | (when (> (length base-segments) 1) |
| 271 | (setq base-segments (nbutlast base-segments))) |
| 272 | (if (or (member "." segments) |
| 273 | (member ".." segments) |
| 274 | (member "." base-segments) |
| 275 | (member ".." base-segments)) |
| 276 | path |
| 277 | (while (and segments |
| 278 | base-segments |
| 279 | (string= (car segments) |
| 280 | (car base-segments))) |
| 281 | (setq segments (cdr segments)) |
| 282 | (setq base-segments (cdr base-segments))) |
| 283 | (while base-segments |
| 284 | (setq base-segments (cdr base-segments)) |
| 285 | (setq segments (cons ".." segments))) |
| 286 | (when (equal (car segments) "") |
| 287 | (setq segments (cons "." segments))) |
| 288 | (rng-join-path segments)))) |
| 289 | |
| 290 | (defun rng-split-path (path) |
| 291 | (let ((start 0) |
| 292 | segments) |
| 293 | (while (string-match "/" path start) |
| 294 | (setq segments (cons (substring path start (match-beginning 0)) |
| 295 | segments)) |
| 296 | (setq start (match-end 0))) |
| 297 | (nreverse (cons (substring path start) segments)))) |
| 298 | |
| 299 | (defun rng-join-path (segments) |
| 300 | (and segments |
| 301 | (mapconcat 'identity segments "/"))) |
| 302 | |
| 303 | (defun rng-uri-unescape-multibyte (str) |
| 304 | (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+" |
| 305 | 'rng-multibyte-percent-decode |
| 306 | str)) |
| 307 | |
| 308 | (defun rng-multibyte-percent-decode (str) |
| 309 | (decode-coding-string (apply 'string |
| 310 | (mapcar (lambda (h) (string-to-number h 16)) |
| 311 | (split-string str "%"))) |
| 312 | 'utf-8)) |
| 313 | |
| 314 | (defun rng-uri-unescape-unibyte (str) |
| 315 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]" |
| 316 | (lambda (h) |
| 317 | (string-to-number (substring h 1) 16)) |
| 318 | str |
| 319 | t |
| 320 | t)) |
| 321 | |
| 322 | (defun rng-uri-unescape-unibyte-match (str) |
| 323 | (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]" |
| 324 | (lambda (match) |
| 325 | (if (string= match "*") |
| 326 | "\\([^/]*\\)" |
| 327 | (regexp-quote |
| 328 | (if (= (length match) 1) |
| 329 | match |
| 330 | (string-to-number (substring match 1) |
| 331 | 16))))) |
| 332 | str |
| 333 | t |
| 334 | t)) |
| 335 | |
| 336 | (defun rng-uri-unescape-unibyte-replace (str next-match-index) |
| 337 | (replace-regexp-in-string |
| 338 | "%[0-7][0-9a-fA-F]\\|[^%]" |
| 339 | (lambda (match) |
| 340 | (if (string= match "*") |
| 341 | (let ((n next-match-index)) |
| 342 | (setq next-match-index (1+ n)) |
| 343 | (format "\\%s" n)) |
| 344 | (let ((ch (if (= (length match) 1) |
| 345 | (aref match 0) |
| 346 | (string-to-number (substring match 1) |
| 347 | 16)))) |
| 348 | (if (eq ch ?\\) |
| 349 | (string ?\\ ?\\) |
| 350 | (string ch))))) |
| 351 | str |
| 352 | t |
| 353 | t)) |
| 354 | |
| 355 | (provide 'rng-uri) |
| 356 | |
| 357 | ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d |
| 358 | ;;; rng-uri.el ends here |