Merge from emacs--rel--22
[bpt/emacs.git] / lisp / url / url-parse.el
1 ;;; url-parse.el --- Uniform Resource Locator parser
2
3 ;; Copyright (C) 1996, 1997, 1998, 1999, 2004,
4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5
6 ;; Keywords: comm, data, processes
7
8 ;; This file is part of GNU Emacs.
9 ;;
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; any later version.
14 ;;
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19 ;;
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;;; Commentary:
26
27 ;;; Code:
28
29 (require 'url-vars)
30 (eval-when-compile (require 'cl))
31
32 (autoload 'url-scheme-get-property "url-methods")
33
34 (defstruct (url
35 (:constructor nil)
36 (:constructor url-parse-make-urlobj
37 (&optional type user password host portspec filename
38 target attributes fullness))
39 (:copier nil))
40 type user password host portspec filename target attributes fullness)
41
42 (defsubst url-port (urlobj)
43 (or (url-portspec urlobj)
44 (if (url-fullness urlobj)
45 (url-scheme-get-property (url-type urlobj) 'default-port))))
46
47 (defsetf url-port (urlobj) (port) `(setf (url-portspec ,urlobj) ,port))
48
49 ;;;###autoload
50 (defun url-recreate-url (urlobj)
51 "Recreate a URL string from the parsed URLOBJ."
52 (concat (url-type urlobj) ":" (if (url-host urlobj) "//" "")
53 (if (url-user urlobj)
54 (concat (url-user urlobj)
55 (if (url-password urlobj)
56 (concat ":" (url-password urlobj)))
57 "@"))
58 (url-host urlobj)
59 (if (and (url-port urlobj)
60 (not (equal (url-port urlobj)
61 (url-scheme-get-property (url-type urlobj) 'default-port))))
62 (format ":%d" (url-port urlobj)))
63 (or (url-filename urlobj) "/")
64 (url-recreate-url-attributes urlobj)
65 (if (url-target urlobj)
66 (concat "#" (url-target urlobj)))))
67
68 (defun url-recreate-url-attributes (urlobj)
69 "Recreate the attributes of an URL string from the parsed URLOBJ."
70 (when (url-attributes urlobj)
71 (concat ";"
72 (mapconcat (lambda (x)
73 (if (cdr x)
74 (concat (car x) "=" (cdr x))
75 (car x)))
76 (url-attributes urlobj) ";"))))
77
78 ;;;###autoload
79 (defun url-generic-parse-url (url)
80 "Return an URL-struct of the parts of URL.
81 The CL-style struct contains the following fields:
82 TYPE USER PASSWORD HOST PORTSPEC FILENAME TARGET ATTRIBUTES FULLNESS."
83 ;; See RFC 3986.
84 (cond
85 ((null url)
86 (url-parse-make-urlobj))
87 ((or (not (string-match url-nonrelative-link url))
88 (= ?/ (string-to-char url)))
89 ;; This isn't correct, as a relative URL can be a fragment link
90 ;; (e.g. "#foo") and many other things (see section 4.2).
91 ;; However, let's not fix something that isn't broken, especially
92 ;; when close to a release.
93 (url-parse-make-urlobj nil nil nil nil nil url))
94 (t
95 (with-temp-buffer
96 (set-syntax-table url-parse-syntax-table)
97 (let ((save-pos nil)
98 (prot nil)
99 (user nil)
100 (pass nil)
101 (host nil)
102 (port nil)
103 (file nil)
104 (refs nil)
105 (attr nil)
106 (full nil)
107 (inhibit-read-only t))
108 (erase-buffer)
109 (insert url)
110 (goto-char (point-min))
111 (setq save-pos (point))
112
113 ;; 3.1. Scheme
114 (if (not (looking-at "//"))
115 (progn
116 (skip-chars-forward "a-zA-Z+.\\-")
117 (downcase-region save-pos (point))
118 (setq prot (buffer-substring save-pos (point)))
119 (skip-chars-forward ":")
120 (setq save-pos (point))))
121
122 ;; 3.2. Authority
123 (if (looking-at "//")
124 (progn
125 (setq full t)
126 (forward-char 2)
127 (setq save-pos (point))
128 (skip-chars-forward "^/")
129 (setq host (buffer-substring save-pos (point)))
130 (if (string-match "^\\([^@]+\\)@" host)
131 (setq user (match-string 1 host)
132 host (substring host (match-end 0) nil)))
133 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user))
134 (setq pass (match-string 2 user)
135 user (match-string 1 user)))
136 ;; This gives wrong results for IPv6 literal addresses.
137 (if (string-match ":\\([0-9+]+\\)" host)
138 (setq port (string-to-number (match-string 1 host))
139 host (substring host 0 (match-beginning 0))))
140 (if (string-match ":$" host)
141 (setq host (substring host 0 (match-beginning 0))))
142 (setq host (downcase host)
143 save-pos (point))))
144
145 (if (not port)
146 (setq port (url-scheme-get-property prot 'default-port)))
147
148 ;; 3.3. Path
149 ;; Gross hack to preserve ';' in data URLs
150 (setq save-pos (point))
151
152 ;; 3.4. Query
153 (if (string= "data" prot)
154 (goto-char (point-max))
155 ;; Now check for references
156 (skip-chars-forward "^#")
157 (if (eobp)
158 nil
159 (delete-region
160 (point)
161 (progn
162 (skip-chars-forward "#")
163 (setq refs (buffer-substring (point) (point-max)))
164 (point-max))))
165 (goto-char save-pos)
166 (skip-chars-forward "^;")
167 (if (not (eobp))
168 (setq attr (url-parse-args (buffer-substring (point) (point-max)) t)
169 attr (nreverse attr))))
170
171 (setq file (buffer-substring save-pos (point)))
172 (if (and host (string-match "%[0-9][0-9]" host))
173 (setq host (url-unhex-string host)))
174 (url-parse-make-urlobj
175 prot user pass host port file refs attr full))))))
176
177 (provide 'url-parse)
178
179 ;; arch-tag: f338325f-71ab-4bee-93cc-78fb9a03d403
180 ;;; url-parse.el ends here