Commit | Line | Data |
---|---|---|
381408e2 | 1 | /* Interface to libxml2. |
ab422c4d | 2 | Copyright (C) 2010-2013 Free Software Foundation, Inc. |
381408e2 LMI |
3 | |
4 | This file is part of GNU Emacs. | |
5 | ||
6 | GNU Emacs is free software: you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation, either version 3 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | GNU Emacs is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <config.h> | |
20 | ||
21 | #ifdef HAVE_LIBXML2 | |
22 | ||
381408e2 LMI |
23 | #include <libxml/tree.h> |
24 | #include <libxml/parser.h> | |
25 | #include <libxml/HTMLparser.h> | |
26 | ||
27 | #include "lisp.h" | |
e5560ff7 | 28 | #include "character.h" |
381408e2 LMI |
29 | #include "buffer.h" |
30 | ||
9078ead6 EZ |
31 | \f |
32 | static Lisp_Object Qlibxml2_dll; | |
33 | ||
34 | #ifdef WINDOWSNT | |
35 | ||
36 | #include <windows.h> | |
37 | #include "w32.h" | |
38 | ||
39 | /* Macro for defining functions that will be loaded from the libxml2 DLL. */ | |
40 | #define DEF_XML2_FN(rettype,func,args) static rettype (FAR CDECL *fn_##func)args | |
41 | ||
42 | /* Macro for loading libxml2 functions from the library. */ | |
43 | #define LOAD_XML2_FN(lib,func) { \ | |
44 | fn_##func = (void *) GetProcAddress (lib, #func); \ | |
45 | if (!fn_##func) goto bad_library; \ | |
46 | } | |
47 | ||
48 | DEF_XML2_FN (htmlDocPtr, htmlReadMemory, | |
49 | (const char *, int, const char *, const char *, int)); | |
50 | DEF_XML2_FN (xmlDocPtr, xmlReadMemory, | |
51 | (const char *, int, const char *, const char *, int)); | |
52 | DEF_XML2_FN (xmlNodePtr, xmlDocGetRootElement, (xmlDocPtr)); | |
53 | DEF_XML2_FN (void, xmlFreeDoc, (xmlDocPtr)); | |
54 | DEF_XML2_FN (void, xmlCleanupParser, (void)); | |
55 | DEF_XML2_FN (void, xmlCheckVersion, (int)); | |
56 | ||
57 | static int | |
58 | libxml2_loaded_p (void) | |
59 | { | |
60 | Lisp_Object found = Fassq (Qlibxml2_dll, Vlibrary_cache); | |
61 | ||
62 | if (CONSP (found)) | |
63 | return EQ (XCDR (found), Qt) ? 1 : 0; | |
64 | return 0; | |
65 | } | |
66 | ||
67 | #else /* !WINDOWSNT */ | |
68 | ||
69 | #define fn_htmlReadMemory htmlReadMemory | |
70 | #define fn_xmlReadMemory xmlReadMemory | |
71 | #define fn_xmlDocGetRootElement xmlDocGetRootElement | |
72 | #define fn_xmlFreeDoc xmlFreeDoc | |
73 | #define fn_xmlCleanupParser xmlCleanupParser | |
74 | #define fn_xmlCheckVersion xmlCheckVersion | |
75 | ||
b0ab8123 | 76 | static int |
9078ead6 EZ |
77 | libxml2_loaded_p (void) |
78 | { | |
79 | return 1; | |
80 | } | |
81 | ||
82 | #endif /* !WINDOWSNT */ | |
83 | ||
84 | static int | |
d07ff9db | 85 | init_libxml2_functions (void) |
9078ead6 EZ |
86 | { |
87 | #ifdef WINDOWSNT | |
9078ead6 EZ |
88 | if (libxml2_loaded_p ()) |
89 | return 1; | |
90 | else | |
91 | { | |
92 | HMODULE library; | |
93 | ||
d07ff9db | 94 | if (!(library = w32_delayed_load (Qlibxml2_dll))) |
9078ead6 | 95 | { |
2f73da9c | 96 | message1 ("libxml2 library not found"); |
9078ead6 EZ |
97 | return 0; |
98 | } | |
99 | ||
100 | /* LOAD_XML2_FN jumps to bad_library if it fails to find the | |
101 | named function. */ | |
102 | LOAD_XML2_FN (library, htmlReadMemory); | |
103 | LOAD_XML2_FN (library, xmlReadMemory); | |
104 | LOAD_XML2_FN (library, xmlDocGetRootElement); | |
105 | LOAD_XML2_FN (library, xmlFreeDoc); | |
106 | LOAD_XML2_FN (library, xmlCleanupParser); | |
107 | LOAD_XML2_FN (library, xmlCheckVersion); | |
108 | ||
109 | Vlibrary_cache = Fcons (Fcons (Qlibxml2_dll, Qt), Vlibrary_cache); | |
110 | return 1; | |
111 | } | |
112 | ||
113 | bad_library: | |
114 | Vlibrary_cache = Fcons (Fcons (Qlibxml2_dll, Qnil), Vlibrary_cache); | |
115 | ||
116 | return 0; | |
117 | #else /* !WINDOWSNT */ | |
118 | return 1; | |
119 | #endif /* !WINDOWSNT */ | |
120 | } | |
121 | ||
c9735e30 PE |
122 | static Lisp_Object |
123 | make_dom (xmlNode *node) | |
381408e2 | 124 | { |
4b9832a6 CY |
125 | if (node->type == XML_ELEMENT_NODE) |
126 | { | |
6c6f1994 | 127 | Lisp_Object result = list1 (intern ((char *) node->name)); |
4b9832a6 CY |
128 | xmlNode *child; |
129 | xmlAttr *property; | |
130 | Lisp_Object plist = Qnil; | |
131 | ||
132 | /* First add the attributes. */ | |
133 | property = node->properties; | |
134 | while (property != NULL) | |
135 | { | |
136 | if (property->children && | |
137 | property->children->content) | |
138 | { | |
c45e5276 PE |
139 | char *content = (char *) property->children->content; |
140 | plist = Fcons (Fcons (intern ((char *) property->name), | |
141 | build_string (content)), | |
4b9832a6 CY |
142 | plist); |
143 | } | |
144 | property = property->next; | |
145 | } | |
146 | result = Fcons (Fnreverse (plist), result); | |
147 | ||
148 | /* Then add the children of the node. */ | |
149 | child = node->children; | |
150 | while (child != NULL) | |
151 | { | |
152 | result = Fcons (make_dom (child), result); | |
153 | child = child->next; | |
154 | } | |
155 | ||
156 | return Fnreverse (result); | |
381408e2 | 157 | } |
3c2317e8 | 158 | else if (node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE) |
4b9832a6 CY |
159 | { |
160 | if (node->content) | |
c45e5276 | 161 | return build_string ((char *) node->content); |
4b9832a6 CY |
162 | else |
163 | return Qnil; | |
381408e2 | 164 | } |
1e98674d LMI |
165 | else if (node->type == XML_COMMENT_NODE) |
166 | { | |
167 | if (node->content) | |
168 | return list3 (intern ("comment"), Qnil, | |
169 | build_string ((char *) node->content)); | |
170 | else | |
171 | return Qnil; | |
172 | } | |
4b9832a6 | 173 | else |
381408e2 LMI |
174 | return Qnil; |
175 | } | |
176 | ||
177 | static Lisp_Object | |
1b217849 | 178 | parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int htmlp) |
381408e2 LMI |
179 | { |
180 | xmlDoc *doc; | |
c97c655f | 181 | Lisp_Object result = Qnil; |
55e572ef | 182 | const char *burl = ""; |
6020559a | 183 | ptrdiff_t istart, iend, istart_byte, iend_byte; |
8b620f11 | 184 | |
9078ead6 | 185 | fn_xmlCheckVersion (LIBXML_VERSION); |
8b620f11 | 186 | |
1b217849 | 187 | validate_region (&start, &end); |
20a5e996 | 188 | |
1b217849 LMI |
189 | istart = XINT (start); |
190 | iend = XINT (end); | |
6020559a DA |
191 | istart_byte = CHAR_TO_BYTE (istart); |
192 | iend_byte = CHAR_TO_BYTE (iend); | |
1b217849 LMI |
193 | |
194 | if (istart < GPT && GPT < iend) | |
6020559a | 195 | move_gap_both (iend, iend_byte); |
381408e2 | 196 | |
4b9832a6 CY |
197 | if (! NILP (base_url)) |
198 | { | |
199 | CHECK_STRING (base_url); | |
42a5b22f | 200 | burl = SSDATA (base_url); |
4b9832a6 | 201 | } |
8b620f11 | 202 | |
1b217849 | 203 | if (htmlp) |
6020559a DA |
204 | doc = fn_htmlReadMemory ((char *) BYTE_POS_ADDR (istart_byte), |
205 | iend_byte - istart_byte, burl, "utf-8", | |
9078ead6 EZ |
206 | HTML_PARSE_RECOVER|HTML_PARSE_NONET| |
207 | HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR| | |
208 | HTML_PARSE_NOBLANKS); | |
1b217849 | 209 | else |
6020559a DA |
210 | doc = fn_xmlReadMemory ((char *) BYTE_POS_ADDR (istart_byte), |
211 | iend_byte - istart_byte, burl, "utf-8", | |
9078ead6 EZ |
212 | XML_PARSE_NONET|XML_PARSE_NOWARNING| |
213 | XML_PARSE_NOBLANKS |XML_PARSE_NOERROR); | |
4b9832a6 CY |
214 | |
215 | if (doc != NULL) | |
216 | { | |
3134906c LMI |
217 | /* If the document is just comments, then this should get us the |
218 | nodes anyway. */ | |
36881d16 HK |
219 | xmlNode *n = doc->children->next; |
220 | Lisp_Object r = Qnil; | |
221 | ||
222 | while (n) { | |
14486a10 | 223 | if (!NILP (r)) |
36881d16 HK |
224 | result = Fcons (r, result); |
225 | r = make_dom (n); | |
226 | n = n->next; | |
227 | } | |
228 | ||
3134906c LMI |
229 | if (NILP (result)) { |
230 | /* The document isn't just comments, so get the tree the | |
231 | proper way. */ | |
9078ead6 | 232 | xmlNode *node = fn_xmlDocGetRootElement (doc); |
3134906c LMI |
233 | if (node != NULL) |
234 | result = make_dom (node); | |
235 | } else | |
9abd0532 LMI |
236 | result = Fcons (intern ("top"), |
237 | Fcons (Qnil, Fnreverse (Fcons (r, result)))); | |
36881d16 | 238 | |
9078ead6 | 239 | fn_xmlFreeDoc (doc); |
4b9832a6 | 240 | } |
8b620f11 | 241 | |
381408e2 LMI |
242 | return result; |
243 | } | |
244 | ||
9078ead6 EZ |
245 | void |
246 | xml_cleanup_parser (void) | |
247 | { | |
248 | if (libxml2_loaded_p ()) | |
249 | fn_xmlCleanupParser (); | |
250 | } | |
251 | ||
1b217849 LMI |
252 | DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region, |
253 | Slibxml_parse_html_region, | |
254 | 2, 3, 0, | |
255 | doc: /* Parse the region as an HTML document and return the parse tree. | |
4b9832a6 | 256 | If BASE-URL is non-nil, it is used to expand relative URLs. */) |
1b217849 | 257 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) |
381408e2 | 258 | { |
d07ff9db | 259 | if (init_libxml2_functions ()) |
9078ead6 EZ |
260 | return parse_region (start, end, base_url, 1); |
261 | return Qnil; | |
381408e2 LMI |
262 | } |
263 | ||
1b217849 LMI |
264 | DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region, |
265 | Slibxml_parse_xml_region, | |
266 | 2, 3, 0, | |
267 | doc: /* Parse the region as an XML document and return the parse tree. | |
4b9832a6 | 268 | If BASE-URL is non-nil, it is used to expand relative URLs. */) |
1b217849 | 269 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) |
381408e2 | 270 | { |
d07ff9db | 271 | if (init_libxml2_functions ()) |
9078ead6 EZ |
272 | return parse_region (start, end, base_url, 0); |
273 | return Qnil; | |
381408e2 LMI |
274 | } |
275 | ||
276 | \f | |
277 | /*********************************************************************** | |
278 | Initialization | |
279 | ***********************************************************************/ | |
280 | void | |
281 | syms_of_xml (void) | |
282 | { | |
1b217849 LMI |
283 | defsubr (&Slibxml_parse_html_region); |
284 | defsubr (&Slibxml_parse_xml_region); | |
9078ead6 EZ |
285 | |
286 | DEFSYM (Qlibxml2_dll, "libxml2"); | |
381408e2 LMI |
287 | } |
288 | ||
289 | #endif /* HAVE_LIBXML2 */ |