Commit | Line | Data |
---|---|---|
381408e2 | 1 | /* Interface to libxml2. |
acaf905b | 2 | Copyright (C) 2010-2012 Free Software Foundation, Inc. |
381408e2 LMI |
3 | |
4 | This file is part of GNU Emacs. | |
5 | ||
6 | GNU Emacs is free software: you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation, either version 3 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | GNU Emacs is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <config.h> | |
20 | ||
21 | #ifdef HAVE_LIBXML2 | |
22 | ||
23 | #include <setjmp.h> | |
24 | #include <libxml/tree.h> | |
25 | #include <libxml/parser.h> | |
26 | #include <libxml/HTMLparser.h> | |
27 | ||
28 | #include "lisp.h" | |
e5560ff7 | 29 | #include "character.h" |
381408e2 LMI |
30 | #include "buffer.h" |
31 | ||
9078ead6 EZ |
32 | \f |
33 | static Lisp_Object Qlibxml2_dll; | |
34 | ||
35 | #ifdef WINDOWSNT | |
36 | ||
37 | #include <windows.h> | |
38 | #include "w32.h" | |
39 | ||
40 | /* Macro for defining functions that will be loaded from the libxml2 DLL. */ | |
41 | #define DEF_XML2_FN(rettype,func,args) static rettype (FAR CDECL *fn_##func)args | |
42 | ||
43 | /* Macro for loading libxml2 functions from the library. */ | |
44 | #define LOAD_XML2_FN(lib,func) { \ | |
45 | fn_##func = (void *) GetProcAddress (lib, #func); \ | |
46 | if (!fn_##func) goto bad_library; \ | |
47 | } | |
48 | ||
49 | DEF_XML2_FN (htmlDocPtr, htmlReadMemory, | |
50 | (const char *, int, const char *, const char *, int)); | |
51 | DEF_XML2_FN (xmlDocPtr, xmlReadMemory, | |
52 | (const char *, int, const char *, const char *, int)); | |
53 | DEF_XML2_FN (xmlNodePtr, xmlDocGetRootElement, (xmlDocPtr)); | |
54 | DEF_XML2_FN (void, xmlFreeDoc, (xmlDocPtr)); | |
55 | DEF_XML2_FN (void, xmlCleanupParser, (void)); | |
56 | DEF_XML2_FN (void, xmlCheckVersion, (int)); | |
57 | ||
58 | static int | |
59 | libxml2_loaded_p (void) | |
60 | { | |
61 | Lisp_Object found = Fassq (Qlibxml2_dll, Vlibrary_cache); | |
62 | ||
63 | if (CONSP (found)) | |
64 | return EQ (XCDR (found), Qt) ? 1 : 0; | |
65 | return 0; | |
66 | } | |
67 | ||
68 | #else /* !WINDOWSNT */ | |
69 | ||
70 | #define fn_htmlReadMemory htmlReadMemory | |
71 | #define fn_xmlReadMemory xmlReadMemory | |
72 | #define fn_xmlDocGetRootElement xmlDocGetRootElement | |
73 | #define fn_xmlFreeDoc xmlFreeDoc | |
74 | #define fn_xmlCleanupParser xmlCleanupParser | |
75 | #define fn_xmlCheckVersion xmlCheckVersion | |
76 | ||
b5385551 | 77 | static inline int |
9078ead6 EZ |
78 | libxml2_loaded_p (void) |
79 | { | |
80 | return 1; | |
81 | } | |
82 | ||
83 | #endif /* !WINDOWSNT */ | |
84 | ||
85 | static int | |
86 | init_libxml2_functions (Lisp_Object libraries) | |
87 | { | |
88 | #ifdef WINDOWSNT | |
9078ead6 EZ |
89 | if (libxml2_loaded_p ()) |
90 | return 1; | |
91 | else | |
92 | { | |
93 | HMODULE library; | |
94 | ||
95 | if (!(library = w32_delayed_load (libraries, Qlibxml2_dll))) | |
96 | { | |
97 | message ("%s", "libxml2 library not found"); | |
98 | return 0; | |
99 | } | |
100 | ||
101 | /* LOAD_XML2_FN jumps to bad_library if it fails to find the | |
102 | named function. */ | |
103 | LOAD_XML2_FN (library, htmlReadMemory); | |
104 | LOAD_XML2_FN (library, xmlReadMemory); | |
105 | LOAD_XML2_FN (library, xmlDocGetRootElement); | |
106 | LOAD_XML2_FN (library, xmlFreeDoc); | |
107 | LOAD_XML2_FN (library, xmlCleanupParser); | |
108 | LOAD_XML2_FN (library, xmlCheckVersion); | |
109 | ||
110 | Vlibrary_cache = Fcons (Fcons (Qlibxml2_dll, Qt), Vlibrary_cache); | |
111 | return 1; | |
112 | } | |
113 | ||
114 | bad_library: | |
115 | Vlibrary_cache = Fcons (Fcons (Qlibxml2_dll, Qnil), Vlibrary_cache); | |
116 | ||
117 | return 0; | |
118 | #else /* !WINDOWSNT */ | |
119 | return 1; | |
120 | #endif /* !WINDOWSNT */ | |
121 | } | |
122 | ||
c9735e30 PE |
123 | static Lisp_Object |
124 | make_dom (xmlNode *node) | |
381408e2 | 125 | { |
4b9832a6 CY |
126 | if (node->type == XML_ELEMENT_NODE) |
127 | { | |
c45e5276 | 128 | Lisp_Object result = Fcons (intern ((char *) node->name), Qnil); |
4b9832a6 CY |
129 | xmlNode *child; |
130 | xmlAttr *property; | |
131 | Lisp_Object plist = Qnil; | |
132 | ||
133 | /* First add the attributes. */ | |
134 | property = node->properties; | |
135 | while (property != NULL) | |
136 | { | |
137 | if (property->children && | |
138 | property->children->content) | |
139 | { | |
c45e5276 PE |
140 | char *content = (char *) property->children->content; |
141 | plist = Fcons (Fcons (intern ((char *) property->name), | |
142 | build_string (content)), | |
4b9832a6 CY |
143 | plist); |
144 | } | |
145 | property = property->next; | |
146 | } | |
147 | result = Fcons (Fnreverse (plist), result); | |
148 | ||
149 | /* Then add the children of the node. */ | |
150 | child = node->children; | |
151 | while (child != NULL) | |
152 | { | |
153 | result = Fcons (make_dom (child), result); | |
154 | child = child->next; | |
155 | } | |
156 | ||
157 | return Fnreverse (result); | |
381408e2 | 158 | } |
3c2317e8 | 159 | else if (node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE) |
4b9832a6 CY |
160 | { |
161 | if (node->content) | |
c45e5276 | 162 | return build_string ((char *) node->content); |
4b9832a6 CY |
163 | else |
164 | return Qnil; | |
381408e2 | 165 | } |
1e98674d LMI |
166 | else if (node->type == XML_COMMENT_NODE) |
167 | { | |
168 | if (node->content) | |
169 | return list3 (intern ("comment"), Qnil, | |
170 | build_string ((char *) node->content)); | |
171 | else | |
172 | return Qnil; | |
173 | } | |
4b9832a6 | 174 | else |
381408e2 LMI |
175 | return Qnil; |
176 | } | |
177 | ||
178 | static Lisp_Object | |
1b217849 | 179 | parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int htmlp) |
381408e2 LMI |
180 | { |
181 | xmlDoc *doc; | |
c97c655f | 182 | Lisp_Object result = Qnil; |
55e572ef | 183 | const char *burl = ""; |
d311d28c PE |
184 | ptrdiff_t bytes; |
185 | ptrdiff_t istart, iend; | |
8b620f11 | 186 | |
9078ead6 | 187 | fn_xmlCheckVersion (LIBXML_VERSION); |
8b620f11 | 188 | |
1b217849 | 189 | validate_region (&start, &end); |
20a5e996 | 190 | |
1b217849 LMI |
191 | istart = XINT (start); |
192 | iend = XINT (end); | |
193 | ||
194 | if (istart < GPT && GPT < iend) | |
195 | move_gap (iend); | |
381408e2 | 196 | |
4b9832a6 CY |
197 | if (! NILP (base_url)) |
198 | { | |
199 | CHECK_STRING (base_url); | |
42a5b22f | 200 | burl = SSDATA (base_url); |
4b9832a6 | 201 | } |
8b620f11 | 202 | |
1b217849 | 203 | bytes = CHAR_TO_BYTE (iend) - CHAR_TO_BYTE (istart); |
20a5e996 | 204 | |
1b217849 | 205 | if (htmlp) |
9078ead6 EZ |
206 | doc = fn_htmlReadMemory ((char *) BYTE_POS_ADDR (CHAR_TO_BYTE (istart)), |
207 | bytes, burl, "utf-8", | |
208 | HTML_PARSE_RECOVER|HTML_PARSE_NONET| | |
209 | HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR| | |
210 | HTML_PARSE_NOBLANKS); | |
1b217849 | 211 | else |
9078ead6 EZ |
212 | doc = fn_xmlReadMemory ((char *) BYTE_POS_ADDR (CHAR_TO_BYTE (istart)), |
213 | bytes, burl, "utf-8", | |
214 | XML_PARSE_NONET|XML_PARSE_NOWARNING| | |
215 | XML_PARSE_NOBLANKS |XML_PARSE_NOERROR); | |
4b9832a6 CY |
216 | |
217 | if (doc != NULL) | |
218 | { | |
3134906c LMI |
219 | /* If the document is just comments, then this should get us the |
220 | nodes anyway. */ | |
36881d16 HK |
221 | xmlNode *n = doc->children->next; |
222 | Lisp_Object r = Qnil; | |
223 | ||
224 | while (n) { | |
14486a10 | 225 | if (!NILP (r)) |
36881d16 HK |
226 | result = Fcons (r, result); |
227 | r = make_dom (n); | |
228 | n = n->next; | |
229 | } | |
230 | ||
3134906c LMI |
231 | if (NILP (result)) { |
232 | /* The document isn't just comments, so get the tree the | |
233 | proper way. */ | |
9078ead6 | 234 | xmlNode *node = fn_xmlDocGetRootElement (doc); |
3134906c LMI |
235 | if (node != NULL) |
236 | result = make_dom (node); | |
237 | } else | |
9abd0532 LMI |
238 | result = Fcons (intern ("top"), |
239 | Fcons (Qnil, Fnreverse (Fcons (r, result)))); | |
36881d16 | 240 | |
9078ead6 | 241 | fn_xmlFreeDoc (doc); |
4b9832a6 | 242 | } |
8b620f11 | 243 | |
381408e2 LMI |
244 | return result; |
245 | } | |
246 | ||
9078ead6 EZ |
247 | void |
248 | xml_cleanup_parser (void) | |
249 | { | |
250 | if (libxml2_loaded_p ()) | |
251 | fn_xmlCleanupParser (); | |
252 | } | |
253 | ||
1b217849 LMI |
254 | DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region, |
255 | Slibxml_parse_html_region, | |
256 | 2, 3, 0, | |
257 | doc: /* Parse the region as an HTML document and return the parse tree. | |
4b9832a6 | 258 | If BASE-URL is non-nil, it is used to expand relative URLs. */) |
1b217849 | 259 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) |
381408e2 | 260 | { |
9078ead6 EZ |
261 | if (init_libxml2_functions (Vdynamic_library_alist)) |
262 | return parse_region (start, end, base_url, 1); | |
263 | return Qnil; | |
381408e2 LMI |
264 | } |
265 | ||
1b217849 LMI |
266 | DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region, |
267 | Slibxml_parse_xml_region, | |
268 | 2, 3, 0, | |
269 | doc: /* Parse the region as an XML document and return the parse tree. | |
4b9832a6 | 270 | If BASE-URL is non-nil, it is used to expand relative URLs. */) |
1b217849 | 271 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) |
381408e2 | 272 | { |
9078ead6 EZ |
273 | if (init_libxml2_functions (Vdynamic_library_alist)) |
274 | return parse_region (start, end, base_url, 0); | |
275 | return Qnil; | |
381408e2 LMI |
276 | } |
277 | ||
278 | \f | |
279 | /*********************************************************************** | |
280 | Initialization | |
281 | ***********************************************************************/ | |
282 | void | |
283 | syms_of_xml (void) | |
284 | { | |
1b217849 LMI |
285 | defsubr (&Slibxml_parse_html_region); |
286 | defsubr (&Slibxml_parse_xml_region); | |
9078ead6 EZ |
287 | |
288 | DEFSYM (Qlibxml2_dll, "libxml2"); | |
381408e2 LMI |
289 | } |
290 | ||
291 | #endif /* HAVE_LIBXML2 */ |