Vuo 2.4.4
Loading...
Searching...
No Matches
VuoTextHtml.c
Go to the documentation of this file.
1
10#include "VuoTextHtml.h"
11
12#include <libxml/xpath.h>
13#include <libxml/HTMLparser.h>
14
15#ifdef VUO_COMPILER
17 "title" : "VuoTextHtml",
18 "dependencies" : [
19 "xml2",
20 "z",
21 "VuoText"
22 ]
23 });
24#endif
25
29static void VuoXmlError(void *unused, xmlError *error)
30{
31 char *message = strdup(error->message);
32 size_t len = strlen(message);
33 if (message[len-1] == '\n')
34 message[len-1] = 0;
35
36 VUserLog("Error: %s (line %i)", message, error->line);
37
38 free(message);
39}
40
44static void __attribute__((constructor)) init()
45{
46 xmlInitParser();
47 xmlSetStructuredErrorFunc(NULL, VuoXmlError);
48}
49
55{
56 if (!text)
57 return NULL;
58
59 // First check whether the text contains stuff that looks like HTML,
60 // so we don't waste time parsing it if it won't change anything.
61 size_t length = strlen(text);
62 bool found = false;
63 for (unsigned int i = 0; i < length; ++i)
64 if (text[i] == '<' || text[i] == '&')
65 {
66 found = true;
67 break;
68 }
69 if (!found)
70 return VuoText_make(text);
71
72 int options = HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING;
73 xmlDocPtr doc = htmlReadDoc((const xmlChar *)text, "", "UTF-8", options);
74 if (!doc)
75 {
76 // Sometimes it works on the second try.
77 doc = htmlReadDoc((const xmlChar *)text, "", "UTF-8", options);
78 if (!doc)
79 return NULL;
80 }
81 VuoDefer(^{ xmlFreeDoc(doc); });
82
83
84 // Remove the <style> and <script> tags.
85 {
86 xmlXPathContextPtr xpathContext = xmlXPathNewContext(doc);
87 if (!xpathContext)
88 return NULL;
89 VuoDefer(^{ xmlXPathFreeContext(xpathContext); });
90
91 xmlXPathObjectPtr xpathObject = xmlXPathEvalExpression((const unsigned char *)"//style|//script", xpathContext);
92 if (!xpathObject)
93 return NULL;
94 VuoDefer(^{ xmlXPathFreeObject(xpathObject); });
95
96 if (!xmlXPathNodeSetIsEmpty(xpathObject->nodesetval))
97 {
98 for (int i = 0; i < xpathObject->nodesetval->nodeNr; ++i)
99 {
100 xmlUnlinkNode(xpathObject->nodesetval->nodeTab[i]);
101 xmlFree(xpathObject->nodesetval->nodeTab[i]);
102 xpathObject->nodesetval->nodeTab[i] = NULL;
103 }
104 }
105 }
106
107
108 xmlNodePtr root = xmlDocGetRootElement(doc);
109 xmlChar *content = xmlNodeGetContent(root);
110 VuoDefer(^{ xmlFree(content); });
111
112 return VuoText_make((const char *)content);
113}