I finally ended up using libxml2. It does exactly what I wanted.
Here is an example.
htmlParserCtxtPtr ctxt = htmlNewParserCtxt();
htmlDocPtr htmlDoc = htmlCtxtReadMemory(ctxt, html.toUtf8().constData(), strlen(html.toUtf8().constData())
, "", NULL, 0);
xmlXPathContextPtr context = xmlXPathNewContext ( htmlDoc );
xmlXPathObjectPtr result = xmlXPathEvalExpression ((xmlChar*) query.toUtf8().constData(), context);
xmlXPathFreeContext (context);
if (result == NULL) {
qDebug()<<"Invalid XQuery ?";
}
else {
xmlNodeSetPtr nodeSet = result->nodesetval;
if ( !xmlXPathNodeSetIsEmpty ( nodeSet ) ) {
for (int i = 0; i < nodeSet->nodeNr; i++ ) {
xmlNodePtr nodePtr;
nodePtr = nodeSet->nodeTab[i];
list.append(decodeXml(xml));
}
}
xmlXPathFreeObject (result);
}
return list;
}
QStringList Core::queryHTML(const QString &html, const QString &query) {
QStringList list;
htmlParserCtxtPtr ctxt = htmlNewParserCtxt();
htmlDocPtr htmlDoc = htmlCtxtReadMemory(ctxt, html.toUtf8().constData(), strlen(html.toUtf8().constData())
, "", NULL, 0);
xmlXPathContextPtr context = xmlXPathNewContext ( htmlDoc );
xmlXPathObjectPtr result = xmlXPathEvalExpression ((xmlChar*) query.toUtf8().constData(), context);
xmlXPathFreeContext (context);
if (result == NULL) {
qDebug()<<"Invalid XQuery ?";
}
else {
xmlNodeSetPtr nodeSet = result->nodesetval;
if ( !xmlXPathNodeSetIsEmpty ( nodeSet ) ) {
for (int i = 0; i < nodeSet->nodeNr; i++ ) {
xmlNodePtr nodePtr;
nodePtr = nodeSet->nodeTab[i];
QString xml = QString::fromUtf8((char*)nodePtr->children->content);
list.append(decodeXml(xml));
}
}
xmlXPathFreeObject (result);
}
return list;
}
To copy to clipboard, switch view to plain text mode
Bookmarks