public HtmlAgilityPack.HtmlDocument htmlToHtmlDoc(string html)
{
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
//http://www.crifan.com/htmlagilitypack_html_tag_form_option_no_child_via_sibling_get_innertext/
//make some html tag: form/option, has child
HtmlNode.ElementsFlags.Remove("form");
HtmlNode.ElementsFlags.Remove("option");
htmlDoc.LoadHtml(html);
return htmlDoc;
}
例 13.2. htmlToHtmlDoc 的使用范例
//Method 2: use htmlToHtmlDoc
string testUrlWithXmlns = "http://sd.csdn.net/";
string respHtml = getUrlRespHtml(testUrlWithXmlns);
//<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
//<html xmlns="http://www.w3.org/1999/xhtml">
//<head>
HtmlAgilityPack.HtmlDocument htmlDoc = htmlToHtmlDoc(respHtml);
注意,使用此函数之前,需要开启对应的宏USE_HTML_PARSER_HTMLAGILITYPACK,以及添加对应的dll库HtmlAgilityPack.dll的引用。