//remove sub node from current html node
//eg:
//"script"
//for
//<script type="text/javascript">
public HtmlNode removeSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove)
{
HtmlNode afterRemoved = curHtmlNode;
////method 1: fail
////foreach (var subNode in afterRemoved.Descendants(subNodeToRemove))
//foreach (HtmlNode subNode in afterRemoved.Descendants(subNodeToRemove))
//{
// //An unhandled exception of type 'System.InvalidOperationException' occurred in mscorlib.dll
// //Additional information: Collection was modified; enumeration operation may not execute.
// //afterRemoved.RemoveChild(subNode);
// //curHtmlNode.RemoveChild(subNode);
// subNode.Remove();
//}
//method 2: OK
HtmlNodeCollection foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove);
if ((foundAllSub != null) && (foundAllSub.Count > 0))
{
foreach (HtmlNode subNode in foundAllSub)
{
curHtmlNode.RemoveChild(subNode);
}
}
return afterRemoved;
}
例 13.3. removeSubHtmlNode 的使用范例
HtmlNode curBulletNode = allBulletNodeList[idx];
HtmlNode noJsNode = crl.removeSubHtmlNode(curBulletNode, "script");
HtmlNode noStyleNode = crl.removeSubHtmlNode(curBulletNode, "style");
string bulletStr = noStyleNode.InnerText;