//get alexa rank for some domain url
//para: http://answers.yahoo.com
//return: 4
public int getDomainAlexaRank(string domainUrl)
{
int alexaRank = 0;
string queryUrl = "";
string respHtml = "";
Dictionary<string, string> postDict = new Dictionary<string, string>();
string alexaRankStr = "";
bool prevMethodFail = true;
//string noHttpPreDomainUrl = Regex.Replace(domainUrl, "((https)|(http)|(ftp))://", "");
if ((alexaRank == 0) && prevMethodFail)
{
//Method 1: use http://www.searchbliss.com/rank.asp
string mainUrl = "http://www.searchbliss.com/rank.asp";
respHtml = getUrlRespHtml_multiTry(mainUrl);
//<input type="hidden" name="RAC" value="EIS">
string accessCode = "";
if (extractSingleStr(@"<input\s+type=""hidden""\s+name=""RAC""\s+value=""([A-Z]+)"">", respHtml, out accessCode))
{
queryUrl = "http://www.searchbliss.com/rank.asp";
//AC EIS
//RAC EIS
//rank http://hubpages.com
postDict = new Dictionary<string, string>();
//postDict.Add("domain", noHttpPreDomainUrl);
postDict.Add("AC", accessCode);
postDict.Add("RAC", accessCode);
postDict.Add("rank", domainUrl);
respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);
//<a href="http://www.alexa.com/data/details/main/http://hubpages.com" target="_blank">444</a>
if (extractSingleStr(@"<a\s+href=""http://www\.alexa\.com/data/details/main/.+?""\s+target=""_blank"">(\d+)</a>", respHtml, out alexaRankStr))
{
//alexaRank = Int32.Parse(alexaRankStr);
if (Int32.TryParse(alexaRankStr, out alexaRank))
{
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
else
{
prevMethodFail = true;
}
}
#if USE_HTML_PARSER_HTMLAGILITYPACK
if ((alexaRank == 0) && prevMethodFail)
{
//Method 2: use http://www.alexa.com/
string tmpUrl = "http://www.alexa.com";
//to get cookies
string tmpRespHtml = getUrlRespHtml_multiTry(tmpUrl);
//then do work
queryUrl = "http://www.alexa.com/search";
//http://www.alexa.com/search?q=crifan.com&r=home_home&p=bigtop
queryUrl += "?q=" + domainUrl;
queryUrl += "&r=" + "home_home";
queryUrl += "&p=" + "bigtop";
respHtml = getUrlRespHtml_multiTry(queryUrl);
HtmlAgilityPack.HtmlDocument htmlDoc = htmlToHtmlDoc(respHtml);
HtmlNode rootHtmlNode = htmlDoc.DocumentNode;
//<span>
//<img class="align-top" src="/images/icons/globe-sm.gif" />
//<span class="traffic-stat-label">Alexa Traffic Rank:</span>
//<a href="/siteinfo/yahoo.com#trafficstats">
//4</a>
//</span>
//<span class="traffic-stat-label">Alexa Traffic Rank:</span>
//<a href="/siteinfo/crifan.com#trafficstats">
//170,557</a>
//</span>
//HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a[@href]");
//HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a]");
//HtmlNodeCollection trafficHtmlNodes = rootHtmlNode.SelectNodes("//span/span[@class='traffic-stat-label']");
HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']");
if ((trafficHtmlNode != null) && (trafficHtmlNode.InnerText.StartsWith("Alexa Traffic Rank:")))
{
HtmlNode parentHtmlNode = trafficHtmlNode.ParentNode;
HtmlNode aHrefNode = parentHtmlNode.SelectSingleNode(".//a[@href]");
string tracfficNumberStr = aHrefNode.InnerText;
alexaRankStr = tracfficNumberStr.Trim().Replace(",", "");
//speical:
//"No Data"
//alexaRank = Int32.Parse(alexaRankStr);
if(Int32.TryParse(alexaRankStr, out alexaRank))
{
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
else
{
prevMethodFail = true;
}
}
#endif
if ((alexaRank == 0) && prevMethodFail)
{
//Method 3: use http://moonsy.com/alexa_rank/
//(1) http://moonsy.com/alexa_rank/
queryUrl = "http://moonsy.com/alexa_rank/";
postDict = new Dictionary<string, string>();
//postDict.Add("domain", noHttpPreDomainUrl);
postDict.Add("domain", domainUrl);
postDict.Add("Submit", "CHECK");
respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);
//<h2>Alexa Rank of <b>ANSWERS.YAHOO.COM</b> is : <b>4</b></h2>
alexaRankStr = "";
if (extractSingleStr(@"<h2>Alexa Rank of.+?is.+?(\d+).+?</h2>", respHtml, out alexaRankStr))
{
//alexaRank = Int32.Parse(alexaRankStr);
if (Int32.TryParse(alexaRankStr, out alexaRank))
{
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
//TODO:
//maybe future can use:
//http://www.dakola.com/tools/alexa/
return alexaRank;
}
例 9.13. getDomainAlexaRank 的使用范例
public struct searchItemInfo
{
public string title;
public string googleUrl; // with google appendix
public string originalUrl;
public string description;
//add domain url and rank
public string domainUrl;
public int pageRank;
public int alexaRank;
};
singleItemInfo.domainUrl = crifanLib.getDomainUrl(singleItemInfo.originalUrl);
singleItemInfo.pageRank = crifanLib.getDomainPageRank(singleItemInfo.domainUrl);
singleItemInfo.alexaRank = crifanLib.getDomainAlexaRank(singleItemInfo.domainUrl);