//get page rank for some domain url
//para: http://answers.yahoo.com
//return: 7
public int getDomainPageRank(string domainUrl)
{
int pageRank = 0;
string queryUrl = "";
string respHtml = "";
Dictionary<string, string> postDict = new Dictionary<string,string>();
string rankStr = "";
bool prevMethodFail = true;
if ((pageRank == 0) && prevMethodFail)
{
//Method 1: use http://www.pagerankme.com/
queryUrl = "http://www.pagerankme.com/";
postDict = new Dictionary<string, string>();
postDict.Add("url", domainUrl);
respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);
//<a href="http://www.pagerankme.com" target="_blank" style="text-decoration:none;color:#000000;">PageRank 7</a>
rankStr = "";
if (extractSingleStr(@"<a href=""http://www\.pagerankme\.com"" target=""_blank"" style="".+?"">PageRank (\d+)</a>", respHtml, out rankStr))
{
pageRank = Int32.Parse(rankStr);
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
if ((pageRank == 0) && prevMethodFail)
{
//Method 2: use http://moonsy.com/pagerank_checker/
//(1) http://moonsy.com/pagerank_checker/
queryUrl = "http://moonsy.com/pagerank_checker/";
postDict = new Dictionary<string, string>();
postDict.Add("domain", domainUrl);
postDict.Add("Submit", "CHECK");
respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);
//<h3>Your Page Rank: 7/10
rankStr = "";
if (extractSingleStr(@"<h3>Your Page Rank.+?(\d+)/10", respHtml, out rankStr))
{
pageRank = Int32.Parse(rankStr);
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
if ((pageRank == 0) && prevMethodFail)
{
//Method 3: use http://pagerank.webmasterhome.cn/
string noHttpPreDomainUrl = Regex.Replace(domainUrl, "((https)|(http)|(ftp))://", "");
//http://pagerank.webmasterhome.cn/prLoading.asp?domain=answers.yahoo.com
string tmpRespHtml = "";
Dictionary<string, string> headerDict;
//(1)to get cookies
string pageRankMainUrl = "http://pagerank.webmasterhome.cn/";
tmpRespHtml = getUrlRespHtml_multiTry(pageRankMainUrl);
//(2)ask page rank
string firstBaseUrl = "http://pagerank.webmasterhome.cn/?domain=";
//http://pagerank.webmasterhome.cn/?domain=answers.yahoo.com
string firstWholeUrl = firstBaseUrl + noHttpPreDomainUrl;
headerDict = new Dictionary<string, string>();
headerDict.Add("referer", pageRankMainUrl);
tmpRespHtml = getUrlRespHtml_multiTry(firstWholeUrl, headerDict: headerDict);
string baseUrl = "http://pagerank.webmasterhome.cn/prLoading.asp?domain=";
//http://pagerank.webmasterhome.cn/prLoading.asp?domain=answers.yahoo.com
queryUrl = baseUrl + noHttpPreDomainUrl;
headerDict = new Dictionary<string, string>();
headerDict.Add("referer", firstWholeUrl);
respHtml = getUrlRespHtml_multiTry(queryUrl, headerDict: headerDict);
//'<img src=\"http://primg.webmasterhome.cn/pr7.gif\" style=\"width:40px;height:5px;border:0px;\" alt=PageRank align=absmiddle> (7/10)'
rankStr = "";
if (extractSingleStr(@"\((\d+)/10\)", respHtml, out rankStr))
{
pageRank = Int32.Parse(rankStr);
prevMethodFail = false;
}
else
{
prevMethodFail = true;
}
}
//TODO:
//Google PR (PageRank) Checker
//http://www.searchbliss.com/seo-tools/google-pagerank-checker.php
//tmp is "We're sorry, the Google PR check is currently being repaired."
//future: if Ok, mayby can use it
return pageRank;
}
例 9.12. getDomainPageRank 的使用范例
public struct searchItemInfo
{
public string title;
public string googleUrl; // with google appendix
public string originalUrl;
public string description;
//add domain url and rank
public string domainUrl;
public int pageRank;
public int alexaRank;
};
singleItemInfo.domainUrl = crifanLib.getDomainUrl(singleItemInfo.originalUrl);
singleItemInfo.pageRank = crifanLib.getDomainPageRank(singleItemInfo.domainUrl);
singleItemInfo.alexaRank = crifanLib.getDomainAlexaRank(singleItemInfo.domainUrl);