此处,顺便也把对应的,全局变量,初始化代码,私有函数等等,贴出来,供参考:
public struct pairItem
{
public string key;
public string value;
};
private Dictionary<string, DateTime> calcTimeList;
const char replacedChar = '_';
string[] cookieFieldArr = { "expires", "domain", "secure", "path", "httponly", "version" };
//IE7
const string constUserAgent_IE7_x64 = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)";
//IE8
const string constUserAgent_IE8_x64 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E";
//IE9
const string constUserAgent_IE9_x64 = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; // x64
const string constUserAgent_IE9_x86 = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"; // x86
//Chrome
const string constUserAgent_Chrome = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4";
//Mozilla Firefox
const string constUserAgent_Firefox = "Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:1.9.2.6) Gecko/20100625 Firefox/3.6.6";
private string gUserAgent;
private WebProxy gProxy = null;
//detault values:
//getUrlResponse
private const Dictionary<string, string> defHeaderDict = null;
private const Dictionary<string, string> defPostDict = null;
private const int defTimeout = 30 * 1000;
private const string defPostDataStr = null;
private const int defReadWriteTimeout = 30 * 1000;
//getUrlRespHtml
private const string defCharset = null;
//getUrlRespHtml_multiTry
private const int defMaxTryNum = 5;
private const int defRetryFailSleepTime = 100; //sleep time in ms when retry fail for getUrlRespHtml
List<string> cookieFieldList = new List<string>();
CookieCollection curCookies = null;
//private long totalLength = 0;
//private long currentLength = 0;
#if USE_GETURLRESPONSE_BW
//indicate background worker complete or not
bool bNotCompleted_resp = true;
//store response of http request
private HttpWebResponse gCurResp = null;
#endif
private BackgroundWorker gBgwDownload;
//indicate download complete or not
bool bNotCompleted_download = true;
//store current read out data len
private int gRealReadoutLen = 0;
Action<int> gFuncUpdateProgress = null;
public crifanLib()
{
//!!! for load embedded dll: (1) register resovle handler
AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve);
//http related
gUserAgent = constUserAgent_IE8_x64;
//set max enough to avoid http request is used out -> avoid dead while get response
System.Net.ServicePointManager.DefaultConnectionLimit = 200;
curCookies = new CookieCollection();
// init const cookie keys
foreach (string key in cookieFieldArr)
{
cookieFieldList.Add(key);
}
//init for calc time
calcTimeList = new Dictionary<string, DateTime>();
#if USE_GETURLRESPONSE_BW
gBgwDownload = new BackgroundWorker();
#endif
//debug
//gProxy = new WebProxy("127.0.0.1", 8087);
}
/*------------------------Private Functions------------------------------*/
//!!! for load embedded dll: (2) implement this handler
System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args)
{
string dllName = args.Name.Contains(",") ? args.Name.Substring(0, args.Name.IndexOf(',')) : args.Name.Replace(".dll", "");
dllName = dllName.Replace(".", "_");
if (dllName.EndsWith("_resources")) return null;
System.Resources.ResourceManager rm = new System.Resources.ResourceManager(GetType().Namespace + ".Properties.Resources", System.Reflection.Assembly.GetExecutingAssembly());
byte[] bytes = (byte[])rm.GetObject(dllName);
return System.Reflection.Assembly.Load(bytes);
}
// replace the replacedChar back to original ','
private string _recoverExpireField(Match foundPprocessedExpire)
{
string recovedStr = "";
recovedStr = foundPprocessedExpire.Value.Replace(replacedChar, ',');
return recovedStr;
}
//replace ',' with replacedChar
private string _processExpireField(Match foundExpire)
{
string replacedComma = "";
replacedComma = foundExpire.Value.ToString().Replace(',', replacedChar);
return replacedComma;
}
//replace "0A" (in \x0A) into '\n'
private string _replaceEscapeSequenceToChar(Match foundEscapeSequence)
{
char[] hexValues = new char[2];
//string hexChars = foundEscapeSequence.Value.ToString();
string matchedEscape = foundEscapeSequence.ToString();
hexValues[0] = matchedEscape[2];
hexValues[1] = matchedEscape[3];
string hexValueString = new string(hexValues);
int convertedInt = int.Parse(hexValueString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
char hexChar = Convert.ToChar(convertedInt);
string hexStr = hexChar.ToString();
return hexStr;
}
//check whether need add/retain this cookie
// not add for:
// ck is null or ck name is null
// domain is null and curDomain is not set
// expired and retainExpiredCookie==false
private bool needAddThisCookie(Cookie ck, string curDomain)
{
bool needAdd = false;
if ((ck == null) || (ck.Name == ""))
{
needAdd = false;
}
else
{
if (ck.Domain != "")
{
needAdd = true;
}
else// ck.Domain == ""
{
if (curDomain != "")
{
ck.Domain = curDomain;
needAdd = true;
}
else // curDomain == ""
{
// not set current domain, omit this
// should not add empty domain cookie, for this will lead execute CookieContainer.Add() fail !!!
needAdd = false;
}
}
}
return needAdd;
}
//quote the input dict values
//note: the return result for first para no '&'
private string _quoteParas(Dictionary<string, string> paras, bool spaceToPercent20 = true)
{
string quotedParas = "";
bool isFirst = true;
string val = "";
foreach (string para in paras.Keys)
{
if (paras.TryGetValue(para, out val))
{
string encodedVal = "";
if (spaceToPercent20)
{
//encodedVal = HttpUtility.UrlPathEncode(val);
//encodedVal = Uri.EscapeDataString(val);
//encodedVal = Uri.EscapeUriString(val);
encodedVal = HttpUtility.UrlEncode(val).Replace("+", "%20");
}
else
{
encodedVal = HttpUtility.UrlEncode(val); //space to +
}
if (isFirst)
{
isFirst = false;
quotedParas += para + "=" + encodedVal;
}
else
{
quotedParas += "&" + para + "=" + encodedVal;
}
}
else
{
break;
}
}
return quotedParas;
}
/* get url's response
* */
private HttpWebResponse _getUrlResponse(string url,
Dictionary<string, string> headerDict = defHeaderDict,
Dictionary<string, string> postDict = defPostDict,
int timeout = defTimeout,
string postDataStr = defPostDataStr,
int readWriteTimeout = defReadWriteTimeout)
{
//CookieCollection parsedCookies;
HttpWebResponse resp = null;
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
req.AllowAutoRedirect = true;
req.Accept = "*/*";
//req.ContentType = "text/plain";
//const string gAcceptLanguage = "en-US"; // zh-CN/en-US
//req.Headers["Accept-Language"] = gAcceptLanguage;
req.KeepAlive = true;
req.UserAgent = gUserAgent;
req.Headers["Accept-Encoding"] = "gzip, deflate";
//req.AutomaticDecompression = DecompressionMethods.GZip;
req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
req.Proxy = gProxy;
if (timeout > 0)
{
req.Timeout = timeout;
}
if (readWriteTimeout > 0)
{
//default ReadWriteTimeout is 300000=300 seconds = 5 minutes !!!
//too long, so here change to 300000 = 30 seconds
//for support TimeOut for later StreamReader's ReadToEnd
req.ReadWriteTimeout = readWriteTimeout;
}
if (curCookies != null)
{
req.CookieContainer = new CookieContainer();
req.CookieContainer.PerDomainCapacity = 40; // following will exceed max default 20 cookie per domain
req.CookieContainer.Add(curCookies);
}
if ((headerDict != null) && (headerDict.Count > 0))
{
foreach (string header in headerDict.Keys)
{
string headerValue = "";
if (headerDict.TryGetValue(header, out headerValue))
{
string lowecaseHeader = header.ToLower();
// following are allow the caller overwrite the default header setting
if (lowecaseHeader == "referer")
{
req.Referer = headerValue;
}
else if (
(lowecaseHeader == "allow-autoredirect") ||
(lowecaseHeader == "allowautoredirect") ||
(lowecaseHeader == "allow autoredirect")
)
{
bool isAllow = false;
if (bool.TryParse(headerValue, out isAllow))
{
req.AllowAutoRedirect = isAllow;
}
}
else if (lowecaseHeader == "accept")
{
req.Accept = headerValue;
}
else if (
(lowecaseHeader == "keep-alive") ||
(lowecaseHeader == "keepalive") ||
(lowecaseHeader == "keep alive")
)
{
bool isKeepAlive = false;
if (bool.TryParse(headerValue, out isKeepAlive))
{
req.KeepAlive = isKeepAlive;
}
}
else if (
(lowecaseHeader == "accept-language") ||
(lowecaseHeader == "acceptlanguage") ||
(lowecaseHeader == "accept language")
)
{
req.Headers["Accept-Language"] = headerValue;
}
else if (
(lowecaseHeader == "user-agent") ||
(lowecaseHeader == "useragent") ||
(lowecaseHeader == "user agent")
)
{
req.UserAgent = headerValue;
}
else if (
(lowecaseHeader == "content-type") ||
(lowecaseHeader == "contenttype") ||
(lowecaseHeader == "content type")
)
{
req.ContentType = headerValue;
}
else
{
req.Headers[header] = headerValue;
}
}
else
{
break;
}
}
}
if (((postDict != null) && (postDict.Count > 0)) || (!string.IsNullOrEmpty(postDataStr)))
{
req.Method = "POST";
if (req.ContentType == null)
{
req.ContentType = "application/x-www-form-urlencoded";
}
if ((postDict != null) && (postDict.Count > 0))
{
postDataStr = _quoteParas(postDict);
}
//byte[] postBytes = Encoding.GetEncoding("utf-8").GetBytes(postData);
byte[] postBytes = Encoding.UTF8.GetBytes(postDataStr);
req.ContentLength = postBytes.Length;
try
{
Stream postDataStream = req.GetRequestStream();
postDataStream.Write(postBytes, 0, postBytes.Length);
postDataStream.Close();
}
catch (WebException webEx)
{
//for prev has set ReadWriteTimeout
//so here also may timeout
if (webEx.Status == WebExceptionStatus.Timeout)
{
req = null;
}
}
}
else
{
req.Method = "GET";
}
if (req != null)
{
//may timeout, has fixed in:
//http://www.crifan.com/fixed_problem_sometime_httpwebrequest_getresponse_timeout/
try
{
resp = (HttpWebResponse)req.GetResponse();
updateLocalCookies(resp.Cookies, ref curCookies);
}
catch (WebException webEx)
{
if (webEx.Status == WebExceptionStatus.Timeout)
{
resp = null;
}
}
}
return resp;
}
#if USE_GETURLRESPONSE_BW
private void getUrlResponse_bw(string url,
Dictionary<string, string> headerDict = defHeaderDict,
Dictionary<string, string> postDict = defPostDict,
int timeout = defTimeout,
string postDataStr = defPostDataStr,
int readWriteTimeout = defReadWriteTimeout)
{
// Create a background thread
BackgroundWorker bgwGetUrlResp = new BackgroundWorker();
bgwGetUrlResp.DoWork += new DoWorkEventHandler(bgwGetUrlResp_DoWork);
bgwGetUrlResp.RunWorkerCompleted += new RunWorkerCompletedEventHandler( bgwGetUrlResp_RunWorkerCompleted );
//init
bNotCompleted_resp = true;
// run in another thread
object paraObj = new object[] { url, headerDict, postDict, timeout, postDataStr, readWriteTimeout };
bgwGetUrlResp.RunWorkerAsync(paraObj);
}
private void bgwGetUrlResp_DoWork(object sender, DoWorkEventArgs e)
{
object[] paraObj = (object[])e.Argument;
string url = (string)paraObj[0];
Dictionary<string, string> headerDict = (Dictionary<string, string>)paraObj[1];
Dictionary<string, string> postDict = (Dictionary<string, string>)paraObj[2];
int timeout = (int)paraObj[3];
string postDataStr = (string)paraObj[4];
int readWriteTimeout = (int)paraObj[5];
e.Result = _getUrlResponse(url, headerDict, postDict, timeout, postDataStr, readWriteTimeout);
}
//void m_bgWorker_ProgressChanged(object sender, ProgressChangedEventArgs e)
//{
// bRespNotCompleted = true;
//}
private void bgwGetUrlResp_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
// The background process is complete. We need to inspect
// our response to see if an error occurred, a cancel was
// requested or if we completed successfully.
// Check to see if an error occurred in the
// background process.
if (e.Error != null)
{
//MessageBox.Show(e.Error.Message);
return;
}
// Check to see if the background process was cancelled.
if (e.Cancelled)
{
//MessageBox.Show("Cancelled ...");
}
else
{
bNotCompleted_resp = false;
// Everything completed normally.
// process the response using e.Result
//MessageBox.Show("Completed...");
gCurResp = (HttpWebResponse)e.Result;
}
}
#endif
private void getUrlRespStreamBytes_bw(ref Byte[] respBytesBuf,
string url,
Dictionary<string, string> headerDict,
Dictionary<string, string> postDict,
int timeout,
Action<int> funcUpdateProgress)
{
// Create a background thread
gBgwDownload = new BackgroundWorker();
gBgwDownload.DoWork += bgwDownload_DoWork;
gBgwDownload.RunWorkerCompleted += bgwDownload_RunWorkerCompleted;
gBgwDownload.WorkerReportsProgress = true;
gBgwDownload.ProgressChanged += bgwDownload_ProgressChanged;
//init
bNotCompleted_download = true;
gFuncUpdateProgress = funcUpdateProgress;
// run in another thread
object paraObj = new object[] {respBytesBuf, url, headerDict, postDict, timeout};
gBgwDownload.RunWorkerAsync(paraObj);
}
private void bgwDownload_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
if (gFuncUpdateProgress != null)
{
// This function fires on the UI thread so it's safe to edit
// the UI control directly, no funny business with Control.Invoke.
// Update the progressBar with the integer supplied to us from the
// ReportProgress() function. Note, e.UserState is a "tag" property
// that can be used to send other information from the
// BackgroundThread to the UI thread.
gFuncUpdateProgress(e.ProgressPercentage);
}
}
private void bgwDownload_DoWork(object sender, DoWorkEventArgs e)
{
// // The sender is the BackgroundWorker object we need it to
// // report progress and check for cancellation.
// BackgroundWorker gBgwDownload = sender as BackgroundWorker;
object[] paraObj = (object[])e.Argument;
Byte[] respBytesBuf = (Byte[])paraObj[0];
string url = (string)paraObj[1];
Dictionary<string, string> headerDict = (Dictionary<string, string>)paraObj[2];
Dictionary<string, string> postDict = (Dictionary<string, string>)paraObj[3];
int timeout = (int)paraObj[4];
//e.Result = _getUrlRespStreamBytes(ref respBytesBuf, url, headerDict, postDict, timeout);
int curReadoutLen;
int realReadoutLen = 0;
int curBufPos = 0;
long totalLength = 0;
long currentLength = 0;
try
{
//HttpWebResponse resp = getUrlResponse(url, headerDict, postDict, timeout);
HttpWebResponse resp = getUrlResponse(url, headerDict, postDict);
long expectReadoutLen = resp.ContentLength;
totalLength = expectReadoutLen;
currentLength = 0;
Stream binStream = resp.GetResponseStream();
//int streamDataLen = (int)binStream.Length; // erro: not support seek operation
do
{
//let up layer update its UI, otherwise up layer UI will no response during this func exec time
//now has make this function to call by backgroundworker, so not need this to update UI
//System.Windows.Forms.Application.DoEvents();
// here download logic is:
// once request, return some data
// request multiple time, until no more data
curReadoutLen = binStream.Read(respBytesBuf, curBufPos, (int)expectReadoutLen);
if (curReadoutLen > 0)
{
curBufPos += curReadoutLen;
currentLength = curBufPos;
expectReadoutLen = expectReadoutLen - curReadoutLen;
realReadoutLen += curReadoutLen;
int currentPercent = (int)((currentLength * 100) / totalLength);
if (currentPercent < 0)
{
currentPercent = 0;
}
if (currentPercent > 100)
{
currentPercent = 100;
}
gBgwDownload.ReportProgress(currentPercent);
}
} while (curReadoutLen > 0);
}
catch (Exception ex)
{
string errorMessage = ex.Message;
realReadoutLen = -1;
}
//return realReadoutLen;
e.Result = realReadoutLen;
//gBgwDownload.ReportProgress(100);
}
private void bgwDownload_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
// The background process is complete. We need to inspect
// our response to see if an error occurred, a cancel was
// requested or if we completed successfully.
// Check to see if an error occurred in the
// background process.
if (e.Error != null)
{
//MessageBox.Show(e.Error.Message);
return;
}
// Check to see if the background process was cancelled.
if (e.Cancelled)
{
//MessageBox.Show("Cancelled ...");
}
else
{
bNotCompleted_download = false;
// Everything completed normally.
// process the response using e.Result
//MessageBox.Show("Completed...");
gRealReadoutLen = (int)e.Result;
}
}