【问题】
写Python代码,利用urllib2去访问网络,结果期间会出现错误:
| urllib2.URLError: <urlopen error [Errno 10060] > | 
【解决过程】
1.后来发现,程序本身是好的,但是是由于,网络的偶尔的不稳定,而导致了此错误的。
2.所以后来就想到,当发现网络遇到这类错误的时候,多试几次,应该就可以解决此问题了。
所以把原先的:
#itemRespHtml = crifanLib.getUrlRespHtml(itemLink);
改为:
itemRespHtml = crifanLib.getUrlRespHtml_multiTry(itemLink);
其中对应的代码是:
中的:
#------------------------------------------------------------------------------
def getUrlResponse(url, postDict={}, headerDict={}, timeout=0, useGzip=False, postDataDelimiter="&") :
    """Get response from url, support optional postDict,headerDict,timeout,useGzip
    Note:
    1. if postDict not null, url request auto become to POST instead of default GET
    2  if you want to auto handle cookies, should call initAutoHandleCookies() before use this function.
       then following urllib2.Request will auto handle cookies
    """
    # makesure url is string, not unicode, otherwise urllib2.urlopen will error
    url = str(url);
    if (postDict) :
        if(postDataDelimiter=="&"):
            postData = urllib.urlencode(postDict);
        else:
            postData = "";
            for eachKey in postDict.keys() :
                postData += str(eachKey) + "="  + str(postDict[eachKey]) + postDataDelimiter;
        postData = postData.strip();
        logging.info("postData=%s", postData);
        req = urllib2.Request(url, postData);
        logging.info("req=%s", req);
        req.add_header('Content-Type', "application/x-www-form-urlencoded");
    else :
        req = urllib2.Request(url);
    defHeaderDict = {
        'User-Agent'    : gConst['UserAgent'],
        'Cache-Control' : 'no-cache',
        'Accept'        : '*/*',
        'Connection'    : 'Keep-Alive',
    };
    # add default headers firstly
    for eachDefHd in defHeaderDict.keys() :
        #print "add default header: %s=%s"%(eachDefHd,defHeaderDict[eachDefHd]);
        req.add_header(eachDefHd, defHeaderDict[eachDefHd]);
    if(useGzip) :
        #print "use gzip for",url;
        req.add_header('Accept-Encoding', 'gzip, deflate');
    # add customized header later -> allow overwrite default header 
    if(headerDict) :
        #print "added header:",headerDict;
        for key in headerDict.keys() :
            req.add_header(key, headerDict[key]);
    if(timeout > 0) :
        # set timeout value if necessary
        resp = urllib2.urlopen(req, timeout=timeout);
    else :
        resp = urllib2.urlopen(req);
        
    #update cookies into local file
    if(gVal['cookieUseFile']):
        gVal['cj'].save();
        logging.info("gVal['cj']=%s", gVal['cj']);
    
    return resp;
#------------------------------------------------------------------------------
# get response html==body from url
#def getUrlRespHtml(url, postDict={}, headerDict={}, timeout=0, useGzip=False) :
def getUrlRespHtml(url, postDict={}, headerDict={}, timeout=0, useGzip=True, postDataDelimiter="&") :
    resp = getUrlResponse(url, postDict, headerDict, timeout, useGzip, postDataDelimiter);
    respHtml = resp.read();
    if(useGzip) :
        #print "---before unzip, len(respHtml)=",len(respHtml);
        respInfo = resp.info();
        
        # Server: nginx/1.0.8
        # Date: Sun, 08 Apr 2012 12:30:35 GMT
        # Content-Type: text/html
        # Transfer-Encoding: chunked
        # Connection: close
        # Vary: Accept-Encoding
        # ...
        # Content-Encoding: gzip
        
        # sometime, the request use gzip,deflate, but actually returned is un-gzip html
        # -> response info not include above "Content-Encoding: gzip"
        # eg: http://blog.sina.com.cn/s/comment_730793bf010144j7_3.html
        # -> so here only decode when it is indeed is gziped data
        if( ("Content-Encoding" in respInfo) and (respInfo['Content-Encoding'] == "gzip")) :
            respHtml = zlib.decompress(respHtml, 16+zlib.MAX_WBITS);
            #print "+++ after unzip, len(respHtml)=",len(respHtml);
    return respHtml;
def getUrlRespHtml_multiTry(url, postDict={}, headerDict={}, timeout=0, useGzip=True, postDataDelimiter="&", maxTryNum=5):
    """
        get url response html, multiple try version:
            if fail, then retry
    """
    respHtml = "";
    
    # access url
    # mutile retry, if some (mostly is network) error
    for tries in range(maxTryNum) :
        try :
            respHtml = getUrlRespHtml(url, postDict, headerDict, timeout, useGzip, postDataDelimiter);
            #logging.debug("Successfully access url %s", url);
            break # successfully, so break now
        except :
            if tries < (maxTryNum - 1) :
                #logging.warning("Access url %s fail, do %d retry", url, (tries + 1));
                continue;
            else : # last try also failed, so exit
                logging.error("Has tried %d times to access url %s, all failed!", maxTryNum, url);
                break;
    return respHtml;
    
【总结】
当访问网络出错:
| urllib2.URLError: <urlopen error [Errno 10060] > | 
那就多试几次,就可以了。
转载请注明:在路上 » 【已解决】Python访问网络出错:urllib2.URLError: <urlopen error [Errno 10060] >