【背景】
之前写的,去模拟:
然后获得返回的jsonp字符串。
【scrape_menupix_com代码分享】
1.截图:
(1)运行效果:
返回的jsonp示例:
jsonp1358141152({"menuHtml" :"<script type='text/javascript'>\n\n ......"});2.Python项目代码下载:
scrape_menupix_com_2013-01-14.7z
3.代码分享:
(1)scrape_menupix_com.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
-------------------------------------------------------------------------------
Function:
scrape menupix.com to got resp jsonp string
https://www.elance.com/j/scrape-website/36786225/
Version: 2013-01-14
Author: Crifan Li
Contact: https://www.crifan.org/about/me/
-------------------------------------------------------------------------------
"""
#--------------------------------const values-----------------------------------
gConst = {
};
gCfg = {
};
gVal = {
};
#---------------------------------import---------------------------------------
import os;
import re;
import sys;
sys.path.append("libs");
from BeautifulSoup import BeautifulSoup,Tag,CData;
import crifanLib;
import logging;
# import urllib;
# import json;
# import csv;
# import argparse;
# import codecs;
def main():
#init cookie
crifanLib.initAutoHandleCookies();
prefUrl = "http://www.menupix.com/menudirectory/menu_link.php?mxresto_id=";
idList = [
"201384",
];
for eachId in idList:
wholeUrl = prefUrl + str(eachId);
respHtml = crifanLib.getUrlRespHtml(wholeUrl);
logging.debug("respHtml=%s", respHtml);
#http://menus.singleplatform.co/restaurants/saketini/menu?apiKey=k47dex17opfs7y7nae9a6p8o0&v=2&callback=jsonp1358010219463
#var menuApi = new MenusApi("k47dex17opfs7y7nae9a6p8o0");
foundMenusApi = re.search('MenusApi\("(?P<menusApi>\w+)"\)', respHtml);
logging.info("foundMenusApi=%s", foundMenusApi);
if(foundMenusApi):
menusApi = foundMenusApi.group("menusApi");
logging.info("menusApi=%s", menusApi);
timeStamp10Digit = crifanLib.getCurTimestamp();
logging.info("timeStamp10Digit=%s", timeStamp10Digit);
jsonp = "jsonp" + str(timeStamp10Digit);
logging.info("jsonp=%s", jsonp);
menusBaseUrl = "http://menus.singleplatform.co/restaurants/saketini/menu";
paraDict = {
"apiKey" : menusApi,
"v" : "2",
"callback" : jsonp,
};
menusWholeUrl = crifanLib.genFullUrl(menusBaseUrl, paraDict);
logging.info("menusWholeUrl=%s", menusWholeUrl);
menusRespHtml = crifanLib.getUrlRespHtml(menusWholeUrl);
logging.info("menusRespHtml=%s", menusRespHtml);
#here successfully got:
#menusRespHtml=jsonp1358141152({"menuHtml" :"<script type='text/javascript'>\n\n ......"});
###############################################################################
if __name__=="__main__":
scriptSelfName = crifanLib.extractFilename(sys.argv[0]);
logging.basicConfig(
level = logging.DEBUG,
format = 'LINE %(lineno)-4d %(levelname)-8s %(message)s',
datefmt = '%m-%d %H:%M',
filename = scriptSelfName + ".log",
filemode = 'w');
# define a Handler which writes INFO messages or higher to the sys.stderr
console = logging.StreamHandler();
console.setLevel(logging.INFO);
# set a format which is simpler for console use
formatter = logging.Formatter('LINE %(lineno)-4d : %(levelname)-8s %(message)s');
# tell the handler to use this format
console.setFormatter(formatter);
logging.getLogger('').addHandler(console);
try:
main();
except:
logging.exception("Unknown Error !");
raise;
【总结】
转载请注明:在路上 » 【代码分享】Python代码:scrape_menupix_com – 抓取menupix.com以获得返回的jsonp字符串