【背景】
之前自己弄的BlogsToWordpress,后来希望添加支持,导出网易163博客中的心情随笔的内容。
之前已经通过代码,可以获得返回的DWR-REPLY数据了:
【记录】给BlogsToWordPress添加支持导出网易的心情随笔
现在就是想办法,在Python去分析并解析这些数据了:
LINE 511 DEBUG getFeelingCardDwrUrl=http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingCards.dwr
LINE 519 DEBUG feelingCardRespHtml=//#DWR-INSERT
//#DWR-REPLY
var s0={};var s1={};var s2={};var s3={};var s4={};var s5={};var s6={};var s7={};var s8={};var s9={};var s10={};var s11={};var s12={};var s13={};var s14={};var s15={};var s16={};var s17={};var s18={};var s19={};var s20={};var s21={};var s22={};var s23={};var s24={};var s25={};var s26={};var s27={};var s28={};var s29={};var s30={};var s31={};var s32={};var s33={};var s34={};var s35={};var s36={};var s37={};var s38={};var s39={};var s40={};var s41={};var s42={};var s43={};var s44={};var s45={};var s46={};var s47={};var s48={};var s49={};var s50={};var s51={};var s52={};var s53={};var s54={};var s55={};var s56={};var s57={};var s58={};var s59={};var s60={};var s61={};var s62={};var s63={};var s64={};var s65={};var s66={};var s67={};var s68={};var s69={};var s70={};var s71={};var s72={};var s73={};var s74={};var s75={};var s76={};var s77={};var s78={};var s79={};var s80={};var s81={};var s82={};var s83={};var s84={};var s85={};var s86={};var s87={};var s88={};var s89={};var s90={};var s91={};var s92={};var s93={};var s94={};var s95={};var s96={};var s97={};var s98={};var s99={};var s100={};var s101={};var s102={};var s103={};var s104={};var s105={};var s106={};var s107={};var s108={};var s109={};var s110={};var s111={};var s112={};var s113={};var s114={};var s115={};var s116={};var s117={};var s118={};var s119={};var s120={};var s121={};var s122={};var s123={};var s124={};var s125={};var s126={};var s127={};var s128={};var s129={};var s130={};var s131={};var s132={};var s133={};var s134={};var s135={};var s136={};var s137={};var s138={};var s139={};var s140={};var s141={};var s142={};var s143={};var s144={};var s145={};var s146={};var s147={};var s148={};var s149={};var s150={};var s151={};var s152={};var s153={};var s154={};var s155={};var s156={};var s157={};var s158={};var s159={};var s160={};var s161={};var s162={};var s163={};var s164={};var s165={};var s166={};var s167={};var s168={};var s169={};var s170={};var s171={};var s172={};var s173={};var s174={};var s175={};var s176={};var s177={};var s178={};var s179={};var s180={};var s181={};var s182={};var s183={};var s184={};var s185={};var s186={};var s187={};var s188={};var s189={};var s190={};var s191={};var s192={};var s193={};var s194={};var s195={};var s196={};var s197={};var s198={};var s199={};var s200={};var s201={};var s202={};var s203={};var s204={};var s205={};var s206={};var s207={};var s208={};var s209={};var s210={};var s211={};var s212={};var s213={};var s214={};var s215={};var s216={};var s217={};var s218={};var s219={};var s220={};var s221={};var s222={};var s223={};var s224={};var s225={};var s226={};var s227={};var s228={};var s229={};var s230={};var s231={};var s232={};var s233={};var s234={};var s235={};var s236={};var s237={};var s238={};var s239={};var s240={};var s241={};var s242={};var s243={};var s244={};var s245={};var s246={};var s247={};var s248={};var s249={};var s250={};var s251={};var s252={};var s253={};var s254={};var s255={};var s256={};var s257={};var s258={};var s259={};var s260={};var s261={};var s262={};var s263={};var s264={};var s265={};var s266={};var s267={};var s268={};var s269={};var s270={};var s271={};var s272={};var s273={};var s274={};var s275={};var s276={};var s277={};var s278={};var s279={};var s280={};var s281={};var s282={};var s283={};var s284={};var s285={};var s286={};var s287={};var s288={};var s289={};var s290={};var s291={};var s292={};var s293={};var s294={};var s295={};var s296={};var s297={};var s298={};var s299={};var s300={};var s301={};var s302={};var s303={};var s304={};var s305={};var s306={};var s307={};var s308={};var s309={};var s310={};var s311={};var s312={};var s313={};var s314={};var s315={};var s316={};var s317={};var s318={};var s319={};var s320={};var s321={};var s322={};var s323={};var s324={};var s325={};var s326={};var s327={};var s328={};var s329={};var s330={};var s331={};var s332={};var s333={};var s334={};var s335={};var s336={};var s337={};var s338={};var s339={};var s340={};var s341={};var s342={};var s343={};var s344={};var s345={};var s346={};var s347={};var s348={};var s349={};var s350={};var s351={};var s352={};var s353={};var s354={};var s355={};var s356={};var s357={};var s358={};var s359={};var s360={};var s361={};var s362={};var s363={};var s364={};var s365={};var s366={};var s367={};var s368={};var s369={};var s370={};var s371={};var s372={};var s373={};var s374={};var s375={};var s376={};var s377={};var s378={};var s379={};var s380={};var s381={};var s382={};var s383={};var s384={};var s385={};var s386={};var s387={};var s388={};var s389={};var s390={};var s391={};var s392={};var s393={};var s394={};var s395={};var s396={};var s397={};var s398={};var s399={};var s400={};var s401={};var s402={};var s403={};var s404={};var s405={};var s406={};var s407={};var s408={};var s409={};var s410={};var s411={};var s412={};var s413={};var s414={};var s415={};var s416={};var s417={};var s418={};var s419={};var s420={};var s421={};var s422={};var s423={};var s424={};var s425={};var s426={};var s427={};var s428={};var s429={};var s430={};var s431={};var s432={};var s433={};var s434={};var s435={};var s436={};var s437={};var s438={};var s439={};var s440={};var s441={};var s442={};var s443={};var s444={};var s445={};var s446={};var s447={};var s448={};var s449={};var s450={};var s451={};var s452={};var s453={};var s454={};var s455={};var s456={};var s457={};var s458={};var s459={};var s460={};var s461={};var s462={};var s463={};var s464={};var s465={};var s466={};var s467={};var s468={};var s469={};var s470={};var s471={};var s472={};var s473={};var s474={};var s475={};var s476={};var s477={};var s478={};var s479={};var s480={};var s481={};var s482={};var s483={};var s484={};var s485={};var s486={};var s487={};var s488={};var s489={};var s490={};var s491={};var s492={};var s493={};var s494={};var s495={};var s496={};var s497={};var s498={};var s499={};var s500={};var s501={};var s502={};var s503={};var s504={};var s505={};var s506={};var s507={};var s508={};var s509={};var s510={};var s511={};var s512={};var s513={};var s514={};var s515={};var s516={};var s517={};var s518={};var s519={};var s520={};var s521={};var s522={};var s523={};var s524={};var s525={};var s526={};var s527={};var s528={};var s529={};var s530={};var s531={};var s532={};var s533={};var s534={};var s535={};var s536={};var s537={};var s538={};var s539={};var s540={};var s541={};var s542={};var s543={};var s544={};var s545={};var s546={};var s547={};var s548={};var s549={};var s550={};var s551={};var s552={};var s553={};var s554={};var s555={};var s556={};var s557={};var s558={};var s559={};var s560={};var s561={};var s562={};var s563={};var s564={};var s565={};var s566={};var s567={};var s568={};var s569={};var s570={};var s571={};var s572={};var s573={};var s574={};var s575={};var s576={};var s577={};var s578={};var s579={};var s580={};var s581={};var s582={};var s583={};var s584={};var s585={};var s586={};var s587={};var s588={};var s589={};var s590={};var s591={};var s592={};var s593={};var s594={};var s595={};var s596={};var s597={};var s598={};var s599={};var s600={};var s601={};var s602={};var s603={};var s604={};var s605={};var s606={};var s607={};var s608={};var s609={};var s610={};var s611={};var s612={};var s613={};var s614={};var s615={};var s616={};var s617={};var s618={};var s619={};var s620={};var s621={};var s622={};var s623={};var s624={};var s625={};var s626={};var s627={};var s628={};var s629={};var s630={};var s631={};var s632={};var s633={};var s634={};var s635={};var s636={};var s637={};var s638={};var s639={};var s640={};var s641={};var s642={};var s643={};var s644={};var s645={};var s646={};var s647={};var s648={};var s649={};var s650={};var s651={};var s652={};var s653={};var s654={};var s655={};var s656={};var s657={};var s658={};var s659={};var s660={};var s661={};var s662={};var s663={};var s664={};var s665={};var s666={};var s667={};var s668={};var s669={};var s670={};var s671={};var s672={};var s673={};var s674={};var s675={};var s676={};var s677={};var s678={};var s679={};var s680={};var s681={};var s682={};var s683={};var s684={};var s685={};var s686={};var s687={};var s688={};var s689={};var s690={};var s691={};var s692={};var s693={};var s694={};var s695={};var s696={};var s697={};var s698={};var s699={};var s700={};var s701={};var s702={};var s703={};var s704={};var s705={};var s706={};var s707={};var s708={};var s709={};var s710={};var s711={};var s712={};var s713={};var s714={};var s715={};var s716={};var s717={};var s718={};var s719={};var s720={};var s721={};var s722={};var s723={};var s724={};var s725={};var s726={};var s727={};var s728={};var s729={};var s730={};var s731={};var s732={};var s733={};var s734={};var s735={};var s736={};var s737={};var s738={};var s739={};var s740={};var s741={};var s742={};var s743={};var s744={};var s745={};var s746={};var s747={};var s748={};var s749={};var s750={};var s751={};var s752={};var s753={};var s754={};var s755={};var s756={};var s757={};var s758={};var s759={};var s760={};var s761={};var s762={};var s763={};var s764={};var s765={};var s766={};var s767={};var s768={};var s769={};var s770={};var s771={};var s772={};var s773={};var s774={};var s775={};var s776={};var s777={};var s778={};var s779={};var s780={};var s781={};var s782={};var s783={};var s784={};var s785={};var s786={};var s787={};var s788={};s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";
s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";
s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
s4.commentCount=0;s4.content="\u5B66\u957F\u8001\u8BA9\u6211\u5E2E\u4ED6\u4E0B\u7535\u5B50\u4E66\uFF0C\u89C9\u5F97\u6211\u5F88\u5389\u5BB3\uFF0C\u97E9\u56FD\u8FD8\u6CA1\u4E70\u7684\u4E66\u6211\u90FD\u80FD\u4E0B\u6765\uFF0C\u8FD8\u8BF4\u662F\u4E0D\u662F\u7684\u7ED9\u6211\u4E70\u597D\u5403\u7684[P]\u5F00\u6000\u7B11[/P][P]\u5F00\u6000\u7B11[/P]";s4.id="134829323";s4.mainCommentCount=0;s4.moodType=1;s4.moveFrom=null;s4.publishTime=1350366965636;s4.synchMiniBlog=-1;s4.userAvatar=0;s4.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s4.userId=186541395;s4.userName="ni_chen";s4.userNickname="Neysa";
s5.commentCount=0;s5.content="\u6628\u5929\u770B\u5B8C\u300A\u9700\u8981\u6D6A\u6F2B\u300B\uFF0C\u7528\u4E86\u5DEE\u4E0D\u591A\u4E00\u76D2\u7EB8\u5DFE\uFF0C\u773C\u775B\u75BC";s5.id="134829040";s5.mainCommentCount=0;s5.moodType=1;s5.moveFrom=null;s5.publishTime=1350342912629;s5.synchMiniBlog=-1;s5.userAvatar=0;s5.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s5.userId=186541395;s5.userName="ni_chen";s5.userNickname="Neysa";
s6.commentCount=0;s6.content="\"\u751F\u6D3B\u4E2D\u7684\u5899\u4E0D\u662F\u7528\u6765\u649E\u5934\u7684\uFF0C\u662F\u7528\u6765\u8F6C\u5F2F\u7684\". ";s6.id="134829014";s6.mainCommentCount=0;s6.moodType=1;s6.moveFrom=null;s6.publishTime=1350340791901;s6.synchMiniBlog=-1;s6.userAvatar=0;s6.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s6.userId=186541395;s6.userName="ni_chen";s6.userNickname="Neysa";
s7.commentCount=1;s7.content="\u90A3\u5E2E\u4EBA\u53BB\u515C\u98CE\u4E86\uFF0C\u7814\u7A76\u5BA4\u5C31\u5269\u6211\u4E00\u4E2A\u4E86[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s7.id="134768355";s7.mainCommentCount=1;s7.moodType=1;s7.moveFrom=null;s7.publishTime=1350278946278;s7.synchMiniBlog=-1;s7.userAvatar=0;s7.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s7.userId=186541395;s7.userName="ni_chen";s7.userNickname="Neysa";
s8.commentCount=0;s8.content="\u4E09\u5341\u5C81\u524D\u603B\u7B97\u6709\u4E86\u4E2A\u5C0F\u7A9D[IMG]http://img4.ph.126.net/NdQSyqju67sds7xsP9wWVA==/6597146732471627934.jpg[/IMG]";s8.id="134757722";s8.mainCommentCount=0;s8.moodType=0;s8.moveFrom="wap";s8.publishTime=1350211074706;s8.synchMiniBlog=-1;s8.userAvatar=0;s8.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s8.userId=186541395;s8.userName="ni_chen";s8.userNickname="Neysa";
......
s786.commentCount=0;s786.content="\u6211\u8BBE\u8BA1\u7684\u6A21\u7248\uFF0C\u563B\u563B\u3002";s786.id="111039880";s786.mainCommentCount=0;s786.moodType=1;s786.moveFrom="";s786.publishTime=1231545101647;s786.synchMiniBlog=-1;s786.userAvatar=0;s786.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s786.userId=186541395;s786.userName="ni_chen";s786.userNickname="Niya";
s787.commentCount=0;s787.content="\u6211\u542C\u4EBA\u8BF4\uFF0C\u5BC2\u5BDE\u7684\u4EBA\uFF0C\u611F\u5192\u4F1A\u62D6\u5F97\u7279\u522B\u957F\uFF0C\u56E0\u4E3A\u4ED6\u81EA\u5DF1\u4E5F\u4E0D\u60F3\u597D\u3002";s787.id="111039881";s787.mainCommentCount=0;s787.moodType=1;s787.moveFrom="";s787.publishTime=1231329494484;s787.synchMiniBlog=-1;s787.userAvatar=0;s787.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s787.userId=186541395;s787.userName="ni_chen";s787.userNickname="Niya";
s788.commentCount=0;s788.content="\u4E0D\u8981\u8FFD\u6C42\u4EC0\u4E48\u7ED3\u679C\uFF0C\u6BCF\u4E2A\u4EBA\u7ED3\u679C\u90FD\u4E00\u6837\uFF0C\u5C31\u662F\u6B7B\u4EA1\u3002";s788.id="111039882";s788.mainCommentCount=0;s788.moodType=1;s788.moveFrom="";s788.publishTime=1231158155439;s788.synchMiniBlog=-1;s788.userAvatar=0;s788.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s788.userId=186541395;s788.userName="ni_chen";s788.userNickname="Niya";
dwr.engine._remoteHandleCallback('1','0',[s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23,s24,s25,s26,s27,s28,s29,s30,s31,s32,s33,s34,s35,s36,s37,s38,s39,s40,s41,s42,s43,s44,s45,s46,s47,s48,s49,s50,s51,s52,s53,s54,s55,s56,s57,s58,s59,s60,s61,s62,s63,s64,s65,s66,s67,s68,s69,s70,s71,s72,s73,s74,s75,s76,s77,s78,s79,s80,s81,s82,s83,s84,s85,s86,s87,s88,s89,s90,s91,s92,s93,s94,s95,s96,s97,s98,s99,s100,s101,s102,s103,s104,s105,s106,s107,s108,s109,s110,s111,s112,s113,s114,s115,s116,s117,s118,s119,s120,s121,s122,s123,s124,s125,s126,s127,s128,s129,s130,s131,s132,s133,s134,s135,s136,s137,s138,s139,s140,s141,s142,s143,s144,s145,s146,s147,s148,s149,s150,s151,s152,s153,s154,s155,s156,s157,s158,s159,s160,s161,s162,s163,s164,s165,s166,s167,s168,s169,s170,s171,s172,s173,s174,s175,s176,s177,s178,s179,s180,s181,s182,s183,s184,s185,s186,s187,s188,s189,s190,s191,s192,s193,s194,s195,s196,s197,s198,s199,s200,s201,s202,s203,s204,s205,s206,s207,s208,s209,s210,s211,s212,s213,s214,s215,s216,s217,s218,s219,s220,s221,s222,s223,s224,s225,s226,s227,s228,s229,s230,s231,s232,s233,s234,s235,s236,s237,s238,s239,s240,s241,s242,s243,s244,s245,s246,s247,s248,s249,s250,s251,s252,s253,s254,s255,s256,s257,s258,s259,s260,s261,s262,s263,s264,s265,s266,s267,s268,s269,s270,s271,s272,s273,s274,s275,s276,s277,s278,s279,s280,s281,s282,s283,s284,s285,s286,s287,s288,s289,s290,s291,s292,s293,s294,s295,s296,s297,s298,s299,s300,s301,s302,s303,s304,s305,s306,s307,s308,s309,s310,s311,s312,s313,s314,s315,s316,s317,s318,s319,s320,s321,s322,s323,s324,s325,s326,s327,s328,s329,s330,s331,s332,s333,s334,s335,s336,s337,s338,s339,s340,s341,s342,s343,s344,s345,s346,s347,s348,s349,s350,s351,s352,s353,s354,s355,s356,s357,s358,s359,s360,s361,s362,s363,s364,s365,s366,s367,s368,s369,s370,s371,s372,s373,s374,s375,s376,s377,s378,s379,s380,s381,s382,s383,s384,s385,s386,s387,s388,s389,s390,s391,s392,s393,s394,s395,s396,s397,s398,s399,s400,s401,s402,s403,s404,s405,s406,s407,s408,s409,s410,s411,s412,s413,s414,s415,s416,s417,s418,s419,s420,s421,s422,s423,s424,s425,s426,s427,s428,s429,s430,s431,s432,s433,s434,s435,s436,s437,s438,s439,s440,s441,s442,s443,s444,s445,s446,s447,s448,s449,s450,s451,s452,s453,s454,s455,s456,s457,s458,s459,s460,s461,s462,s463,s464,s465,s466,s467,s468,s469,s470,s471,s472,s473,s474,s475,s476,s477,s478,s479,s480,s481,s482,s483,s484,s485,s486,s487,s488,s489,s490,s491,s492,s493,s494,s495,s496,s497,s498,s499,s500,s501,s502,s503,s504,s505,s506,s507,s508,s509,s510,s511,s512,s513,s514,s515,s516,s517,s518,s519,s520,s521,s522,s523,s524,s525,s526,s527,s528,s529,s530,s531,s532,s533,s534,s535,s536,s537,s538,s539,s540,s541,s542,s543,s544,s545,s546,s547,s548,s549,s550,s551,s552,s553,s554,s555,s556,s557,s558,s559,s560,s561,s562,s563,s564,s565,s566,s567,s568,s569,s570,s571,s572,s573,s574,s575,s576,s577,s578,s579,s580,s581,s582,s583,s584,s585,s586,s587,s588,s589,s590,s591,s592,s593,s594,s595,s596,s597,s598,s599,s600,s601,s602,s603,s604,s605,s606,s607,s608,s609,s610,s611,s612,s613,s614,s615,s616,s617,s618,s619,s620,s621,s622,s623,s624,s625,s626,s627,s628,s629,s630,s631,s632,s633,s634,s635,s636,s637,s638,s639,s640,s641,s642,s643,s644,s645,s646,s647,s648,s649,s650,s651,s652,s653,s654,s655,s656,s657,s658,s659,s660,s661,s662,s663,s664,s665,s666,s667,s668,s669,s670,s671,s672,s673,s674,s675,s676,s677,s678,s679,s680,s681,s682,s683,s684,s685,s686,s687,s688,s689,s690,s691,s692,s693,s694,s695,s696,s697,s698,s699,s700,s701,s702,s703,s704,s705,s706,s707,s708,s709,s710,s711,s712,s713,s714,s715,s716,s717,s718,s719,s720,s721,s722,s723,s724,s725,s726,s727,s728,s729,s730,s731,s732,s733,s734,s735,s736,s737,s738,s739,s740,s741,s742,s743,s744,s745,s746,s747,s748,s749,s750,s751,s752,s753,s754,s755,s756,s757,s758,s759,s760,s761,s762,s763,s764,s765,s766,s767,s768,s769,s770,s771,s772,s773,s774,s775,s776,s777,s778,s779,s780,s781,s782,s783,s784,s785,s786,s787,s788]);【折腾过程】
1.针对第一个获得的数据:
| \u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86 |
去分析了一下,结果是:
>>> print ‘\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86’.decode(‘unicode-escape’) |
即,对应着,第一个评论数据:
所以,接下来,就是针对于这样的数据:
s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";
如何去用python一点点解析出来,成为单个的评论。
2.所以,就是去写代码去解析评论了。
代码如下:
def parseRespDwrToCmtList(respDwrReplyStr):
"""
Parse response DWR-REPLY string, into comment list
"""
#s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
#s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";
#s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";
#s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
commentStrList = [];
#commentStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr);
commentStrList = re.findall(r's\d+\.commentCount=.+?s\d+\.userNickname=".+?";', respDwrReplyStr);
#logging.info("commentStrList=%s", commentStrList);
logging.info("len(commentStrList)=%d", len(commentStrList));
if(commentStrList):
for eachCommentStr in commentStrList:
#parse each comment string into comment dict
singleCmtDict = {
'cmtIdx' : 0,
'commentCount' : "",
'content' : "",
'id' : "",
'mainCommentCount' : "",
'moodType' : "",
'moveFrom' : "",
'publishTime' : "",
'synchMiniBlog' : "",
'userAvatar' : "",
'userAvatarUrl' : "",
'userId' : "",
'userName' : "",
'userNickname' : "",
};
#fisrt get the comment index
foundCmtIdx = re.search('s(?P<cmtIdx>\d+)\.commentCount=', eachCommentStr);
cmtIdx = foundCmtIdx.group("cmtIdx");
cmtIdx = int(cmtIdx);
logging.info("cmtIdx=%d", cmtIdx);
singleCmtDict['cmtIdx'] = cmtIdx;
#init some common values
strSn = "s" + str(cmtIdx);
#commentCount
#s0.commentCount=0;
foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);', eachCommentStr);
commentCount = foundCommentCount.group("commentCount");
singleCmtDict['commentCount'] = commentCount;
logging.info("commentCount=%s", commentCount);
#content
#s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="', eachCommentStr);
content = foundContent.group("content");
content = content.decode("unicode-escape");
singleCmtDict['content'] = content;
logging.info("content=%s", content);
#id
#s0.id="148749270";
foundId = re.search(strSn + '\.id="(?P<id>\d+)";', eachCommentStr);
id = foundId.group("id");
singleCmtDict['id'] = id;
logging.info("id=%s", id);
#mainCommentCount
#s0.mainCommentCount=0;
foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);', eachCommentStr);
mainCommentCount = foundMainCommentCount.group("mainCommentCount");
singleCmtDict['mainCommentCount'] = mainCommentCount;
logging.info("mainCommentCount=%s", mainCommentCount);
#moodType
#s0.moodType=0;
foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);', eachCommentStr);
moodType = foundMoodType.group("moodType");
singleCmtDict['moodType'] = moodType;
logging.info("moodType=%s", moodType);
#moveFrom
#s0.moveFrom="iphone";
#s2.moveFrom=null;
#s8.moveFrom="wap";
#s699.moveFrom="";
foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;', eachCommentStr);
moveFrom = foundMoveFrom.group("moveFrom");
singleCmtDict['moveFrom'] = moveFrom;
logging.info("moveFrom=%s", moveFrom);
#publishTime
#s0.publishTime=1374626867596;
foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);', eachCommentStr);
publishTime = foundPublishTime.group("publishTime");
singleCmtDict['publishTime'] = publishTime;
logging.info("publishTime=%s", publishTime);
#synchMiniBlog
#s0.synchMiniBlog=-1;
foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);', eachCommentStr);
synchMiniBlog = foundSynchMiniBlog.group("synchMiniBlog");
singleCmtDict['synchMiniBlog'] = synchMiniBlog;
logging.info("synchMiniBlog=%s", synchMiniBlog);
#userAvatar
#s0.userAvatar=0;
foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);', eachCommentStr);
userAvatar = foundUserAvatar.group("userAvatar");
singleCmtDict['userAvatar'] = userAvatar;
logging.info("userAvatar=%s", userAvatar);
#userAvatarUrl
#s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;', eachCommentStr);
userAvatarUrl = foundUserAvatarUrl.group("userAvatarUrl");
singleCmtDict['userAvatarUrl'] = userAvatarUrl;
logging.info("userAvatarUrl=%s", userAvatarUrl);
#userId
#s0.userId=186541395;
foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);', eachCommentStr);
userId = foundUserId.group("userId");
singleCmtDict['userId'] = userId;
logging.info("userId=%s", userId);
#userName
#s0.userName="ni_chen";
foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;', eachCommentStr);
userName = foundUserName.group("userName");
singleCmtDict['userName'] = userName;
logging.info("userName=%s", userName);
#userNickname
#s0.userNickname="Neysa";
foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;', eachCommentStr);
userNickname = foundUserNickname.group("userNickname");
singleCmtDict['userNickname'] = userNickname;
logging.info("userNickname=%s", userNickname);
return commentStrList;3.但是,对于
mainCommentCount不是0
比如:
mainCommentCount=1
的评论,说明其下是有对应的子评论的,所以还要想办法抓取出来。
经过分析,对应的发送的post data和response data分别是:
然后就去将其中的逻辑,用代码,再模拟出来,获得对应的数据,再去分析出来子评论的内容。
4.最后,完整的相关解析部分的代码为:
def getPlaincallRespDwrStr(c0ScriptName, c0MethodName, c0Param0, c0Param1, c0Param2):
"""
get FeelingsBeanNew response DWR string
"""
#typ1:
# http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr
# for: http://againinput4.blog.163.com/blog/static/172799491201010159650483/
# [paras]
# callCount=1
# scriptSessionId=${scriptSessionId}187
# c0-scriptName=BlogBeanNew
# c0-methodName=getComments
# c0-id=0
# c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085
# c0-param1=number:1
# c0-param2=number:0
# batchId=728048
#http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr?&callCount=1&scriptSessionId=${scriptSessionId}187&c0-scriptName=BlogBeanNew&c0-methodName=getComments&c0-id=0&c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085&c0-param1=number:1&c0-param2=number:0&batchId=728048
#type2:
# callCount=1
# scriptSessionId=${scriptSessionId}187
# c0-scriptName=FeelingsBeanNew
# c0-methodName=getRecentFeelingsComment
# c0-id=0
# c0-param0=string:134875456
# c0-param1=number:1
# c0-param2=number:0
# batchId=705438
#type3:
# callCount=1
# scriptSessionId=${scriptSessionId}187
# c0-scriptName=FeelingsBeanNew
# c0-methodName=getRecentFeelingCards
# c0-id=0
# c0-param0=number:186541395
# c0-param1=number:0
# c0-param2=number:20
# batchId=292545
logging.debug("get FeelingsBeanNew reponse DWR string for c0MethodName=%s, c0Param0=%s, c0Param1=%s, c0Param2=%s", c0MethodName, c0Param0, c0Param1, c0Param2);
postDict = {
'callCount' : '1',
'scriptSessionId': '${scriptSessionId}187',
'c0-scriptName' : c0ScriptName, #BlogBeanNew/FeelingsBeanNew
'c0-methodName' : c0MethodName, #getComments/getRecentFeelingsComment/getRecentFeelingCards
'c0-id' : '0',
'c0-param0' : c0Param0,
'c0-param1' : c0Param1,
'c0-param2' : c0Param2,
'batchId' : '1', # should random generate number?
};
#http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr
#http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingsComment.dwr
#http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingCards.dwr
plaincallDwrUrl = gConst['blogApi163'] + '/' + gVal['blogUser'] + '/' + "dwr/call/plaincall/" + c0ScriptName + "." + c0MethodName + ".dwr";
logging.debug("plaincallDwrUrl=%s", plaincallDwrUrl);
#Referer http://api.blog.163.com/crossdomain.html?t=20100205
headerDict = {
'Referer' : "http://api.blog.163.com/crossdomain.html?t=20100205",
'Content-Type' : "text/plain",
};
plaincallRespDwrStr = crifanLib.getUrlRespHtml(plaincallDwrUrl, postDict=postDict, headerDict=headerDict, postDataDelimiter='\r\n');
logging.debug("plaincallRespDwrStr=%s", plaincallRespDwrStr);
return plaincallRespDwrStr;
def fetchComments_feelingCard():
"""
Get feeling card items, to use as comments
"""
totalCmtDictList = [];
totalMainCmtDictList = [];
totalSubCmtDictList = [];
# init before loop
needGetMore = True;
startIdx = 0;
startNum = 1;
onceGetNum = 1000; # get 1000 items once
try :
while needGetMore :
# get resopnse dwr string
# callCount=1
# scriptSessionId=${scriptSessionId}187
# c0-scriptName=FeelingsBeanNew
# c0-methodName=getRecentFeelingCards
# c0-id=0
# c0-param0=number:186541395
# c0-param1=number:0
# c0-param2=number:20
# batchId=292545
getRecentFeelingCardsRespDwrStr = getPlaincallRespDwrStr( "FeelingsBeanNew",
"getRecentFeelingCards",
"number:" + str(gVal['userId']),
"number:" + str(startIdx),
"number:" + str(onceGetNum));
logging.debug("getRecentFeelingCardsRespDwrStr=%s", getRecentFeelingCardsRespDwrStr);
curMainCmtDictList = parseMainCmtDwrStrToMainCmtDictList(getRecentFeelingCardsRespDwrStr);
totalMainCmtDictList.extend(curMainCmtDictList);
curGotMainCmtNum = len(curMainCmtDictList);
if(curGotMainCmtNum < onceGetNum):
#has got all comment, so quit
needGetMore = False;
logging.debug("Request %d comments, but only response %d comments, so no more comments, has got all comments", onceGetNum, curGotMainCmtNum);
#add main comment dict list into total comment dict list
logging.debug("Total got %d main comments dict", len(totalMainCmtDictList));
totalCmtDictList.extend(totalMainCmtDictList);
logging.debug("Total comments %d", len(totalCmtDictList));
#after get all main comment dict, then try to find the sub comments
for eachMainCmtDict in totalMainCmtDictList:
#logging.info("eachMainCmtDict=%s", eachMainCmtDict);
mainCommentCount = eachMainCmtDict['mainCommentCount'];
#logging.info("mainCommentCount=%s", mainCommentCount);
mainCommentCountInt = int(mainCommentCount);
#logging.info("mainCommentCountInt=%d", mainCommentCountInt);
if(mainCommentCountInt > 0):
#has sub comment
logging.debug("[%d] main comment has sub %d comments", eachMainCmtDict['curCmtIdx'], mainCommentCountInt);
#1. get sub comment dwr string
subCmtDwrStr = getFeelingCardSubCmtDwrStr(eachMainCmtDict['id']);
#2. parse sub comment dwr string to sub comment dict
curSubCmtDictList = parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr);
totalSubCmtDictList.extend(curSubCmtDictList);
#do some update for sub comment
logging.debug("Total got %d sub comment dict", len(totalSubCmtDictList));
if(totalSubCmtDictList):
#update sub comment index
subCmtStartIdx = len(totalMainCmtDictList);
logging.debug("subCmtStartIdx=%d", subCmtStartIdx);
for idx,eachSubCmtDict in enumerate(totalSubCmtDictList):
eachSubCmtDict['curCmtIdx'] = subCmtStartIdx + idx;
eachSubCmtDict['curCmtNum'] = eachSubCmtDict['curCmtIdx'] + 1;
logging.debug("done for update sub comment index");
#update sub comment's parent relation
for idx,eachSubCmtDict in enumerate(totalSubCmtDictList):
subCmtParentId = eachSubCmtDict['cardId'];
for eachMainCmtDict in totalMainCmtDictList:
mainCmtId = eachMainCmtDict['id'];
if(subCmtParentId == mainCmtId):
logging.debug("sub cmt id=%s 's parent's id=%s, parent curCmtNum=%d", eachSubCmtDict['id'], mainCmtId, eachMainCmtDict['curCmtNum']);
eachSubCmtDict['parentCmtNum'] = eachMainCmtDict['curCmtNum'];
#update sub comment's parent whose within sub comment list
#s0.replyComId="-1";
#s3.replyComId="72175292"
curSubCmtReplyComId = eachSubCmtDict['replyComId']; #
for singleSubCmtDict in totalSubCmtDictList:
subCmtId = singleSubCmtDict['id'];
subCmtCurCmtNum = singleSubCmtDict['curCmtNum'];
if(curSubCmtReplyComId == subCmtId):
logging.debug("sub cmt id=%s 's replyComId=%s, find correspoinding parent (sub) comment, whose curCmtNum=%d", subCmtId, curSubCmtReplyComId, subCmtCurCmtNum);
eachSubCmtDict['parentCmtNum'] = subCmtCurCmtNum;
logging.debug("done for update sub comment's parent relation");
totalCmtDictList.extend(totalSubCmtDictList);
except :
logging.debug("Fail for fetch the feeling card (index=[%d-%d]) for %s ", startIdx, startIdx + onceGetNum - 1, url);
return totalCmtDictList;
def getFeelingCardSubCmtDwrStr(subCmtId):
"""
input sub comment id, return sub comment response dwr string
"""
# callCount=1
# scriptSessionId=${scriptSessionId}187
# c0-scriptName=FeelingsBeanNew
# c0-methodName=getRecentFeelingsComment
# c0-id=0
# c0-param0=string:134875456
# c0-param1=number:1
# c0-param2=number:0
# batchId=705438
logging.debug("get sub comment for %s", subCmtId);
getRecentFeelingsCommentRespDwrStr = getPlaincallRespDwrStr( "FeelingsBeanNew",
"getRecentFeelingsComment",
"string:" + str(subCmtId),
"number:1",
"number:0");
logging.debug("getRecentFeelingsCommentRespDwrStr=%s", getRecentFeelingsCommentRespDwrStr);
return getRecentFeelingsCommentRespDwrStr;
def parseSingleDwrStrToCmtDict(singleCmtDwrStr):
"""
parse single comment dwr string, main comment or sub comment, to comment dict
"""
logging.debug("singleCmtDwrStr=%s", singleCmtDwrStr);
#init values
curCmtDict = {};
singleMainCmtDict = {
'curCmtIdx' : 0,
'curCmtNum' : 0,
'parentCmtNum' : 0,
'isSubComment' : False,
'commentCount' : "",
'mainCommentCount' : "",
'moodType' : "",
'userAvatar' : "",
'userAvatarUrl' : "",
'userName' : "",
'userNickname' : "",
#common part
'content' : "",
'id' : "",
'moveFrom' : "",
'publishTime' : "",
'synchMiniBlog' : "",
'userId' : "",
};
singleSubCmtDict = {
'curCmtIdx' : 0,
'curCmtNum' : 0,
'parentCmtNum' : 0,
'isSubComment' : True,
'cardId' : "", # is parent ID
'ip' : "",
'ipName' : "",
'lastUpdateTime' : "",
'mainComId' : "",
'popup' : "",
'publisherAvatar' : "",
'publisherAvatarUrl': "",
'publisherId' : "",
'publisherName' : "",
'publisherNickname' : "",
'publisherUrl' : "",
'replyComId' : "",
'replyToUserId' : "",
'replyToUserName' : "",
'replyToUserNick' : "",
'spam' : "",
'subComments' : "",
'valid' : "",
#common part
'content' : "",
'id' : "",
'moveFrom' : "",
'publishTime' : "",
'synchMiniBlog' : "",
'userId' : "",
};
#1. check is main comment or sub comment
#start with sN.cardId=, is sub comment
foundCardId = re.search("^s\d+\.cardId=", singleCmtDwrStr);
if(foundCardId):
curCmtDict = singleSubCmtDict;
curCmtDict['isSubComment'] = True;
logging.debug("------- is sub comments");
else:
curCmtDict = singleMainCmtDict;
curCmtDict['isSubComment'] = False;
logging.debug("======= is main comments");
#2. process common key and value
#common key and value
#fisrt get the comment index
#main comment:
#s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
#sub comment:
#s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="
#s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
foundCurCmtIdx = re.search(r's(?P<curCmtIdx>\d+)\.content=".+?";s\1\.id="', singleCmtDwrStr);
logging.debug("foundCurCmtIdx=%s", foundCurCmtIdx);
curCmtIdx = foundCurCmtIdx.group("curCmtIdx");
curCmtIdx = int(curCmtIdx);
logging.debug("curCmtIdx=%d", curCmtIdx);
if(not curCmtDict['isSubComment']):
#only add for main comment
#later, will update sub comment curCmtIdx and curCmtNum
curCmtDict['curCmtIdx'] = curCmtIdx;
curCmtDict['curCmtNum'] = curCmtIdx + 1;
#init some common values
strSn = "s" + str(curCmtIdx);
#content
#s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="', singleCmtDwrStr);
content = foundContent.group("content");
content = content.decode("unicode-escape");
curCmtDict['content'] = content;
logging.debug("content=%s", content);
#id
#s0.id="148749270";
foundId = re.search(strSn + '\.id="(?P<id>\d+)";', singleCmtDwrStr);
id = foundId.group("id");
curCmtDict['id'] = id;
logging.debug("id=%s", id);
#moveFrom
#s0.moveFrom="iphone";
#s2.moveFrom=null;
#s8.moveFrom="wap";
#s699.moveFrom="";
foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;', singleCmtDwrStr);
moveFrom = foundMoveFrom.group("moveFrom");
curCmtDict['moveFrom'] = moveFrom;
logging.debug("moveFrom=%s", moveFrom);
#publishTime
#s0.publishTime=1374626867596;
foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);', singleCmtDwrStr);
publishTime = foundPublishTime.group("publishTime");
curCmtDict['publishTime'] = publishTime;
logging.debug("publishTime=%s", publishTime);
#synchMiniBlog
#s0.synchMiniBlog=-1;
#in sub comment:
#s0.synchMiniBlog=false;
foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);', singleCmtDwrStr);
synchMiniBlog = foundSynchMiniBlog.group("synchMiniBlog");
curCmtDict['synchMiniBlog'] = synchMiniBlog;
logging.debug("synchMiniBlog=%s", synchMiniBlog);
#userId
#s0.userId=186541395;
foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);', singleCmtDwrStr);
userId = foundUserId.group("userId");
curCmtDict['userId'] = userId;
logging.debug("userId=%s", userId);
#3. process different key and value
if(curCmtDict['isSubComment']):
#process sub comment remaing field
#sub comment dwr string:
#sample 1: #s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
#sample 2:
# s0.cardId="133211376";s0.content="\u4ECE\u9AD8\u4E2D\u5C31\u5F00\u59CB\u7684\u5417\uFF1F\u597D\u597D\u53BB\u533B\u9662\u68C0\u67E5\u4E00\u4E0B\u5427\uFF0C\u73B0\u5728\u6709\u75C5\u4E00\u5B9A\u4E0D\u8981\u62D6\u7740\uFF0C\u8981\u4E0D\u5C0F\u75C5\u4E5F\u4F1A\u53D8\u6210\u5927\u75C5\uFF0C\u5230\u65F6\u53EF\u6CA1\u6709\u540E\u6094\u836F\u5403\u3002";s0.id="72192291";s0.ip="115.170.58.191";s0.ipName=null;s0.lastUpdateTime=1348561288469;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1348468815327;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
# s1[0]=s2;s1[1]=s3;s1[2]=s4;
# s2.cardId="133211376";s2.content="\u4E00\u76F4\u60F3\u67E5\uFF0C\u4F46\u662F\u6CA1\u6709\u533B\u7597\u4FDD\u9669<img src=\"http://b.bst.126.net/common/portrait/face/preview/face2.gif\" >\u3002\u6211\u5F97\u5148\u95EE\u6E05\u695A\u4E00\u4E0B";s2.id="72175292";s2.ip="147.46.115.126";s2.ipName=null;s2.lastUpdateTime=0;s2.mainComId="72192291";s2.moveFrom=null;s2.popup=false;s2.publishTime=1348471820683;s2.publisherAvatar=0;s2.publisherAvatarUrl=null;s2.publisherId=186541395;s2.publisherName="ni_chen";s2.publisherNickname="Neysa";s2.publisherUrl=null;s2.replyComId="72192291";s2.replyToUserId=26959367;s2.replyToUserName="chenyuanyuan0913";s2.replyToUserNick="\u6C89\u7F18\u6E90";s2.spam=0;s2.subComments=s5;s2.synchMiniBlog=false;s2.userId=186541395;s2.valid=0;
#s3.cardId="133211376";s3.content="\u522B\u62D6\u5EF6\uFF0C\u505A\u4E2A\u68C0\u67E5\u82B1\u4E0D\u4E86\u591A\u5C11\u94B1\u7684\uFF0C\u522B\u5230\u65F6\u771F\u751F\u75C5\u4E86\uFF0C\u90A3\u53EF\u82B1\u5F97\u4E0D\u662F\u4E00\u70B9\u534A\u70B9\u7684\u3002\u6709\u65F6\u95F4\u4E86\u5C31\u8D76\u7D27\u53BB\uFF0C\u4E00\u5B9A\u8981\u53BB\u554A\uFF0C\u6CA1\u4EC0\u4E48\u4E8B\u5C31\u653E\u5FC3\u4E86\u3002\u8BB0\u5F97\u6211\u4EEC\u5BBF\u820D\u90A3\u4E2A\u5C0F\u59D1\u5A18\u5417\uFF0C\u90A3\u53EF\u662F\u771F\u5B9E\u7684\u6559\u8BAD\u554A";s3.id="72227357";s3.ip="115.170.26.179";s3.ipName=null;s3.lastUpdateTime=0;s3.mainComId="72192291";s3.moveFrom=null;s3.popup=false;s3.publishTime=1348560697833;s3.publisherAvatar=0;s3.publisherAvatarUrl=null;s3.publisherId=26959367;s3.publisherName="chenyuanyuan0913";s3.publisherNickname="\u6C89\u7F18\u6E90";s3.publisherUrl=null;s3.replyComId="72175292";s3.replyToUserId=186541395;s3.replyToUserName="ni_chen";s3.replyToUserNick="Neysa";s3.spam=0;s3.subComments=s6;s3.synchMiniBlog=false;s3.userId=186541395;s3.valid=0;
# s4.cardId="133211376";s4.content="\u55EF\uFF0C\u77E5\u9053\u5566<img src=\"http://b.bst.126.net/common/portrait/face/preview/face47.gif\" >";s4.id="72206314";s4.ip="147.46.115.126";s4.ipName=null;s4.lastUpdateTime=0;s4.mainComId="72192291";s4.moveFrom=null;s4.popup=false;s4.publishTime=1348561288458;s4.publisherAvatar=0;s4.publisherAvatarUrl=null;s4.publisherId=186541395;s4.publisherName="ni_chen";s4.publisherNickname="Neysa";s4.publisherUrl=null;s4.replyComId="72227357";s4.replyToUserId=26959367;s4.replyToUserName="chenyuanyuan0913";s4.replyToUserNick="\u6C89\u7F18\u6E90";s4.spam=0;s4.subComments=s7;s4.synchMiniBlog=false;s4.userId=186541395;s4.valid=0;
#sample 3:
#s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
#sample 4:
#s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
#sample 5:
#s0.cardId="111039854";s0.content="\u65B0\u53D1\u578B\u771F\u5F97\u5F88\u6F02\u4EAE\u554A\u2026\u2026\u53EF\u4EE5\u4F20\u4E00\u7EC4\u7167\u7247\u8BA9\u59D0\u59D0\u770B\u770B\u5417\uFF1F";s0.id="58333672";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1251121541764;s0.mainComId="-1";s0.moveFrom="";s0.popup=false;s0.publishTime=1251121541764;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName=null;s0.replyToUserNick=null;s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
#cardId
#s0.cardId="134875456";
foundCardId = re.search(strSn + '\.cardId="?(?P<cardId>.*?)"?;', singleCmtDwrStr);
cardId = foundCardId.group("cardId");
curCmtDict['cardId'] = cardId;
logging.debug("cardId=%s", cardId);
#ip
#s0.ip="203.234.215.66";
#s0.ip=null;
foundIp = re.search(strSn + '\.ip="?(?P<ip>.*?)"?;', singleCmtDwrStr);
ip = foundIp.group("ip");
if(not re.search("\d+\.\d+\.\d+\.\d+", ip)):
ip = "";
curCmtDict['ip'] = ip;
logging.debug("ip=%s", ip);
#ipName
#s0.ipName=null;
foundIpName = re.search(strSn + '\.ipName=(?P<ipName>.+?);', singleCmtDwrStr);
ipName = foundIpName.group("ipName");
curCmtDict['ipName'] = ipName;
logging.debug("ipName=%s", ipName);
#lastUpdateTime
#s0.lastUpdateTime=1351380367156;
foundLastUpdateTime = re.search(strSn + '\.lastUpdateTime=(?P<lastUpdateTime>\d+);', singleCmtDwrStr);
lastUpdateTime = foundLastUpdateTime.group("lastUpdateTime");
curCmtDict['lastUpdateTime'] = lastUpdateTime;
logging.debug("lastUpdateTime=%s", lastUpdateTime);
#mainComId
#s0.mainComId="-1";
foundMainComId = re.search(strSn + '\.mainComId="?(?P<mainComId>.*?)"?;', singleCmtDwrStr);
mainComId = foundMainComId.group("mainComId");
curCmtDict['mainComId'] = mainComId;
logging.debug("mainComId=%s", mainComId);
#popup
#s0.popup=false;
foundPopup = re.search(strSn + '\.popup=(?P<popup>.+?);', singleCmtDwrStr);
popup = foundPopup.group("popup");
curCmtDict['popup'] = popup;
logging.debug("popup=%s", popup);
#publisherAvatar
#s0.publisherAvatar=0;
foundPublisherAvatar = re.search(strSn + '\.publisherAvatar=(?P<publisherAvatar>\d+);', singleCmtDwrStr);
publisherAvatar = foundPublisherAvatar.group("publisherAvatar");
curCmtDict['publisherAvatar'] = publisherAvatar;
logging.debug("publisherAvatar=%s", publisherAvatar);
#publisherAvatarUrl
#s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
#s2.publisherAvatarUrl=null;
foundPublisherAvatarUrl = re.search(strSn + '\.publisherAvatarUrl="?(?P<publisherAvatarUrl>.*?)"?;', singleCmtDwrStr);
publisherAvatarUrl = foundPublisherAvatarUrl.group("publisherAvatarUrl");
curCmtDict['publisherAvatarUrl'] = publisherAvatarUrl;
logging.debug("publisherAvatarUrl=%s", publisherAvatarUrl);
#publisherId
#s0.publisherId=55976067;
foundPublisherId = re.search(strSn + '\.publisherId=(?P<publisherId>\d+);', singleCmtDwrStr);
publisherId = foundPublisherId.group("publisherId");
curCmtDict['publisherId'] = publisherId;
logging.debug("publisherId=%s", publisherId);
#publisherName
#s0.publisherName="chenlin198412@126";
foundPublisherName = re.search(strSn + '\.publisherName="?(?P<publisherName>.*?)"?;', singleCmtDwrStr);
publisherName = foundPublisherName.group("publisherName");
curCmtDict['publisherName'] = publisherName;
logging.debug("publisherName=%s", publisherName);
#publisherNickname
#s0.publisherNickname="Lynn";
foundPublisherNickname = re.search(strSn + '\.publisherNickname="?(?P<publisherNickname>.*?)"?;', singleCmtDwrStr);
publisherNickname = foundPublisherNickname.group("publisherNickname");
publisherNicknameUni = publisherNickname.decode('unicode-escape');
curCmtDict['publisherNickname'] = publisherNicknameUni;
logging.debug("publisherNickname=%s", publisherNickname);
#publisherUrl
#s0.publisherUrl=null;
foundPublisherUrl = re.search(strSn + '\.publisherUrl="?(?P<publisherUrl>.*?)"?;', singleCmtDwrStr);
publisherUrl = foundPublisherUrl.group("publisherUrl");
curCmtDict['publisherUrl'] = publisherUrl;
logging.debug("publisherUrl=%s", publisherUrl);
#replyComId
#s0.replyComId="-1";
foundReplyComId = re.search(strSn + '\.replyComId="?(?P<replyComId>.*?)"?;', singleCmtDwrStr);
replyComId = foundReplyComId.group("replyComId");
curCmtDict['replyComId'] = replyComId;
logging.debug("replyComId=%s", replyComId);
#replyToUserId
#s0.replyToUserId=186541395;
foundReplyToUserId = re.search(strSn + '\.replyToUserId=(?P<replyToUserId>\d+);', singleCmtDwrStr);
replyToUserId = foundReplyToUserId.group("replyToUserId");
curCmtDict['replyToUserId'] = replyToUserId;
logging.debug("replyToUserId=%s", replyToUserId);
#replyToUserName
#s0.replyToUserName="ni_chen";
#s0.replyToUserName="";
#s0.replyToUserName=null;
foundReplyToUserName = re.search(strSn + '\.replyToUserName="?(?P<replyToUserName>.*?)"?;', singleCmtDwrStr);
replyToUserName = foundReplyToUserName.group("replyToUserName");
curCmtDict['replyToUserName'] = replyToUserName;
logging.debug("replyToUserName=%s", replyToUserName);
#replyToUserNick
#s0.replyToUserNick="Neysa";
#s0.replyToUserNick=null;
foundReplyToUserNick = re.search(strSn + '\.replyToUserNick="?(?P<replyToUserNick>.*?)"?;', singleCmtDwrStr);
replyToUserNick = foundReplyToUserNick.group("replyToUserNick");
curCmtDict['replyToUserNick'] = replyToUserNick;
logging.debug("replyToUserNick=%s", replyToUserNick);
#spam
#s0.spam=0;
foundSpam = re.search(strSn + '\.spam=(?P<spam>\d+);', singleCmtDwrStr);
spam = foundSpam.group("spam");
curCmtDict['spam'] = spam;
logging.debug("spam=%s", spam);
#subComments
#s0.subComments=s1;
foundSubComments = re.search(strSn + '\.subComments=(?P<subComments>.+?);', singleCmtDwrStr);
subComments = foundSubComments.group("subComments");
curCmtDict['subComments'] = subComments;
logging.debug("subComments=%s", subComments);
#valid
#s0.valid=0;
foundValid = re.search(strSn + '\.valid=(?P<valid>\d+);', singleCmtDwrStr);
valid = foundValid.group("valid");
curCmtDict['valid'] = valid;
logging.debug("valid=%s", valid);
else:
#process main comment remaing field
#main comment dwr string:
#s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
#s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";
#s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";
#s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
#commentCount
#s0.commentCount=0;
foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);', singleCmtDwrStr);
commentCount = foundCommentCount.group("commentCount");
curCmtDict['commentCount'] = commentCount;
logging.debug("commentCount=%s", commentCount);
#mainCommentCount
#s0.mainCommentCount=0;
foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);', singleCmtDwrStr);
mainCommentCount = foundMainCommentCount.group("mainCommentCount");
curCmtDict['mainCommentCount'] = mainCommentCount;
logging.debug("mainCommentCount=%s", mainCommentCount);
#moodType
#s0.moodType=0;
foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);', singleCmtDwrStr);
moodType = foundMoodType.group("moodType");
curCmtDict['moodType'] = moodType;
logging.debug("moodType=%s", moodType);
#userAvatar
#s0.userAvatar=0;
foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);', singleCmtDwrStr);
userAvatar = foundUserAvatar.group("userAvatar");
curCmtDict['userAvatar'] = userAvatar;
logging.debug("userAvatar=%s", userAvatar);
#userAvatarUrl
#s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;', singleCmtDwrStr);
userAvatarUrl = foundUserAvatarUrl.group("userAvatarUrl");
curCmtDict['userAvatarUrl'] = userAvatarUrl;
logging.debug("userAvatarUrl=%s", userAvatarUrl);
#userName
#s0.userName="ni_chen";
foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;', singleCmtDwrStr);
userName = foundUserName.group("userName");
curCmtDict['userName'] = userName;
logging.debug("userName=%s", userName);
#userNickname
#s0.userNickname="Neysa";
foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;', singleCmtDwrStr);
userNickname = foundUserNickname.group("userNickname");
curCmtDict['userNickname'] = userNickname;
logging.debug("userNickname=%s", userNickname);
return curCmtDict;
def parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr):
"""
parse sub comment dwr string to sub comment dict list
split to single sub comment dwr string list
convert each sub comment dwr string to dict
"""
subCmtDictList = [];
# //#DWR-INSERT
# //#DWR-REPLY
# var s0={};var s1=[];s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
# dwr.engine._remoteHandleCallback('1','0',[s0]);
subCmtStrList = re.findall(r's\d+\.cardId=.+?s\d+\.valid=\d+;(?:\s)', subCmtDwrStr);
#logging.info("subCmtStrList=%s", subCmtStrList);
logging.debug("len(subCmtStrList)=%d", len(subCmtStrList));
if(subCmtStrList):
for singleSubCmtDwrStr in subCmtStrList:
singleSubCmtDict = parseSingleDwrStrToCmtDict(singleSubCmtDwrStr);
subCmtDictList.append(singleSubCmtDict);
return subCmtDictList;
def parseMainCmtDwrStrToMainCmtDictList(respDwrReplyStr):
"""
Parse main comment response DWR-REPLY string, into comment dict list
"""
commentDictList = [];
#s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
#s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";
#s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";
#s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
mainCmtDwrStrList = [];
#mainCmtDwrStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr);
mainCmtDwrStrList = re.findall(r's\d+\.commentCount=.+?s\d+\.userNickname=".+?";(?:\s)', respDwrReplyStr);
#logging.info("mainCmtDwrStrList=%s", mainCmtDwrStrList);
logging.debug("len(mainCmtDwrStrList)=%d", len(mainCmtDwrStrList));
if(mainCmtDwrStrList):
for eachMainCmtDwrStr in mainCmtDwrStrList:
#parse each main comment string into comment dict
singleMainCmtDict = parseSingleDwrStrToCmtDict(eachMainCmtDwrStr);
#add single comment dict into list
commentDictList.append(singleMainCmtDict);
return commentDictList;
#------------------------------------------------------------------------------
def fillComments_fellingCard(destCmtDict, srcCmtDict):
"""
fill source comments dictionary into destination comments dictionary
note:
here srcCmtDict may be is main comment dict or sub comment dict
"""
logging.debug("--------- source comment: idx=%d, num=%d ---------", srcCmtDict['curCmtIdx'], srcCmtDict['curCmtNum']);
#for item in srcCmtDict.items() :
# logging.debug("%s", item);
destCmtDict['id'] = srcCmtDict['curCmtNum'];
if(srcCmtDict['isSubComment']):
destCmtDict['author'] = srcCmtDict['publisherNickname'];
else:
destCmtDict['author'] = srcCmtDict['userNickname'];
#logging.info("done for author");
if(srcCmtDict['isSubComment']):
destCmtDict['author_email'] = srcCmtDict['publisherName'];#s0.publisherName="chenlin198412@126";
else:
destCmtDict['author_email'] = "";
#logging.info("done for author_email");
if(srcCmtDict['isSubComment']):
destCmtDict['author_url'] = saxutils.escape(genNeteaseUserUrl(srcCmtDict['publisherName']));
else:
destCmtDict['author_url'] = saxutils.escape(gVal['blogEntryUrl']);
#logging.info("done for author_url");
if(srcCmtDict['isSubComment']):
destCmtDict['author_IP'] = srcCmtDict['ip'];
else:
destCmtDict['author_IP'] = "";
#logging.info("done for author_IP");
# method 1:
#epoch1000 = srcCmtDict['publishTime']
#epoch = float(epoch1000) / 1000
#localTime = time.localtime(epoch)
#gmtTime = time.gmtime(epoch)
# method 2:
#s0.publishTime=1374626867596;
#s4.publishTime=1348561288458;
publishTimeStr = srcCmtDict['publishTime'];
#logging.info("publishTimeStr=%s", publishTimeStr);
publishTimeStrInt = int(publishTimeStr);
publishTimeStrIntSec = publishTimeStrInt/1000;
publishTimeStrIntSecStr = str(publishTimeStrIntSec);
localTime = crifanLib.timestampToDatetime(publishTimeStrIntSecStr);
#logging.info("localTime=%s", localTime);
#pubTimeStr = srcCmtDict['shortPublishDateStr'] + " " + srcCmtDict['publishTimeStr'];
#localTime = datetime.strptime(pubTimeStr, "%Y-%m-%d %H:%M:%S");
gmtTime = crifanLib.convertLocalToGmt(localTime);
destCmtDict['date'] = localTime.strftime("%Y-%m-%d %H:%M:%S");
destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S");
#logging.info("done for date and date_gmt");
# handle some speical condition
#logging.debug("before decode, coment content:\n%s", srcCmtDict['content']);
#cmtContent = srcCmtDict['content'].decode('unicode-escape'); # convert from \uXXXX to character
cmtContent = srcCmtDict['content'];
#logging.debug("after decode, coment content:\n%s", cmtContent);
destCmtDict['content'] = cmtContent;
#logging.info("done for content");
destCmtDict['approved'] = 1;
destCmtDict['type'] = '';
destCmtDict['parent'] = srcCmtDict['parentCmtNum'];
destCmtDict['user_id'] = 0;
logging.debug("author=%s", destCmtDict['author']);
logging.debug("author_email=%s", destCmtDict['author_email']);
logging.debug("author_IP=%s", destCmtDict['author_IP']);
logging.debug("author_url=%s", destCmtDict['author_url']);
logging.debug("date=%s", destCmtDict['date']);
logging.debug("date_gmt=%s", destCmtDict['date_gmt']);
logging.debug("content=%s", destCmtDict['content']);
logging.debug("parent=%s", destCmtDict['parent']);
return destCmtDict;
#------------------------------------------------------------------------------
# fetch and parse comments
# return the parsed dict value
def fetchAndParseComments(url, html):
cmtRespDictList = [];
parsedCommentsList = [];
if(url == gVal['special']['feelingCard']['url']):
cmtRespDictList = fetchComments_feelingCard();
if(cmtRespDictList) :
# got valid comments, now proess it
for cmtDict in cmtRespDictList :
comment = {};
#fill all comment field
comment = fillComments_fellingCard(comment, cmtDict);
parsedCommentsList.append(comment);
else:
#extract comments if exist
soup = htmlToSoup(html);
cmtRespDictList = fetchComments(url, soup);
#logging.info("cmtRespDictList=%s", cmtRespDictList);
if(cmtRespDictList) :
# got valid comments, now proess it
for cmtDict in cmtRespDictList :
comment = {};
#fill all comment field
comment = fillComments(comment, cmtDict);
parsedCommentsList.append(comment);
return parsedCommentsList;
【总结】
其实分析逻辑,获取评论数据,不是很难。
麻烦的是写代码,去提取评论数据,比较琐碎,是个体力活。。。
关于抓取网易163博客的FeelingCard心情随笔的完整的代码,可以去看:
转载请注明:在路上 » 【记录】用Python解析网易163博客的心情随笔FeelingCard返回的DWR-REPLY数据