最新消息:20210917 已从crifan.com换到crifan.org

【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之后

CodeShare crifan 1782浏览 0评论

【背景】

和:

【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前

类似,但是是现在,2013-09-12,

http://fiverr.com

网站已经改版,然后重新写了代码,去爬取网站的。

【FiverComScraper 代码】

1.截图:

【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前

一样,就不贴了。

2.完整项目代码下载:

FiverrComScraper_2013-09-12_afterWebsiteChange.7z

 

3.源码分享:

(1)frmFiverrComScraper.cs

/*
 * [File]
 * frmFiverrComScraper.cs
 * 
 * [Function]
 * scrape fiverr.com
 * 
 * [Note]
 * 
 * [update]
 * 2013-09-12
 * 
 * [Author]
 * Crifan Li
 * 
 * [Contact]
 * https://www.crifan.org/contact_me/
 * 
 * [History]
 */

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

using System.Xml;
using System.IO;
using System.Web;
using Excel = Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop.Excel;

using HtmlAgilityPack;


/*
 * icons:
 * 
 * search/find
 * http://www.easyicon.cn/icondetail/106/
 * 
 * stop
 * http://www.easyicon.cn/icondetail/568811/
 * 
 * excel
 * http://www.easyicon.cn/icondetail/1087666/
 * 
 * csv
 * http://www.easyicon.cn/icondetail/558199/
 * 
 * help
 * http://www.easyicon.cn/icondetail/12270/
 */

namespace FiverComScraper
{
    public partial class frmFiverrComScraper : Form
    {
        const string fiverrComDomain = "http://fiverr.com";
        public crifanLib crifanLib;
        public DataGridViewButtonColumn gigUrlColumn = null;
        public static int girUrlColumnIdx = 12;

        //need get more gig to scrape or not
        bool needGetMore = true;

        enum search_status
        {
            SEARCH_STATUS_STOPPED,
            SEARCH_STATUS_SEARCHING,
            SEARCH_STATUS_PAUSED
        };
        search_status curSearchStatus = search_status.SEARCH_STATUS_STOPPED;

        public struct search_info
        {
            public int pageNum;
            public string searchUrl;
            public string searchRespHtml;
            public HtmlAgilityPack.HtmlDocument htmlDoc;
            
            //public XmlNamespaceManager m;
            //public HtmlNodeCollection gigDataList;
            //public int nodeIdx;

            public Object[] itemObjList;
            public int curItemIdx;
        };
        search_info curSearchInfo = new search_info();

        public struct gigSearchItemInfo
        {
            //{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\"  alt=\\"be your SEO teacher\\"   >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\"  alt=\\"be your SEO teacher\\"   >"},
            public string title;
            public string title_full;
            public int duration;
            public string price;
            public int rating;
            public int rating_count;
            public bool is_featured;
            public int gig_id;
            public string gig_url;
            public string img_medium;
            public string video_thumb;
            public string seller_name;
            public string seller_created_at;
            public string seller_country_name;
            public string seller_country;
            public string seller_url;
            public string seller_level; //null/"level_two_seller"/"top_rated_seller"/
            public string gig_image;
        };

        public frmFiverrComScraper()
        {
            AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve);

            InitializeComponent();

            crifanLib = new crifanLib();
            gigUrlColumn = new DataGridViewButtonColumn();
        }

        //for load embedded dll
        System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args)
        {
            string dllName = args.Name.Contains(",") ? args.Name.Substring(0, args.Name.IndexOf(',')) : args.Name.Replace(".dll", "");

            dllName = dllName.Replace(".", "_");

            if (dllName.EndsWith("_resources")) return null;

            System.Resources.ResourceManager rm = new System.Resources.ResourceManager(GetType().Namespace + ".Properties.Resources", System.Reflection.Assembly.GetExecutingAssembly());

            byte[] bytes = (byte[])rm.GetObject(dllName);

            return System.Reflection.Assembly.Load(bytes);
        }

        private void frmFiverrComScraper_Load(object sender, EventArgs e)
        {
            //DataGridView init
            dgvSearchResult.ColumnCount = 12;

            dgvSearchResult.RowHeadersWidth = 60;
            dgvSearchResult.RowHeadersDefaultCellStyle.Alignment = DataGridViewContentAlignment.MiddleCenter;
            dgvSearchResult.RowHeadersWidthSizeMode = DataGridViewRowHeadersWidthSizeMode.DisableResizing;

            dgvSearchResult.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.None;
            dgvSearchResult.AutoSizeRowsMode = DataGridViewAutoSizeRowsMode.AllCellsExceptHeaders;

            //(1)title
            dgvSearchResult.Columns[0].AutoSizeMode = DataGridViewAutoSizeColumnMode.Fill;
            dgvSearchResult.Columns[0].HeaderText = "Title";
            dgvSearchResult.Columns[0].Width = 100;
            //(2)seller rating ( based on 1-100% format )
            dgvSearchResult.Columns[1].HeaderText = "Seller Rating";
            dgvSearchResult.Columns[1].Width = 49;
            //(3)estimated delivery ( based on 24 hours - 7days format )
            dgvSearchResult.Columns[2].HeaderText = "Estimated Delivery";
            dgvSearchResult.Columns[2].Width = 66;
            //(4)gig rating ( based on 1-100% )
            dgvSearchResult.Columns[3].HeaderText = "Gig Rating";
            dgvSearchResult.Columns[3].Width = 47;
            //(5)orders in que ( based on 0-9999 format )
            dgvSearchResult.Columns[4].HeaderText = "Orders in Queue";
            dgvSearchResult.Columns[4].Width = 54;
            //(6)level of the seller ( 1-3 )
            dgvSearchResult.Columns[5].HeaderText = "Seller Level";
            dgvSearchResult.Columns[5].Width = 47;
            //(7)haz video ( yes or no )
            dgvSearchResult.Columns[6].HeaderText = "Has Video";
            dgvSearchResult.Columns[6].Width = 42;
            //(8)express gigs (yes or no )
            dgvSearchResult.Columns[7].HeaderText = "Is Express Gig";
            dgvSearchResult.Columns[7].Width = 55;
            //(9)country flag ( display county flag )
            dgvSearchResult.Columns[8].HeaderText = "Country Flag";
            dgvSearchResult.Columns[8].Width = 106;
            //(10)+ve reviews and -ve reviews ( based on 1-9999 )
            dgvSearchResult.Columns[9].HeaderText = "Positive Reviews";
            dgvSearchResult.Columns[9].Width = 57;
            dgvSearchResult.Columns[10].HeaderText = "Negative Reviews";
            dgvSearchResult.Columns[10].Width = 60;
            //(11)top rated seller ( yes or no )
            dgvSearchResult.Columns[11].HeaderText = "Is Top Rated Seller";
            dgvSearchResult.Columns[11].Width = 50;
            ////(12)gig url
            //dgvSearchResult.Columns[12].HeaderText = "Gig Url";
            //dgvSearchResult.Columns[12].Width = 106;

            // Add a button column
            gigUrlColumn.HeaderText = "Gig Url";
            //gigUrlColumn.Name = "Gig Url name";
            gigUrlColumn.Text = "Buy Now";
            //gigUrlColumn.UseColumnTextForButtonValue = true;
            gigUrlColumn.Width = 106;
            dgvSearchResult.Columns.Add(gigUrlColumn);

            //this.WindowState = FormWindowState.Maximized;

            updateUI();
        }

        //update UI according current status
        private void updateUI()
        {
            if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED)
            {
                btnSearch.Enabled = true;
                btnSearch.Text = "Search";

                btnPause.Enabled = false;
                btnStop.Enabled = false;

            }
            else if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
            {
                btnSearch.Enabled = true;
                btnSearch.Text = "Continue Search";

                btnPause.Enabled = false;
                btnStop.Enabled = true;
            }
            else if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING)
            {
                btnSearch.Enabled = false;
                btnSearch.Text = "Searching";

                btnPause.Enabled = true;
                btnStop.Enabled = true;
            }
        }

        private void processEachGitItemInfo(gigSearchItemInfo gigItemInfo)
        {
            gigInfo singleGigInfo = new gigInfo();
            //(12)gig url
            //gigUrl
            singleGigInfo.gigUrl = fiverrComDomain + gigItemInfo.gig_url;

            string respGigHtml = crifanLib.getUrlRespHtml_multiTry(singleGigInfo.gigUrl);
            HtmlNode rootNode = crifanLib.htmlToHtmlDoc(respGigHtml).DocumentNode;

            //(1)title
            singleGigInfo.title = gigItemInfo.title_full;

            //(2)seller rating ( based on 1-100% format )
            //http://fiverr.com/betaomicronalph/write-seo-articles-for-you
            //<div class="stats-row stats-row-ratings cf">
            //    <div class="stat ">91%<small>positive rating</small></div>
            //    <div class="stat-sml">by</div>
            //    <div class="stat">590<small>votes</small></div>
            //</div>
            HtmlNode positiveRaingNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-ratings cf']/div[@class='stat ']");
            if (null != positiveRaingNode)
            {
                //"100%positive rating"
                string ratingPercentStr = "";
                if (crifanLib.extractSingleStr(@"(\d+)%", positiveRaingNode.InnerText, out ratingPercentStr))
                {
                    singleGigInfo.sellerRating = Int32.Parse(ratingPercentStr);
                }
                else
                {
 
                }
            }
            else
            {

            }

            //(3)estimated delivery ( based on 24 hours - 7days format )
            singleGigInfo.estimatedDeliveryStr = gigItemInfo.duration.ToString() + " days";

            //(4)gig rating ( based on 1-100% )
            singleGigInfo.gigRating = gigItemInfo.rating * 10;

            //(5)orders in que ( based on 0-9999 format )
            //http://fiverr.com/trickyguy/be-your-seo-teacher
            //<div class="stats-row stats-row-locked icn-orders cf">
            //    <div class="stat">0</div>
            //    <div class="stat"><small>orders<br>in queue</small></div>
            //</div>
            HtmlNode icnOrdersCfStatNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-locked icn-orders cf']/div[@class='stat']");
            if (null != icnOrdersCfStatNode)
            {
                string orderInQueuesNumStr = icnOrdersCfStatNode.InnerText; //"0"
                singleGigInfo.ordersInQueue = Int32.Parse(orderInQueuesNumStr);
            }
            else
            {

            }

            //(6)level of the seller ( 1-3 )
            //(11)top rated seller ( yes or no )
            singleGigInfo.isTopRatedSeller = false;
            string strLevel = gigItemInfo.seller_level;
            switch(strLevel)
            {
                case "level_one_seller":
                    singleGigInfo.sellerLevel = 1;
                    break;
                case "level_two_seller":
                    singleGigInfo.sellerLevel = 2;
                    break;
                case "top_rated_seller":
                    singleGigInfo.sellerLevel = 3;
                    singleGigInfo.isTopRatedSeller = true;
                    break;
                default:
                    singleGigInfo.sellerLevel = 0;
                    break;
            }

            //(7)haz video ( yes or no )
            singleGigInfo.hasVideo = (!string.IsNullOrEmpty(gigItemInfo.video_thumb)); //false
            
            //(8)express gigs (yes or no )
            singleGigInfo.isExpressGig = gigItemInfo.is_featured;
            
            //(9)country flag ( display county flag )
            singleGigInfo.coutryFlag = gigItemInfo.seller_country_name; //"India"


            //(10)+ve reviews and -ve reviews ( based on 1-9999 )

            //http://fiverr.com/trickyguy/be-your-seo-teacher
            //<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
            //    <span class="summary summary-pos" itemprop="ratingValuePositive" content="5.0">7</span>
            //    <span class="summary summary-neg" itemprop="reviewCount" content="0">0</span>
            //</div>

            //http://fiverr.com/betaomicronalph/write-seo-articles-for-you
            //<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
            //    <span class="summary summary-pos" itemprop="ratingValuePositive" content="4.6">537</span>
            //    <span class="summary summary-neg" itemprop="reviewCount" content="53">53</span>
            //</div>

            HtmlNode summaryPosNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-pos']");
            if (null != summaryPosNode)
            {
                string posibiteValue = summaryPosNode.InnerText; //"7"
                singleGigInfo.positiveReviews = Int32.Parse(posibiteValue);
            }
            else
            {
                singleGigInfo.positiveReviews = 0;
            }
            HtmlNode summaryNegNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-neg']");
            if (null != summaryNegNode)
            {
                string negativeValue = summaryNegNode.InnerText; //"0"
                singleGigInfo.negativeReviews = Int32.Parse(negativeValue);
            }
            else
            {
                singleGigInfo.negativeReviews = 0;
            }

            storeGigInfo(singleGigInfo);

            //update UI
            System.Windows.Forms.Application.DoEvents();
        }

        public struct gigInfo
        {
            public string title;
            public int sellerRating;
            public string estimatedDeliveryStr;
            public int gigRating;
            public int ordersInQueue;
            public int sellerLevel;
            public bool hasVideo;
            public bool isExpressGig;
            public string coutryFlag;
            public int positiveReviews;
            public int negativeReviews;
            public bool isTopRatedSeller;
            public string gigUrl;
        };
        

        private void processSingleGigSearchItemObject(Object singleGigItemObject)
        {
            Dictionary<string, Object> itemDict = (Dictionary<string, Object>)singleGigItemObject;
            gigSearchItemInfo itemInfo = new gigSearchItemInfo();

            Object objItemTitle;
            itemDict.TryGetValue("title", out objItemTitle);
            itemInfo.title = objItemTitle.ToString();

            Object objItemTitleFull;
            itemDict.TryGetValue("title_full", out objItemTitleFull);
            itemInfo.title_full = objItemTitleFull.ToString();

            Object objItemDuration;
            itemDict.TryGetValue("duration", out objItemDuration);
            itemInfo.duration = Int32.Parse(objItemDuration.ToString());

            Object objItemPrice;
            itemDict.TryGetValue("price", out objItemPrice);
            itemInfo.price = objItemPrice.ToString();

            Object objItemRating;
            itemDict.TryGetValue("rating", out objItemRating);
            itemInfo.rating = Int32.Parse(objItemRating.ToString());

            Object objItemRatingCount;
            itemDict.TryGetValue("rating_count", out objItemRatingCount);
            itemInfo.rating_count = Int32.Parse(objItemRatingCount.ToString());

            Object objItemIsFeatured;
            itemDict.TryGetValue("is_featured", out objItemIsFeatured);
            itemInfo.is_featured = Boolean.Parse(objItemIsFeatured.ToString());

            Object objItemGigId;
            itemDict.TryGetValue("gig_id", out objItemGigId);
            itemInfo.gig_id = Int32.Parse(objItemGigId.ToString());

            Object objItemGigUrl;
            itemDict.TryGetValue("gig_url", out objItemGigUrl);
            itemInfo.gig_url = objItemGigUrl.ToString();

            Object objItemImgMedium;
            itemDict.TryGetValue("img_medium", out objItemImgMedium);
            itemInfo.img_medium = objItemImgMedium.ToString();

            Object objItemVideoThumb;
            itemDict.TryGetValue("video_thumb", out objItemVideoThumb);

            //Convert.ToBoolean
            //if (Boolean.TryParse(objItemVideoThumb, out bVideoThumb))
            //if(objItemVideoThumb.GetType() == typeof(bool))
            if (objItemVideoThumb is bool)
            {
                //normal:
                //"video_thumb":false
                bool bVideoThumb = (bool)objItemVideoThumb;

                if (false == bVideoThumb)
                {
                    itemInfo.video_thumb = string.Empty;
                }
                else
                {
                    //unexpected ???
                    itemInfo.video_thumb = string.Empty;
                }
            }
            else if (objItemVideoThumb is string)
            {
                //special:
                //"video_thumb":"<img src=\\"http://static.dmcloud.net/4e5bf73e94a6f629c900461b/5172ce9c06361d76ae000218/thumb-162x121-f.jpeg\\" 

                string strVideoThumb = (string)objItemVideoThumb;
                itemInfo.video_thumb = strVideoThumb;
            }

            Object objItemSellerName;
            itemDict.TryGetValue("seller_name", out objItemSellerName);
            itemInfo.seller_name = objItemSellerName.ToString();

            Object objItemSellerCreatedAt;
            itemDict.TryGetValue("seller_created_at", out objItemSellerCreatedAt);
            itemInfo.seller_created_at = objItemSellerCreatedAt.ToString();

            Object objItemSellerCountryName;
            itemDict.TryGetValue("seller_country_name", out objItemSellerCountryName);
            itemInfo.seller_country_name = objItemSellerCountryName.ToString();

            Object objItemSellerCountry;
            itemDict.TryGetValue("seller_country", out objItemSellerCountry);
            itemInfo.seller_country = objItemSellerCountry.ToString();

            Object objItemSellerUrl;
            itemDict.TryGetValue("seller_url", out objItemSellerUrl);
            itemInfo.seller_url = objItemSellerUrl.ToString();

            Object objItemSellerLevel;
            itemDict.TryGetValue("seller_level", out objItemSellerLevel);
            if (objItemSellerLevel != null)
            {
                itemInfo.seller_level = objItemSellerLevel.ToString();
            }
            else
            {
                //may be null
                itemInfo.seller_level = string.Empty;
            }

            Object objItemGigImage;
            itemDict.TryGetValue("gig_image", out objItemGigImage);
            itemInfo.gig_image = objItemGigImage.ToString();

            //itemInfoList.Add(itemInfo);
            processEachGitItemInfo(itemInfo);
        }

        private void btnSearch_Click(object sender, EventArgs e)
        {
            string respHtml = "";
            Dictionary<string, string> headerDict;

            //STEP1: access main page
            string fiverMainUrl = "http://fiverr.com/";
            respHtml = crifanLib.getUrlRespHtml_multiTry(fiverMainUrl);

            HtmlAgilityPack.HtmlDocument htmlDoc = crifanLib.htmlToHtmlDoc(respHtml);
            HtmlNode rootNode = htmlDoc.DocumentNode;

            //1.extract category_id
            //<div class="gig-carousel gallery loading cf carousel-noaction" data-json-path="/gigs/endless_page_as_json?host=homepage&amp;type=endless&amp;category_id=99912&amp;limit=30" data-load-more="true" data-hide-empty="false" data-gigs-shown="30" data-do-special="false" data-do-endless="true" data-box-id="hp99912_1" >
            HtmlNode datajsonPathNode = rootNode.SelectSingleNode("//div[@class and contains(@data-json-path, 'category_id=')]");
            string dataJsonPathValue = datajsonPathNode.Attributes["data-json-path"].Value;
            string categoryId = "";
            if(crifanLib.extractSingleStr(@"category_id=(\d+)", dataJsonPathValue, out categoryId))
            {
                //got category_id: 99912
            }

            //2. extract X-CSRF-Token value
            //<meta content="6j77ymABhWzqVarvSOXSIl4MwW3KrEESH8rofrLem4w=" name="csrf-token" />
            HtmlNode csrfTokenNode = rootNode.SelectSingleNode("//meta[@content and @name='csrf-token']");
            string csrfTokenValue = "";
            if (csrfTokenNode != null)
            {
                csrfTokenValue = csrfTokenNode.Attributes["content"].Value; //"K4Q+6uaGEvepuVLy5EwEDTddxbgzS3BJQ2zHIDdBFpk="
            }

            //STEP2: access search url
            //http://fiverr.com/gigs/search?utf8=%E2%9C%93&search_in=everywhere&query=seo&x=12&y=12
            curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93"
                + "&search_in=everywhere"
                + "&query=" + HttpUtility.UrlEncode(txbKeyword.Text)
                + "&x=12"
                + "&y=12";
            headerDict = new Dictionary<string, string>();
            headerDict.Add("Referer", fiverMainUrl);
            respHtml = crifanLib.getUrlRespHtml_multiTry(curSearchInfo.searchUrl, headerDict: headerDict);

            if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
            {
                needGetMore = true;
                //restore status

                //continue search

                curSearchStatus = search_status.SEARCH_STATUS_SEARCHING;
                updateUI();

                //curSearchInfo = curSearchInfo;
                
                //for debug
                //int debugNum = 0;
                //int debugMaxNum = 3;
                
                for (; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++)
                {
                    Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx];
                    if (needGetMore)
                    {
                        processSingleGigSearchItemObject(itemObj);

                        ////for debug
                        //debugNum++;
                        //if (debugNum >= debugMaxNum)
                        //{
                        //    //debug
                        //    needGetMore = false;
                        //    break;
                        //}
                    }
                    else
                    {
                        break;
                    }
                }
                
                //update for next page
                curSearchInfo.pageNum++;
            }
            else if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED)
            {
                // new search -> clear previously searched result
                crifanLib.dgvClearContent(dgvSearchResult);

                curSearchStatus = search_status.SEARCH_STATUS_SEARCHING;
                updateUI();

                curSearchInfo = new search_info();

                curSearchInfo.pageNum = 1;
                curSearchInfo.curItemIdx = 0;

                needGetMore = true;
            }
            else
            {
                //unexpected status

                return;
            }

            while (needGetMore)
            {
                //http://fiverr.com/gigs/search?utf8=%E2%9C%93&query=seo&x=15&y=13&page=2
                //curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93"
                //    + "&query=" + HttpUtility.UrlEncode(txbKeyword.Text)
                //    + "&page=" + curSearchInfo.pageNum.ToString();

                //STEP3: search each page, got json, parse search result to list
                int numPerPage = 50;
                //page 1:
                //http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=1
                //page 2:
                //http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=2
                curSearchInfo.searchUrl = "http://fiverr.com/gigs/gigs_as_json?"
                    + "host=search"
                    + "&type=best_match"
                    + "&query_string=" + HttpUtility.UrlEncode(txbKeyword.Text)
                    + "&search_filter=auto"
                    + "&category_id=" + categoryId
                    + "&limit=" + numPerPage.ToString()
                    + "&page=" + curSearchInfo.pageNum.ToString();
                headerDict = new Dictionary<string, string>();
                headerDict.Add("X-CSRF-Token", csrfTokenValue);
                headerDict.Add("X-Requested-With", "XMLHttpRequest");

                string respGigJson = crifanLib.getUrlRespHtml(curSearchInfo.searchUrl, headerDict: headerDict);

                //got json string, processed like this:
                //{"gigs":[
                //{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\"  alt=\\"be your SEO teacher\\"   >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\"  alt=\\"be your SEO teacher\\"   >"},
                //{"title":"write SEO articles for you","title_full":"write SEO articles for you","duration":4,"price":"$5","rating":9,"rating_count":590,"is_featured":false,"gig_id":283403,"gig_url":"/betaomicronalph/write-seo-articles-for-you","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\"  alt=\\"write SEO articles for you\\"   >","video_thumb":false,"seller_name":"betaomicronalph","seller_created_at":"over 2 years","seller_country_name":"United States","seller_country":"us","seller_url":"/betaomicronalph","seller_level":"level_two_seller","gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\"  alt=\\"write SEO articles for you\\"   >"},
                //....
                //],"total_results":519,"next_page":true}

                Dictionary<string, Object> gigsObjDict = (Dictionary<string, Object>)crifanLib.jsonToDict(respGigJson);

                bool bNextPage = false;
                Object objNextPage = null;
                if (gigsObjDict.ContainsKey("next_page") && gigsObjDict.TryGetValue("next_page", out objNextPage))
                {
                    bNextPage = (bool)objNextPage;
                }
                needGetMore = bNextPage;

                Object itemDictListObj = null;
                if (gigsObjDict.ContainsKey("gigs") && gigsObjDict.TryGetValue("gigs", out itemDictListObj))
                {
                    //Dictionary<string, Object>[] itemDictList = (Dictionary<string, Object>[])itemDictListObj;
                    curSearchInfo.itemObjList = (Object[])itemDictListObj;

                    //List<gigSearchItemInfo> itemInfoList = new List<gigSearchItemInfo>();

                    for (curSearchInfo.curItemIdx = 0; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++)
                    {
                        Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx];

                        if (needGetMore)
                        {
                            processSingleGigSearchItemObject(itemObj);

                            ////for debug
                            //debugNum++;
                            //if (debugNum >= debugMaxNum)
                            //{
                            //    //debug
                            //    needGetMore = false;
                            //    break;
                            //}
                        }
                        else
                        {
                            break;
                        }
                    }//foreach (Object itemObj in itemObjList)
                    
                    //update for next page
                    curSearchInfo.pageNum++;
                }
                else
                {
                    //some error ?
                    needGetMore = false; 
                }                
            };
        }

        private void btnPause_Click(object sender, EventArgs e)
        {
            if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING)
            {
                curSearchStatus = search_status.SEARCH_STATUS_PAUSED;
                updateUI();

                needGetMore = false;

                //store current status and progress

                //MessageBox.Show(curSearchInfo.gigDataList[0].ToString());
            }

        }

        private void btnStopSearching_Click(object sender, EventArgs e)
        {
            if ((curSearchStatus == search_status.SEARCH_STATUS_SEARCHING) ||
                (curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
                )
            {
                curSearchStatus = search_status.SEARCH_STATUS_STOPPED;
                updateUI();

                needGetMore = false;

                //clear things
            }
        }

        void storeGigInfo(gigInfo singleGigInfo)
        {
            //DataGridViewButtonCell gigUrlCell = new DataGridViewButtonCell();
            //gigUrlCell.Value = "Buy Now";
            //gigUrlCell.Tag = singleGigInfo.gigUrl;

            dgvSearchResult.Rows.Add(
                singleGigInfo.title,
                singleGigInfo.sellerRating,
                singleGigInfo.estimatedDeliveryStr,
                singleGigInfo.gigRating,
                singleGigInfo.ordersInQueue,
                singleGigInfo.sellerLevel,
                singleGigInfo.hasVideo ? "yes" : "no",
                singleGigInfo.isExpressGig,
                singleGigInfo.coutryFlag,
                singleGigInfo.positiveReviews,
                singleGigInfo.negativeReviews,
                singleGigInfo.isTopRatedSeller);
                //gigUrlCell);
                //singleGigInfo.gigUrl);

            gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Value = "Buy Now";
            gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Tag = singleGigInfo.gigUrl;
            
            dgvSearchResult.Rows[dgvSearchResult.Rows.Count - 1].Selected = true;
            dgvSearchResult.FirstDisplayedScrollingRowIndex = dgvSearchResult.Rows.Count - 1;
            crifanLib.dgvDrawHeaderNum(dgvSearchResult);

            return;
        }

        private void dgvSearchResult_CellContentClick(object sender, DataGridViewCellEventArgs e)
        {
            if ((e.RowIndex >= 0) && (e.ColumnIndex == girUrlColumnIdx))
            {
                DataGridViewButtonCell clickedButtonCell = (DataGridViewButtonCell)dgvSearchResult.Rows[e.RowIndex].Cells[e.ColumnIndex];
                //MessageBox.Show(clickedButtonCell.Value.ToString() + clickedButtonCell.Tag.ToString());
                System.Diagnostics.Process.Start(clickedButtonCell.Tag.ToString()); 
            }
        }
        
        private void btnSaveAll_Click(object sender, EventArgs e)
        {
            string outputFilename = "fiverrComScrapedResult" + ".xls";
            string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename);

            //List<int> omitColumnIdxList = new List<int>();
            ////omit the last column: View page
            //omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1);
            //crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList);

            List<int> useTagColumnIdxList = new List<int>();
            //columns that use tag as value
            useTagColumnIdxList.Add(girUrlColumnIdx);
            crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, useTagColumnIdxList: useTagColumnIdxList);

            crifanLib.openFolderAndSelectFile(fullFilename);
        }

        private void openFolderAndSelectFile(string fullFilename)
        {
            System.Diagnostics.Process.Start("Explorer.exe", "/select," + fullFilename);
        }

        private void btnExportToCsv_Click(object sender, EventArgs e)
        {
            string outputFilename = "fiverrComScrapedResult.csv";
            string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename);

            //List<int> omitColumnIdxList = new List<int>();
            ////omit the last column: View page
            //omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1);
            //crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList);
            
            List<int> useTagColumnIdxList = new List<int>();
            //columns that use tag as value
            useTagColumnIdxList.Add(girUrlColumnIdx);
            crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, delimiter: ",", useTagColumnIdxList: useTagColumnIdxList);

            crifanLib.openFolderAndSelectFile(fullFilename);
        }
        
        private void btnClearAll_Click(object sender, EventArgs e)
        {
            crifanLib.dgvClearContent(dgvSearchResult);
        }

        private void btnHelp_Click(object sender, EventArgs e)
        {
            string helpUrl = "http://giggladiator.com/help";
            System.Diagnostics.Process.Start(helpUrl); 
        }

        private void btnCreateAlert_Click(object sender, EventArgs e)
        {

        }

        private void btnExpReaderToExcel_Click(object sender, EventArgs e)
        {

        }

        private void btnExpReaderToCsv_Click(object sender, EventArgs e)
        {

        }

    }

}

(2)

 

【总结】

转载请注明:在路上 » 【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之后

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
88 queries in 0.193 seconds, using 20.67MB memory