【背景】
和:
【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前
类似,但是是现在,2013-09-12,
网站已经改版,然后重新写了代码,去爬取网站的。
【FiverComScraper 代码】
1.截图:
【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前
一样,就不贴了。
2.完整项目代码下载:
FiverrComScraper_2013-09-12_afterWebsiteChange.7z
3.源码分享:
(1)frmFiverrComScraper.cs
/* * [File] * frmFiverrComScraper.cs * * [Function] * scrape fiverr.com * * [Note] * * [update] * 2013-09-12 * * [Author] * Crifan Li * * [Contact] * https://www.crifan.org/contact_me/ * * [History] */ using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using System.Xml; using System.IO; using System.Web; using Excel = Microsoft.Office.Interop.Excel; using Microsoft.Office.Interop.Excel; using HtmlAgilityPack; /* * icons: * * search/find * http://www.easyicon.cn/icondetail/106/ * * stop * http://www.easyicon.cn/icondetail/568811/ * * excel * http://www.easyicon.cn/icondetail/1087666/ * * csv * http://www.easyicon.cn/icondetail/558199/ * * help * http://www.easyicon.cn/icondetail/12270/ */ namespace FiverComScraper { public partial class frmFiverrComScraper : Form { const string fiverrComDomain = "http://fiverr.com"; public crifanLib crifanLib; public DataGridViewButtonColumn gigUrlColumn = null; public static int girUrlColumnIdx = 12; //need get more gig to scrape or not bool needGetMore = true; enum search_status { SEARCH_STATUS_STOPPED, SEARCH_STATUS_SEARCHING, SEARCH_STATUS_PAUSED }; search_status curSearchStatus = search_status.SEARCH_STATUS_STOPPED; public struct search_info { public int pageNum; public string searchUrl; public string searchRespHtml; public HtmlAgilityPack.HtmlDocument htmlDoc; //public XmlNamespaceManager m; //public HtmlNodeCollection gigDataList; //public int nodeIdx; public Object[] itemObjList; public int curItemIdx; }; search_info curSearchInfo = new search_info(); public struct gigSearchItemInfo { //{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >"}, public string title; public string title_full; public int duration; public string price; public int rating; public int rating_count; public bool is_featured; public int gig_id; public string gig_url; public string img_medium; public string video_thumb; public string seller_name; public string seller_created_at; public string seller_country_name; public string seller_country; public string seller_url; public string seller_level; //null/"level_two_seller"/"top_rated_seller"/ public string gig_image; }; public frmFiverrComScraper() { AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve); InitializeComponent(); crifanLib = new crifanLib(); gigUrlColumn = new DataGridViewButtonColumn(); } //for load embedded dll System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args) { string dllName = args.Name.Contains(",") ? args.Name.Substring(0, args.Name.IndexOf(',')) : args.Name.Replace(".dll", ""); dllName = dllName.Replace(".", "_"); if (dllName.EndsWith("_resources")) return null; System.Resources.ResourceManager rm = new System.Resources.ResourceManager(GetType().Namespace + ".Properties.Resources", System.Reflection.Assembly.GetExecutingAssembly()); byte[] bytes = (byte[])rm.GetObject(dllName); return System.Reflection.Assembly.Load(bytes); } private void frmFiverrComScraper_Load(object sender, EventArgs e) { //DataGridView init dgvSearchResult.ColumnCount = 12; dgvSearchResult.RowHeadersWidth = 60; dgvSearchResult.RowHeadersDefaultCellStyle.Alignment = DataGridViewContentAlignment.MiddleCenter; dgvSearchResult.RowHeadersWidthSizeMode = DataGridViewRowHeadersWidthSizeMode.DisableResizing; dgvSearchResult.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.None; dgvSearchResult.AutoSizeRowsMode = DataGridViewAutoSizeRowsMode.AllCellsExceptHeaders; //(1)title dgvSearchResult.Columns[0].AutoSizeMode = DataGridViewAutoSizeColumnMode.Fill; dgvSearchResult.Columns[0].HeaderText = "Title"; dgvSearchResult.Columns[0].Width = 100; //(2)seller rating ( based on 1-100% format ) dgvSearchResult.Columns[1].HeaderText = "Seller Rating"; dgvSearchResult.Columns[1].Width = 49; //(3)estimated delivery ( based on 24 hours - 7days format ) dgvSearchResult.Columns[2].HeaderText = "Estimated Delivery"; dgvSearchResult.Columns[2].Width = 66; //(4)gig rating ( based on 1-100% ) dgvSearchResult.Columns[3].HeaderText = "Gig Rating"; dgvSearchResult.Columns[3].Width = 47; //(5)orders in que ( based on 0-9999 format ) dgvSearchResult.Columns[4].HeaderText = "Orders in Queue"; dgvSearchResult.Columns[4].Width = 54; //(6)level of the seller ( 1-3 ) dgvSearchResult.Columns[5].HeaderText = "Seller Level"; dgvSearchResult.Columns[5].Width = 47; //(7)haz video ( yes or no ) dgvSearchResult.Columns[6].HeaderText = "Has Video"; dgvSearchResult.Columns[6].Width = 42; //(8)express gigs (yes or no ) dgvSearchResult.Columns[7].HeaderText = "Is Express Gig"; dgvSearchResult.Columns[7].Width = 55; //(9)country flag ( display county flag ) dgvSearchResult.Columns[8].HeaderText = "Country Flag"; dgvSearchResult.Columns[8].Width = 106; //(10)+ve reviews and -ve reviews ( based on 1-9999 ) dgvSearchResult.Columns[9].HeaderText = "Positive Reviews"; dgvSearchResult.Columns[9].Width = 57; dgvSearchResult.Columns[10].HeaderText = "Negative Reviews"; dgvSearchResult.Columns[10].Width = 60; //(11)top rated seller ( yes or no ) dgvSearchResult.Columns[11].HeaderText = "Is Top Rated Seller"; dgvSearchResult.Columns[11].Width = 50; ////(12)gig url //dgvSearchResult.Columns[12].HeaderText = "Gig Url"; //dgvSearchResult.Columns[12].Width = 106; // Add a button column gigUrlColumn.HeaderText = "Gig Url"; //gigUrlColumn.Name = "Gig Url name"; gigUrlColumn.Text = "Buy Now"; //gigUrlColumn.UseColumnTextForButtonValue = true; gigUrlColumn.Width = 106; dgvSearchResult.Columns.Add(gigUrlColumn); //this.WindowState = FormWindowState.Maximized; updateUI(); } //update UI according current status private void updateUI() { if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED) { btnSearch.Enabled = true; btnSearch.Text = "Search"; btnPause.Enabled = false; btnStop.Enabled = false; } else if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED) { btnSearch.Enabled = true; btnSearch.Text = "Continue Search"; btnPause.Enabled = false; btnStop.Enabled = true; } else if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING) { btnSearch.Enabled = false; btnSearch.Text = "Searching"; btnPause.Enabled = true; btnStop.Enabled = true; } } private void processEachGitItemInfo(gigSearchItemInfo gigItemInfo) { gigInfo singleGigInfo = new gigInfo(); //(12)gig url //gigUrl singleGigInfo.gigUrl = fiverrComDomain + gigItemInfo.gig_url; string respGigHtml = crifanLib.getUrlRespHtml_multiTry(singleGigInfo.gigUrl); HtmlNode rootNode = crifanLib.htmlToHtmlDoc(respGigHtml).DocumentNode; //(1)title singleGigInfo.title = gigItemInfo.title_full; //(2)seller rating ( based on 1-100% format ) //http://fiverr.com/betaomicronalph/write-seo-articles-for-you //<div class="stats-row stats-row-ratings cf"> // <div class="stat ">91%<small>positive rating</small></div> // <div class="stat-sml">by</div> // <div class="stat">590<small>votes</small></div> //</div> HtmlNode positiveRaingNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-ratings cf']/div[@class='stat ']"); if (null != positiveRaingNode) { //"100%positive rating" string ratingPercentStr = ""; if (crifanLib.extractSingleStr(@"(\d+)%", positiveRaingNode.InnerText, out ratingPercentStr)) { singleGigInfo.sellerRating = Int32.Parse(ratingPercentStr); } else { } } else { } //(3)estimated delivery ( based on 24 hours - 7days format ) singleGigInfo.estimatedDeliveryStr = gigItemInfo.duration.ToString() + " days"; //(4)gig rating ( based on 1-100% ) singleGigInfo.gigRating = gigItemInfo.rating * 10; //(5)orders in que ( based on 0-9999 format ) //http://fiverr.com/trickyguy/be-your-seo-teacher //<div class="stats-row stats-row-locked icn-orders cf"> // <div class="stat">0</div> // <div class="stat"><small>orders<br>in queue</small></div> //</div> HtmlNode icnOrdersCfStatNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-locked icn-orders cf']/div[@class='stat']"); if (null != icnOrdersCfStatNode) { string orderInQueuesNumStr = icnOrdersCfStatNode.InnerText; //"0" singleGigInfo.ordersInQueue = Int32.Parse(orderInQueuesNumStr); } else { } //(6)level of the seller ( 1-3 ) //(11)top rated seller ( yes or no ) singleGigInfo.isTopRatedSeller = false; string strLevel = gigItemInfo.seller_level; switch(strLevel) { case "level_one_seller": singleGigInfo.sellerLevel = 1; break; case "level_two_seller": singleGigInfo.sellerLevel = 2; break; case "top_rated_seller": singleGigInfo.sellerLevel = 3; singleGigInfo.isTopRatedSeller = true; break; default: singleGigInfo.sellerLevel = 0; break; } //(7)haz video ( yes or no ) singleGigInfo.hasVideo = (!string.IsNullOrEmpty(gigItemInfo.video_thumb)); //false //(8)express gigs (yes or no ) singleGigInfo.isExpressGig = gigItemInfo.is_featured; //(9)country flag ( display county flag ) singleGigInfo.coutryFlag = gigItemInfo.seller_country_name; //"India" //(10)+ve reviews and -ve reviews ( based on 1-9999 ) //http://fiverr.com/trickyguy/be-your-seo-teacher //<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating"> // <span class="summary summary-pos" itemprop="ratingValuePositive" content="5.0">7</span> // <span class="summary summary-neg" itemprop="reviewCount" content="0">0</span> //</div> //http://fiverr.com/betaomicronalph/write-seo-articles-for-you //<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating"> // <span class="summary summary-pos" itemprop="ratingValuePositive" content="4.6">537</span> // <span class="summary summary-neg" itemprop="reviewCount" content="53">53</span> //</div> HtmlNode summaryPosNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-pos']"); if (null != summaryPosNode) { string posibiteValue = summaryPosNode.InnerText; //"7" singleGigInfo.positiveReviews = Int32.Parse(posibiteValue); } else { singleGigInfo.positiveReviews = 0; } HtmlNode summaryNegNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-neg']"); if (null != summaryNegNode) { string negativeValue = summaryNegNode.InnerText; //"0" singleGigInfo.negativeReviews = Int32.Parse(negativeValue); } else { singleGigInfo.negativeReviews = 0; } storeGigInfo(singleGigInfo); //update UI System.Windows.Forms.Application.DoEvents(); } public struct gigInfo { public string title; public int sellerRating; public string estimatedDeliveryStr; public int gigRating; public int ordersInQueue; public int sellerLevel; public bool hasVideo; public bool isExpressGig; public string coutryFlag; public int positiveReviews; public int negativeReviews; public bool isTopRatedSeller; public string gigUrl; }; private void processSingleGigSearchItemObject(Object singleGigItemObject) { Dictionary<string, Object> itemDict = (Dictionary<string, Object>)singleGigItemObject; gigSearchItemInfo itemInfo = new gigSearchItemInfo(); Object objItemTitle; itemDict.TryGetValue("title", out objItemTitle); itemInfo.title = objItemTitle.ToString(); Object objItemTitleFull; itemDict.TryGetValue("title_full", out objItemTitleFull); itemInfo.title_full = objItemTitleFull.ToString(); Object objItemDuration; itemDict.TryGetValue("duration", out objItemDuration); itemInfo.duration = Int32.Parse(objItemDuration.ToString()); Object objItemPrice; itemDict.TryGetValue("price", out objItemPrice); itemInfo.price = objItemPrice.ToString(); Object objItemRating; itemDict.TryGetValue("rating", out objItemRating); itemInfo.rating = Int32.Parse(objItemRating.ToString()); Object objItemRatingCount; itemDict.TryGetValue("rating_count", out objItemRatingCount); itemInfo.rating_count = Int32.Parse(objItemRatingCount.ToString()); Object objItemIsFeatured; itemDict.TryGetValue("is_featured", out objItemIsFeatured); itemInfo.is_featured = Boolean.Parse(objItemIsFeatured.ToString()); Object objItemGigId; itemDict.TryGetValue("gig_id", out objItemGigId); itemInfo.gig_id = Int32.Parse(objItemGigId.ToString()); Object objItemGigUrl; itemDict.TryGetValue("gig_url", out objItemGigUrl); itemInfo.gig_url = objItemGigUrl.ToString(); Object objItemImgMedium; itemDict.TryGetValue("img_medium", out objItemImgMedium); itemInfo.img_medium = objItemImgMedium.ToString(); Object objItemVideoThumb; itemDict.TryGetValue("video_thumb", out objItemVideoThumb); //Convert.ToBoolean //if (Boolean.TryParse(objItemVideoThumb, out bVideoThumb)) //if(objItemVideoThumb.GetType() == typeof(bool)) if (objItemVideoThumb is bool) { //normal: //"video_thumb":false bool bVideoThumb = (bool)objItemVideoThumb; if (false == bVideoThumb) { itemInfo.video_thumb = string.Empty; } else { //unexpected ??? itemInfo.video_thumb = string.Empty; } } else if (objItemVideoThumb is string) { //special: //"video_thumb":"<img src=\\"http://static.dmcloud.net/4e5bf73e94a6f629c900461b/5172ce9c06361d76ae000218/thumb-162x121-f.jpeg\\" string strVideoThumb = (string)objItemVideoThumb; itemInfo.video_thumb = strVideoThumb; } Object objItemSellerName; itemDict.TryGetValue("seller_name", out objItemSellerName); itemInfo.seller_name = objItemSellerName.ToString(); Object objItemSellerCreatedAt; itemDict.TryGetValue("seller_created_at", out objItemSellerCreatedAt); itemInfo.seller_created_at = objItemSellerCreatedAt.ToString(); Object objItemSellerCountryName; itemDict.TryGetValue("seller_country_name", out objItemSellerCountryName); itemInfo.seller_country_name = objItemSellerCountryName.ToString(); Object objItemSellerCountry; itemDict.TryGetValue("seller_country", out objItemSellerCountry); itemInfo.seller_country = objItemSellerCountry.ToString(); Object objItemSellerUrl; itemDict.TryGetValue("seller_url", out objItemSellerUrl); itemInfo.seller_url = objItemSellerUrl.ToString(); Object objItemSellerLevel; itemDict.TryGetValue("seller_level", out objItemSellerLevel); if (objItemSellerLevel != null) { itemInfo.seller_level = objItemSellerLevel.ToString(); } else { //may be null itemInfo.seller_level = string.Empty; } Object objItemGigImage; itemDict.TryGetValue("gig_image", out objItemGigImage); itemInfo.gig_image = objItemGigImage.ToString(); //itemInfoList.Add(itemInfo); processEachGitItemInfo(itemInfo); } private void btnSearch_Click(object sender, EventArgs e) { string respHtml = ""; Dictionary<string, string> headerDict; //STEP1: access main page string fiverMainUrl = "http://fiverr.com/"; respHtml = crifanLib.getUrlRespHtml_multiTry(fiverMainUrl); HtmlAgilityPack.HtmlDocument htmlDoc = crifanLib.htmlToHtmlDoc(respHtml); HtmlNode rootNode = htmlDoc.DocumentNode; //1.extract category_id //<div class="gig-carousel gallery loading cf carousel-noaction" data-json-path="/gigs/endless_page_as_json?host=homepage&type=endless&category_id=99912&limit=30" data-load-more="true" data-hide-empty="false" data-gigs-shown="30" data-do-special="false" data-do-endless="true" data-box-id="hp99912_1" > HtmlNode datajsonPathNode = rootNode.SelectSingleNode("//div[@class and contains(@data-json-path, 'category_id=')]"); string dataJsonPathValue = datajsonPathNode.Attributes["data-json-path"].Value; string categoryId = ""; if(crifanLib.extractSingleStr(@"category_id=(\d+)", dataJsonPathValue, out categoryId)) { //got category_id: 99912 } //2. extract X-CSRF-Token value //<meta content="6j77ymABhWzqVarvSOXSIl4MwW3KrEESH8rofrLem4w=" name="csrf-token" /> HtmlNode csrfTokenNode = rootNode.SelectSingleNode("//meta[@content and @name='csrf-token']"); string csrfTokenValue = ""; if (csrfTokenNode != null) { csrfTokenValue = csrfTokenNode.Attributes["content"].Value; //"K4Q+6uaGEvepuVLy5EwEDTddxbgzS3BJQ2zHIDdBFpk=" } //STEP2: access search url //http://fiverr.com/gigs/search?utf8=%E2%9C%93&search_in=everywhere&query=seo&x=12&y=12 curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93" + "&search_in=everywhere" + "&query=" + HttpUtility.UrlEncode(txbKeyword.Text) + "&x=12" + "&y=12"; headerDict = new Dictionary<string, string>(); headerDict.Add("Referer", fiverMainUrl); respHtml = crifanLib.getUrlRespHtml_multiTry(curSearchInfo.searchUrl, headerDict: headerDict); if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED) { needGetMore = true; //restore status //continue search curSearchStatus = search_status.SEARCH_STATUS_SEARCHING; updateUI(); //curSearchInfo = curSearchInfo; //for debug //int debugNum = 0; //int debugMaxNum = 3; for (; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++) { Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx]; if (needGetMore) { processSingleGigSearchItemObject(itemObj); ////for debug //debugNum++; //if (debugNum >= debugMaxNum) //{ // //debug // needGetMore = false; // break; //} } else { break; } } //update for next page curSearchInfo.pageNum++; } else if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED) { // new search -> clear previously searched result crifanLib.dgvClearContent(dgvSearchResult); curSearchStatus = search_status.SEARCH_STATUS_SEARCHING; updateUI(); curSearchInfo = new search_info(); curSearchInfo.pageNum = 1; curSearchInfo.curItemIdx = 0; needGetMore = true; } else { //unexpected status return; } while (needGetMore) { //http://fiverr.com/gigs/search?utf8=%E2%9C%93&query=seo&x=15&y=13&page=2 //curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93" // + "&query=" + HttpUtility.UrlEncode(txbKeyword.Text) // + "&page=" + curSearchInfo.pageNum.ToString(); //STEP3: search each page, got json, parse search result to list int numPerPage = 50; //page 1: //http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=1 //page 2: //http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=2 curSearchInfo.searchUrl = "http://fiverr.com/gigs/gigs_as_json?" + "host=search" + "&type=best_match" + "&query_string=" + HttpUtility.UrlEncode(txbKeyword.Text) + "&search_filter=auto" + "&category_id=" + categoryId + "&limit=" + numPerPage.ToString() + "&page=" + curSearchInfo.pageNum.ToString(); headerDict = new Dictionary<string, string>(); headerDict.Add("X-CSRF-Token", csrfTokenValue); headerDict.Add("X-Requested-With", "XMLHttpRequest"); string respGigJson = crifanLib.getUrlRespHtml(curSearchInfo.searchUrl, headerDict: headerDict); //got json string, processed like this: //{"gigs":[ //{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >"}, //{"title":"write SEO articles for you","title_full":"write SEO articles for you","duration":4,"price":"$5","rating":9,"rating_count":590,"is_featured":false,"gig_id":283403,"gig_url":"/betaomicronalph/write-seo-articles-for-you","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\" alt=\\"write SEO articles for you\\" >","video_thumb":false,"seller_name":"betaomicronalph","seller_created_at":"over 2 years","seller_country_name":"United States","seller_country":"us","seller_url":"/betaomicronalph","seller_level":"level_two_seller","gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\" alt=\\"write SEO articles for you\\" >"}, //.... //],"total_results":519,"next_page":true} Dictionary<string, Object> gigsObjDict = (Dictionary<string, Object>)crifanLib.jsonToDict(respGigJson); bool bNextPage = false; Object objNextPage = null; if (gigsObjDict.ContainsKey("next_page") && gigsObjDict.TryGetValue("next_page", out objNextPage)) { bNextPage = (bool)objNextPage; } needGetMore = bNextPage; Object itemDictListObj = null; if (gigsObjDict.ContainsKey("gigs") && gigsObjDict.TryGetValue("gigs", out itemDictListObj)) { //Dictionary<string, Object>[] itemDictList = (Dictionary<string, Object>[])itemDictListObj; curSearchInfo.itemObjList = (Object[])itemDictListObj; //List<gigSearchItemInfo> itemInfoList = new List<gigSearchItemInfo>(); for (curSearchInfo.curItemIdx = 0; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++) { Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx]; if (needGetMore) { processSingleGigSearchItemObject(itemObj); ////for debug //debugNum++; //if (debugNum >= debugMaxNum) //{ // //debug // needGetMore = false; // break; //} } else { break; } }//foreach (Object itemObj in itemObjList) //update for next page curSearchInfo.pageNum++; } else { //some error ? needGetMore = false; } }; } private void btnPause_Click(object sender, EventArgs e) { if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING) { curSearchStatus = search_status.SEARCH_STATUS_PAUSED; updateUI(); needGetMore = false; //store current status and progress //MessageBox.Show(curSearchInfo.gigDataList[0].ToString()); } } private void btnStopSearching_Click(object sender, EventArgs e) { if ((curSearchStatus == search_status.SEARCH_STATUS_SEARCHING) || (curSearchStatus == search_status.SEARCH_STATUS_PAUSED) ) { curSearchStatus = search_status.SEARCH_STATUS_STOPPED; updateUI(); needGetMore = false; //clear things } } void storeGigInfo(gigInfo singleGigInfo) { //DataGridViewButtonCell gigUrlCell = new DataGridViewButtonCell(); //gigUrlCell.Value = "Buy Now"; //gigUrlCell.Tag = singleGigInfo.gigUrl; dgvSearchResult.Rows.Add( singleGigInfo.title, singleGigInfo.sellerRating, singleGigInfo.estimatedDeliveryStr, singleGigInfo.gigRating, singleGigInfo.ordersInQueue, singleGigInfo.sellerLevel, singleGigInfo.hasVideo ? "yes" : "no", singleGigInfo.isExpressGig, singleGigInfo.coutryFlag, singleGigInfo.positiveReviews, singleGigInfo.negativeReviews, singleGigInfo.isTopRatedSeller); //gigUrlCell); //singleGigInfo.gigUrl); gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Value = "Buy Now"; gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Tag = singleGigInfo.gigUrl; dgvSearchResult.Rows[dgvSearchResult.Rows.Count - 1].Selected = true; dgvSearchResult.FirstDisplayedScrollingRowIndex = dgvSearchResult.Rows.Count - 1; crifanLib.dgvDrawHeaderNum(dgvSearchResult); return; } private void dgvSearchResult_CellContentClick(object sender, DataGridViewCellEventArgs e) { if ((e.RowIndex >= 0) && (e.ColumnIndex == girUrlColumnIdx)) { DataGridViewButtonCell clickedButtonCell = (DataGridViewButtonCell)dgvSearchResult.Rows[e.RowIndex].Cells[e.ColumnIndex]; //MessageBox.Show(clickedButtonCell.Value.ToString() + clickedButtonCell.Tag.ToString()); System.Diagnostics.Process.Start(clickedButtonCell.Tag.ToString()); } } private void btnSaveAll_Click(object sender, EventArgs e) { string outputFilename = "fiverrComScrapedResult" + ".xls"; string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename); //List<int> omitColumnIdxList = new List<int>(); ////omit the last column: View page //omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1); //crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList); List<int> useTagColumnIdxList = new List<int>(); //columns that use tag as value useTagColumnIdxList.Add(girUrlColumnIdx); crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, useTagColumnIdxList: useTagColumnIdxList); crifanLib.openFolderAndSelectFile(fullFilename); } private void openFolderAndSelectFile(string fullFilename) { System.Diagnostics.Process.Start("Explorer.exe", "/select," + fullFilename); } private void btnExportToCsv_Click(object sender, EventArgs e) { string outputFilename = "fiverrComScrapedResult.csv"; string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename); //List<int> omitColumnIdxList = new List<int>(); ////omit the last column: View page //omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1); //crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList); List<int> useTagColumnIdxList = new List<int>(); //columns that use tag as value useTagColumnIdxList.Add(girUrlColumnIdx); crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, delimiter: ",", useTagColumnIdxList: useTagColumnIdxList); crifanLib.openFolderAndSelectFile(fullFilename); } private void btnClearAll_Click(object sender, EventArgs e) { crifanLib.dgvClearContent(dgvSearchResult); } private void btnHelp_Click(object sender, EventArgs e) { string helpUrl = "http://giggladiator.com/help"; System.Diagnostics.Process.Start(helpUrl); } private void btnCreateAlert_Click(object sender, EventArgs e) { } private void btnExpReaderToExcel_Click(object sender, EventArgs e) { } private void btnExpReaderToCsv_Click(object sender, EventArgs e) { } } }
(2)
【总结】
转载请注明:在路上 » 【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之后