SoFunction
Updated on 2025-03-01

go colly Crawler Implementation Example

text

Contribute to a certain CC, go source code crawler, based on colly, the effect is to read the company name from excel and query the company's legal person and phone number based on the entered browser cookie and the necessary row number of excel. and write back to the specified line in Excel.

package main
import (
   "bufio"
   "fmt"
   "/gocolly/colly/debug"
   "/gocolly/colly/extensions"
   "/xuri/excelize/v2"
   "net/url"
   "os"
   "runtime"
   "strconv"
   "time"
)
import "/gocolly/colly"
var (
   cookies string
   tempUrl string
   tempGongSiName string
   tempI int
)
func main() {
   //The full name of the file to be processed   var fileName string
   //The name of the column   var namelie string
   //Start line number   var startNum int
   //End line number   var endNum int
   var personLie string
   var phoneLie string
   ("Please enter your browsercookies In the browser Developer ModeF12,Find the console under the circumstances(consol) enter(Notice,CookieIf there is HttpOnlyThe need to be in the development toolHttpOnlyCancel,Then execute the following command):  Just,Then copy it out! Right-click,Copy string content")
   //(&cookies) //This line will encounter spaces. The default input is completed, so it cannot be used.   reader := ()
   res, _, err := ()
   if nil == err {
      cookies=string(res)
   }else{
      ("Read cookie error:", err)
      return
   }
   //("The entered cookie is: "+cookies)   ("Please enter the full path of the file: (string type)")
   (&fileName)
   ("Please enter the letter (capsular) of the company name column in Excel:")
   (&namelie)
   ("Please enter the first row number (numeric type) of the column specified by Excel:")
   (&startNum)
   ("Please enter the last row number (numeric type) of the column specified by Excel:")
   (&endNum)
   ("Please enter the letter (capsulated letters) of the column in which Excel contact is located:")
   (&personLie)
   ("Please enter the letter (capsular) of the column where Excel contact number is located:")
   (&phoneLie)
   //Output all input information and verify that it is correct   //(fileName,namelie,startNum,endNum,personLie,phoneLie)
   f, err := (fileName)
   if err!=nil {
      (err)
      return
   }
   c:=initCollector(f,personLie,phoneLie)
   //Remember to close the workbook opened above.   defer func() {
      // Close the workbook      if err := (); err != nil {
         (err)
      }
   }()
   for i:=startNum;i<=endNum;i++{
      // Get the value of the specified cell in the worksheet      cell, err := ("Sheet1", namelie+(i))
      if err != nil {
         ("Read the number"+(i)+"There's an error!")
         return
      }else{
         ("Start crawling:"+cell+"  data")
         tempGongSiName = cell
         tempI = i
         visitUrl(c)
         (1*)
      }
   }
   ("------------------------------------------------------------------------------------------------------------------------------)
}
///Initialize the collectorfunc initCollector(f *,personLie string,phoneLie string,) * {
   c := ((1), (&{}))
   (c)                              // Using a random UserAgent, it is best to use a proxy.  This will not be easily banned   ("socks5://127.0.0.1:7890")
   (func(response *, err error) {
      ("--->onError -------------------------------------------------------------------------------------------------------------------------+())
      ()
   })
   (func(response *) {
      ("---->onResponse")
   })
   ("table", func(element *) {
      ("---->onXML")
   })
   (func(r *) {
      ("Cookie",cookies)
      ("referer", tempUrl)
      ("sec-fetch-mode", "cors")
      ("sec-fetch-site", "same-origin")
      ("accept", "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01")
      ("accept-encoding", "gzip, deflate, br")
      ("accept-language", "en,zh-CN;q=0.9,zh;q=0.8")
      ("X-Requested-With", "XMLHttpRequest")
   })
   ("tr:first-child", func(e *) {//Get the first piece of data in the query.      ("---->onHtml---Get successful!")
      //Get the first main information of the company.      //("---->"+(".relate-info").Text())
      sellectEle := (".relate-info")
      //The person who found it finally      name:=("div:nth-child(1)").Find("div>span").First().Find("a").Text()
      //The final number of inquiry      phone:=("div:nth-child(2)").Find("div>span").First().Find("span>span").Find(":nth-child(2)").Text()
      //("--->>>"+name)
      //("--->>>"+phone)
      ("Sheet1", personLie+(tempI), name)
      ("Will"+tempGongSiName+"Name of person ("+name+") Write  "+personLie+(tempI))
      ("Sheet1", phoneLie+(tempI), phone)
      ("Will"+tempGongSiName+"Telephone ("+phone+") Write  "+phoneLie+(tempI))
      ()
   })
   (func(response *) {
      ("onScraped")
   })
   return c
}
//Access the given namefunc visitUrl(c *){
   tempUrl:="/web/search?key="+(tempGongSiName)
   (tempUrl)
}

The above is the detailed content of the go colly crawler implementation example. For more information about go colly crawler, please follow my other related articles!