text
Contribute to a certain CC, go source code crawler, based on colly, the effect is to read the company name from excel and query the company's legal person and phone number based on the entered browser cookie and the necessary row number of excel. and write back to the specified line in Excel.
package main import ( "bufio" "fmt" "/gocolly/colly/debug" "/gocolly/colly/extensions" "/xuri/excelize/v2" "net/url" "os" "runtime" "strconv" "time" ) import "/gocolly/colly" var ( cookies string tempUrl string tempGongSiName string tempI int ) func main() { //The full name of the file to be processed var fileName string //The name of the column var namelie string //Start line number var startNum int //End line number var endNum int var personLie string var phoneLie string ("Please enter your browsercookies In the browser Developer ModeF12,Find the console under the circumstances(consol) enter(Notice,CookieIf there is HttpOnlyThe need to be in the development toolHttpOnlyCancel,Then execute the following command): Just,Then copy it out! Right-click,Copy string content") //(&cookies) //This line will encounter spaces. The default input is completed, so it cannot be used. reader := () res, _, err := () if nil == err { cookies=string(res) }else{ ("Read cookie error:", err) return } //("The entered cookie is: "+cookies) ("Please enter the full path of the file: (string type)") (&fileName) ("Please enter the letter (capsular) of the company name column in Excel:") (&namelie) ("Please enter the first row number (numeric type) of the column specified by Excel:") (&startNum) ("Please enter the last row number (numeric type) of the column specified by Excel:") (&endNum) ("Please enter the letter (capsulated letters) of the column in which Excel contact is located:") (&personLie) ("Please enter the letter (capsular) of the column where Excel contact number is located:") (&phoneLie) //Output all input information and verify that it is correct //(fileName,namelie,startNum,endNum,personLie,phoneLie) f, err := (fileName) if err!=nil { (err) return } c:=initCollector(f,personLie,phoneLie) //Remember to close the workbook opened above. defer func() { // Close the workbook if err := (); err != nil { (err) } }() for i:=startNum;i<=endNum;i++{ // Get the value of the specified cell in the worksheet cell, err := ("Sheet1", namelie+(i)) if err != nil { ("Read the number"+(i)+"There's an error!") return }else{ ("Start crawling:"+cell+" data") tempGongSiName = cell tempI = i visitUrl(c) (1*) } } ("------------------------------------------------------------------------------------------------------------------------------) } ///Initialize the collectorfunc initCollector(f *,personLie string,phoneLie string,) * { c := ((1), (&{})) (c) // Using a random UserAgent, it is best to use a proxy. This will not be easily banned ("socks5://127.0.0.1:7890") (func(response *, err error) { ("--->onError -------------------------------------------------------------------------------------------------------------------------+()) () }) (func(response *) { ("---->onResponse") }) ("table", func(element *) { ("---->onXML") }) (func(r *) { ("Cookie",cookies) ("referer", tempUrl) ("sec-fetch-mode", "cors") ("sec-fetch-site", "same-origin") ("accept", "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01") ("accept-encoding", "gzip, deflate, br") ("accept-language", "en,zh-CN;q=0.9,zh;q=0.8") ("X-Requested-With", "XMLHttpRequest") }) ("tr:first-child", func(e *) {//Get the first piece of data in the query. ("---->onHtml---Get successful!") //Get the first main information of the company. //("---->"+(".relate-info").Text()) sellectEle := (".relate-info") //The person who found it finally name:=("div:nth-child(1)").Find("div>span").First().Find("a").Text() //The final number of inquiry phone:=("div:nth-child(2)").Find("div>span").First().Find("span>span").Find(":nth-child(2)").Text() //("--->>>"+name) //("--->>>"+phone) ("Sheet1", personLie+(tempI), name) ("Will"+tempGongSiName+"Name of person ("+name+") Write "+personLie+(tempI)) ("Sheet1", phoneLie+(tempI), phone) ("Will"+tempGongSiName+"Telephone ("+phone+") Write "+phoneLie+(tempI)) () }) (func(response *) { ("onScraped") }) return c } //Access the given namefunc visitUrl(c *){ tempUrl:="/web/search?key="+(tempGongSiName) (tempUrl) }
The above is the detailed content of the go colly crawler implementation example. For more information about go colly crawler, please follow my other related articles!