Using the co-process concurrency advantage of Go language, the speed of crawling web pages is quite fast. The 100-page news title in the blog park can be crawled in just one second.
package main import ( "bytes" "fmt" "/PuerkitoBio/goquery" "log" "net/http" "runtime" "strconv" "sync" ) func Scraper(page string) string { // Request the HTML page. ScrapeURL := "/n/page/" + page client := &{} reqest, _ := ("GET", ScrapeURL, nil) ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") ("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3") //("Accept-Encoding", "gzip,deflate,sdch") ("Accept-Language", "zh-CN,zh;q=0.8") ("Cache-Control", "max-age=0") ("Connection", "keep-alive") ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36") res, err := (reqest) if err != nil { (err) } defer () if != 200 { ("status code error: %d %s", , ) } // Load the HTML document doc, err := () if err != nil { (err) } // Find the review items var buffer ("**********Scraped page " + page + "**********\n") (".content .news_entry").Each(func(i int, s *) { // For each item found, get the band and title title := ("a").Text() url, _ := ("a").Attr("href") ("Review " + (i) + ": " + title + "\nhttps://" + url + "\n") }) return () } func main() { (()) ch := make(chan string, 100) wg := &{} var page string for i := 1; i < 101; i++ { (1) go func(i int) { page = (i) ("Scraping page %s...\n", page) ch <- Scraper(page) () }(i) } () //print result for i := 0; i < 101; i++ { (<-ch) } }
Summarize
The above is the entire content of this article. I hope that the content of this article has certain reference value for your study or work. Thank you for your support. If you want to know more about it, please see the following links