SoFunction
Updated on 2025-03-05

GoOnline Online100 pages of news

Using the co-process concurrency advantage of Go language, the speed of crawling web pages is quite fast. The 100-page news title in the blog park can be crawled in just one second.

package main

import (
 "bytes"
 "fmt"
 "/PuerkitoBio/goquery"
 "log"
 "net/http"
 "runtime"
 "strconv"
 "sync"
)

func Scraper(page string) string {
 // Request the HTML page.
 ScrapeURL := "/n/page/" + page
 client := &{}
 reqest, _ := ("GET", ScrapeURL, nil)
 ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
 ("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3")
 //("Accept-Encoding", "gzip,deflate,sdch")
 ("Accept-Language", "zh-CN,zh;q=0.8")
 ("Cache-Control", "max-age=0")
 ("Connection", "keep-alive")
 ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36")
 res, err := (reqest)
 if err != nil {
  (err)
 }
 defer ()
 if  != 200 {
  ("status code error: %d %s", , )
 }

 // Load the HTML document
 doc, err := ()
 if err != nil {
  (err)
 }

 // Find the review items
 var buffer 
 ("**********Scraped page " + page + "**********\n")
 (".content .news_entry").Each(func(i int, s *) {
  // For each item found, get the band and title
  title := ("a").Text()
  url, _ := ("a").Attr("href")
  ("Review " + (i) + ": " + title + "\nhttps://" + url + "\n")
 })
 return ()
}

func main() {
 (())
 ch := make(chan string, 100)
 wg := &{}
 var page string
 for i := 1; i < 101; i++ {
  (1)
  go func(i int) {
   page = (i)
   ("Scraping page %s...\n", page)
   ch <- Scraper(page)
   ()
  }(i)
 }
 ()

 //print result
 for i := 0; i < 101; i++ {
  (<-ch)
 }
}

Summarize
The above is the entire content of this article. I hope that the content of this article has certain reference value for your study or work. Thank you for your support. If you want to know more about it, please see the following links