SoFunction
Updated on 2024-12-20

Python implementation of the crawl movie download link function example

In this paper, the example of the python implementation of the crawl movie download link function. Shared for your reference, as follows:

#!/usr/bin/python
#coding=UTF-8
import sys
import urllib2
import os
import chardet
from bs4 import BeautifulSoup
reload(sys)
("utf-8")
# Get video download address from movie html page
def get_movie_download_url(html):
  soup=BeautifulSoup(html,'')
  fixed_html=()
  td=('td',attrs={'style':'WORD-WRAP: break-word'})
  url_a=('a')
  url_a=url_a.string
  return url_a
# Get movie title from movie html page
def get_movie_title(html):
  soup=BeautifulSoup(html,'')
  fixed_html=()
  title=('h1')
  title=
  return title
# Visit the url and return to the html page
def get_html(url):
  req=(url)
  req.add_header('User-Agent','Mozilla/5.0')
  response=(url)
  html=()
  return html
# From the movie list page, get the url of the movie, splice it, save it to the list and return it.
def get_movie_list(url):
  m_list = []
  html = get_html(url)
  soup=BeautifulSoup(html,'')
  fixed_html=()
  a_urls=soup.find_all('a',attrs={'class':'ulink'})
  host = "http://"
  for a_url in a_urls:
    m_url=a_url.get('href')
    m_list.append(host+m_url)
  return m_list
#Posted to txt file
def file_edit(wr_str):
  f1 = open(r'e:\down_load_url.txt','a')
  (wr_str)
  ()
# Pass in a list collection of movie url's, get the download address, and write to file
def write_to_txt(a_urls):
  for a_url in a_urls:
    html=get_html(a_url)
    html=('GBK')
    write_title=get_movie_title(html)
    write_url=get_movie_download_url(html)
    file_edit(write_title+"\n")
    file_edit(write_url+"\n")
    file_edit("\n")
# Pass in the number of pages and return a list of urls for those pages.
def get_pages_url(num):
  urls_list = []
  url="http:///html/gndy/dyzz/list_23_"
  for n in range(1,num+1):
    new_url = url+str(n)+".html"
    urls_list.append(new_url)
  return urls_list
if __name__=='__main__':
  pages = 2 # Planning to crawl a few pages of movies
  p_url = get_pages_url(pages)
  for i in p_url:
    write_to_txt(get_movie_list(i))# Execute the write
  print "done"

More about Python related content can be viewed on this site topic: thePython Socket Programming Tips Summary》、《Python Data Structures and Algorithms Tutorial》、《Summary of Python function usage tips》、《Summary of Python string manipulation techniques》、《Python introductory and advanced classic tutorialsand theSummary of Python file and directory manipulation techniques

I hope that what I have said in this article will help you in Python programming.