In this paper, the example of the python implementation of the crawl movie download link function. Shared for your reference, as follows:
#!/usr/bin/python #coding=UTF-8 import sys import urllib2 import os import chardet from bs4 import BeautifulSoup reload(sys) ("utf-8") # Get video download address from movie html page def get_movie_download_url(html): soup=BeautifulSoup(html,'') fixed_html=() td=('td',attrs={'style':'WORD-WRAP: break-word'}) url_a=('a') url_a=url_a.string return url_a # Get movie title from movie html page def get_movie_title(html): soup=BeautifulSoup(html,'') fixed_html=() title=('h1') title= return title # Visit the url and return to the html page def get_html(url): req=(url) req.add_header('User-Agent','Mozilla/5.0') response=(url) html=() return html # From the movie list page, get the url of the movie, splice it, save it to the list and return it. def get_movie_list(url): m_list = [] html = get_html(url) soup=BeautifulSoup(html,'') fixed_html=() a_urls=soup.find_all('a',attrs={'class':'ulink'}) host = "http://" for a_url in a_urls: m_url=a_url.get('href') m_list.append(host+m_url) return m_list #Posted to txt file def file_edit(wr_str): f1 = open(r'e:\down_load_url.txt','a') (wr_str) () # Pass in a list collection of movie url's, get the download address, and write to file def write_to_txt(a_urls): for a_url in a_urls: html=get_html(a_url) html=('GBK') write_title=get_movie_title(html) write_url=get_movie_download_url(html) file_edit(write_title+"\n") file_edit(write_url+"\n") file_edit("\n") # Pass in the number of pages and return a list of urls for those pages. def get_pages_url(num): urls_list = [] url="http:///html/gndy/dyzz/list_23_" for n in range(1,num+1): new_url = url+str(n)+".html" urls_list.append(new_url) return urls_list if __name__=='__main__': pages = 2 # Planning to crawl a few pages of movies p_url = get_pages_url(pages) for i in p_url: write_to_txt(get_movie_list(i))# Execute the write print "done"
More about Python related content can be viewed on this site topic: thePython Socket Programming Tips Summary》、《Python Data Structures and Algorithms Tutorial》、《Summary of Python function usage tips》、《Summary of Python string manipulation techniques》、《Python introductory and advanced classic tutorialsand theSummary of Python file and directory manipulation techniques》
I hope that what I have said in this article will help you in Python programming.