宅男福利 用Python爬取美女圖片

嘿嘿嘿

今天帶大家爬點有意思的東西

用的是requests和xpath去解析

獲取網頁和解析網頁的函式

def get_tag(response,tag): html=etree。HTML(response) ret=html。xpath(tag) return retdef parse_url(url): response=requests。get(url,headers=headers) return response。text12345678

獲取網頁url

def url_find(url): r=parse_url(url) url_list=get_tag(r,‘//*[@id=“pins”]/li/span[1]/a/@href’) title=get_tag(r, ‘//*[@id=“pins”]/li/span[1]/a/text()’) # print(len(url_list)) for i in range(len(url_list)): url_jpg_find(url_list[i],title[i]) print(title,‘儲存完畢’)12345678

獲取圖片的url

def url_jpg_find(url,title): global page page=0 r=parse_url(url) url_last=int(get_tag(r,‘/html/body/div[2]/div[1]/div[4]/a[5]/span/text()’)[0]) url_list=[url]+[url + ‘/’ + str(i) for i in range(2, url_last + 1)] if not os。path。exists(title): os。makedirs(title) # else: # return for i in url_list: content_find(i,title) # break12345678910111213

獲取圖片的資訊

def content_find(url,title): # print(url) r=parse_url(url) # print(r) name=get_tag(r,‘/html/body/div[2]/div[1]/h2/text()’)[0] url_jpg=get_tag(r,‘//div[@class=“main-image”]//a/img/@src’)[0] # print(name,url_jpg) time。sleep(0。2) save(name,url_jpg,title)123456789

儲存圖片

def save(name,url_jpg,title): global page r=requests。get(url_jpg,headers=headers) with open(os。getcwd()+‘/’+title+‘/’+name+‘。jpg’,‘wb’) as j: j。write(r。content) j。close() page+=1 print(page)123456789

import requests,os,timefrom lxml import etreeheaders={ “User-Agent” : “Mozilla/5。0 (Windows NT 10。0; WOW64) AppleWebKit/537。36 (KHTML, like Gecko) Chrome/69。0。3497。100 Safari/537。36”, “Referer” : “https://www。mzitu。com”,}page=0def get_tag(response,tag): html=etree。HTML(response) ret=html。xpath(tag) return retdef parse_url(url): response=requests。get(url,headers=headers) return response。textdef url_find(url): r=parse_url(url) url_list=get_tag(r,‘//*[@id=“pins”]/li/span[1]/a/@href’) title=get_tag(r, ‘//*[@id=“pins”]/li/span[1]/a/text()’) # print(len(url_list)) for i in range(len(url_list)): url_jpg_find(url_list[i],title[i]) print(title,‘儲存完畢’)def url_jpg_find(url,title): global page page=0 r=parse_url(url) url_last=int(get_tag(r,‘/html/body/div[2]/div[1]/div[4]/a[5]/span/text()’)[0]) url_list=[url]+[url + ‘/’ + str(i) for i in range(2, url_last + 1)] if not os。path。exists(title): os。makedirs(title) # else: # return for i in url_list: content_find(i,title) # breakdef content_find(url,title): # print(url) r=parse_url(url) # print(r) name=get_tag(r,‘/html/body/div[2]/div[1]/h2/text()’)[0] url_jpg=get_tag(r,‘//div[@class=“main-image”]//a/img/@src’)[0] # print(name,url_jpg) time。sleep(0。2) save(name,url_jpg,title)def save(name,url_jpg,title): global page r=requests。get(url_jpg,headers=headers) with open(os。getcwd()+‘/’+title+‘/’+name+‘。jpg’,‘wb’) as j: j。write(r。content) j。close() page+=1 print(page)def main(): start_url=‘https://www。mzitu。com’ r=parse_url(start_url) url_last=int(get_tag(r,‘/html/body/div[2]/div[1]/div[3]/div/a[4]/text()’)[0]) url=‘https://www。mzitu。com/page/’ url_list=[‘https://www。mzitu。com’]+[url+str(i) for i in range(2,url_last+1)] # print(url_list) for url in url_list: url_find(url) # breakif __name__ == ‘__main__’: main()123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475

效果圖就不放了

咳咳 太誘人 會被封掉

請大家自行腦補一下

小編是一名python開發工程師,這裡有我自己整理了一套最新的python系統學習教程,包括從基礎的python指令碼到web開發、爬蟲、資料分析、資料視覺化、機器學習等。想要這些資料的可以關注小編,並在後臺私信小編:“01”即可領取