import requests from bs4 import BeautifulSoup import os def download_img(url,save_path): print(f'正在下载图片……{url}') response = requests.get(url) with open(save_path,'wb') as f: f.write(response.content) print('-'*30) def main(): url = 'https://www.ptt.cc/bbs/Beauty/M.1686997472.A.FDA.html' headers = {"Cookie": "over18=1"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') # print(soup.prettify()) spans=soup.find_all('span', class_='article-meta-value') titles=spans[2].text dir_name=f'images/{titles}' os.makedirs(dir_name, exist_ok=True) #找出网页中所有的图片 links=soup.find_all('a') allow_file_name=["jpg","jpeg","png","gif"] for link in links: href=link.get('href') if not href: continue file_name=href.split('/')[-1] extension=href.split('.')[-1].lower() if extension in allow_file_name: print(f"图片类型:{extension} ") print(f'url:{href}') download_img(href,f'{dir_name}/{file_name} ') # print(extension) # print(href) if __name__ == '__main__': main()
这个程序下载下来的图片是不能打开,其实还没有找到真正的图片地址,后期有时候再修改代码。
继续阅读
评论