爬取吉林敖东财报
python3
直接上代码吧
from bs4 import BeautifulSoup
import urllib.request
import os
url = r'http://gg.cfi.cn/cbgg/212/000623.html'
res = urllib.request.urlopen(url)
html = res.read().decode('utf-8')
soup = BeautifulSoup(html,'html.parser')
tabh = soup.select('#tabh')
a = tabh[0].find_all('a')
u_list = list()
name = list()
for i in a:
if i.attrs['href'].startswith('http'):
u_list.append(i.attrs['href'])
name.append('./'+i.text+'.html')
for (u,n) in zip(u_list, name):
urllib.request.urlretrieve(u, n)