- url="http://www.xxx.cn/nowlist"
- r=requests.get(url)
- soup=BeautifulSoup(r.text,"html.parser")
- divlist=soup.find("div",class_="list")
- ahrefs=divlist.find_all("a",class_='item')
- for ah in ahrefs:
- ahurl="http://www.xxx.cn"+ah.get('href')
- r2=requests.get(ahurl)
- soup2=BeautifulSoup(r2.text,"html.parser")
- f_PName=str(soup2.find('div',class_='pd-top-title').get_text())
- city=soup2.find("div",class_="city").get_text()
- f_tp1="拟在建项目"
- f_tp2=self.getType2FromTit(f_PName)
- cityr=city.split(" ")
- f_ProvinceName="全国"
- f_city="全国"
- if(len(cityr)>1):
- f_ProvinceName=cityr[0]
- f_city=cityr[1]
- f_CreateTime=soup2.find("div",class_="top-date-left").get_text()
- f_CreateTime=re.search(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}",f_CreateTime).group()
- f_detils=(soup2.find("div",class_="pd-contact"))
- del_e= f_detils.find("div",class_="pd-bottm")
- del_e.decompose()
- #print(f_tp1,f_tp2,f_ProvinceName,f_city,f_PName,f_CreateTime)
- #self.main_upload_Interface(f_tp1,f_tp2,f_ProvinceName,f_city,f_PName,str(f_detils),f_CreateTime)