defpage_crawl(results): res = pd.Series() # 记录单条商品信息 prices = driver.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li/div/div[2]/strong/i') prices = [float(price.text) for price in prices] goods = driver.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li/div/div[3]/a/em') goods = [good.text.replace('\n', '') for good in goods] links = driver.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li/div/div[3]/a') # 查找当前页面的商品链接 urls = [l.get_attribute('href') for l in links] codes = [re.search('\d+', url).group() for url in urls]
for ii inrange(len(prices)): # print(len(prices)) res.name = codes[ii] # 这个name是Series的名字 res['good_name'] = goods[ii] res['good_price'] = prices[ii]