from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
# 显示等待打开主页面
wait = WebDriverWait(driver, 10, 0.5)
# 切换到对应的iframe,否则无法操作内部元素
wait.until(
EC.frame_to_be_available_and_switch_to_it(driver.find_element_by_xpath('//iframe[contains(@id,"x-URS-iframe")]')))
# 找一个登录成功的页面元素
# 通过元素属性+元素值来唯一定位元素
result = True
try:
element_recy_email = wait.until(EC.element_to_be_clickable((By.XPATH, '//span[@class="oz0" and contains(text(),"收 信")]')))
if element_recy_email:
result = True
else:
result = False
except Exception as e:
result = False
print("邮箱登陆成功" if result else "邮箱登录失败")
if __name__ == '__main__':
with open('cities.csv', 'r', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
cities = [row[0] for row in reader]
loc = LocationSearch('瑞幸咖啡')
loc.get_cities_data(cities)
loc = LocationSearch('星巴克咖啡')
loc.get_cities_data(cities)
if resp['code'] == '100000':
html = resp['data']['html']
html = etree.HTML(html)
data = html.xpath('//div[@node-type="comment_list"]')
for i in data:
# 评论人昵称
nick_name = i.xpath('.//div[@class="WB_text"]/a[1]/text()')
# 评论内容
text = i.xpath('.//div[@class="WB_text"]')
text = [i.xpath('string(.)') for i in text]
# 头像地址
pic_url = i.xpath('.//div[@class="WB_face W_fl"]/a/img/@src')
print(len(nick_name),len(text),len(pic_url))
write_comment([i.strip() for i in text], pic_url, nick_name)
其中写入文件的函数和下载图片的函数如下:
# 下载图片
def download_pic(url, nick_name):
if not url:
return
if not os.path.exists(pic_file_path):
os.mkdir(pic_file_path)
resp = requests.get(url)
if resp.status_code == 200:
with open(pic_file_path + f'/{nick_name}.jpg', 'wb') as f:
f.write(resp.content)
# 写入留言内容
def write_comment(comment, pic_url, nick_name):
f = open('comment.txt', 'a', encoding='utf-8')
for index, i in enumerate(comment):
if ':' not in i and '回复' not in i and i != '':
# 去除评论的评论
w_comment = i.strip().replace(':', '').replace('\n', '')
# 写入评论
f.write(w_comment.replace('等人', '').replace('图片评论', '')+'\n')
# 获得头像
download_pic(pic_url[index], nick_name[index])