Python XPath抓取小说《三国演义》
时间:2020-01-19 19:25:22
收藏:0
阅读:86
from lxml import etree import requests """ 获取章节列表和地址 """ def getContents(): tagret = "https://www.kanunu8.com/files/old/2011/2447.html" req = requests.get(url=tagret) req.encoding = "gb2312" html = req.text bookdata = etree.HTML(html) table_list = bookdata.xpath(‘//table[9]//tr[1]//td[2]//table[4]//tr[1]//td[1]//table[1]//a//text()‘) table_url = bookdata.xpath(‘//table[9]//tr[1]//td[2]//table[4]//tr[1]//td[1]//table[1]//a//@href‘) for title in table_list: print(title) for u in table_url: print(u) """ 获取小说内容 """ def getContent(): tagret = "https://www.kanunu8.com/files/old/2011/2447/71775.html" req = requests.get(url=tagret) req.encoding = "gb2312" html = req.text bookdata = etree.HTML(html) table_list = bookdata.xpath(‘//table[5]//tr[1]//td[2]//text()‘) print(table_list) if __name__ == ‘__main__‘: getContents()
原文:https://www.cnblogs.com/dangzhengtao/p/12215219.html
评论(0)