通过运行如下代码,返回的是一个空列表
# 模拟用户登录csdn学院,查看到该用户购买的课程,并拿到已经购买的课程名称 # 对已登录用户的已购买课程的request url进行分析 # 然后创建一个request,向该网站发送request请求 # request请求得到响应url, # response的内容是一个url对象 # 将url对象转换为python对象 # 本地操作该python对象,并存储到本地,以HTML形式 # 将response的内容存放在本地、存放为一个静态的HTML文件 import urllib.request as ur import user_agent import lxml.etree as le # request得到的是一个URL对象 request = ur.Request( url='https://edu.csdn.net/mycollege', headers={ 'User-Agent':user_agent.get_user_agent_pc(), 'cookie':'uuid_tt_dd=10_18988990250-1599141745408-197704; dc_session_id=10_1599141745408.519592; UserName=weixin_42961082; UserInfo=6f39b8f164ec4bb093b2000edaa9fb30; UserToken=6f39b8f164ec4bb093b2000edaa9fb30; UserNick=小玉姐姐脸小的你揍并; AU=FD6; UN=weixin_42961082; BT=1599141825209; p_uid=U010000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18988990250-1599141745408-197704!5744*1*weixin_42961082; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac={"uid_":{"value":"weixin_42961082","scope":1},"islogin":{"value":"1","scope":1},"isonline":{"value":"1","scope":1},"isvip":{"value":"0","scope":1}}; Hm_lvt_e5ef47b9f471504959267fd614d579cd=1601435011; __gads=ID=467b88bb4ed232ab:T=1601503151:S=ALNI_Ma6iZXwh9VVy0ezgcoZAn7eRZXW3A; searchHistoryArray=%5B%22python%22%2C%22java%22%2C%22java_%22%5D; dc_sid=c5d2789026b870acf232a353fdc27844; c_first_ref=default; c_first_page=https://blog.csdn.net/; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1600171828,1601303543,1601597928,1601708310; c_segment=8; log_Id_click=15; log_Id_pv=20; log_Id_view=82; csrfToken=xCwgsmTUyL9c-Kco1Hp3HiKF; announcement=%7B%22isLogin%22%3Atrue%2C%22announcementUrl%22%3A%22https%3A%2F%2Flive.csdn.net%2Froom%2Fyzkskaka%2F5n5O4pRs%3Futm_source%3D1598583200%22%2C%22announcementCount%22%3A0%7D; TY_SESSION_ID=9619dba5-0fb6-4f20-b5b5-3d1233093ef4; c_ref=https://edu.csdn.net/; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1601812723; c_page_id=default; dc_tos=qhoelv' } ) # 经过urlopen处理过的是一个字节类型的数据,字节类型的经过decode后成为字符串 # 所以这里的response是一个字符串类型的 response = ur.urlopen(request).read().decode('utf-8') # print(response) with open('s.html', 'w',encoding='utf-8') as f: f.write(response) # must be str, not bytes html_x = le.HTML('s.html') # print(html_x) html_x_s = html_x.xpath('//li[@class="item_box"]//h1/a/text()') print(html_x_s) D:\Python3.8.5\python.exe D:/pythonProject6/urllib_.py [] Process finished with exit code 0对进行xpath操作的对象html_x操作的le.HTML()中的对象更改,原先: html_x = le.HTML(‘s.html’) 更改: html_x = le.HTML(response) 重新运行:能够获取到列表
D:\Python3.8.5\python.exe D:/pythonProject6/urllib_.py ['Python 闯关特训营', '职场微技能 Word中的锦囊妙计', 'Word从小白变小能手 '] Process finished with exit code 0