使用教程
源码方式 1). 打开Main.py,运行程序。 2). 按照提示输入存储路径和漫画名称。路径用"/","","\" 做分隔符都行,代码中都将"“替换成”/"。
path = input("请输入存储路径:\n").replace("\\","/")
3). 选择要下载的漫画,然后等待下载就行。
4). 下载结果显示,程序是从最新章节开始下载的,下载完成程序会自动关闭。
可执行文件方式 1). 打开Main.exe,按照提示操作。
注意事项
本项目仅用于学习与交流,请勿用于其他用途。会出现单个章节下载不全的情况,这是下载源的问题。可执行文件在win7 x64和win10 x64上测试可以正常运行。项目没有使用多线程,所以下载速度较慢。
下载地址
源码下载地址 1). gitee: https://gitee.com/zy888888/spider 2). 文末。可执行文件下载地址 1). 百度云链接:https://pan.baidu.com/s/1rqTcuPrWem__Io0_pstjbQ 提取码:mp0u 2). 在线下载: https://zhouyun.site/Main.exe
import requests
import lxml
.html
import os
import re
class Api():
SEARCH_URL
= "https://m.wuxiamh.com/search/?keywords={0}"
FILE_PATH
= "{0}/{1}/{2}"
class Book():
def __init__(self
, book_name
, author
, url
):
self
._book_name
= book_name
self
._author
= author
self
._url
= url
@
property
def book_name(self
):
return self
._book_name
@book_name
.setter
def book_name(self
, book_name
):
self
._book_name
= book_name
@
property
def author(self
):
return self
._author
@author
.setter
def author(self
, author
):
self
._author
= author
@
property
def url(self
):
return self
._url
@url
.setter
def url(self
, url
):
self
._url
= url
@
property
def type(self
):
return self
._type
@
type.setter
def type(self
, type):
self
._type
= type
@
property
def date(self
):
return self
._date
@date
.setter
def date(self
, date
):
self
._date
= date
@
property
def description(self
):
return self
._description
@description
.setter
def description(self
, description
):
self
._description
= description
def __str__(self
) -> str:
return "{书名: %s , 作者: %s , 链接: %s, 类型: %s, 日期: %s, 简介: %s}" % (
self
.book_name
, self
.author
, self
.url
, self
.type, self
.date
, self
.description
)
class Cartoon():
def get_html(self
, url
):
"""
获取html源码
:param url: 需要获取的url
:return: html源码
"""
if url
!= None and url
!= "":
response
= requests
.get
(url
)
if response
.status_code
== 200:
html
= response
.text
return html
else:
raise Exception
("请输入正确的链接")
def search(self
, keywords
):
"""
搜索漫画
:param keywords: 漫画名称
:return: html页面
"""
url
= Api
.SEARCH_URL
.format(keywords
)
html
= self
.get_html
(url
)
return html
def search_result(self
, html
):
"""
搜索结果处理
:param html: html页面
:return: book对象
"""
selector
= lxml
.html
.fromstring
(html
)
info
= selector
.xpath
("//div[@class='Sub_H2 classify']/span/text()")
result_num
= int(re
.findall
("搜索结果,共(\\d*)条", info
[0], flags
=0)[0])
print("搜索结果,共{0}条".format(result_num
))
if result_num
== 0:
raise Exception
("搜索结果为0,请重新搜索")
update_list
= selector
.xpath
("//div[@id='update_list']/div/div/div[@class='itemTxt']")
book_list
= []
for itemTxt
in update_list
:
book_name
= itemTxt
.xpath
("a/text()")[0]
author
= itemTxt
.xpath
("p[1]/text()")[0]
url
= itemTxt
.xpath
("a/@href")[0]
type = itemTxt
.xpath
("p[2]/span[2]/text()")[0]
date
= itemTxt
.xpath
("p[3]/span[2]/text()")[0]
book
= Book
(book_name
, author
, url
)
book
.type = type
book
.date
= date
book
.description
= ""
book_list
.append
(book
)
i
= 0
while i
< result_num
:
print("结果" , i
+ 1, ": ", book_list
[i
])
i
+= 1
for i
in range(10):
result
= int(input("请选择结果:\n"))
if result
> 0 and result
<= result_num
:
return book_list
[result
- 1]
else:
print("请输入正确的结果")
def chapeter(self
, book
):
"""
获取所有章节
:param book: book对象
:return: 章节列表
"""
html
= self
.get_html
(book
.url
)
selector
= lxml
.html
.fromstring
(html
)
book
.description
= selector
.xpath
("//div[@class='comic-view clearfix']/p/text()")[0]
print(book
)
li
= selector
.xpath
("//div[@class='list']/ul/li")
chapter_dict
= {}
for i
in li
:
links
= "https://m.wuxiamh.com" + i
.xpath
("a/@href")[0]
chapter_name
= i
.xpath
("a/span/text()")[0]
chapter_dict
[chapter_name
] = links
new_chapter_dict
= self
.filter(book
.book_name
,chapter_dict
)
return new_chapter_dict
def filter(self
, book_name
, chapter_dict
):
"""
过滤无效链接
:param book_name: 名称
:param chapter_dict: 章节字典
:return: 过滤后的新章节字典
"""
new_chapter_dict
= {}
for chapter
in chapter_dict
.keys
():
html
= self
.get_html
(chapter_dict
[chapter
])
selector
= lxml
.html
.fromstring
(html
)
mip_img
= selector
.xpath
("//div[@class='UnderPage']/div[@class='UnderPage']/mip-link/mip-img[1]/@src")
if len(mip_img
) > 0:
webp
= mip_img
[0]
response
= requests
.get
(webp
)
if response
.status_code
== 200:
new_chapter_dict
[chapter
] = webp
else:
print("异常链接: " + webp
)
else:
print("尊敬的各位喜爱{0}漫画的用户,本站应《{0}》版权方要求现已屏蔽删除本漫画所有章节链接,只保留作品文字信息简介以及章节目录,请喜欢{0}的漫友购买杂志或到官网付费欣赏。为此给各位漫友带来的不便,敬请谅解!".format(book_name
))
raise Exception
("没有版权,无法下载")
return new_chapter_dict
def download(self
, book_name
, chapter_dict
):
"""
下载
:param book_name: 书名
:param chapter_dict: 章节字典
:return:
"""
for chapter
in chapter_dict
.keys
():
file_name
= Api
.FILE_PATH
.format(book_name
, chapter
)
if os
.path
.exists
(file_name
) == False:
os
.makedirs
(file_name
)
url
= chapter_dict
[chapter
][0:-6]
i
= 0
while True:
print("download url: " + url
+ "{0}.webp".format(i
))
response
= requests
.get
(url
+ "{0}.webp".format(i
))
status_code
= response
.status_code
if status_code
== 200:
with open(file_name
+ "/{0}.jpg".format(i
), "wb") as f
:
f
.write
(response
.content
)
elif status_code
== 404:
break
i
+= 1
if __name__
== "__main__":
cartoon
= Cartoon
()
path
= input("请输入存储路径:\n").replace
("\\","/")
Api
.FILE_PATH
= path
+ "/{0}/{1}"
book_name
= input("请输入要搜索的漫画名称:\n")
try:
html
= cartoon
.search
(book_name
)
book
= cartoon
.search_result
(html
)
chapter_dict
= cartoon
.chapeter
(book
)
cartoon
.download
(book
.book_name
, chapter_dict
)
except Exception
as e
:
print(e
)