From 5ebcb6a933ab51f0f12689b184f8c46fd71305cb Mon Sep 17 00:00:00 2001 From: sleepwithoutbz Date: Mon, 2 Jun 2025 13:37:21 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=AC=AC=E4=BA=8C=E6=9C=AC=E4=B9=A6?= =?UTF-8?q?=E7=9A=84=E4=B8=8B=E8=BD=BD=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + second.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 second.py diff --git a/.gitignore b/.gitignore index 6986862..426dc88 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ NovelDown/ +Novels/ *.txt diff --git a/second.py b/second.py new file mode 100644 index 0000000..4505658 --- /dev/null +++ b/second.py @@ -0,0 +1,67 @@ +# 吞噬星空之虫群主宰 + +import time +import requests +from bs4 import BeautifulSoup, Tag + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", + "Referer": "https://www.69shuba.com/", + "Accept-Language": "zh-CN,zh;q=0.9", +} + + +def fetch_and_parse(url): + try: + # 发送HTTP请求 + response = requests.get(url, headers=headers, timeout=10) + response.encoding = "gbk" # 强制使用GBK编码解码 + + # 检查请求是否成功 + response.raise_for_status() + + # 使用BeautifulSoup解析HTML + soup = BeautifulSoup(response.text, "html.parser") + + # 提取正文内容 + chapter_element = soup.select_one("div.txtnav") + if chapter_element: + chapter_text = chapter_element.get_text(strip=False) + else: + return False, "未找到元素'div.txtnav'", "" + + # 提取下一章网址 + next_url_element = soup.find("a", string="下一章") + if next_url_element and isinstance(next_url_element, Tag): + next_url_text = next_url_element.get("href") + else: + return False, "未找到元素'第一章<\\a>'", "" + + return True, chapter_text, next_url_text + + except requests.exceptions.RequestException as e: + return False, f"网络请求失败: {e}", "" + except Exception as e: + return False, f"发生错误: {e}", "" + + +if __name__ == "__main__": + url = "https://www.69shuba.com/txt/57193/37129923" + end_url = "https://www.69shuba.com/book/57193.htm" + + outputFile = open("novel.txt", "w", encoding="utf-8") + + while True: + flag, text, url = fetch_and_parse(url) + if not flag: + print(text) + break + if url == end_url: + break + outputFile.write(text) + time.sleep(1) + + # print("=" * 40) + # print(f"{text}") + # print(f"{url}") + # print("=" * 40)