From 26c9f00dea54b81427816681f12baf10d6ae9d87 Mon Sep 17 00:00:00 2001 From: sleepwithoutbz Date: Mon, 26 May 2025 22:00:08 +0800 Subject: [PATCH] Init commit --- .gitignore | 2 ++ first.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 .gitignore create mode 100644 first.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6986862 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +NovelDown/ +*.txt diff --git a/first.py b/first.py new file mode 100644 index 0000000..98b4d4b --- /dev/null +++ b/first.py @@ -0,0 +1,65 @@ +import time +import requests +from bs4 import BeautifulSoup, Tag + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", + "Referer": "https://www.69shuba.com/", + "Accept-Language": "zh-CN,zh;q=0.9", +} + + +def fetch_and_parse(url): + try: + # 发送HTTP请求 + response = requests.get(url, headers=headers, timeout=10) + response.encoding = "gbk" # 强制使用GBK编码解码 + + # 检查请求是否成功 + response.raise_for_status() + + # 使用BeautifulSoup解析HTML + soup = BeautifulSoup(response.text, "html.parser") + + # 提取正文内容 + chapter_element = soup.select_one("div.txtnav") + if chapter_element: + chapter_text = chapter_element.get_text(strip=False) + else: + return False, "未找到元素'div.txtnav'", "" + + # 提取下一章网址 + next_url_element = soup.find("a", string="下一章") + if next_url_element and isinstance(next_url_element, Tag): + next_url_text = next_url_element.get("href") + else: + return False, "未找到元素'第一章<\\a>'", "" + + return True, chapter_text, next_url_text + + except requests.exceptions.RequestException as e: + return False, f"网络请求失败: {e}", "" + except Exception as e: + return False, f"发生错误: {e}", "" + + +if __name__ == "__main__": + url = "https://www.69shuba.com/txt/47926/31767698" + end_url = "https://www.69shuba.com/book/47926.htm" + + outputFile = open("novel.txt", "w", encoding="utf-8") + + while True: + flag, text, url = fetch_and_parse(url) + if not flag: + print(text) + break + if url == end_url: + break + outputFile.write(text) + time.sleep(1) + + # print("=" * 40) + # print(f"{text}") + # print(f"{url}") + # print("=" * 40)