# 吞噬星空之主宰 import time import requests from bs4 import BeautifulSoup, Tag headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", "Referer": "https://www.69shuba.com/", "Accept-Language": "zh-CN,zh;q=0.9", } def fetch_and_parse(url): try: # 发送HTTP请求 response = requests.get(url, headers=headers, timeout=10) response.encoding = "gbk" # 强制使用GBK编码解码 # 检查请求是否成功 response.raise_for_status() # 使用BeautifulSoup解析HTML soup = BeautifulSoup(response.text, "html.parser") # 提取正文内容 chapter_element = soup.select_one("div.txtnav") if chapter_element: chapter_text = chapter_element.get_text(strip=False) else: return False, "未找到元素'div.txtnav'", "" # 提取下一章网址 next_url_element = soup.find("a", string="下一章") if next_url_element and isinstance(next_url_element, Tag): next_url_text = next_url_element.get("href") else: return False, "未找到元素'第一章<\\a>'", "" return True, chapter_text, next_url_text except requests.exceptions.RequestException as e: return False, f"网络请求失败: {e}", "" except Exception as e: return False, f"发生错误: {e}", "" if __name__ == "__main__": url = "https://www.69shuba.com/txt/47926/31767698" end_url = "https://www.69shuba.com/book/47926.htm" outputFile = open("novel.txt", "w", encoding="utf-8") while True: flag, text, url = fetch_and_parse(url) if not flag: print(text) break if url == end_url: break outputFile.write(text) time.sleep(1) # print("=" * 40) # print(f"{text}") # print(f"{url}") # print("=" * 40)