🤔将获取rss的函数改为多线程(dev)测试中

This commit is contained in:
2024-11-23 00:43:43 +08:00
parent 55d88561b2
commit aa14d1b706
4 changed files with 95 additions and 43 deletions

40
run.py
View File

@ -4,24 +4,30 @@ from friend_circle_lite.get_conf import load_config
from rss_subscribe.push_article_update import get_latest_articles_from_link, extract_emails_from_issues
from push_rss_update.send_email import send_emails
import logging
import json
import sys
import os
# 日志记录
logging.basicConfig(level=logging.INFO, format='😋%(levelname)s: %(message)s')
# 爬虫部分内容
config = load_config("./conf.yaml")
if config["spider_settings"]["enable"]:
print("爬虫已启用")
logging.info("爬虫已启用")
json_url = config['spider_settings']['json_url']
article_count = config['spider_settings']['article_count']
specific_RSS = config['specific_RSS']
print("正在从 {json_url} 中获取,每个博客获取 {article_count} 篇文章".format(json_url=json_url, article_count=article_count))
logging.info("正在从 {json_url} 中获取,每个博客获取 {article_count} 篇文章".format(json_url=json_url, article_count=article_count))
result, lost_friends = fetch_and_process_data(json_url=json_url, specific_RSS=specific_RSS, count=article_count)
if config["spider_settings"]["merge_result"]["enable"]:
marge_json_url = config['spider_settings']["merge_result"]['merge_json_url']
print("合并数据功能开启,从 {marge_json_url} 中获取境外数据并合并".format(marge_json_url=marge_json_url + "/all.json"))
logging.info("合并数据功能开启,从 {marge_json_url} 中获取境外数据并合并".format(marge_json_url=marge_json_url + "/all.json"))
result = marge_data_from_json_url(result, marge_json_url + "/all.json")
lost_friends = marge_errors_from_json_url(lost_friends, marge_json_url + "/errors.json")
logging.info("数据获取完毕,目前共有 {count} 位好友的动态,正在处理数据".format(count=len(result.get("article_data", []))))
result = deal_with_large_data(result)
with open("all.json", "w", encoding="utf-8") as f:
@ -30,20 +36,22 @@ if config["spider_settings"]["enable"]:
json.dump(lost_friends, f, ensure_ascii=False, indent=2)
if config["email_push"]["enable"] or config["rss_subscribe"]["enable"]:
print("获取smtp配置信息")
logging.info("推送功能已启用,正在准备推送,获取配置信息")
email_settings = config["smtp"]
email = email_settings["email"]
server = email_settings["server"]
port = email_settings["port"]
use_tls = email_settings["use_tls"]
password = os.getenv("SMTP_PWD")
print("密码检测是否存在:", password[:2], "****", password[-2:])
logging.info("SMTP 服务器信息:{server}:{port}".format(server=server, port=port))
logging.info("密码:{pwd}************".format(pwd=password[:3]))
if config["email_push"]["enable"]:
print("邮件推送已启用")
logging.info("邮件推送已启用")
logging.info("抱歉,目前暂未实现功能")
if config["rss_subscribe"]["enable"]:
print("RSS通过issue订阅已启用")
logging.info("RSS 订阅推送已启用")
# 获取并强制转换为字符串
# 尝试从环境变量获取 FCL_REPO
fcl_repo = os.getenv('FCL_REPO')
@ -51,13 +59,13 @@ if config["rss_subscribe"]["enable"]:
# 提取 github_username 和 github_repo
if fcl_repo:
github_username, github_repo = fcl_repo.split('/')
print(f"从环境变量获取到的 GitHub Username: {github_username}")
print(f"从环境变量获取到的 GitHub Repo: {github_repo}")
else:
github_username = str(config["rss_subscribe"]["github_username"]).strip()
github_repo = str(config["rss_subscribe"]["github_repo"]).strip()
print(f"从配置文件获取到的 GitHub Username: {github_username}")
print(f"从配置文件获取到的 GitHub Repo: {github_repo}")
# 输出 github_username 和 github_repo
logging.info("github_username: {github_username}".format(github_username=github_username))
logging.info("github_repo: {github_repo}".format(github_repo=github_repo))
your_blog_url = config["rss_subscribe"]["your_blog_url"]
email_template = config["rss_subscribe"]["email_template"]
@ -69,18 +77,18 @@ if config["rss_subscribe"]["enable"]:
count=5,
last_articles_path="./rss_subscribe/last_articles.json"
)
print("最新文章为:", latest_articles)
logging.info("获取到的最新文章为:{latest_articles}".format(latest_articles=latest_articles))
if latest_articles == None:
print("没有新文章")
logging.info("无未进行推送的新文章")
else:
github_api_url = "https://api.github.com/repos/" + github_username + "/" + github_repo + "/issues" + "?state=closed&label=subscribed&per_page=200"
print("正在从 {github_api_url} 中获取订阅信息".format(github_api_url=github_api_url))
logging.info("正在从 {github_api_url} 中获取订阅信息".format(github_api_url=github_api_url))
email_list = extract_emails_from_issues(github_api_url)
if email_list == None:
print("无邮箱列表")
logging.info("无邮箱列表,请检查您的订阅列表是否有订阅者或订阅格式是否正确")
sys.exit(0)
else:
print("获取到的邮箱列表为:", email_list)
logging.info("获取到的邮箱列表为:{email_list}".format(email_list=email_list))
# 循环latest_articles发送邮件
for article in latest_articles:
template_data = {