From a4bfd5972cdd7d27956fec1c8588a16fef575c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9F=B3=E7=A5=9E?= <3162475700@qq.com> Date: Sun, 21 Jul 2024 22:02:25 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=AB=97=E5=B0=9D=E8=AF=95=E8=87=AA?= =?UTF-8?q?=E9=83=A8=E7=BD=B2=E5=88=B0=E6=9C=8D=E5=8A=A1=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.sh | 2 ++ server.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 run.sh create mode 100644 server.py diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..4105e35 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +nohup python server.py > grab.log 2>&1 & diff --git a/server.py b/server.py new file mode 100644 index 0000000..f497f56 --- /dev/null +++ b/server.py @@ -0,0 +1,61 @@ +from flask import Flask, jsonify +from flask_apscheduler import APScheduler +from threading import Lock +import logging +import os + +from friend_circle_lite.get_info import fetch_and_process_data, sort_articles_by_time +from friend_circle_lite.get_conf import load_config + +app = Flask(__name__) + +# 配置APScheduler +class Config: + SCHEDULER_API_ENABLED = True + +app.config.from_object(Config()) +scheduler = APScheduler() +scheduler.init_app(app) +scheduler.start() + +# 配置日志记录 +log_file = "grab.log" +logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +# 全局变量 +articles_data = [] +data_lock = Lock() + +def fetch_articles(): + global articles_data + logging.info("开始抓取文章...") + config = load_config("./conf.yaml") + if config["spider_settings"]["enable"]: + json_url = config['spider_settings']['json_url'] + article_count = config['spider_settings']['article_count'] + logging.info(f"正在从 {json_url} 中获取,每个博客获取 {article_count} 篇文章") + try: + result = fetch_and_process_data(json_url=json_url, count=article_count) + sorted_result = sort_articles_by_time(result) + with data_lock: + articles_data = sorted_result + logging.info("文章抓取成功") + except Exception as e: + logging.error(f"抓取文章时出错: {e}") + +# 每四个小时抓取一次文章 +scheduler.add_job(id='Fetch_Articles_Job', func=fetch_articles, trigger='interval', hours=4) + +@app.route('/all', methods=['GET']) +def get_all_articles(): + with data_lock: + return jsonify(articles_data) + +if __name__ == '__main__': + # 清空日志文件 + if os.path.exists(log_file): + with open(log_file, 'w'): + pass + + fetch_articles() # 启动时立即抓取一次 + app.run(port=1223)