From 02bbaaa78856559e3bd95486fe2daba1a7203d7f Mon Sep 17 00:00:00 2001 From: zhouzhongping Date: Fri, 3 Nov 2023 16:58:54 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=EF=BC=9A=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 44 ++++---- src/__init__.py | 0 src/analyzer.py | 145 ++++++++++++++------------ src/const.py | 49 +++++++++ src/generator.py | 199 ++++++++++++++++++++---------------- src/models.py | 80 +++++++++++++++ src/scraper.py | 221 +++++++++++++++++++++++++--------------- src/tools.py | 39 ++++++- templates/painting.html | 34 ++++--- 9 files changed, 536 insertions(+), 275 deletions(-) delete mode 100644 src/__init__.py create mode 100644 src/const.py create mode 100644 src/models.py diff --git a/main.py b/main.py index f8918fc..853935f 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,10 @@ from flask import Flask, render_template, redirect, url_for from loguru import logger -from src.config import Config -from src.generator import build_data +import const +import models +import tools +from generator import Generator app = Flask(__name__) logger.add("endofyear.log") @@ -10,28 +12,36 @@ logger.add("endofyear.log") @app.route('/') def home(): - # 默认主题 painting + # 重定向 painting return redirect(url_for('painting')) @app.route('/painting') def painting(): - if Config("config.ini").web_status: - # web 服务 - # 如果数据存在,直接返回 - if blog_data := Config("config.ini").blog_data: - return render_template('painting.html', data=blog_data, web_status=1) + # 站点数据 + site = models.Site( + service=const.SITE_SERVICE, + title=const.SITE_NAME + ).to_dict() - # 如果数据不存在,需要生成,并写入配置 - return render_template('painting.html', data=build_data(), web_status=1) - else: - # Github 静态 - # 数据需要生成,并写入静态文件 - html_data = render_template('painting.html', data=build_data(), web_status=0) - with open("static/index.html", "w") as f: - f.write(html_data) + # 自定义数据 + custom = models.Custom( + yiyan=tools.get_yiyan() + ).to_dict() - return 'OK' + # 初始化数据生成器 + generator = Generator("https://blog.7wate.com/rss.xml") + + # 渲染模板 + return render_template('painting.html', + site=site, + blog=generator.blog(), + special_post=generator.special_post(), + sentiment_post=generator.sentiment_post(), + long_post=generator.long_post(), + short_post=generator.short_post(), + custom=custom + ) if __name__ == '__main__': diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/analyzer.py b/src/analyzer.py index 3006d34..8c501e6 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -1,5 +1,3 @@ -from typing import Any - import jieba.analyse import pytz from dateutil.parser import parse @@ -7,31 +5,52 @@ from loguru import logger from lunardate import LunarDate from snownlp import SnowNLP +import const + # 计算文本内容情感分数 -def analyze_sentiment(text): +def analyze_sentiment(keys): """ - 博客文章情感分计算(有点问题,酌情使用) - :param text:文章文本 + 博客文章情感分计算 + + :param keys:文章关键字 :return:分数 """ - s = SnowNLP(text) - return round(s.sentiments * 100) + score_lists = [SnowNLP(key).sentiments for key in keys] + all_score = sum(score_lists) + + if len(score_lists) > 10: + max_score = max(score_lists) + min_score = min(score_lists) + average_score = (all_score - max_score - min_score) / (len(keys) - 2) + return int(average_score * 1000) + elif 10 > len(score_lists) > 6: + average_score = all_score / len(keys) + return int(average_score * 900) + elif 6 > len(score_lists) > 3: + average_score = all_score / len(keys) + return int(average_score * 800) + elif 3 > len(score_lists) > 0: + average_score = all_score / len(keys) + return int(average_score * 500) + else: + return 0 -def classify_and_extract_keywords(text: str, topK: int, stopwords: str, - tech_terms_file: str) -> tuple[None, list[Any]] | tuple[int, Any]: +def extract_keywords(text, + topK, + stopwords): """ - 博客文章关键字提取 + 文章关键字提取 :param text:文章文本 - :param topK:关键字数量,建议20个 - :param stopwords:停词文本,去掉无意义词组 - :param tech_terms_file:专业词语,区分文章类目 + :param topK:关键字数量 + :param stopwords:停词文本(去掉无意义词组) :return: """ try: jieba.analyse.set_stop_words(stopwords) keywords = jieba.analyse.extract_tags(text, topK=topK) + return keywords except ValueError as e: logger.error(f"关键词提取出错:{e}") return None, [] @@ -39,72 +58,52 @@ def classify_and_extract_keywords(text: str, topK: int, stopwords: str, logger.error(f"关键词提取出错:{e}") return None, [] + +def check_category(tech_terms_file, keywords): + """ + 文章分类判断 + :param keywords: 文章关键词 + :param tech_terms_file: 分类词典文件 + :return: 分类常量 + """ with open(tech_terms_file, 'r', encoding='utf-8') as f: - tech_terms_set = {line.strip().lower() for line in f} + tech_terms_set = {line.strip().lower() for line in f} # 读取分类词典文件,将其转化为小写并创建集合 for keyword in keywords: - if keyword.lower() in tech_terms_set: - return 1, keywords + if keyword.lower() in tech_terms_set: # 判断关键词是否在分类词典集合中 + return const.BLOG_POST_CATEGORY_TECH # 若关键词存在,则返回技术类分类常量 - return 2, keywords + return const.BLOG_POST_CATEGORY_LIFE # 若关键词不存在,则返回生活类分类常量 -def calculate_weight(time_str: str): +def calculate_weight(time_str: str) -> int: """ - 博客文章特殊日期权重分数计算。 - - 传统节假日 +10 - - 节假日 +7 - - 凌晨 +5 - - 早上 +4 - - 下午 +3 - - 晚上 +2 + 计算文章特殊日期的权重分数。 + - 传统节假日 +10 + - 节假日 +7 + - 凌晨 +5 + - 早上 +4 + - 下午 +3 + - 晚上 +2 + :param time_str: 时间字符串 - :return:总分数,特殊日期 + :return: 总分数(整数) """ dt = parse(time_str) - dt = dt.astimezone(pytz.timezone('Asia/Shanghai')) + dt = dt.astimezone(pytz.timezone(const.TIME_ZONE)) weight = 0 - date_str = "" - - # 农历节日权重计算 - LUNAR_HOLIDAYS = { - (1, 1): '春节', - (1, 15): '元宵节', - (2, 2): '龙抬头', - (5, 5): '端午节', - (7, 7): '七夕节', - (7, 15): '中元节', - (8, 15): '中秋节', - (9, 9): '重阳节', - (12, 8): '腊八节', - (12, 23): '小年', - (12, 30): '除夕' - } + # 计算农历节假日的权重 lunar_date = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) - if (lunar_date.month, lunar_date.day) in LUNAR_HOLIDAYS: + if (lunar_date.month, lunar_date.day) in const.LUNAR_HOLIDAYS: weight += 10 - date_str = LUNAR_HOLIDAYS[(lunar_date.month, lunar_date.day)] - # 公历节日权重计算 - SOLAR_HOLIDAYS = { - (1, 1): '元旦', - (2, 14): '情人节', - (3, 8): '国际妇女节', - (4, 4): '清明节', - (5, 1): '国际劳动节', - (10, 1): '国庆节', - (12, 13): '南京大屠杀纪念日', - (9, 18): '九一八事变纪念日', - (12, 7): '南京保卫战胜利纪念日', - (8, 15): '抗日战争胜利纪念日' - } - - if (dt.month, dt.day) in SOLAR_HOLIDAYS: + # 计算公历节假日的权重 + if (dt.month, dt.day) in const.SOLAR_HOLIDAYS: weight += 7 - date_str = SOLAR_HOLIDAYS[(dt.month, dt.day)] + # 计算时间节点的权重 if 22 <= dt.hour or dt.hour < 7: weight += 5 elif 7 <= dt.hour < 12: @@ -116,7 +115,25 @@ def calculate_weight(time_str: str): else: weight += 0 - if not date_str: - date_str = f"{dt.month}月{dt.day}日" + return weight - return weight, date_str + +def special_date_calculation(time_str): + """ + 特殊日期计算。 + :param time_str: 时间字符串 + :return:总分数 + """ + dt = parse(time_str) + dt = dt.astimezone(pytz.timezone(const.TIME_ZONE)) + + # 农历节假日计算 + lunar_date = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) + if (lunar_date.month, lunar_date.day) in const.LUNAR_HOLIDAYS: + return const.LUNAR_HOLIDAYS[(lunar_date.month, lunar_date.day)] + + # 公历节假日计算 + if (dt.month, dt.day) in const.SOLAR_HOLIDAYS: + return const.SOLAR_HOLIDAYS[(dt.month, dt.day)] + + return f"{dt.month}月{dt.day}日" diff --git a/src/const.py b/src/const.py new file mode 100644 index 0000000..c1fb727 --- /dev/null +++ b/src/const.py @@ -0,0 +1,49 @@ +# 时区 +TIME_ZONE = "Asia/Shanghai" + +# 时间格式 +FORMAT_TIME = "%Y-%m-%d %H:%M:%S" + +# 站点服务模式 +SITE_SERVICE = 1 + +# 站点标题 +SITE_NAME = "EndOfYear" + +# 博客文章分类-生活 +BLOG_POST_CATEGORY_LIFE = 1 + +# 博客文章分类-技术 +BLOG_POST_CATEGORY_TECH = 2 + +# 博客文章关键字数量 +BLOG_MAX_KEYS = 7 + +# 农历节假日 +LUNAR_HOLIDAYS = { + (1, 1): '春节', + (1, 15): '元宵节', + (2, 2): '龙抬头', + (5, 5): '端午节', + (7, 7): '七夕节', + (7, 15): '中元节', + (8, 15): '中秋节', + (9, 9): '重阳节', + (12, 8): '腊八节', + (12, 23): '小年', + (12, 30): '除夕' +} + +# 公历节假日 +SOLAR_HOLIDAYS = { + (1, 1): '元旦', + (2, 14): '情人节', + (3, 8): '妇女节', + (4, 4): '清明节', + (5, 1): '劳动节', + (10, 1): '国庆节', + (12, 13): '南京大屠杀纪念日', + (9, 18): '九一八事变纪念日', + (12, 7): '南京保卫战胜利纪念日', + (8, 15): '抗日战争胜利纪念日' +} diff --git a/src/generator.py b/src/generator.py index 4fed53b..3f36c51 100644 --- a/src/generator.py +++ b/src/generator.py @@ -1,97 +1,116 @@ -from collections import Counter - from loguru import logger -from .analyzer import analyze_sentiment, calculate_weight, classify_and_extract_keywords -from .config import Config -from .scraper import Blog -from .tools import get_yiyan +import models +import scraper -def build_data(): - """ - 目前只有一个主题,构建数据部分后期会再进行重构拆分 - :return: 网页渲染数据 - """ - # 读取配置 - config = Config("config.ini") +class Generator: - # 创建博客对象 - try: - my_blog = Blog(config.rss_url) - except Exception as e: - logger.error(f"Feed 无法创建博客对象: {str(e)}") + def __init__(self, rss): + """ + 初始化Generator类 + :param rss: RSS链接 + """ + try: + self._my_blog = scraper.Blog(rss) + logger.debug(self._my_blog) + for i, post in enumerate(self._my_blog.post_lists, 1): + logger.info(f"Post #{i}:") + logger.info(post) + except Exception as e: + logger.error(f"Generator 无法创建 Blog 对象: {str(e)}") + + def blog(self): + """ + 获取博客信息 + :return: Blog字典 + """ + return models.Blog( + name=self._my_blog.title, + link=self._my_blog.link, + life=self._my_blog.life, + article_count=self._my_blog.article_count, + article_word_count=self._my_blog.article_word_count, + top_post_keys=self._my_blog.keys, + category=self._my_blog.category + ).to_dict() + + def special_post(self): + """ + 获取特殊日期的文章 + :return: Post字典 + """ + max_item_special_date = self._get_post_with_max("special_date_score") + return models.Post( + title=max_item_special_date.title, + content=max_item_special_date.content, + keys=max_item_special_date.keys, + time=max_item_special_date.time, + date=max_item_special_date.date + ).to_dict() + + def sentiment_post(self): + """ + 获取情感最优文章 + :return: Post字典 + """ + max_item_sentiment = self._get_post_with_max("sentiment_score") + return models.Post( + title=max_item_sentiment.title, + content=max_item_sentiment.content, + keys=max_item_sentiment.keys, + time=max_item_sentiment.time, + date=max_item_sentiment.date + ).to_dict() + + def long_post(self): + """ + 获取最长文章数据 + :return: Post字典 + """ + max_item_long = self._get_post_with_max("word_count") + return models.Post( + title=max_item_long.title, + content=max_item_long.content, + keys=max_item_long.keys, + time=max_item_long.time, + date=max_item_long.date, + ).to_dict() + + def short_post(self): + """ + 获取最短文章数据 + :return: Post字典 + """ + max_item_short = self._get_post_with_min("word_count") + return models.Post( + title=max_item_short.title, + content=max_item_short.content, + keys=max_item_short.keys, + time=max_item_short.time, + date=max_item_short.date, + ).to_dict() + + def _get_post_with_max(self, score_attr): + """ + 获取具有最大属性值的文章 + :param score_attr: 属性 + :return: + """ + max_score = max(getattr(post, score_attr) for post in self._my_blog.post_lists) + max_posts = [post for post in self._my_blog.post_lists if getattr(post, score_attr) == max_score] + if max_posts: + return max_posts[0] return None - logger.debug(my_blog) - - # 构建博客基本数据 - data = { - "blog_name": my_blog.title, - "blog_link": my_blog.link, - "blog_article_count": my_blog.article_count, - "blog_article_word_count": my_blog.article_word_count, - "blog_end_yiyan": get_yiyan() - } - - if my_blog.life is None: - data.update({ - "blog_life": 0 - }) - else: - data.update({ - "blog_life_year": my_blog.life // 365, - "blog_life_day": my_blog.life % 365, - }) - - # 博客文章处理 - for i, post in enumerate(my_blog.post_lists(), 1): - # 情感分 - post.score = analyze_sentiment(post.content) - # 分类, 关键字 - post.category, post.keys = classify_and_extract_keywords(text=post.content, topK=21, - stopwords='data/stop_words.txt', - tech_terms_file='data/tech_terms.txt') - # 权重, 日子计算 - post.weight, post.date = calculate_weight(post.time) - - logger.info(f"Post #{i}:") - logger.info(post) - - # 博客文章权重计算 - weights = [post.weight for post in my_blog.post_lists()] - max_weight = max(weights) - max_item = [post for post in my_blog.post_lists() if post.weight == max_weight][0] - - data.update({ - "blog_title": max_item.title, - "blog_content": max_item.content[0:50], - "blog_content_date": max_item.date, - }) - - # 暂时只有一个主题 - # 博客关键词计算 5 个 - all_keys = [] - for post in my_blog.post_lists(): - all_keys.extend(post.keys) - - keyword_counts = Counter(all_keys) - top_keywords = keyword_counts.most_common(5) - data.update({ - "blog_top_keywords": top_keywords - }) - - # 博客分类计算 - categories = [post.category for post in my_blog.post_lists()] - cat_counts = Counter(categories) - most_common_cat = cat_counts.most_common(1)[0][0] - - data.update({ - "blog_category": "技术" if most_common_cat == 1 else "生活" - }) - - # 输出 - logger.debug(data) - # 写入 config.ini 避免重复计算 - config.blog_data = data - return data + def _get_post_with_min(self, score_attr): + """ + 获取具有最小属性值的文章 + :param score_attr: + :return: + """ + min_score = min(getattr(post, score_attr) for post in self._my_blog.post_lists) + min_posts = [post for post in self._my_blog.post_lists if getattr(post, score_attr) == min_score] + if min_posts: + return min_posts[0] + return None diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..768d3dc --- /dev/null +++ b/src/models.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass +from enum import Enum +from typing import List + +@dataclass +class Site: + """ + 站点数据模型 + - service: 服务模式 + - title: 站点标题 + """ + service: int + title: str + + def to_dict(self) -> dict: + """ + 将Site对象转换为字典形式 + """ + return {k: v if not isinstance(v, Enum) else v.value for k, v in vars(self).items()} + +@dataclass +class Blog: + """ + 博客数据模型 + - name:名称 + - link:链接 + - life:域名注册天数 + - article_count:博客文章总和 + - article_word_count:博客文章字数总和 + - top_post_keys:博客关键字 + - category:博客分类 + """ + name: str + link: str + life: int + article_count: int + article_word_count: int + top_post_keys: List[str] + category: int + + def to_dict(self) -> dict: + """ + 将Blog对象转换为字典形式 + """ + return {k: v if not isinstance(v, Enum) else v.value for k, v in vars(self).items()} + +@dataclass +class Post: + """ + 文章数据模型 + - title:标题 + - content:内容 + - keys:关键字列表 + - date:日期字符串 + """ + title: str + content: str + keys: List[str] + time: str + date: str + + def to_dict(self) -> dict: + """ + 将Post对象转换为字典形式 + """ + return {k: v if not isinstance(v, Enum) else v.value for k, v in vars(self).items()} + +@dataclass +class Custom: + """ + 自定义数据模型 + - yiyan:一言 + """ + yiyan: str + + def to_dict(self) -> dict: + """ + 将Custom对象转换为字典形式 + """ + return vars(self) diff --git a/src/scraper.py b/src/scraper.py index f00bbe5..5a186bc 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -1,85 +1,139 @@ +import re +from collections import Counter + import feedparser from loguru import logger -from . import tools +import analyzer +import const +import tools class Blog: - def __init__(self, url): + def __init__(self, rss): try: - self.feed = feedparser.parse(url) + # 解析RSS feed + self._feed = feedparser.parse(rss) + # 解析feed中的所有文章 + self._posts = [Post(entry) for entry in self._feed.entries] except Exception as e: - logger.error(f'解析 RSS feed 时发生错误: {str(e)}') + logger.error(f'Feedparser 解析 RSS feed 时发生错误: {str(e)}') raise - self.posts = [Post(entry) for entry in self.feed.entries] def _get_feed_field(self, field): - """ - 从 RSS feed 中获取特定字段 - """ - field_value = self.feed.feed.get(field) - if field_value is None: - logger.warning(f'{field} 字段不存在!') - return field_value + if field_value := self._feed.feed.get(field): + return field_value + logger.warning(f'Feedparser {field} 字段不存在!') + return "" @property def title(self): - return self._get_feed_field('title') + # 获取RSS feed的标题 + return self._feed.feed.get('title') @property def link(self): - return self._get_feed_field('link') + # 获取RSS feed的链接 + return self._feed.feed.get('link') @property def life(self): - domain = tools.get_domain(self.link) - return tools.get_domain_life(domain) + # 获取RSS feed链接的域名存活时间 + return tools.get_domain_life(self.link) @property def article_count(self): - return len(self.posts) + # 获取文章数量 + return len(self._posts) if self._posts else 0 @property def article_word_count(self): - return sum(post.word_count for post in self.posts) + # 获取文章总字数 + return sum(post.word_count for post in self._posts) if self._posts else 0 + @property + def keys(self): + if self._posts: + # 提取所有关键字 + all_keys = [key for post in self._posts for key in post.keys] + + # 过滤出中文关键字 + chinese_keys = [key for key in all_keys if re.search(r'[\u4e00-\u9fff]+', key)] + + # 计算关键字出现的次数 + keyword_counts = Counter(chinese_keys) + + # 提取出现次数最多的关键字 + top_keywords = keyword_counts.most_common(const.BLOG_MAX_KEYS) + + return top_keywords + + return [] + + @property + def category(self): + # 获取博客的分类 + if self._posts: + # 如果博客有帖子 + categories = [post.category for post in self._posts] + # 获取所有帖子的分类 + cat_counts = Counter(categories) + # 统计每个分类的个数 + most_common_cat = cat_counts.most_common(1)[0][0] + # 获取出现次数最多的分类 + return most_common_cat + # 如果博客没有帖子 + return const.BLOG_POST_CATEGORY_LIFE + + @property def post_lists(self): - return self.posts + # 获取文章列表 + return self._posts if self._posts else [] def __str__(self): - return f"Blog: {self.title}, Life:{self.life}, Count{self.article_count}. Word count:{self.article_word_count}" + return f""" + 博客: {self.title} + 链接: {self.link} + 时间: {self.life} 天 + 文章: {self.article_count} 篇 + 字数: {self.article_word_count} 个 + 分类: {self.category} + 关键字: {self.keys} + """ class Post: def __init__(self, entry): - # 日期权重 - self._weight = None - # 日子 - self._date = None - # 情感分 - self._score = None - # 关键字 - self._keys = None - # 分类 - self._category = None self.entry = entry + # 文章内容 + self._content = self._get_content() + # 文章时间 + self._time = tools.format_datetime(self._get_entry_field('published')) + # 文章日期 + self._date = analyzer.special_date_calculation(self._time) + # 特殊日期分 + self._special_date_score = analyzer.calculate_weight(self._get_entry_field('published')) + # 关键字 + self._keys = analyzer.extract_keywords(text=self._content, + topK=tools.get_multiple_of_100(self._content), + stopwords='data/stop_words.txt') + # 文章情感分 + self._sentiment_score = analyzer.analyze_sentiment(self._keys) + # 分类 + self._category = analyzer.check_category(tech_terms_file='data/tech_terms.txt', keywords=self._keys) def _get_entry_field(self, field): - """ - 从 RSS entry 中获取特定字段 - """ - field_value = self.entry.get(field) - if field_value is None: - pass - # logger.warning(f'{field} 字段不存在!') - return field_value + return self.entry.get(field) - @property - def title(self): - return self._get_entry_field('title') - - @property - def content(self): + def _get_content(self): + """ + 获取文章内容。 + :return: 文章的描述或内容,根据以下规则: + - 如果'content'字段存在,那么返回'content'字段的值。 + - 如果'description'字段的长度小于128,并且'content'字段存在,那么返回'content'字段的值。 + - 否则,返回'description'字段的值。 + - 如果'description'和'content'字段都不存在,返回空字符串。 + """ description = self._get_entry_field('description') content = self._get_entry_field('content') if content: @@ -94,60 +148,61 @@ class Post: return description @property - def time(self): - return self._get_entry_field('published') + def title(self): + # 获取文章标题 + return self._get_entry_field('title') @property - def link(self): - return self._get_entry_field('link') + def content(self): + # 获取文章内容 + return self._content @property def word_count(self): + # 获取文章字数 return len(self.content) if self.content else 0 @property - def keys(self): - return self._keys - - @keys.setter - def keys(self, value): - self._keys = value - - @property - def score(self): - return self._score - - @score.setter - def score(self, value): - self._score = value - - @property - def category(self): - return self._category - - @category.setter - def category(self, value): - self._category = value + def time(self): + # 获取文章时间 + return self._time @property def date(self): + # 获取日期分 return self._date - @date.setter - def date(self, value): - self._date = value + @property + def link(self): + # 获取文章链接 + return self._get_entry_field('link') @property - def weight(self): - return self._weight + def keys(self): + # 获取文章关键字 + return self._keys - @weight.setter - def weight(self, value): - self._weight = value + @property + def category(self): + # 获取文章分类 + return self._category + + @property + def special_date_score(self): + # 获取特殊日期分 + return self._special_date_score + + @property + def sentiment_score(self): + # 获取文章情感分 + return self._sentiment_score def __str__(self): - return (f"Post title={self.title[:20]}..., " - f" content={self.content[:20]}..., " - f" time={self.time}, " - f" link={self.link}, " - f" word_count={self.word_count}") + return (f" 标题:{self.title}, " + f" 内容:{self.content[:20]}..., " + f" 时间:{self.time}, " + f" 链接:{self.link}, " + f" 日期分:{self.special_date_score}" + f" 情感分:{self.sentiment_score}" + f" 类目:{self.category}" + f" 关键字:{self.keys}") diff --git a/src/tools.py b/src/tools.py index b3fc5bb..2fbfd9a 100644 --- a/src/tools.py +++ b/src/tools.py @@ -1,10 +1,14 @@ from datetime import datetime from urllib.parse import urlparse +import pytz import requests from bs4 import BeautifulSoup +from dateutil.parser import parse from loguru import logger +import const + def check_website_status(url): """ @@ -54,10 +58,10 @@ def get_domain_life(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" } - domain_url = f"https://rdap.verisign.com/com/v1/domain/{url}" + domain = get_domain(url) try: - response = requests.get(domain_url, headers=headers, timeout=30) + response = requests.get(f"https://rdap.verisign.com/com/v1/domain/{domain}", headers=headers, timeout=30) response.raise_for_status() # Raises stored HTTPError, if one occurred. registration_date = response.json().get('events')[0].get('eventDate') @@ -87,7 +91,7 @@ def get_domain_life(url): except Exception as err: logger.error(f"未预期的错误: {err}") - return None + return 0 def remove_html_tags(text): @@ -105,7 +109,8 @@ def get_yiyan(): :return:一言 """ try: - response = requests.get("https://v1.hitokoto.cn/?c=d&min_length=12&encode=text", timeout=30) # Set timeout to 5 seconds + response = requests.get("https://v1.hitokoto.cn/?c=d&min_length=18&max_length=24&encode=text", + timeout=30) # Set timeout to 5 seconds if response.status_code == 200: return response.text else: @@ -122,4 +127,28 @@ def get_yiyan(): return False except Exception as e: logger.error(f"一言未知错误,错误:{e}") - return False \ No newline at end of file + return False + + +def get_multiple_of_100(string): + """ + 获取文章长度 100 的整除 + :return:建议关键字数量 + """ + length = len(string) + multiple = length // 100 + if multiple < 1: + multiple = 1 + return multiple + + +def format_datetime(dt_str): + """ + 格式化时间字符串为指定格式 + :param dt_str:时间字符串 + :return:指定格式 + """ + dt = parse(dt_str) + tz = pytz.timezone(const.TIME_ZONE) + formatted_dt = dt.astimezone(tz).strftime(const.FORMAT_TIME) + return formatted_dt diff --git a/templates/painting.html b/templates/painting.html index 210c20f..6b1cfdc 100644 --- a/templates/painting.html +++ b/templates/painting.html @@ -3,8 +3,8 @@ - EndOfYear - {% if web_status == 1 %} + {{ site.title }} + {% if site.service == 1 %} @@ -19,7 +19,7 @@