From 4d1879aa0d6e5f960ad4d01a693ef6dfbc616441 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 05:42:56 +0000 Subject: [PATCH 01/29] docs: update customization for log level configuration (#f69d105) Co-authored-by: Colin-XKL <49122401+Colin-XKL@users.noreply.github.com> --- README.md | 2 ++ doc-site/src/content/docs/en/guides/advanced/customization.md | 1 + .../src/content/docs/zh-tw/guides/advanced/customization.md | 1 + doc-site/src/content/docs/zh/guides/advanced/customization.md | 1 + 4 files changed, 5 insertions(+) diff --git a/README.md b/README.md index 8b15665e..68c912f5 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ services: FC_LLM_API_MODEL: gemini-pro/chatgpt-3.5/... # 默认使用的模型 FC_LLM_API_TYPE: openai # openai 或 ollama FC_DEFAULT_TARGET_LANG: zh-CN # (Optional) LLM 处理任务的默认目标语言,如 zh-CN, en-US + LOG_LEVEL: info # (Optional) 日志级别,如 info, debug, trace ``` 你也可以直接在一个 compose 文件中把 redis 等附加组件也一起部署好: @@ -129,6 +130,7 @@ services: FC_LLM_API_MODEL: gemini-pro/chatgpt-3.5/... # 默认使用的模型 FC_LLM_API_TYPE: openai # openai 或 ollama FC_DEFAULT_TARGET_LANG: zh-CN # (Optional) LLM 处理任务的默认目标语言 + LOG_LEVEL: info # (Optional) 日志级别,如 info, debug, trace service.redis: image: redis:6-alpine container_name: feedcraft_redis diff --git a/doc-site/src/content/docs/en/guides/advanced/customization.md b/doc-site/src/content/docs/en/guides/advanced/customization.md index 625e87f0..858a2ece 100644 --- a/doc-site/src/content/docs/en/guides/advanced/customization.md +++ b/doc-site/src/content/docs/en/guides/advanced/customization.md @@ -71,6 +71,7 @@ You can configure FeedCraft using environment variables in `docker-compose.yml`. - **FC_LLM_API_TYPE**: (Optional) `openai` (default) or `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (Optional) Global maximum concurrency for LLM requests (default: `3`). Limits concurrent API calls to prevent rate limits. - **FC_DOMAIN_MAX_CONCURRENCY**: (Optional) Maximum concurrent requests per target domain during web scraping like fulltext extraction (default: `3`). Prevents overwhelming target servers. +- **LOG_LEVEL**: (Optional) Log level for the backend application (e.g., `info`, `debug`, `trace`). Overrides the default level set by `ENV`. ### External Services diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md b/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md index 2d656a43..a69632f9 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md @@ -71,6 +71,7 @@ sidebar: - **FC_LLM_API_TYPE**: (可選) `openai` (預設) 或 `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (可選) 全局最大 LLM 併發請求數(預設: `3`)。用於限制併發請求數量以防止觸發 API 速率限制。 - **FC_DOMAIN_MAX_CONCURRENCY**: (可選) 網頁抓取(如全文提取)時每個目標域名的最大併發數(預設: `3`)。防止抓取目標伺服器負載過高。 +- **LOG_LEVEL**: (可選) 後端應用的日誌級別 (例如 `info`, `debug`, `trace`)。覆蓋 `ENV` 設定的預設級別。 ### 外部服務 diff --git a/doc-site/src/content/docs/zh/guides/advanced/customization.md b/doc-site/src/content/docs/zh/guides/advanced/customization.md index f86aad0c..ef83b76c 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/customization.md +++ b/doc-site/src/content/docs/zh/guides/advanced/customization.md @@ -71,6 +71,7 @@ sidebar: - **FC_LLM_API_TYPE**: (可选) `openai` (默认) 或 `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (可选) 全局最大 LLM 并发请求数(默认: `3`)。用于限制并发请求数量以防止触发 API 速率限制。 - **FC_DOMAIN_MAX_CONCURRENCY**: (可选) 网页抓取(如全文提取)时每个目标域名的最大并发数(默认: `3`)。防止抓取目标服务器负载过高。 +- **LOG_LEVEL**: (可选) 后端应用的日志级别 (例如 `info`, `debug`, `trace`)。覆盖 `ENV` 设置的默认级别。 ### 外部服务 From 1bc5c73804d01ad824c1b39b41a9585f858e5aca Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:51:31 +0000 Subject: [PATCH 02/29] fix(html-to-rss): use descendant combinator in generated CSS selectors The frontend visual picker previously generated CSS selectors using the direct child combinator (`>`). This caused matching failures when the backend `goquery` parser processed raw HTML, as the frontend DOM often contains structural differences due to client-side hydration (e.g., Astro custom elements) or automatic tag insertion. Changed the combinator in `getCssSelector` from `>` to a space (descendant combinator) to make the generated `item_selector` robust and fault-tolerant against these DOM differences. Co-authored-by: Colin-XKL <49122401+Colin-XKL@users.noreply.github.com> --- web/admin/src/views/dashboard/html_to_rss/utils/selector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/admin/src/views/dashboard/html_to_rss/utils/selector.ts b/web/admin/src/views/dashboard/html_to_rss/utils/selector.ts index d0c0cf84..162bbca4 100644 --- a/web/admin/src/views/dashboard/html_to_rss/utils/selector.ts +++ b/web/admin/src/views/dashboard/html_to_rss/utils/selector.ts @@ -75,5 +75,5 @@ export const getCssSelector = ( if (!currentEl || currentEl === body || currentEl === html) break; } - return path.join(' > '); + return path.join(' '); }; From 2eb19460389571c5827dc2febd73e3d1219bc0a7 Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 16 Apr 2026 23:28:47 +0800 Subject: [PATCH 03/29] docs: update System Tools for recent observability changes (#68c1987) (#666) * docs: add System Runtime observability tool documentation Co-authored-by: Colin-XKL <49122401+Colin-XKL@users.noreply.github.com> * docs: add System Runtime observability tool documentation Co-authored-by: Colin-XKL <49122401+Colin-XKL@users.noreply.github.com> --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../src/content/docs/en/guides/advanced/tools.md | 15 +++++++++++++++ .../content/docs/zh-tw/guides/advanced/tools.md | 15 +++++++++++++++ .../src/content/docs/zh/guides/advanced/tools.md | 15 +++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/doc-site/src/content/docs/en/guides/advanced/tools.md b/doc-site/src/content/docs/en/guides/advanced/tools.md index 9f3c114e..037b9392 100644 --- a/doc-site/src/content/docs/en/guides/advanced/tools.md +++ b/doc-site/src/content/docs/en/guides/advanced/tools.md @@ -48,6 +48,21 @@ The **Craft Dependencies** (System Health) tool visualizes the internal relation If you encounter errors like "Craft not found", use this tool to trace the broken link in your configuration. ::: +## System Runtime + +The **System Runtime** (Observability) tool provides a comprehensive dashboard for monitoring the health and execution status of your resources. + +- **Usage**: + 1. Navigate to **Tools > System Runtime**. +- **Features**: + - **Resource Health**: View the current status (Healthy, Degraded, Paused) of Recipes and other components, including consecutive failures. + - **Execution Logs**: Track detailed execution history, success rates, and specific error types (e.g., Timeout, Network, Parse) across all runs. + - **System Notifications**: Review automated alerts regarding resource state transitions (e.g., when a Recipe becomes degraded). You can also subscribe to these alerts via the built-in RSS feed at `/system/notifications/rss`. + +:::tip +If a Recipe fails repeatedly and becomes "Paused", you can use the System Runtime dashboard to manually "Resume" it after fixing the underlying issue. +::: + ## Debug Tools ### LLM Debug diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md b/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md index 7a7dcf03..ca4cdead 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md @@ -48,6 +48,21 @@ FeedCraft 提供了一些內建工具來幫助您除錯 RSS 來源並監控系 如果遇到 "Craft not found" 等錯誤,可以使用此工具追蹤配置中的斷鏈。 ::: +## 系統運行狀態 (System Runtime) + +**系統運行狀態** (Observability) 工具提供了一個全面的儀表板,用於監控資源的健康狀況和執行狀態。 + +- **使用方法**: + 1. 導航至 **工具 > 系統運行狀態**。 +- **功能**: + - **資源健康 (Resource Health)**: 查看配方及其他組件的當前狀態(健康、降級、暫停),包括連續失敗次數。 + - **執行日誌 (Execution Logs)**: 追蹤詳細的執行歷史、成功率以及每次運行的具體錯誤類型(例如:超時、網路錯誤、解析錯誤)。 + - **系統通知 (System Notifications)**: 查看關於資源狀態轉換的自動警報(例如當配方降級時)。您還可以透過內建的 RSS 來源 `/system/notifications/rss` 訂閱這些警報。 + +:::tip +如果配方反覆失敗並變為「暫停 (Paused)」狀態,您可以在解決根本問題後,透過系統運行狀態儀表板手動將其「恢復 (Resume)」。 +::: + ## 除錯工具 (Debug Tools) ### LLM 除錯 (LLM Debug) diff --git a/doc-site/src/content/docs/zh/guides/advanced/tools.md b/doc-site/src/content/docs/zh/guides/advanced/tools.md index b09e5291..fb07d63c 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/tools.md +++ b/doc-site/src/content/docs/zh/guides/advanced/tools.md @@ -48,6 +48,21 @@ FeedCraft 提供了一些内置工具来帮助您调试 RSS 源并监控系统 如果遇到 "Craft not found" 等错误,可以使用此工具追踪配置中的断链。 ::: +## 系统运行状态 (System Runtime) + +**系统运行状态** (Observability) 工具提供了一个全面的仪表板,用于监控资源的健康状况和执行状态。 + +- **使用方法**: + 1. 导航至 **工具 > 系统运行状态**。 +- **功能**: + - **资源健康 (Resource Health)**: 查看配方及其他组件的当前状态(健康、降级、暂停),包括连续失败次数。 + - **执行日志 (Execution Logs)**: 跟踪详细的执行历史、成功率以及每次运行的具体错误类型(例如:超时、网络错误、解析错误)。 + - **系统通知 (System Notifications)**: 查看关于资源状态转换的自动警报(例如当配方降级时)。您还可以通过内置的 RSS 源 `/system/notifications/rss` 订阅这些警报。 + +:::tip +如果配方反复失败并变为“暂停 (Paused)”状态,您可以在解决根本问题后,通过系统运行状态仪表板手动将其“恢复 (Resume)”。 +::: + ## 调试工具 (Debug Tools) ### LLM 调试 (LLM Debug) From ae8fb3317b9bd7dea5d68014db6df61109e8dd6e Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 16 Apr 2026 22:54:37 +0800 Subject: [PATCH 04/29] docs: add inbox source design and html token optimization document --- proposal/future/html_token_optimization.md | 331 +++++++++++++++++++++ proposal/inbox_source_design.md | 67 +++-- 2 files changed, 373 insertions(+), 25 deletions(-) create mode 100644 proposal/future/html_token_optimization.md diff --git a/proposal/future/html_token_optimization.md b/proposal/future/html_token_optimization.md new file mode 100644 index 00000000..c60da800 --- /dev/null +++ b/proposal/future/html_token_optimization.md @@ -0,0 +1,331 @@ +# HTML Token Optimization for LLM Input + +> 状态:规划中 + +## 1. 需求背景 + +当前 FeedCraft 已经有统一的 LLM 调用入口与内容预处理链路,但 HTML 进入 LLM 前的清洗仍然比较粗糙。很多页面会带上大量对 RSS 场景无价值、但会显著消耗 token 的内容,例如: + +- `script` / `style` / `noscript` 等无关节点 +- `class` / `style` / `id` / `aria-*` / `data-*` 等低价值属性 +- 很长的 `href` / `src` / `srcset` +- base64 `data:` 图片 +- 过多空白、缩进与样式噪音 + +这些内容会带来两个问题: + +1. **增加 LLM 成本**:同样的正文语义会占用更多 token。 +2. **干扰模型理解**:无关 HTML 噪音会稀释正文、图片、链接等真正重要的信息。 + +因此需要在现有架构中增加一层简洁、稳定、可配置的 HTML 优化逻辑,在不破坏主要语义的前提下,尽量缩小送入 LLM 的内容体积。 + +## 2. 目标 + +本方案希望实现一套面向 LLM 输入的 HTML 优化机制,满足以下目标: + +- 尽量复用现有 `ProcessContent` 预处理入口,不重新发明新链路 +- 通过 DOM 级处理删除无意义 HTML 内容,而不是只靠正则替换 +- 支持不同 Craft 使用不同优化等级 +- 保持配置模型足够小,但能表达“保留多少原始内容”这种关键差异 +- 让 tag / attr 规则集中定义,方便后续维护 +- 为 placeholder 替换与恢复提供简单、局部的机制 +- 补充单独测试,确保优化结果稳定、可预期 + +## 3. 适用场景与等级差异 + +不同 LLM 场景对 HTML 保真度的要求并不一样,因此这里不适合只用一个 `bool` 开关。 + +### 3.1 更激进的场景 + +例如: + +- summary +- llm filter +- 条件判断类 craft + +这类场景重点是提取正文语义或做分类判断,不需要保留太多原始结构。对于它们,可以更激进地: + +- 删除更多无关节点 +- 移除大部分低价值属性 +- 对链接和图片做更强压缩或直接去除 + +### 3.2 中等保留的场景 + +例如: + +- 常规 translate +- beautify + +这些场景仍然希望模型理解原文结构,且输出内容最好保留链接、图片等信息,因此应该: + +- 保留主要结构标签 +- 保留关键资源属性 +- 仅去掉明显无意义的噪音 +- 仅替换超长 URL / `data:` URI + +### 3.3 最保守的场景 + +例如: + +- immersive-translate + +这种场景强调尽量保留原始格式、链接、图片以及更多内容组织形式,因此优化应该尽量保守,只做: + +- 明确无意义节点移除 +- 低价值属性清理 +- 极端长字段压缩 +- 空白压缩 + +## 4. 设计原则 + +### 4.1 单一入口 + +优先把 HTML 优化纳入现有的 `internal/util/content_processor.go`,作为 `ProcessContent` 的一部分,这样: + +- 大部分 LLM 调用链无需额外重构 +- 现有 `RemoveLinks` / `RemoveImage` / `ConvertToMd` 能继续复用 +- 逻辑集中,后续更容易维护 + +### 4.2 配置驱动,而不是 craft 分支硬编码 + +不希望在代码里到处写: + +- `if summary { ... }` +- `if immersiveTranslate { ... }` + +更合适的方式是: + +- `ContentProcessOption` 增加一个 HTML optimize config struct +- caller 只声明“我想要哪种保留等级” +- 具体 tag/attr/placeholder 规则由 optimizer 内部统一决定 + +### 4.3 规则集中、可维护 + +attr 和 tag 规则应该集中定义,不应散落在 DOM 遍历逻辑里。后续新增规则时,最好只修改一处规则表或 helper。 + +### 4.4 保持 v1 简洁 + +这次优化是为了减少 token,不是为了做完整 HTML sanitizer,也不是为了构建一个高度可配置的通用清洗框架。v1 只处理最有收益、最确定的部分。 + +## 5. 推荐的数据结构 + +建议在 `internal/util/` 中增加一个小型配置模型,例如: + +- `ContentProcessOption` 增加 `OptimizeHTML *HTMLOptimizeConfig` +- `HTMLOptimizeConfig` 只表达少数关键维度,例如: + - 优化等级 / preservation profile + - 是否保留链接 + - 是否保留图片 + - 长 URL / `data:` URI 的替换阈值(可内部默认) + +更推荐的做法是: + +- 对外暴露少量 profile 或 level +- 内部再映射成实际规则集 + +这样可以同时满足: + +- 外部调用简洁 +- 内部实现可演进 +- 不会让调用方承担太多细节决策 + +## 6. HTML 优化的核心步骤 + +建议优化器按以下顺序工作: + +1. 判断输入是否像 HTML;非 HTML 内容直接跳过 +2. 用 DOM 解析 HTML +3. 删除无意义节点 +4. 清理低价值属性 +5. 根据配置决定是否保留图片、链接等结构 +6. 对超长属性值做 placeholder 替换 +7. 序列化 HTML +8. 压缩空白 + +### 6.1 建议直接删除的节点 + +v1 可先覆盖这些明显无价值的元素: + +- `script` +- `style` +- `noscript` +- `template` +- `iframe` + +这些元素通常不会帮助 RSS 抽取、摘要、翻译或分类,保留它们只会浪费 token。 + +### 6.2 建议清理的属性 + +低价值属性建议集中通过规则清理,例如: + +- `class` +- `id` +- `style` +- `aria-*` +- `data-*` +- `on*` 事件属性 + +同时保留可能有语义价值的属性,具体是否保留也可受 profile 影响,例如: + +- `href` +- `src` +- `srcset` +- `alt` +- `title` + +### 6.3 超长字段替换 + +对以下字段做可逆 placeholder 压缩: + +- 很长的 `href` +- 很长的 `src` +- 很长的 `srcset` +- base64 `data:` URI + +例如替换为: + +- `__FC_PH_URL_0001__` +- `__FC_PH_DATA_0002__` + +并维护 request-scoped map。这样可以避免: + +- 把极长字符串直接发给 LLM +- 使用全局共享状态 +- 后续恢复时依赖不透明上下文 + +## 7. placeholder 恢复策略 + +v1 不需要把恢复逻辑强行塞进所有 LLM 流程里。 + +推荐做法: + +- 优化器返回优化后的 HTML 和 placeholder map +- 只有确实需要恢复的流程才使用 restore helper +- placeholder map 生命周期限定在单次调用上下文内 + +这样能保持实现足够简单,同时给像 `beautify` 这种对原始 URL 保真度更敏感的流程留下扩展空间。 + +## 8. 与现有代码的集成点 + +### 8.1 统一入口 + +优先复用: + +- `internal/adapter/llm.go` +- `internal/util/content_processor.go` + +也就是继续通过 `ProcessContent` 统一处理大部分 LLM 输入。 + +### 8.2 主要调用方 + +后续实现时,至少应考虑这些现有路径: + +- `internal/craft/common_llm_logic.go` + - 使用更激进的 profile + - 适合 filter / condition / summary 一类语义型任务 + +- `internal/craft/translate.go` + - 使用更保守的 profile + - 常规翻译需要保留更多结构与资源 + +- immersive translate + - 使用最保守的 profile + - 尽量保留链接、图片和格式 + +- `internal/craft/beautify.go` + - 当前会直接把原始 HTML 拼进 prompt + - 需要在 prompt 构造前引入同一套优化逻辑 + +## 9. 规则维护方式 + +为了让代码保持简洁、清晰、优雅,建议使用以下形式管理规则: + +- 一组集中定义的 removable tags +- 一组集中定义的 removable attrs +- 一组 prefix-based removable attr rules +- 一组 profile-aware preserved attrs + +DOM 遍历时只调用这些 helper,不在遍历逻辑里堆积大量 if/else。 + +这样做的优点: + +- 可读性更好 +- 修改规则时影响范围小 +- 容易为不同 profile 扩展行为 +- 更容易测试每条规则的预期 + +## 10. v1 明确不做的事情 + +为了保持范围收敛,这个方案暂时不处理: + +- 完整 HTML 安全清洗 +- 复杂 CSS 可见性推断 +- 语义重排或正文重写 +- 过度细粒度的用户可配置规则 +- 所有 URL 一律替换 +- 对所有 LLM 返回结果做全局 placeholder 自动恢复 + +## 11. 测试策略 + +这部分必须单独补充测试,不能只依赖集成路径顺带覆盖。 + +### 11.1 独立 optimizer 单测 + +建议增加单独测试文件,例如: + +- `internal/util/content_processor_test.go` +- 或 `internal/util/html_optimize_test.go` + +重点验证: + +- 无意义节点是否被移除 +- 低价值属性是否被清理 +- 关键属性是否按 profile 保留 +- whitespace 是否被正确压缩 +- 长 URL / `data:` URI 是否被替换为 placeholder +- placeholder 恢复是否正确 +- 非 HTML 输入是否安全透传 + +### 11.2 profile 差异测试 + +需要专门验证不同优化等级的行为差异,而不是只测单一输出。 + +例如: + +- aggressive profile 是否删除更多噪音 +- preserve profile 是否保留图片和链接 +- immersive profile 是否比 preserve profile 保留更多原始结构 + +### 11.3 `ProcessContent` 组合测试 + +继续验证它和现有逻辑的组合行为: + +- optimize + `ConvertToMd` +- optimize + `RemoveImage` +- optimize + `RemoveLinks` +- 多个步骤叠加时顺序是否稳定 + +### 11.4 调用路径 smoke test + +对主要 craft 调用路径补充轻量 smoke coverage,确保它们确实选用了正确 profile。 + +## 12. 预期收益 + +如果实现得当,这套机制会带来以下收益: + +- 减少 LLM 输入 token 消耗 +- 提高 prompt 中有效语义密度 +- 让 summary / filter 等场景更稳定 +- 让 translate / immersive-translate 在保真和成本之间更可控 +- 为后续更细致的 HTML 内容优化打下可扩展基础 + +## 13. 总结 + +这项工作的关键不在于“删得越多越好”,而在于: + +- 用统一入口做预处理 +- 用小而清晰的 config 表达不同保留等级 +- 用集中规则保持 tag / attr 逻辑可维护 +- 用独立测试确保优化结果稳定 + +如果这几个点把握好,FeedCraft 就能在不明显增加架构复杂度的前提下,让 HTML 进入 LLM 前变得更轻、更干净、更适配不同 craft 的需求。 diff --git a/proposal/inbox_source_design.md b/proposal/inbox_source_design.md index 37505940..cf93edce 100644 --- a/proposal/inbox_source_design.md +++ b/proposal/inbox_source_design.md @@ -61,6 +61,12 @@ type InboxItem struct { } ``` +其中: + +- `Content` 存储文章正文内容 +- `URL` 优先保存调用方提供的原文链接;若 POST 写入时该字段为空,则服务端自动回填为 FeedCraft 内部内容访问地址:`/:inbox_id/:article_id` +- 上述路由中的 `article_id` 对应 `InboxItem.ItemID` + `(InboxID, ItemID)` 建联合唯一索引,用于去重(见 5.6)。 ### 4.3 InboxToken @@ -109,41 +115,44 @@ Header: Authorization: Bearer [{ "title": "hello" }] ``` +如果 `url` 为空或省略,服务端会在写入时自动回填:`/:inbox_id/:article_id`。 + ### 5.3 字段说明 -| 字段 | 类型 | 必填 | 说明 | -| ----------- | ------ | ------ | ----------------------------------------------------------- | -| `title` | string | **是** | 标题 | -| `url` | string | 否 | 原文链接 | -| `content` | string | 否 | 正文内容(支持 HTML) | -| `summary` | string | 否 | 摘要文本,不填则自动截取 content 前 200 字符 | -| `id` | string | 否 | 唯一标识,用于去重。不填则服务端自动生成 UUID(不参与去重) | -| `author` | string | 否 | 作者名 | -| `timestamp` | number | 否 | Unix 秒级时间戳,不填则使用服务端当前时间 | +| 字段 | 类型 | 必填 | 说明 | +| ----------- | ------ | ------ | ----------------------------------------------------------------------- | +| `title` | string | **是** | 标题 | +| `url` | string | 否 | 原文链接;若为空或省略,则服务端自动填充为 Inbox 文章内容访问地址 | +| `content` | string | 否 | 正文内容(支持 HTML) | +| `summary` | string | 否 | 摘要文本,不填则自动截取 content 前 200 字符 | +| `id` | string | 否 | 唯一标识,用于去重。不填则服务端自动生成 UUID(不参与去重) | +| `author` | string | 否 | 作者名 | +| `timestamp` | number | 否 | Unix 秒级时间戳,不填则使用服务端当前时间 | ### 5.4 POST 请求体与 DB 模型的映射 -| POST 字段 | DB 字段 (InboxItem) | 默认值逻辑 | -| ------------ | ------------------- | --------------------------------------------- | -| _(URL 路径)_ | `InboxID` | 从 URL 路径 `/api/inbox/:inbox_id/items` 提取 | -| `title` | `Title` | 直接存储 | -| `url` | `URL` | 直接存储 | -| `content` | `Content` | 直接存储 | -| `summary` | `Summary` | 不填则截取 Content 前 200 字符 | -| `id` | `ItemID` | 不填则服务端生成 UUID | -| `author` | `Author` | 直接存储 | -| `timestamp` | `PublishedAt` | 不填则 = `CreatedAt` | -| _(无)_ | `ID` | DB 自增主键 | -| _(无)_ | `CreatedAt` | GORM 自动填充为入库时间 | +| POST 字段 | DB 字段 (InboxItem) | 默认值逻辑 | +| ------------ | ------------------- | --------------------------------------------------------------- | +| _(URL 路径)_ | `InboxID` | 从 URL 路径 `/api/inbox/:inbox_id/items` 提取 | +| `title` | `Title` | 直接存储 | +| `url` | `URL` | 优先使用请求值;若为空则自动填充为 `/:inbox_id/:article_id` | +| `content` | `Content` | 直接存储 | +| `summary` | `Summary` | 不填则截取 Content 前 200 字符 | +| `id` | `ItemID` | 不填则服务端生成 UUID | +| `author` | `Author` | 直接存储 | +| `timestamp` | `PublishedAt` | 不填则 = `CreatedAt` | +| _(无)_ | `ID` | DB 自增主键 | +| _(无)_ | `CreatedAt` | GORM 自动填充为入库时间 | ### 5.5 写入行为 1. 验证 token 有效性(查 `inbox_tokens` 表) 2. 验证 `inbox_id` 对应的 inbox 存在 3. 校验所有条目(每条必须有 `title`,总数不超过 100)。任一条校验失败则整批拒绝,返回 400 -4. 填充默认值(`id` → UUID, `timestamp` → 当前时间, `summary` → 截取 content) -5. 批量 upsert `InboxItem` -6. 滚动清理:查询该 inbox 当前总条数,若超过 `max_items`,删除最旧的记录使总数 = `max_items` +4. 填充默认值(`id` → UUID, `timestamp` → 当前时间, `summary` → 截取 content 前 200 字符) +5. 若 `url` 为空或省略,则服务端根据 `/:inbox_id/:article_id` 自动生成内容访问地址,其中 `article_id = ItemID` +6. 批量 upsert `InboxItem` +7. 滚动清理:查询该 inbox 当前总条数,若超过 `max_items`,删除最旧的记录使总数 = `max_items` ### 5.6 去重机制 @@ -287,7 +296,15 @@ Inbox 可以通过 `feedcraft://recipe/:id` 间接被 Topic 引用:先建一 | -------- | ---------------------------- | -------------------------------------- | | **POST** | `/api/inbox/:inbox_id/items` | 批量写入数据(JSON 数组,上限 100 条) | -### 9.2 管理后台(JWT 鉴权) +### 9.2 内容读取(公开访问) + +| 方法 | 路径 | 说明 | +| ------- | ------------------------------------------------- | --------------------------------- | +| **GET** | `/:inbox_id/:article_id` | 返回对应文章的 `content` 字段内容 | + +该路由中的 `article_id` 对应 `InboxItem.ItemID`。如果某条数据在 POST 写入时未提供 `url`,系统会自动将 `url` 回填为这个内容访问地址。 + +### 9.3 管理后台(JWT 鉴权) | 方法 | 路径 | 说明 | | ------ | ----------------------------- | ------------------ | From df86ae2959f79026b66a953add102a198570775f Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 16 Apr 2026 23:54:09 +0800 Subject: [PATCH 05/29] feat(custom_recipe): add copy link support - Introduced `currentLink` ref to hold the generated recipe feed URL - Created a dedicated clipboard instance for copying the link - Updated `handleCopyLink` to set the link and perform the copy action - Simplified the copy configuration logic with separate clipboard instances --- .../dashboard/custom_recipe/custom_recipe.vue | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/web/admin/src/views/dashboard/custom_recipe/custom_recipe.vue b/web/admin/src/views/dashboard/custom_recipe/custom_recipe.vue index 54e8bdd8..c0ed6850 100644 --- a/web/admin/src/views/dashboard/custom_recipe/custom_recipe.vue +++ b/web/admin/src/views/dashboard/custom_recipe/custom_recipe.vue @@ -283,6 +283,7 @@ const showModal = ref(false); const showConfigModal = ref(false); const currentConfig = ref(''); + const currentLink = ref(''); const quickCreate = ref(false); const rssUrl = ref(''); @@ -306,13 +307,22 @@ }, }); - const { copy, copied } = useClipboard(); + const { copy: copyConfig, copied } = useClipboard({ + source: currentConfig, + legacy: true, + copiedDuring: 1500, + }); + const { copy: copyLink } = useClipboard({ + source: currentLink, + legacy: true, + copiedDuring: 1500, + }); const buildRecipeFeedUrl = (id?: string) => buildPublicFeedUrl(`/recipe/${id || ''}`); const handleCopyConfig = async () => { try { - await copy(currentConfig.value); + await copyConfig(); Message.success(t('customRecipe.copied')); } catch (e: any) { Message.error(t('customRecipe.copyFailed', { msg: e.message || e })); @@ -321,7 +331,8 @@ const handleCopyLink = async (id: string) => { try { - await copy(buildRecipeFeedUrl(id)); + currentLink.value = buildRecipeFeedUrl(id); + await copyLink(); Message.success(t('customRecipe.copied')); } catch (e: any) { Message.error(t('customRecipe.copyFailed', { msg: e.message || e })); From 0ba68e8fb57bc2a193c99e6b68543d0848a9b1a5 Mon Sep 17 00:00:00 2001 From: Colin Date: Fri, 17 Apr 2026 00:09:25 +0800 Subject: [PATCH 06/29] refactor: hide topic_feed routes and UI - Hide TopicFeed routes and UI until feature is ready to prevent navigation to incomplete pages. --- .../src/router/routes/modules/worktable.ts | 39 ++++++++++--------- .../views/dashboard/observability/index.vue | 4 +- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/web/admin/src/router/routes/modules/worktable.ts b/web/admin/src/router/routes/modules/worktable.ts index 76dc5b59..6e3d7bcc 100644 --- a/web/admin/src/router/routes/modules/worktable.ts +++ b/web/admin/src/router/routes/modules/worktable.ts @@ -12,25 +12,26 @@ const WORKTABLE: AppRouteRecordRaw = { order: 1, }, children: [ - { - path: 'topic_feed', - name: 'TopicFeed', - component: () => import('@/views/dashboard/topic_feed/topic_feed.vue'), - meta: { - requiresAuth: true, - locale: 'menu.topicFeed', - }, - }, - { - path: 'topic_feed/:id', - name: 'TopicFeedDetail', - component: () => import('@/views/dashboard/topic_feed/detail.vue'), - meta: { - requiresAuth: true, - locale: 'menu.topicFeed', - hideInMenu: true, - }, - }, + // TopicFeed 功能当前仍在开发完善中,先隐藏入口;待功能 ready 后再重新开放。 + // { + // path: 'topic_feed', + // name: 'TopicFeed', + // component: () => import('@/views/dashboard/topic_feed/topic_feed.vue'), + // meta: { + // requiresAuth: true, + // locale: 'menu.topicFeed', + // }, + // }, + // { + // path: 'topic_feed/:id', + // name: 'TopicFeedDetail', + // component: () => import('@/views/dashboard/topic_feed/detail.vue'), + // meta: { + // requiresAuth: true, + // locale: 'menu.topicFeed', + // hideInMenu: true, + // }, + // }, { path: 'custom_recipe', name: 'CustomRecipe', diff --git a/web/admin/src/views/dashboard/observability/index.vue b/web/admin/src/views/dashboard/observability/index.vue index 886e829d..9ace523d 100644 --- a/web/admin/src/views/dashboard/observability/index.vue +++ b/web/admin/src/views/dashboard/observability/index.vue @@ -93,8 +93,9 @@ > {{ t('observability.link') }} + { router.push({ name: 'TopicFeedDetail', params: { id } }); }; From fb80f5dd9466fc8fc8ad685185d42a1b44731dfa Mon Sep 17 00:00:00 2001 From: Colin Date: Fri, 17 Apr 2026 00:15:22 +0800 Subject: [PATCH 07/29] feat: optimize topic feed timestamp logic - Remove hardcoded `time.Now()` for Updated and Created in TopicFeed.Fetch - Add `applyTopicFeedTimestamps` helper to set feed timestamps based on article metadata - Apply helper after merging feeds and after processing feeds Ensures topic feed metadata accurately reflects the most recent article timestamps, improving consistency across feeds. --- internal/engine/topic.go | 32 ++++++++++++++++++++++++++++++-- internal/engine/topic_test.go | 30 ++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/internal/engine/topic.go b/internal/engine/topic.go index f306612a..c6f036e6 100644 --- a/internal/engine/topic.go +++ b/internal/engine/topic.go @@ -74,10 +74,9 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { Title: t.Title, Description: t.Description, Link: t.Link, - Updated: time.Now(), - Created: time.Now(), Articles: allArticles, } + applyTopicFeedTimestamps(mergedFeed) // If there's an aggregator pipeline (e.g., deduplicate -> sort -> limit), run it. if t.Aggregator != nil { @@ -99,6 +98,7 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { }) return nil, err } + applyTopicFeedTimestamps(processedFeed) if len(processedFeed.Articles) == 0 && len(failedInputs) > 0 { reportTopicResult(ctx, t, processedFeed, failedInputs, startedAt) return nil, errors.New("topic failed because all upstream providers failed or produced no items") @@ -116,6 +116,34 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { return mergedFeed, nil } +func applyTopicFeedTimestamps(feed *model.CraftFeed) { + if feed == nil { + return + } + + var latestUpdated time.Time + var latestCreated time.Time + + for _, article := range feed.Articles { + if article == nil { + continue + } + if article.Updated.After(latestUpdated) { + latestUpdated = article.Updated + } + if article.Created.After(latestCreated) { + latestCreated = article.Created + } + } + + if !latestUpdated.IsZero() { + feed.Updated = latestUpdated + } + if !latestCreated.IsZero() { + feed.Created = latestCreated + } +} + func reportTopicResult(ctx context.Context, topic *TopicFeed, feed *model.CraftFeed, failedInputs []map[string]any, startedAt time.Time) { status := dao.ExecutionStatusSuccess errorKind := "" diff --git a/internal/engine/topic_test.go b/internal/engine/topic_test.go index d543c806..9782aedd 100644 --- a/internal/engine/topic_test.go +++ b/internal/engine/topic_test.go @@ -25,18 +25,25 @@ func (m *MockProvider) Fetch(ctx context.Context) (*model.CraftFeed, error) { } func TestTopicFeed_Fetch_Success(t *testing.T) { + updated1 := time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC) + created1 := time.Date(2026, 4, 1, 9, 0, 0, 0, time.UTC) + updated2 := time.Date(2026, 4, 2, 11, 0, 0, 0, time.UTC) + created2 := time.Date(2026, 4, 2, 8, 0, 0, 0, time.UTC) + updated3 := time.Date(2026, 4, 3, 12, 0, 0, 0, time.UTC) + created3 := time.Date(2026, 4, 3, 7, 0, 0, 0, time.UTC) + provider1 := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "1", Title: "Article 1"}, - {Id: "2", Title: "Article 2"}, + {Id: "1", Title: "Article 1", Updated: updated1, Created: created1}, + {Id: "2", Title: "Article 2", Updated: updated2, Created: created2}, }, }, } provider2 := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "3", Title: "Article 3"}, + {Id: "3", Title: "Article 3", Updated: updated3, Created: created3}, }, }, } @@ -53,6 +60,8 @@ func TestTopicFeed_Fetch_Success(t *testing.T) { // Should contain 3 articles total assert.Len(t, result.Articles, 3) + assert.True(t, result.Updated.Equal(updated3)) + assert.True(t, result.Created.Equal(created3)) } func TestTopicFeed_Fetch_PartialFailure(t *testing.T) { @@ -83,12 +92,19 @@ func TestTopicFeed_Fetch_PartialFailure(t *testing.T) { } func TestTopicFeed_Fetch_WithAggregator(t *testing.T) { + updated1 := time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC) + updated2 := time.Date(2026, 4, 2, 10, 0, 0, 0, time.UTC) + updated3 := time.Date(2026, 4, 3, 10, 0, 0, 0, time.UTC) + created1 := time.Date(2026, 4, 1, 9, 0, 0, 0, time.UTC) + created2 := time.Date(2026, 4, 2, 9, 0, 0, 0, time.UTC) + created3 := time.Date(2026, 4, 3, 9, 0, 0, 0, time.UTC) + provider := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "1", Title: "A"}, - {Id: "2", Title: "B"}, - {Id: "3", Title: "C"}, + {Id: "1", Title: "A", Updated: updated1, Created: created1}, + {Id: "2", Title: "B", Updated: updated2, Created: created2}, + {Id: "3", Title: "C", Updated: updated3, Created: created3}, }, }, } @@ -105,6 +121,8 @@ func TestTopicFeed_Fetch_WithAggregator(t *testing.T) { assert.NoError(t, err) assert.NotNil(t, result) assert.Len(t, result.Articles, 2) + assert.True(t, result.Updated.Equal(updated2)) + assert.True(t, result.Created.Equal(created2)) } func TestTopicFeed_Fetch_AllInputsFailed(t *testing.T) { From a8033a132e4abf0367e118fc33f67b6e084fa3a1 Mon Sep 17 00:00:00 2001 From: Colin Date: Fri, 17 Apr 2026 00:30:52 +0800 Subject: [PATCH 08/29] feat: better ux for feed preview tool - Add frontend API wrapper for feed preview - Add better error handling in UI --- internal/controller/feed_viewer.go | 210 ++++++++++++++++++ internal/router/registry.go | 1 + web/admin/src/api/feed_viewer.ts | 37 +++ .../feed_viewer/feed_view_container.vue | 38 ++-- .../dashboard/feed_viewer/feed_viewer.vue | 33 ++- 5 files changed, 287 insertions(+), 32 deletions(-) create mode 100644 internal/controller/feed_viewer.go create mode 100644 web/admin/src/api/feed_viewer.ts diff --git a/internal/controller/feed_viewer.go b/internal/controller/feed_viewer.go new file mode 100644 index 00000000..ed986f5d --- /dev/null +++ b/internal/controller/feed_viewer.go @@ -0,0 +1,210 @@ +package controller + +import ( + "FeedCraft/internal/config" + "FeedCraft/internal/constant" + "FeedCraft/internal/model" + "FeedCraft/internal/source" + "FeedCraft/internal/util" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +type FeedViewerPreviewReq struct { + InputURL string `json:"input_url" form:"input_url" binding:"required"` +} + +type FeedViewerPreview struct { + Title string `json:"title"` + Description string `json:"description"` + Link string `json:"link"` + FeedURL string `json:"feedUrl"` + Copyright string `json:"copyright"` + Image *FeedViewerPreviewImage `json:"image,omitempty"` + Items []FeedViewerPreviewItem `json:"items"` +} + +type FeedViewerPreviewImage struct { + URL string `json:"url"` + Title string `json:"title"` +} + +type FeedViewerPreviewItem struct { + GUID string `json:"guid"` + Title string `json:"title"` + Link string `json:"link"` + PubDate string `json:"pubDate"` + IsoDate string `json:"isoDate"` + Content string `json:"content"` + ContentSnippet string `json:"contentSnippet"` +} + +func PreviewFeedViewer(c *gin.Context) { + var req FeedViewerPreviewReq + if err := c.ShouldBindQuery(&req); err != nil { + c.JSON(http.StatusBadRequest, util.APIResponse[any]{StatusCode: -1, Msg: "Please enter a valid http(s) feed URL"}) + return + } + + if err := validateFeedViewerURL(req.InputURL); err != nil { + c.JSON(http.StatusBadRequest, util.APIResponse[any]{StatusCode: -1, Msg: err.Error()}) + return + } + + feed, err := loadFeedViewerPreview(c, req.InputURL) + if err != nil { + status, msg := classifyFeedViewerError(err) + c.JSON(status, util.APIResponse[any]{StatusCode: -1, Msg: msg}) + return + } + + c.JSON(http.StatusOK, util.APIResponse[FeedViewerPreview]{ + StatusCode: 0, + Data: buildFeedViewerPreview(feed, req.InputURL), + }) +} + +func loadFeedViewerPreview(c *gin.Context, inputURL string) (*model.CraftFeed, error) { + cfg := &config.SourceConfig{ + Type: constant.SourceRSS, + HttpFetcher: &config.HttpFetcherConfig{ + URL: inputURL, + }, + } + + factory, err := source.Get(constant.SourceRSS) + if err != nil { + return nil, fmt.Errorf("factory not found: %w", err) + } + + src, err := factory(cfg) + if err != nil { + return nil, fmt.Errorf("failed to create source: %w", err) + } + + feed, err := src.Fetch(c.Request.Context()) + if err != nil { + return nil, err + } + + return feed, nil +} + +func buildFeedViewerPreview(feed *model.CraftFeed, inputURL string) FeedViewerPreview { + preview := FeedViewerPreview{ + Title: feed.Title, + Description: feed.Description, + Link: feed.Link, + FeedURL: inputURL, + Copyright: feed.Copyright, + Items: make([]FeedViewerPreviewItem, 0, len(feed.Articles)), + } + + if feed.ImageURL != "" || feed.ImageTitle != "" { + preview.Image = &FeedViewerPreviewImage{ + URL: feed.ImageURL, + Title: feed.ImageTitle, + } + } + + for _, article := range feed.Articles { + if article == nil { + continue + } + + contentSnippet := article.Description + if contentSnippet == "" { + contentSnippet = article.Content + } + + preview.Items = append(preview.Items, FeedViewerPreviewItem{ + GUID: article.Id, + Title: article.Title, + Link: article.Link, + PubDate: formatFeedViewerTime(article.Created, article.Updated), + IsoDate: formatFeedViewerISOTime(article.Created, article.Updated), + Content: article.Content, + ContentSnippet: contentSnippet, + }) + } + + return preview +} + +func formatFeedViewerTime(primary, fallback time.Time) string { + if !primary.IsZero() { + return primary.Format(time.RFC1123Z) + } + if !fallback.IsZero() { + return fallback.Format(time.RFC1123Z) + } + return "" +} + +func formatFeedViewerISOTime(primary, fallback time.Time) string { + if !primary.IsZero() { + return primary.UTC().Format(time.RFC3339) + } + if !fallback.IsZero() { + return fallback.UTC().Format(time.RFC3339) + } + return "" +} + +func validateFeedViewerURL(rawURL string) error { + parsedURL, err := url.Parse(rawURL) + if err != nil || parsedURL == nil { + return errors.New("Please enter a valid http(s) feed URL") + } + if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { + return errors.New("Please enter a valid http(s) feed URL") + } + if parsedURL.Hostname() == "" { + return errors.New("Please enter a valid http(s) feed URL") + } + + ips, err := net.LookupIP(parsedURL.Hostname()) + if err != nil { + return fmt.Errorf("Unable to resolve this URL: %w", err) + } + for _, ip := range ips { + if ip.IsLoopback() || ip.IsPrivate() { + return fmt.Errorf("Access to private IP %s is forbidden", ip.String()) + } + } + + return nil +} + +func classifyFeedViewerError(err error) (int, string) { + msg := err.Error() + + switch { + case strings.Contains(msg, "http status not ok:"): + return http.StatusOK, humanizeFeedViewerHTTPStatus(msg) + case strings.Contains(msg, "http get failed:"), strings.Contains(msg, "browserless fetch failed:"), strings.Contains(msg, "failed to read response body:"): + return http.StatusOK, "Unable to fetch this URL. Please check the address and try again." + case strings.Contains(msg, "parse failed:"): + return http.StatusOK, "The URL is reachable, but it does not appear to be a valid RSS or Atom feed." + default: + return http.StatusInternalServerError, "Failed to preview this feed due to an internal error." + } +} + +func humanizeFeedViewerHTTPStatus(msg string) string { + status := strings.TrimSpace(strings.TrimPrefix(msg, "fetch failed: http status not ok:")) + if status == "" { + status = strings.TrimSpace(strings.TrimPrefix(msg, "http status not ok:")) + } + if status == "" { + return "Unable to fetch this URL. Please check the address and try again." + } + return fmt.Sprintf("The source returned %s, so the feed could not be loaded.", status) +} diff --git a/internal/router/registry.go b/internal/router/registry.go index 38ae0e92..94eb9afd 100644 --- a/internal/router/registry.go +++ b/internal/router/registry.go @@ -123,6 +123,7 @@ func RegisterRouters(router *gin.Engine) { adminApi.POST("/tools/fetch", controller.HtmlFetch) adminApi.POST("/tools/parse", controller.HtmlParse) + adminApi.GET("/tools/feed/preview", controller.PreviewFeedViewer) adminApi.POST("/tools/json/fetch", controller.CurlFetch) adminApi.POST("/tools/json/parse", controller.CurlParse) diff --git a/web/admin/src/api/feed_viewer.ts b/web/admin/src/api/feed_viewer.ts new file mode 100644 index 00000000..0a3631fe --- /dev/null +++ b/web/admin/src/api/feed_viewer.ts @@ -0,0 +1,37 @@ +import axios from 'axios'; +import { APIResponse } from '@/api/types'; + +export interface FeedViewerPreviewImage { + url: string; + title: string; +} + +export interface FeedViewerPreviewItem { + guid: string; + title: string; + link: string; + pubDate: string; + isoDate: string; + content: string; + contentSnippet: string; +} + +export interface FeedViewerPreview { + title: string; + description: string; + link: string; + feedUrl: string; + copyright: string; + image?: FeedViewerPreviewImage; + items: FeedViewerPreviewItem[]; +} + +export function previewFeed( + inputUrl: string +): Promise> { + return axios + .get>('/api/admin/tools/feed/preview', { + params: { input_url: inputUrl }, + }) + .then((res) => res.data); +} diff --git a/web/admin/src/views/dashboard/feed_viewer/feed_view_container.vue b/web/admin/src/views/dashboard/feed_viewer/feed_view_container.vue index 4fd04b44..24f48acc 100644 --- a/web/admin/src/views/dashboard/feed_viewer/feed_view_container.vue +++ b/web/admin/src/views/dashboard/feed_viewer/feed_view_container.vue @@ -15,7 +15,7 @@