From c520fe4329c0f0ee748ec37292c69f76b3f8e939 Mon Sep 17 00:00:00 2001
From: hankl <your.email@example.com>
Date: Sat, 9 May 2026 19:12:54 +0800
Subject: [PATCH 01/15] init commit

---
 docs/nova-pdf-refactor-zhipu.md               |  565 ++++++++
 docs/nova-pdf-technical-design.md             | 1175 +++++++++++++++++
 docs/spec.md                                  |   31 +
 packages/nova-pdf/README.md                   |  166 +++
 packages/nova-pdf/pyproject.toml              |   74 ++
 packages/nova-pdf/src/nova_pdf/__about__.py   |    1 +
 packages/nova-pdf/src/nova_pdf/__init__.py    |   13 +
 packages/nova-pdf/src/nova_pdf/_ai_service.py |  202 +++
 packages/nova-pdf/src/nova_pdf/_config.py     |  101 ++
 packages/nova-pdf/src/nova_pdf/_converter.py  |  251 ++++
 .../nova-pdf/src/nova_pdf/_page_analyzer.py   |  117 ++
 .../nova-pdf/src/nova_pdf/_page_renderer.py   |   32 +
 packages/nova-pdf/src/nova_pdf/_plugin.py     |   56 +
 packages/nova-pdf/tests/__init__.py           |    1 +
 packages/nova-pdf/tests/test_ai_service.py    |  103 ++
 packages/nova-pdf/tests/test_analyzer.py      |  131 ++
 packages/nova-pdf/tests/test_converter.py     |  181 +++
 scripts/load_secrets.sh                       |   13 +
 18 files changed, 3213 insertions(+)
 create mode 100644 docs/nova-pdf-refactor-zhipu.md
 create mode 100644 docs/nova-pdf-technical-design.md
 create mode 100644 docs/spec.md
 create mode 100644 packages/nova-pdf/README.md
 create mode 100644 packages/nova-pdf/pyproject.toml
 create mode 100644 packages/nova-pdf/src/nova_pdf/__about__.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/__init__.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_ai_service.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_config.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_converter.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_page_analyzer.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_page_renderer.py
 create mode 100644 packages/nova-pdf/src/nova_pdf/_plugin.py
 create mode 100644 packages/nova-pdf/tests/__init__.py
 create mode 100644 packages/nova-pdf/tests/test_ai_service.py
 create mode 100644 packages/nova-pdf/tests/test_analyzer.py
 create mode 100644 packages/nova-pdf/tests/test_converter.py
 create mode 100755 scripts/load_secrets.sh

diff --git a/docs/nova-pdf-refactor-zhipu.md b/docs/nova-pdf-refactor-zhipu.md
new file mode 100644
index 000000000..cf6b2b5ff
--- /dev/null
+++ b/docs/nova-pdf-refactor-zhipu.md
@@ -0,0 +1,565 @@
+# Nova-PDF 重构方案：使用 zai-sdk + glm-ocr
+
+## 1. 重构目标
+
+将现有的自定义 AI 服务替换为 zai-sdk + glm-ocr，简化代码并提升 OCR 能力。
+
+## 2. 技术对比
+
+| 项目 | 原方案 | 新方案 |
+|------|--------|--------|
+| SDK | requests (手动调用) | zai-sdk (官方 SDK) |
+| 模型 | 自定义 Workflow | glm-ocr |
+| 接口 | 两步上传（上传+调用） | 直接调用 layout_parsing |
+| 认证 | 双 token (upload + workflow) | 单 API key |
+| 配置 | 环境变量 | 配置文件 + 环境变量 |
+
+## 3. 接口分析
+
+### 3.1 glm-ocr API
+
+```python
+from zai import ZhipuAiClient
+
+client = ZhipuAiClient(api_key="your-api-key")
+
+# 支持图片 URL
+response = client.layout_parsing.create(
+    model="glm-ocr",
+    file="https://example.com/image.png"
+)
+
+# 支持本地文件路径
+response = client.layout_parsing.create(
+    model="glm-ocr",
+    file="/path/to/image.png"
+)
+
+# 返回结果（包含 Markdown 格式的内容）
+print(response)
+```
+
+### 3.2 响应结构
+
+```python
+# response 包含解析后的结构化内容
+# 具体字段需查看实际返回，通常包括：
+# - 文本内容
+# - 布局信息
+# - 表格识别结果
+# - Markdown 格式输出
+```
+
+## 4. 架构设计
+
+### 4.1 组件变更
+
+```
+原架构：
+┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
+│  Page Renderer  │────►│  File Uploader  │────►│  Workflow API   │
+│  (截图)          │     │  (上传获取URL)   │     │  (自定义接口)    │
+└─────────────────┘     └─────────────────┘     └─────────────────┘
+
+新架构：
+┌─────────────────┐     ┌─────────────────┐
+│  Page Renderer  │────►│   glm-ocr API   │
+│  (截图→临时文件) │     │  (layout_parsing)│
+└─────────────────┘     └─────────────────┘
+```
+
+### 4.2 文件变更清单
+
+| 文件 | 变更类型 | 说明 |
+|------|----------|------|
+| `_ai_service.py` | **重写** | 使用 zai-sdk + glm-ocr |
+| `_converter.py` | 微调 | 适配新 AIService 接口 |
+| `_plugin.py` | 微调 | 简化配置参数 |
+| `pyproject.toml` | 更新 | 添加 zai-sdk 依赖 |
+| `_config.py` | **新增** | 配置文件读取 |
+| `README.md` | 更新 | 新的使用说明 |
+
+## 5. 详细设计
+
+### 5.1 配置模块 (_config.py)
+
+```python
+"""Configuration management for nova-pdf."""
+
+import os
+from pathlib import Path
+from typing import Optional
+from dataclasses import dataclass
+
+try:
+    import tomllib  # Python 3.11+
+except ImportError:
+    import tomli as tomllib
+
+
+@dataclass
+class NovaPdfConfig:
+    """nova-pdf configuration."""
+    
+    # API 配置
+    zhipu_api_key: str = ""
+    
+    # OCR 配置
+    model: str = "glm-ocr"
+    dpi: int = 150
+    timeout: int = 120
+    
+    # 处理策略
+    force_ai: bool = False
+    
+    @classmethod
+    def load(cls, config_path: Optional[str] = None) -> "NovaPdfConfig":
+        """
+        从多个来源加载配置（优先级从高到低）：
+        1. 环境变量
+        2. 配置文件 (pyproject.toml 或 nova-pdf.toml)
+        3. 默认值
+        """
+        config = cls()
+        
+        # 1. 从配置文件加载
+        config._load_from_file(config_path)
+        
+        # 2. 环境变量覆盖
+        config._load_from_env()
+        
+        return config
+    
+    def _load_from_file(self, config_path: Optional[str] = None):
+        """从配置文件加载"""
+        # 查找配置文件
+        search_paths = []
+        
+        if config_path:
+            search_paths.append(Path(config_path))
+        
+        # 当前目录的 pyproject.toml
+        search_paths.append(Path("pyproject.toml"))
+        
+        # 当前目录的 nova-pdf.toml
+        search_paths.append(Path("nova-pdf.toml"))
+        
+        # 用户目录
+        search_paths.append(Path.home() / ".config" / "nova-pdf" / "config.toml")
+        
+        for path in search_paths:
+            if path.exists():
+                try:
+                    with open(path, "rb") as f:
+                        data = tomllib.load(f)
+                    
+                    # 读取 [tool.nova-pdf] 配置段
+                    if "tool" in data and "nova-pdf" in data["tool"]:
+                        self._apply_config(data["tool"]["nova-pdf"])
+                    elif "nova-pdf" in data:
+                        self._apply_config(data["nova-pdf"])
+                    
+                    break
+                except Exception:
+                    pass
+    
+    def _apply_config(self, data: dict):
+        """应用配置"""
+        if "api_key" in data:
+            self.zhipu_api_key = data["api_key"]
+        if "model" in data:
+            self.model = data["model"]
+        if "dpi" in data:
+            self.dpi = data["dpi"]
+        if "timeout" in data:
+            self.timeout = data["timeout"]
+        if "force_ai" in data:
+            self.force_ai = data["force_ai"]
+    
+    def _load_from_env(self):
+        """从环境变量加载（优先级最高）"""
+        if os.environ.get("NOVA_ZHIPU_API_KEY"):
+            self.zhipu_api_key = os.environ["NOVA_ZHIPU_API_KEY"]
+        if os.environ.get("NOVA_MODEL"):
+            self.model = os.environ["NOVA_MODEL"]
+        if os.environ.get("NOVA_DPI"):
+            self.dpi = int(os.environ["NOVA_DPI"])
+        if os.environ.get("NOVA_TIMEOUT"):
+            self.timeout = int(os.environ["NOVA_TIMEOUT"])
+        if os.environ.get("NOVA_FORCE_AI"):
+            self.force_ai = os.environ["NOVA_FORCE_AI"].lower() in ("true", "1", "yes")
+```
+
+### 5.2 AI 服务模块 (_ai_service.py)
+
+```python
+"""AI service using zai-sdk and glm-ocr."""
+
+import io
+import os
+import tempfile
+from dataclasses import dataclass
+from typing import BinaryIO, Optional
+
+try:
+    from zai import ZhipuAiClient
+except ImportError:
+    ZhipuAiClient = None
+
+from ._config import NovaPdfConfig
+
+
+@dataclass
+class AIResult:
+    """Result from AI conversion."""
+    text: str
+    success: bool = True
+    error: Optional[str] = None
+
+
+class AIService:
+    """
+    AI 服务 - 使用 zai-sdk + glm-ocr
+    
+    特点：
+    - 直接调用 glm-ocr 的 layout_parsing API
+    - 支持本地文件路径或图片 URL
+    - 自动处理图片格式转换
+    """
+    
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "glm-ocr",
+        timeout: int = 120,
+        config: Optional[NovaPdfConfig] = None,
+    ):
+        """
+        初始化 AI 服务
+        
+        Args:
+            api_key: 智谱 API Key，默认从配置读取
+            model: 模型名称，默认 glm-ocr
+            timeout: 请求超时时间（秒）
+            config: 配置对象
+        """
+        if ZhipuAiClient is None:
+            raise ImportError(
+                "zai-sdk is required for AIService. "
+                "Install with: pip install nova-pdf[zhipu]"
+            )
+        
+        # 从配置加载
+        if config:
+            self.api_key = api_key or config.zhipu_api_key
+            self.model = model or config.model
+            self.timeout = timeout or config.timeout
+        else:
+            config = NovaPdfConfig.load()
+            self.api_key = api_key or config.zhipu_api_key
+            self.model = model
+            self.timeout = timeout
+        
+        if not self.api_key:
+            raise ValueError(
+                "API key is required. Set NOVA_ZHIPU_API_KEY environment variable "
+                "or add 'api_key' to [tool.nova-pdf] in pyproject.toml"
+            )
+        
+        # 初始化客户端
+        self.client = ZhipuAiClient(api_key=self.api_key)
+    
+    def image_to_markdown(
+        self,
+        image_stream: BinaryIO,
+        filename: str = "page.png",
+    ) -> AIResult:
+        """
+        将图片转换为 Markdown
+        
+        Args:
+            image_stream: 图片流
+            filename: 文件名（用于临时文件）
+        
+        Returns:
+            AIResult: 转换结果
+        """
+        try:
+            # 方案1：保存为临时文件，传文件路径
+            with tempfile.NamedTemporaryFile(
+                suffix=".png",
+                delete=False
+            ) as tmp:
+                tmp.write(image_stream.read())
+                tmp_path = tmp.name
+            
+            image_stream.seek(0)
+            
+            # 调用 glm-ocr API
+            response = self.client.layout_parsing.create(
+                model=self.model,
+                file=tmp_path
+            )
+            
+            # 清理临时文件
+            try:
+                os.unlink(tmp_path)
+            except Exception:
+                pass
+            
+            # 解析响应
+            # 响应格式可能是字符串或对象，需要适配
+            if hasattr(response, 'content'):
+                text = response.content
+            elif hasattr(response, 'text'):
+                text = response.text
+            elif isinstance(response, str):
+                text = response
+            else:
+                text = str(response)
+            
+            return AIResult(
+                text=text.strip() if text else "",
+                success=True,
+            )
+        
+        except Exception as e:
+            return AIResult(
+                text="",
+                success=False,
+                error=str(e),
+            )
+```
+
+### 5.3 插件注册 (_plugin.py)
+
+```python
+"""Plugin registration for nova-pdf."""
+
+from typing import Any
+from markitdown import MarkItDown
+
+from ._config import NovaPdfConfig
+from ._ai_service import AIService
+from ._converter import NovaPdfConverter
+
+
+__plugin_interface_version__ = 1
+
+
+def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
+    """
+    注册 nova-pdf 转换器
+    
+    配置来源（优先级从高到低）：
+    1. kwargs 参数
+    2. 环境变量
+    3. 配置文件 (pyproject.toml)
+    4. 默认值
+    """
+    # 加载配置
+    config = NovaPdfConfig.load()
+    
+    # kwargs 覆盖配置
+    api_key = kwargs.get("api_key") or kwargs.get("zhipu_api_key") or config.zhipu_api_key
+    model = kwargs.get("model", config.model)
+    dpi = kwargs.get("dpi", config.dpi)
+    force_ai = kwargs.get("force_ai", config.force_ai)
+    timeout = kwargs.get("timeout", config.timeout)
+    
+    # 创建 AI 服务
+    ai_service = None
+    if api_key:
+        try:
+            ai_service = AIService(
+                api_key=api_key,
+                model=model,
+                timeout=timeout,
+            )
+        except Exception:
+            pass
+    
+    # 注册转换器
+    PRIORITY_NOVA_PDF = -1.0
+    
+    markitdown.register_converter(
+        NovaPdfConverter(
+            ai_service=ai_service,
+            dpi=dpi,
+            force_ai=force_ai,
+        ),
+        priority=PRIORITY_NOVA_PDF,
+    )
+```
+
+### 5.4 pyproject.toml 更新
+
+```toml
+[project]
+name = "nova-pdf"
+dependencies = [
+    "markitdown>=0.1.0",
+    "pdfminer.six>=20251230",
+    "pdfplumber>=0.11.9",
+    "Pillow>=9.0.0",
+    "tomli>=2.0.0;python_version<'3.11'",
+]
+
+[project.optional-dependencies]
+zhipu = [
+    "zai-sdk>=0.2.2",
+]
+dev = [
+    "pytest>=7.0.0",
+]
+
+[project.entry-points."markitdown.plugin"]
+nova_pdf = "nova_pdf"
+
+[tool.nova-pdf]
+# API 配置
+api_key = ""
+model = "glm-ocr"
+dpi = 150
+timeout = 120
+force_ai = false
+```
+
+## 6. 配置方式
+
+### 6.1 本地敏感配置文件（推荐）
+
+项目根目录下的 `.secrets.local` 文件存储敏感信息，此文件不会被提交到 Git：
+
+```bash
+# .secrets.local
+NOVA_ZHIPU_API_KEY="your-api-key-here"
+```
+
+使用方式：
+```bash
+# 加载敏感配置
+source .secrets.local
+
+# 或使用脚本
+source scripts/load_secrets.sh
+
+# 然后运行
+markitdown -p document.pdf
+```
+
+### 6.2 配置文件 (pyproject.toml)
+
+```toml
+[tool.nova-pdf]
+# API key 请通过环境变量或 .secrets.local 文件设置，不要硬编码
+api_key = ""
+model = "glm-ocr"
+dpi = 150
+timeout = 120
+```
+
+### 6.3 环境变量（推荐）
+
+```bash
+export NOVA_ZHIPU_API_KEY="your-api-key-here"
+export NOVA_MODEL="glm-ocr"
+export NOVA_DPI="150"
+```
+
+### 6.3 Python API
+
+```python
+from markitdown import MarkItDown
+
+md = MarkItDown(
+    enable_plugins=True,
+    api_key="your-api-key",
+)
+```
+
+### 6.4 命令行
+
+```bash
+export NOVA_ZHIPU_API_KEY="your-api-key"
+markitdown -p document.pdf
+```
+
+## 7. 使用示例
+
+```python
+from markitdown import MarkItDown
+from nova_pdf import AIService, NovaPdfConverter
+
+# 方式1：自动加载配置
+md = MarkItDown(enable_plugins=True)
+result = md.convert("document.pdf")
+
+# 方式2：手动配置
+from nova_pdf import NovaPdfConfig, AIService
+
+config = NovaPdfConfig.load()
+ai_service = AIService(
+    api_key="your-api-key",
+    model="glm-ocr",
+)
+
+converter = NovaPdfConverter(
+    ai_service=ai_service,
+    dpi=150,
+)
+
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+```
+
+## 8. 迁移路径
+
+### 8.1 从旧版本迁移
+
+| 旧配置 | 新配置 |
+|--------|--------|
+| `NOVA_UPLOAD_TOKEN` | `NOVA_ZHIPU_API_KEY` |
+| `NOVA_WORKFLOW_TOKEN` | （删除） |
+| `NOVA_BASE_URL` | （删除） |
+| `NOVA_APP_ID` | （删除） |
+
+### 8.2 API 兼容性
+
+- 旧版 `AIService(upload_token, workflow_token, ...)` → 废弃
+- 新版 `AIService(api_key, ...)` → 推荐
+
+## 9. 实施计划
+
+### ✅ Phase 1: 核心实现（已完成）
+- [x] 设计配置模块
+- [x] 实现 `_config.py`
+- [x] 重写 `_ai_service.py`（使用 zai-sdk + glm-ocr）
+- [x] 更新 `_plugin.py`
+
+### ✅ Phase 2: 集成测试（已完成）
+- [x] 更新 `pyproject.toml`
+- [x] 测试 glm-ocr API
+- [x] 测试插件集成
+
+### Phase 3: 文档更新（进行中）
+- [x] 更新 README.md
+- [ ] 更新技术方案文档
+- [ ] 添加迁移指南
+
+## 10. 风险与缓解
+
+| 风险 | 缓解措施 |
+|------|----------|
+| zai-sdk 接口变化 | 封装适配层，隔离 SDK 细节 |
+| glm-ocr 返回格式不确定 | 做多种格式兼容处理 |
+| 临时文件清理失败 | 使用 try-finally 确保清理 |
+| API key 泄露 | 支持环境变量，避免硬编码 |
+
+## 11. 待确认事项
+
+- [ ] glm-ocr 返回的具体数据结构
+- [ ] 是否支持直接传图片字节流（不保存临时文件）
+- [ ] 超时和重试策略
+- [ ] 并发请求限制
diff --git a/docs/nova-pdf-technical-design.md b/docs/nova-pdf-technical-design.md
new file mode 100644
index 000000000..25128e33a
--- /dev/null
+++ b/docs/nova-pdf-technical-design.md
@@ -0,0 +1,1175 @@
+# Nova-PDF 插件技术方案
+
+## 1. 概述
+
+### 1.1 目标
+开发一个智能 PDF 解析插件 `nova-pdf`，实现：
+- 自动检测 PDF 每页内容类型（纯文本 vs 包含图片/表格）
+- 对纯文本页面使用默认解析能力（pdfminer/pdfplumber）
+- 对包含图片/表格的页面截图后调用 AI 接口转 Markdown
+
+### 1.2 核心价值
+- **提升复杂 PDF 解析质量**：图表、扫描件等传统方法效果差的内容
+- **降低成本**：纯文本页面不调用 AI，节省 API 费用
+- **灵活配置**：支持自定义 AI 模型、分辨率、提示词等
+
+---
+
+## 2. 架构设计
+
+### 2.1 插件结构
+```
+packages/nova-pdf/
+├── src/
+│   └── nova_pdf/
+│       ├── __init__.py           # 导出和版本信息
+│       ├── __about__.py          # 版本号
+│       ├── _plugin.py            # 插件注册入口
+│       ├── _converter.py         # PDF 转换器核心实现
+│       ├── _page_analyzer.py     # 页面内容分析器
+│       ├── _page_renderer.py     # 页面截图渲染器
+│       └── _ai_service.py        # AI 接口封装
+├── tests/
+│   ├── __init__.py
+│   ├── test_converter.py
+│   ├── test_analyzer.py
+│   └── fixtures/
+│       ├── text_only.pdf
+│       ├── with_images.pdf
+│       └── mixed_content.pdf
+├── pyproject.toml
+└── README.md
+```
+
+### 2.2 组件职责
+
+| 组件 | 职责 |
+|------|------|
+| `_plugin.py` | 实现 `register_converters` 入口，注册转换器 |
+| `_converter.py` | 继承 `DocumentConverter`，协调整体流程 |
+| `_page_analyzer.py` | 分析页面是否包含图片/表格 |
+| `_page_renderer.py` | 将 PDF 页面渲染为图片 |
+| `_ai_service.py` | 调用 AI Vision API 转换图片为 Markdown |
+
+### 2.3 流程图
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│                        PDF 文件输入                                │
+└──────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌──────────────────────────────────────────────────────────────────┐
+│                     逐页分析 (PageAnalyzer)                        │
+│  ┌────────────────────────────────────────────────────────────┐  │
+│  │  对每一页:                                                   │  │
+│  │  1. 检测是否包含图片 (images)                                │  │
+│  │  2. 检测是否包含表格 (tables)                                 │  │
+│  │  3. 标记页面类型: PLAIN_TEXT / COMPLEX                       │  │
+│  └────────────────────────────────────────────────────────────┘  │
+└──────────────────────────────────────────────────────────────────┘
+                                │
+          ┌─────────────────────┴─────────────────────┐
+          ▼                                           ▼
+┌─────────────────────┐                    ┌─────────────────────┐
+│   PLAIN_TEXT 页面    │                    │    COMPLEX 页面      │
+│                     │                    │                     │
+│  使用默认解析:        │                    │  1. 截图渲染         │
+│  - pdfplumber 提取   │                    │  2. 调用 AI 接口     │
+│  - pdfminer 备用     │                    │  3. 转换为 Markdown   │
+└─────────────────────┘                    └─────────────────────┘
+          │                                           │
+          └─────────────────────┬─────────────────────┘
+                                │
+                                ▼
+┌──────────────────────────────────────────────────────────────────┐
+│                    合并所有页面结果                                │
+│                    输出完整 Markdown                               │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 3. 核心算法设计
+
+### 3.1 页面内容检测 (PageAnalyzer)
+
+#### 检测策略
+```python
+class PageType(Enum):
+    PLAIN_TEXT = "plain_text"      # 纯文本，使用默认解析
+    HAS_IMAGES = "has_images"      # 包含图片
+    HAS_TABLES = "has_tables"      # 包含表格
+    COMPLEX = "complex"            # 复杂内容（图片+表格+混合）
+```
+
+#### 图片检测方法
+使用 **pdfplumber** 的页面对象检测：
+
+```python
+def detect_images(page) -> bool:
+    """检测页面是否包含图片"""
+    # 方法1: 直接检测 page.images
+    if hasattr(page, 'images') and len(page.images) > 0:
+        return True
+
+    # 方法2: 检测页面对象中的图像资源
+    if hasattr(page, 'objects'):
+        if 'image' in page.objects and len(page.objects['image']) > 0:
+            return True
+        # 检测 XObject (可能包含内嵌图像)
+        if 'xobject' in page.objects and len(page.objects['xobject']) > 0:
+            for obj in page.objects['xobject']:
+                if obj.get('subtype') == 'Image':
+                    return True
+
+    # 方法3: 检测页面资源字典
+    try:
+        if hasattr(page.page, 'get_resources'):
+            resources = page.page.get_resources()
+            if resources and 'XObject' in resources:
+                return True
+    except Exception:
+        pass
+
+    return False
+```
+
+#### 表格检测方法
+```python
+def detect_tables(page) -> bool:
+    """检测页面是否包含表格"""
+    # 方法1: 使用 pdfplumber 的 extract_tables
+    tables = page.extract_tables()
+    if tables and len(tables) > 0:
+        # 过滤空表格
+        for table in tables:
+            if table and any(any(cell for cell in row) for row in table):
+                return True
+
+    # 方法2: 检测表格线（边框线）
+    if hasattr(page, 'objects') and 'line' in page.objects:
+        lines = page.objects['line']
+        if len(lines) > 10:  # 大量线条可能构成表格
+            # 分析线条是否形成网格结构
+            h_lines = [l for l in lines if l.get('height', 1) < 2]
+            v_lines = [l for l in lines if l.get('width', 1) < 2]
+            if len(h_lines) > 2 and len(v_lines) > 2:
+                return True
+
+    return False
+```
+
+#### 综合判断
+```python
+def analyze_page(page) -> PageType:
+    """分析页面类型"""
+    has_images = detect_images(page)
+    has_tables = detect_tables(page)
+
+    if has_images and has_tables:
+        return PageType.COMPLEX
+    elif has_images:
+        return PageType.HAS_IMAGES
+    elif has_tables:
+        return PageType.HAS_TABLES
+    else:
+        return PageType.PLAIN_TEXT
+```
+
+### 3.2 页面截图渲染 (PageRenderer)
+
+#### 技术选型
+
+使用 **pdfplumber.to_image**，理由：
+- 已是项目依赖，无需额外安装
+- 实现简单，代码量少
+- 底层使用 PIL，满足需求
+
+#### 实现方案
+```python
+import io
+
+def render_page_to_image(page, dpi: int = 150) -> io.BytesIO:
+    """
+    将 PDF 页面渲染为图片
+
+    Args:
+        page: pdfplumber 页面对象
+        dpi: 渲染分辨率，默认 150（平衡质量和速度）
+
+    Returns:
+        BytesIO: PNG 图片流
+    """
+    # 使用 pdfplumber 的 to_image 方法
+    page_image = page.to_image(resolution=dpi)
+
+    # 转换为 BytesIO
+    img_stream = io.BytesIO()
+    page_image.original.save(img_stream, format="PNG")
+    img_stream.seek(0)
+
+    return img_stream
+```
+
+#### DPI 推荐值
+```python
+DPI_SETTINGS = {
+    "low": 72,      # 快速预览，文件小
+    "medium": 150,  # 平衡质量和速度（默认）
+    "high": 300,   # 高质量，适合复杂图表
+}
+```
+
+### 3.3 AI 接口调用 (AIService)
+
+#### 复用 markitdown 的 LLM 客户端机制
+```python
+from markitdown.converters._llm_caption import llm_caption
+
+class AIService:
+    """AI Vision 服务封装"""
+
+    def __init__(
+        self,
+        client,                    # OpenAI 兼容客户端
+        model: str = "gpt-4o",     # 模型名称
+        prompt: str | None = None, # 自定义提示词
+    ):
+        self.client = client
+        self.model = model
+        self.prompt = prompt or self._default_prompt()
+
+    def _default_prompt(self) -> str:
+        return """请将这张图片的内容转换为 Markdown 格式。
+
+要求：
+1. 保持原有的文档结构（标题、段落、列表等）
+2. 表格使用 Markdown 表格语法
+3. 图片中的文字清晰转写
+4. 数学公式使用 LaTeX 语法
+5. 如有图表，用文字描述其内容
+6. 不要添加任何额外的解释或评论"""
+
+    def image_to_markdown(
+        self,
+        image_stream: io.BytesIO,
+        stream_info: StreamInfo,
+    ) -> str:
+        """调用 AI 将图片转为 Markdown"""
+        result = llm_caption(
+            image_stream,
+            stream_info,
+            client=self.client,
+            model=self.model,
+            prompt=self.prompt,
+        )
+        return result or ""
+```
+
+---
+
+## 4. 转换器实现 (_converter.py)
+
+### 4.1 核心流程
+```python
+class NovaPdfConverter(DocumentConverter):
+    """智能 PDF 转换器"""
+
+    def __init__(
+        self,
+        ai_service: AIService | None = None,
+        dpi: int = 150,
+        force_ai: bool = False,  # 强制所有页面使用 AI
+    ):
+        self.ai_service = ai_service
+        self.dpi = dpi
+        self.force_ai = force_ai
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        # 读取 PDF
+        pdf_stream = io.BytesIO(file_stream.read())
+
+        markdown_parts = []
+
+        with pdfplumber.open(pdf_stream) as pdf:
+            for page_num, page in enumerate(pdf.pages):
+                # 分析页面类型
+                page_type = analyze_page(page)
+
+                # 根据类型选择处理方式
+                if self.force_ai or page_type != PageType.PLAIN_TEXT:
+                    # 复杂内容：截图 + AI
+                    if self.ai_service:
+                        img = render_page_to_image(page, self.dpi)
+                        md = self.ai_service.image_to_markdown(img, StreamInfo())
+                    else:
+                        # 无 AI 服务，回退到默认解析
+                        md = page.extract_text() or ""
+                else:
+                    # 纯文本：默认解析
+                    md = page.extract_text() or ""
+
+                if md.strip():
+                    markdown_parts.append(f"## Page {page_num + 1}\n\n{md}")
+
+        return DocumentConverterResult(
+            markdown="\n\n".join(markdown_parts),
+        )
+```
+
+---
+
+## 5. 配置选项
+
+### 5.1 初始化参数
+```python
+class NovaPdfConfig:
+    """nova-pdf 配置"""
+
+    # AI 服务配置
+    llm_client: Any = None           # OpenAI 兼容客户端（必需）
+    llm_model: str = "gpt-4o"        # 模型名称
+    llm_prompt: str | None = None   # 自定义提示词
+
+    # 渲染配置
+    dpi: int = 150                   # 截图分辨率
+    image_format: str = "png"       # 图片格式
+
+    # 处理策略
+    force_ai: bool = False          # 强制所有页面使用 AI
+    skip_tables: bool = False       # 跳过表格检测（表格用默认解析）
+    skip_images: bool = False       # 跳过图片检测（图片用默认解析）
+
+    # 性能配置
+    max_concurrent: int = 5          # 并发请求数
+    timeout: int = 60                # 单页 AI 调用超时（秒）
+```
+
+### 5.2 使用示例
+```python
+from openai import OpenAI
+from markitdown import MarkItDown
+
+# 初始化 LLM 客户端
+client = OpenAI(api_key="your-api-key")
+
+# 创建 MarkItDown 实例并启用 nova-pdf 插件
+md = MarkItDown(
+    enable_plugins=True,
+    llm_client=client,
+    llm_model="gpt-4o",
+)
+
+# 转换 PDF
+result = md.convert("complex_document.pdf")
+print(result.markdown)
+```
+
+---
+
+## 6. 依赖管理
+
+### 6.1 pyproject.toml
+```toml
+[project]
+name = "nova-pdf"
+dependencies = [
+    "markitdown>=0.1.0",
+    "pdfminer.six>=20251230",
+    "pdfplumber>=0.11.9",   # 页面解析和截图渲染
+    "Pillow>=9.0.0",        # 图像处理（pdfplumber.to_image 底层依赖）
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+]
+
+# 插件入口点
+[project.entry-points."markitdown.plugin"]
+nova_pdf = "nova_pdf"
+```
+
+---
+
+## 7. 错误处理
+
+### 7.1 降级策略
+```python
+def convert_with_fallback(
+    self,
+    pdf_bytes: bytes,
+    page_num: int,
+    page_type: PageType,
+) -> str:
+    """带降级的转换"""
+
+    # 尝试 AI 转换
+    if self.ai_service and page_type != PageType.PLAIN_TEXT:
+        try:
+            img = render_page_to_image(pdf_bytes, page_num, self.dpi)
+            result = self.ai_service.image_to_markdown(img, StreamInfo())
+            if result.strip():
+                return result
+        except AIServiceError as e:
+            logger.warning(f"AI 转换失败，降级到默认解析: {e}")
+
+    # 降级到默认解析
+    with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
+        page = pdf.pages[page_num]
+        text = page.extract_text() or ""
+
+        # 尝试提取表格
+        tables = page.extract_tables()
+        if tables:
+            for table in tables:
+                text += "\n\n" + self._table_to_markdown(table)
+
+        return text
+```
+
+---
+
+## 8. 性能优化
+
+### 8.1 异步处理
+```python
+import asyncio
+from typing import List
+
+async def convert_pages_async(
+    self,
+    pdf_bytes: bytes,
+    pages: List[PageInfo],
+) -> List[str]:
+    """异步并发处理多页"""
+
+    async def process_page(page_info: PageInfo) -> str:
+        if page_info.type == PageType.PLAIN_TEXT:
+            return self._extract_text(pdf_bytes, page_info.num)
+        else:
+            return await self._ai_convert_async(pdf_bytes, page_info.num)
+
+    # 使用信号量限制并发
+    semaphore = asyncio.Semaphore(self.max_concurrent)
+
+    async def limited_process(page_info):
+        async with semaphore:
+            return await process_page(page_info)
+
+    tasks = [limited_process(p) for p in pages]
+    return await asyncio.gather(*tasks)
+```
+
+### 8.2 缓存机制
+```python
+from functools import lru_cache
+import hashlib
+
+class CachedAIService(AIService):
+    """带缓存的 AI 服务"""
+
+    @lru_cache(maxsize=100)
+    def _get_cache_key(self, image_hash: str) -> str | None:
+        """获取缓存结果"""
+        # 可接入 Redis 等
+        pass
+
+    def image_to_markdown(self, image_stream: io.BytesIO, ...) -> str:
+        # 计算图片哈希
+        image_hash = hashlib.md5(image_stream.read()).hexdigest()
+        image_stream.seek(0)
+
+        # 检查缓存
+        cached = self._get_cache_key(image_hash)
+        if cached:
+            return cached
+
+        # 调用 AI
+        result = super().image_to_markdown(image_stream, ...)
+
+        # 存入缓存
+        self._cache_result(image_hash, result)
+        return result
+```
+
+---
+
+## 9. 测试策略
+
+### 9.1 测试用例设计
+```python
+class TestNovaPdfConverter:
+    """nova-pdf 转换器测试"""
+
+    def test_plain_text_pdf(self):
+        """纯文本 PDF 应使用默认解析"""
+        pass
+
+    def test_pdf_with_images(self):
+        """包含图片的 PDF 应调用 AI"""
+        pass
+
+    def test_pdf_with_tables(self):
+        """包含表格的 PDF 应调用 AI"""
+        pass
+
+    def test_mixed_content_pdf(self):
+        """混合内容应正确区分处理"""
+        pass
+
+    def test_ai_service_fallback(self):
+        """AI 服务失败时应降级"""
+        pass
+
+    def test_dpi_settings(self):
+        """不同 DPI 设置的渲染质量"""
+        pass
+
+    def test_concurrent_processing(self):
+        """并发处理性能测试"""
+        pass
+```
+
+---
+
+## 10. 扩展性设计
+
+### 10.1 自定义页面分析器
+```python
+class PageAnalyzerPlugin(ABC):
+    """页面分析器插件接口"""
+
+    @abstractmethod
+    def analyze(self, page) -> PageType:
+        """分析页面类型"""
+        pass
+
+# 允许用户注入自定义分析器
+class NovaPdfConverter(DocumentConverter):
+    def __init__(
+        self,
+        page_analyzer: PageAnalyzerPlugin | None = None,
+        ...
+    ):
+        self.page_analyzer = page_analyzer or DefaultPageAnalyzer()
+```
+
+### 10.2 自定义 AI 提示词模板
+```python
+PROMPT_TEMPLATES = {
+    "default": "...",
+    "academic": "学术论文模板...",
+    "financial": "财务报表模板...",
+    "legal": "法律文档模板...",
+}
+
+class AIService:
+    def __init__(self, prompt_template: str = "default", ...):
+        self.prompt = PROMPT_TEMPLATES.get(prompt_template, PROMPT_TEMPLATES["default"])
+```
+
+---
+
+## 11. 风险与缓解措施
+
+| 风险 | 影响 | 缓解措施 |
+|------|------|----------|
+| AI API 调用失败 | 转换中断 | 实现降级策略，回退到默认解析 |
+| 大文件内存溢出 | 程序崩溃 | 分页处理，控制内存占用 |
+| AI 响应慢 | 用户体验差 | 异步处理、进度反馈、超时控制 |
+| 解析质量不稳定 | 输出错误 | 多模型对比、人工审核机制 |
+| API 费用过高 | 成本失控 | 智能跳过纯文本页面、缓存机制 |
+
+---
+
+## 12. 实施计划
+
+### ✅ Phase 1: 基础框架（已完成）
+- [x] 创建项目结构
+- [x] 实现插件注册入口
+- [x] 实现基础转换器框架
+
+### ✅ Phase 2: 核心功能（已完成）
+- [x] 实现页面内容检测 (`_page_analyzer.py`)
+- [x] 实现页面截图渲染 (`_page_renderer.py`)
+- [x] 实现 AI 服务接口 (`_ai_service.py`)
+- [x] 实现完整转换流程 (`_converter.py`)
+
+### ⏳ Phase 3: 测试与优化（待进行）
+- [ ] 运行单元测试
+- [ ] 添加测试 PDF 样本
+- [ ] 性能测试和优化
+
+### ⏳ Phase 4: 文档与发布（待进行）
+- [x] 编写 README 和使用文档
+- [x] 准备示例代码
+- [ ] 打包发布
+
+---
+
+## 代码结构
+
+```
+packages/nova-pdf/
+├── src/nova_pdf/
+│   ├── __about__.py          # 版本号 (0.1.0)
+│   ├── __init__.py           # 导出 register_converters
+│   ├── _plugin.py            # 插件注册入口
+│   ├── _converter.py         # PDF 转换器核心
+│   ├── _page_analyzer.py     # 图片/表格检测
+│   ├── _page_renderer.py     # 页面截图 (pdfplumber.to_image)
+│   └── _ai_service.py        # AI 接口封装（两步上传）
+├── tests/
+│   ├── test_analyzer.py      # 分析器测试
+│   ├── test_converter.py     # 转换器测试
+│   └── test_ai_service.py    # AI 服务测试
+├── pyproject.toml            # 项目配置 + nova-pdf 配置
+└── README.md                 # 使用文档
+```
+
+**语法验证**: ✓ 所有 Python 文件通过语法检查
+
+---
+
+## 15. 改造完成总结
+
+### 15.1 主要变更
+
+| 文件 | 变更内容 |
+|------|----------|
+| `_ai_service.py` | 重写为两步调用：上传 → Workflow |
+| `_plugin.py` | 适配新 AIService 初始化参数 |
+| `_converter.py` | 传递文件名给 AI 服务 |
+| `pyproject.toml` | 添加 `[tool.nova-pdf]` 配置段 |
+| `README.md` | 更新环境变量和配置说明 |
+| `tests/test_ai_service.py` | 新增 AI 服务测试（13 个用例）|
+
+### 15.2 环境变量
+
+```bash
+export NOVA_UPLOAD_TOKEN="your-fastgpt-token"      # 必需
+export NOVA_WORKFLOW_TOKEN="your-workflow-token"  # 必需
+export NOVA_BASE_URL="https://xny-test.glodon.com/jsf-ai"  # 可选
+export NOVA_APP_ID="69fc37113fedac1eaaf65c82"     # 可选
+```
+
+### 15.3 快速开始
+
+```python
+from markitdown import MarkItDown
+
+# 启用插件
+md = MarkItDown(enable_plugins=True)
+
+# 转换 PDF（复杂页面自动调用 AI）
+result = md.convert("document.pdf")
+print(result.markdown)
+```
+
+### 15.4 实测结果
+
+**测试图片**: `数位顺序表.png` (22KB)
+
+**测试结果**: ✓ 成功转换
+
+```markdown
+|  | 整数部分 | | | | | | | 小数部分 | | | | |
+|:---:|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|
+| 数位 | ...... | 万位 | 千位 | 百位 | 十位 | 个位 | . | 十分位 | 百分位 | 千分位 | 万分位 | ...... |
+| 单位 | ...... | 万 | 千 | 百 | 十 | 个 | | 十分之一 0.1 | 百分之一 0.01 | 千分之一 0.001 | 万分之一 0.0001 | ...... |
+```
+
+**关键修正**:
+1. 上传接口返回 `code: 200`（不是 0）
+2. Workflow 接口需要 `messages` 字段（OpenAI 兼容格式）
+3. SSL 验证跳过（`verify=False`）以适配内部 API
+
+---
+
+## 13. 附录
+
+### 13.1 参考实现
+- `markitdown-ocr`: 已有的 OCR 插件，可参考架构
+- `markitdown-sample-plugin`: 官方插件示例
+- `_pdf_converter.py`: 默认 PDF 转换器实现
+
+### 13.2 关键代码参考
+```python
+# 参考 markitdown-ocr 的插件注册方式
+def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
+    PRIORITY_NOVA_PDF = -1.0  # 优先于默认 PDF 转换器
+
+    llm_client = kwargs.get("llm_client")
+    llm_model = kwargs.get("llm_model", "gpt-4o")
+
+    ai_service = None
+    if llm_client:
+        ai_service = AIService(client=llm_client, model=llm_model)
+
+    markitdown.register_converter(
+        NovaPdfConverter(ai_service=ai_service),
+        priority=PRIORITY_NOVA_PDF,
+    )
+
+# 页面截图渲染（简化版）
+def render_page_to_image(page, dpi: int = 150) -> io.BytesIO:
+    """使用 pdfplumber.to_image 渲染页面"""
+    page_image = page.to_image(resolution=dpi)
+    img_stream = io.BytesIO()
+    page_image.original.save(img_stream, format="PNG")
+    img_stream.seek(0)
+    return img_stream
+```
+
+---
+
+## 14. AI 接口改造方案（自定义两步调用）
+
+### 14.1 背景
+
+原方案使用 OpenAI 兼容的 base64 图片上传方式，现需改造为自定义两步流程：
+1. 上传图片到文件服务，获取 URL
+2. 调用 Workflow 接口处理图片
+
+### 14.2 接口分析
+
+#### Step 1: 文件上传接口
+
+**请求**
+```
+POST https://xny-test.glodon.com/jsf-ai/api/common/file/upload
+Content-Type: multipart/form-data
+Cookie: fastgpt_token=<token>
+```
+
+**表单参数**
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| metadata | string | ✓ | JSON 字符串，如 `{"chatId":"<uuid>"}`，每次动态生成 |
+| bucketName | string | ✓ | 固定值 `chat` |
+| file | binary | ✓ | 图片文件（PNG/JPEG） |
+| data | string | ✓ | JSON 字符串，如 `{"appId":"69fc37113fedac1eaaf65c82"}` |
+
+**响应示例**
+```json
+{
+  "code": 200,
+  "data": {
+    "previewUrl": "https://xny-test.glodon.com/jsf-ai/api/common/file/read/xxx.png?token=...",
+    "fileId": "69fc42e024457b47b7e22b4a"
+  }
+}
+```
+
+> 注意：接口返回 `code: 200` 表示成功（不是 0）
+
+#### Step 2: Workflow 调用接口
+
+**请求**
+```
+POST https://xny-test.glodon.com/jsf-ai/api/v1/chat/completions
+Content-Type: application/json
+Authorization: Bearer <workflow_image2markdown_key>
+```
+
+**请求体**（OpenAI 兼容格式）
+```json
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {"type": "text", "text": "请将这张图片的内容转换为 Markdown 格式。"},
+        {"type": "image_url", "image_url": {"url": "https://...previewUrl..."}}
+      ]
+    }
+  ]
+}
+```
+
+**响应示例**（OpenAI 兼容格式）
+```json
+{
+  "id": "",
+  "model": "",
+  "choices": [
+    {
+      "message": {
+        "role": "assistant",
+        "content": "| 数位顺序表 |\n|---|"
+      }
+    }
+  ]
+}
+```
+
+> 注意：Workflow 接口使用 OpenAI 兼容的消息格式，需要 `messages` 字段
+
+### 14.3 改造后的 AIService
+
+```python
+"""AI service with custom two-step API calls."""
+
+import io
+import json
+import requests
+from dataclasses import dataclass
+from typing import Any, BinaryIO, Optional
+
+
+@dataclass
+class AIResult:
+    """Result from AI conversion."""
+    text: str
+    success: bool = True
+    error: Optional[str] = None
+
+
+class AIService:
+    """
+    AI 服务 - 自定义两步调用方式
+
+    流程：
+    1. 上传图片到文件服务，获取 previewUrl
+    2. 调用 Workflow 接口，传入 fileUrls 参数
+    """
+
+    def __init__(
+        self,
+        base_url: str = "https://xny-test.glodon.com/jsf-ai",
+        upload_token: str = "",           # fastgpt_token (Cookie)
+        workflow_token: str = "",         # workflow_image2markdown_key (Authorization)
+        chat_id: str = "",                # 用于上传接口的 chatId
+        app_id: str = "",                 # 用于上传接口的 appId
+        timeout: int = 60,
+    ):
+        """
+        初始化 AI 服务
+
+        Args:
+            base_url: API 基础地址
+            upload_token: 文件上传认证 token（fastgpt_token）
+            workflow_token: Workflow 接口认证 token
+            chat_id: 会话 ID
+            app_id: 应用 ID
+            timeout: 请求超时时间（秒）
+        """
+        self.base_url = base_url.rstrip("/")
+        self.upload_token = upload_token
+        self.workflow_token = workflow_token
+        self.chat_id = chat_id
+        self.app_id = app_id
+        self.timeout = timeout
+
+    def image_to_markdown(
+        self,
+        image_stream: BinaryIO,
+        filename: str = "page.png",
+    ) -> AIResult:
+        """
+        将图片转换为 Markdown（两步调用）
+
+        Args:
+            image_stream: 图片流
+            filename: 文件名
+
+        Returns:
+            AIResult: 转换结果
+        """
+        try:
+            # Step 1: 上传图片
+            upload_result = self._upload_file(image_stream, filename)
+            if not upload_result["success"]:
+                return AIResult(
+                    text="",
+                    success=False,
+                    error=f"Upload failed: {upload_result.get('error')}"
+                )
+
+            file_url = upload_result["preview_url"]
+
+            # Step 2: 调用 Workflow
+            workflow_result = self._call_workflow(file_url)
+            if not workflow_result["success"]:
+                return AIResult(
+                    text="",
+                    success=False,
+                    error=f"Workflow failed: {workflow_result.get('error')}"
+                )
+
+            return AIResult(
+                text=workflow_result["text"],
+                success=True,
+            )
+
+        except Exception as e:
+            return AIResult(
+                text="",
+                success=False,
+                error=str(e),
+            )
+
+    def _upload_file(
+        self,
+        image_stream: BinaryIO,
+        filename: str,
+    ) -> dict:
+        """
+        上传文件到文件服务
+
+        Args:
+            image_stream: 图片流
+            filename: 文件名
+
+        Returns:
+            dict: {"success": bool, "preview_url": str, "error": str}
+        """
+        url = f"{self.base_url}/api/common/file/upload"
+
+        # 准备 multipart/form-data
+        files = {
+            "file": (filename, image_stream, "image/png")
+        }
+
+        data = {
+            "metadata": json.dumps({"chatId": self.chat_id}),
+            "bucketName": "chat",
+            "data": json.dumps({"appId": self.app_id}),
+        }
+
+        headers = {
+            "Cookie": f"fastgpt_token={self.upload_token}",
+        }
+
+        try:
+            response = requests.post(
+                url,
+                files=files,
+                data=data,
+                headers=headers,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+
+            result = response.json()
+
+            if result.get("code") == 0 and result.get("data", {}).get("previewUrl"):
+                return {
+                    "success": True,
+                    "preview_url": result["data"]["previewUrl"],
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": result.get("message", "Unknown error"),
+                }
+
+        except requests.RequestException as e:
+            return {
+                "success": False,
+                "error": str(e),
+            }
+
+    def _call_workflow(self, file_url: str) -> dict:
+        """
+        调用 Workflow 接口处理图片
+
+        Args:
+            file_url: 文件 URL
+
+        Returns:
+            dict: {"success": bool, "text": str, "error": str}
+        """
+        url = f"{self.base_url}/api/v1/chat/completions"
+
+        headers = {
+            "Authorization": f"Bearer {self.workflow_token}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "fileUrls": [file_url],
+        }
+
+        try:
+            response = requests.post(
+                url,
+                json=payload,
+                headers=headers,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+
+            result = response.json()
+
+            # 解析 OpenAI 兼容响应格式
+            choices = result.get("choices", [])
+            if choices:
+                content = choices[0].get("message", {}).get("content", "")
+                return {
+                    "success": True,
+                    "text": content.strip(),
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": "No response content",
+                }
+
+        except requests.RequestException as e:
+            return {
+                "success": False,
+                "error": str(e),
+            }
+```
+
+### 14.4 使用示例
+
+```python
+from markitdown import MarkItDown
+from nova_pdf import AIService, NovaPdfConverter
+
+# 创建自定义 AI 服务
+ai_service = AIService(
+    base_url="https://xny-test.glodon.com/jsf-ai",
+    upload_token="<your-fastgpt-token>",  # fastgpt_token
+    workflow_token="your-workflow-token",
+    chat_id="tv1cyJFTt4wEKLqTKEx1KPEN",
+    app_id="69fc37113fedac1eaaf65c82",
+    timeout=120,
+)
+
+# 创建转换器
+converter = NovaPdfConverter(
+    ai_service=ai_service,
+    dpi=150,
+)
+
+# 手动注册
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+
+# 转换 PDF
+result = md.convert("document.pdf")
+print(result.markdown)
+```
+
+### 14.5 配置参数说明
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `base_url` | str | ✓ | API 基础地址 |
+| `upload_token` | str | ✓ | 文件上传认证 token（fastgpt_token） |
+| `workflow_token` | str | ✓ | Workflow 接口认证 token |
+| `chat_id` | str | ✓ | 会话 ID（用于上传接口） |
+| `app_id` | str | ✓ | 应用 ID（用于上传接口） |
+| `timeout` | int | | 超时时间，默认 60 秒 |
+
+### 14.6 错误处理
+
+```python
+def image_to_markdown(self, image_stream, filename="page.png") -> AIResult:
+    """带完善错误处理的转换"""
+    try:
+        # Step 1: 上传
+        upload_result = self._upload_file(image_stream, filename)
+        if not upload_result["success"]:
+            # 上传失败，返回详细错误
+            return AIResult(
+                text="",
+                success=False,
+                error=f"上传失败: {upload_result.get('error')}"
+            )
+
+        # Step 2: Workflow
+        workflow_result = self._call_workflow(upload_result["preview_url"])
+        if not workflow_result["success"]:
+            # Workflow 失败，返回详细错误
+            return AIResult(
+                text="",
+                success=False,
+                error=f"AI 处理失败: {workflow_result.get('error')}"
+            )
+
+        return AIResult(
+            text=workflow_result["text"],
+            success=True,
+        )
+
+    except requests.Timeout:
+        return AIResult(
+            text="",
+            success=False,
+            error="请求超时，请检查网络或增加 timeout 设置"
+        )
+    except requests.ConnectionError:
+        return AIResult(
+            text="",
+            success=False,
+            error="网络连接失败，请检查网络设置"
+        )
+    except json.JSONDecodeError:
+        return AIResult(
+            text="",
+            success=False,
+            error="响应解析失败，接口返回非 JSON 格式"
+        )
+    except Exception as e:
+        return AIResult(
+            text="",
+            success=False,
+            error=f"未知错误: {str(e)}"
+        )
+```
+
+### 14.7 与原方案的对比
+
+| 对比项 | 原方案（base64） | 新方案（两步上传） |
+|--------|-----------------|-------------------|
+| 图片传输 | base64 内嵌 | URL 引用 |
+| 请求大小 | 大（含图片数据） | 小（仅 URL） |
+| 适用场景 | 小图片 | 大图片、多图片 |
+| 依赖 | OpenAI SDK | requests |
+| 认证方式 | API Key | Token + Cookie |
+| 接口格式 | OpenAI 标准 | 自定义 |
+
+### 14.8 配置确认
+
+- [x] ~~`chat_id` 是否需要每次动态生成？~~ **是的，每次生成 UUID**
+- [x] ~~`app_id` 是否固定？~~ **是的，固定值**
+- [x] ~~`workflow_image2markdown_key` 如何获取？~~ **在 pyproject.toml 中配置**
+- [x] ~~是否需要支持并发上传？~~ **否**
+
+### 14.9 配置文件设计
+
+**pyproject.toml 新增配置项**
+```toml
+[project.optional-dependencies]
+nova-api = [
+  "requests>=2.28.0",
+]
+
+[tool.nova-pdf]
+# AI 服务配置
+base_url = "https://xny-test.glodon.com/jsf-ai"
+app_id = "69fc37113fedac1eaaf65c82"
+timeout = 120
+
+# 认证配置（建议通过环境变量覆盖）
+# upload_token = ""    # 环境变量: NOVA_UPLOAD_TOKEN
+# workflow_token = ""  # 环境变量: NOVA_WORKFLOW_TOKEN
+```
+
+**环境变量**
+- `NOVA_UPLOAD_TOKEN`: 上传接口认证 token (fastgpt_token)
+- `NOVA_WORKFLOW_TOKEN`: Workflow 接口认证 token
+- `NOVA_BASE_URL`: API 基础地址（可选，覆盖配置文件）
+- `NOVA_APP_ID`: 应用 ID（可选，覆盖配置文件）
diff --git a/docs/spec.md b/docs/spec.md
new file mode 100644
index 000000000..660e90a15
--- /dev/null
+++ b/docs/spec.md
@@ -0,0 +1,31 @@
+# 目标
+重构调用ai接口解析PDF的功能：对包含图片/表格的页面截图后调用 AI 接口转 Markdown
+
+# 技术要求
+使用glm-ocr能力，zai-sdk，如下
+
+# 关键信息：api key：528b833ddafd74f7ce6d32f6d1e3b39e.yLrspX8jiUwh5BGd 需要从配置文件读取
+
+# 安装最新版本
+pip install zai-sdk
+# 或指定版本
+pip install zai-sdk==0.2.2
+from zai import ZhipuAiClient
+
+# 初始化客户端
+client = ZhipuAiClient(api_key="your-api-key")
+
+image_url = "https://cdn.bigmodel.cn/static/logo/introduction.png"
+
+# 调用布局解析 API
+response = client.layout_parsing.create(
+    model="glm-ocr",
+    file=image_url
+)
+
+# 输出结果
+print(response)
+
+详细文档：https://docs.bigmodel.cn/cn/guide/models/vlm/glm-ocr#python
+
+先设计重构方案
\ No newline at end of file
diff --git a/packages/nova-pdf/README.md b/packages/nova-pdf/README.md
new file mode 100644
index 000000000..969e687d5
--- /dev/null
+++ b/packages/nova-pdf/README.md
@@ -0,0 +1,166 @@
+# Nova-PDF
+
+智能 PDF 转 Markdown 插件，使用 glm-ocr AI 驱动的图片和表格提取。
+
+## 特性
+
+- 🔍 **智能检测**：自动识别每页内容类型（纯文本 vs 图片/表格）
+- 📄 **默认解析**：纯文本页面使用 pdfplumber/pdfminer 提取，速度快、成本低
+- 🤖 **AI 增强**：复杂页面（图片、表格）使用 glm-ocr 转换为 Markdown
+- ⚙️ **灵活配置**：支持配置文件、环境变量等多种配置方式
+
+## 安装
+
+```bash
+# 基础安装
+pip install nova-pdf
+
+# 安装 AI 功能
+pip install nova-pdf[zhipu]
+```
+
+## 配置
+
+### 本地敏感配置（推荐）
+
+项目根目录的 `.secrets.local` 文件存储敏感信息，此文件不会被提交到 Git：
+
+```bash
+# 创建 .secrets.local 文件
+echo 'NOVA_ZHIPU_API_KEY="your-api-key"' > .secrets.local
+
+# 加载配置
+source .secrets.local
+```
+
+### 环境变量
+
+```bash
+# 必需
+export NOVA_ZHIPU_API_KEY="your-zhipu-api-key"
+
+# 可选
+export NOVA_MODEL="glm-ocr"
+export NOVA_DPI="150"
+export NOVA_TIMEOUT="120"
+```
+
+### 配置文件
+
+在 `pyproject.toml` 中配置默认值：
+
+```toml
+[tool.nova-pdf]
+model = "glm-ocr"
+dpi = 150
+timeout = 120
+force_ai = false
+```
+
+## 使用方法
+
+### 命令行（推荐）
+
+```bash
+# 1. 加载敏感配置
+source .secrets.local
+
+# 2. 查看已安装插件
+markitdown --list-plugins
+
+# 3. 使用插件转换 PDF
+markitdown -p document.pdf
+
+# 4. 保存到文件
+markitdown -p document.pdf -o output.md
+```
+
+### Python API
+
+```python
+from markitdown import MarkItDown
+
+# 方式1：自动加载配置
+md = MarkItDown(enable_plugins=True)
+result = md.convert("document.pdf")
+print(result.markdown)
+
+# 方式2：手动配置
+from nova_pdf import NovaPdfConfig, AIService, NovaPdfConverter
+
+config = NovaPdfConfig.load()
+ai_service = AIService(
+    api_key="your-api-key",
+    model="glm-ocr",
+)
+
+converter = NovaPdfConverter(
+    ai_service=ai_service,
+    dpi=150,
+)
+
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+```
+
+## 配置选项
+
+### NovaPdfConfig 参数
+
+| 参数 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `zhipu_api_key` | str | 环境变量 `NOVA_ZHIPU_API_KEY` | 智谱 API Key |
+| `model` | str | "glm-ocr" | 模型名称 |
+| `dpi` | int | 150 | 截图分辨率 |
+| `timeout` | int | 120 | 请求超时（秒） |
+| `force_ai` | bool | False | 强制所有页面使用 AI |
+
+### NovaPdfConverter 参数
+
+| 参数 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `ai_service` | AIService | None | AI 服务实例 |
+| `dpi` | int | 150 | 截图分辨率 |
+| `force_ai` | bool | False | 强制所有页面使用 AI |
+
+## 工作原理
+
+```
+PDF 输入
+    │
+    ▼
+逐页分析内容类型
+    │
+    ├─ 纯文本页面 ──► pdfplumber 提取文本
+    │
+    └─ 复杂页面（图片/表格）
+          │
+          ├─ 截图渲染 (150 DPI)
+          │
+          ├─ base64 编码
+          │
+          └─ 调用 glm-ocr API 转 Markdown
+    │
+    ▼
+合并输出完整 Markdown
+```
+
+## 技术架构
+
+- **zai-sdk**: 智谱 AI 官方 SDK
+- **glm-ocr**: 智谱 OCR 模型，支持表格、图片识别
+- **pdfplumber**: PDF 页面分析和截图
+- **pdfminer**: 纯文本页面提取
+
+## 依赖
+
+- `markitdown>=0.1.0` - 基础框架
+- `pdfplumber>=0.11.9` - PDF 解析和截图
+- `pdfminer.six>=20251230` - 文本提取备用
+- `Pillow>=9.0.0` - 图像处理
+- `zai-sdk>=0.2.2` - 智谱 AI SDK（可选，AI 功能需要）
+
+## 许可证
+
+MIT
diff --git a/packages/nova-pdf/pyproject.toml b/packages/nova-pdf/pyproject.toml
new file mode 100644
index 000000000..f21aedf0c
--- /dev/null
+++ b/packages/nova-pdf/pyproject.toml
@@ -0,0 +1,74 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "nova-pdf"
+dynamic = ["version"]
+description = "Intelligent PDF to Markdown converter with AI-powered image/table extraction"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+keywords = ["markitdown", "pdf", "ocr", "ai", "llm", "vision", "glm-ocr"]
+authors = [
+  { name = "Contributors", email = "noreply@github.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+
+dependencies = [
+  "markitdown>=0.1.0",
+  "pdfminer.six>=20251230",
+  "pdfplumber>=0.11.9",
+  "Pillow>=9.0.0",
+  "tomli>=2.0.0;python_version<'3.11'",
+]
+
+[project.optional-dependencies]
+zhipu = [
+  "zai-sdk>=0.2.2",
+]
+dev = [
+  "pytest>=7.0.0",
+]
+
+[project.urls]
+Documentation = "https://github.com/microsoft/markitdown#readme"
+Issues = "https://github.com/microsoft/markitdown/issues"
+Source = "https://github.com/microsoft/markitdown"
+
+[tool.hatch.version]
+path = "src/nova_pdf/__about__.py"
+
+# Plugin entry point - MarkItDown will discover this plugin
+[project.entry-points."markitdown.plugin"]
+nova_pdf = "nova_pdf"
+
+[tool.hatch.build.targets.sdist]
+only-include = ["src/nova_pdf"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/nova_pdf"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+
+# Nova-PDF configuration
+[tool.nova-pdf]
+# API key - set via environment variable NOVA_ZHIPU_API_KEY
+api_key = ""
+model = "glm-ocr"
+dpi = 150
+timeout = 120
+force_ai = false
+
+# Legacy config (deprecated, will be removed)
+workflow_image2markdown_key = "<your-workflow-token>"
+fastgpt_token = "<your-fastgpt-token>"
diff --git a/packages/nova-pdf/src/nova_pdf/__about__.py b/packages/nova-pdf/src/nova_pdf/__about__.py
new file mode 100644
index 000000000..3dc1f76bc
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/__about__.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/packages/nova-pdf/src/nova_pdf/__init__.py b/packages/nova-pdf/src/nova_pdf/__init__.py
new file mode 100644
index 000000000..ce059c499
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/__init__.py
@@ -0,0 +1,13 @@
+from ._plugin import register_converters
+from ._config import NovaPdfConfig
+from ._ai_service import AIService, AIResult
+from ._converter import NovaPdfConverter
+
+__plugin_interface_version__ = 1
+__all__ = [
+    "register_converters",
+    "NovaPdfConfig",
+    "AIService",
+    "AIResult",
+    "NovaPdfConverter",
+]
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/_ai_service.py b/packages/nova-pdf/src/nova_pdf/_ai_service.py
new file mode 100644
index 000000000..2451f9955
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_ai_service.py
@@ -0,0 +1,202 @@
+"""AI service using zai-sdk and glm-ocr."""
+
+import base64
+import re
+from dataclasses import dataclass
+from typing import BinaryIO, Optional
+
+from ._config import NovaPdfConfig
+
+try:
+    from zai import ZhipuAiClient
+except ImportError:
+    ZhipuAiClient = None
+
+
+@dataclass
+class AIResult:
+    """Result from AI conversion."""
+    text: str
+    success: bool = True
+    error: Optional[str] = None
+
+
+class AIService:
+    """
+    AI Service using zai-sdk + glm-ocr.
+    
+    Features:
+    - Direct API call to glm-ocr layout_parsing
+    - Support image bytes via base64 data URI
+    - Return Markdown or HTML format content
+    """
+    
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "glm-ocr",
+        timeout: int = 120,
+        config: Optional[NovaPdfConfig] = None,
+    ):
+        if ZhipuAiClient is None:
+            raise ImportError(
+                "zai-sdk is required. Install with: pip install nova-pdf[zhipu]"
+            )
+        
+        if config:
+            self.api_key = api_key or config.zhipu_api_key
+            self.model = model or config.model
+            self.timeout = timeout or config.timeout
+        else:
+            config = NovaPdfConfig.load()
+            self.api_key = api_key or config.zhipu_api_key
+            self.model = model
+            self.timeout = timeout
+        
+        if not self.api_key:
+            raise ValueError(
+                "API key is required. Set NOVA_ZHIPU_API_KEY environment variable"
+            )
+        
+        self.client = ZhipuAiClient(api_key=self.api_key)
+    
+    def image_to_markdown(
+        self,
+        image_stream: BinaryIO,
+        filename: str = "page.png",
+        keep_html: bool = False,
+    ) -> AIResult:
+        """
+        Convert image to Markdown using glm-ocr.
+        
+        Args:
+            image_stream: Image stream
+            filename: Filename (for content type detection)
+            keep_html: Keep HTML format for complex tables (default: False, convert to MD)
+        
+        Returns:
+            AIResult: Conversion result
+        """
+        try:
+            image_stream.seek(0)
+            image_bytes = image_stream.read()
+            
+            base64_image = base64.b64encode(image_bytes).decode("utf-8")
+            content_type = "image/jpeg" if filename.lower().endswith((".jpg", ".jpeg")) else "image/png"
+            data_uri = f"data:{content_type};base64,{base64_image}"
+            
+            response = self.client.layout_parsing.create(
+                model=self.model,
+                file=data_uri
+            )
+            
+            # Get HTML content
+            html = response.md_results or ""
+            
+            if not html and response.layout_details:
+                parts = []
+                for detail_list in response.layout_details:
+                    for detail in detail_list:
+                        if detail.content:
+                            parts.append(detail.content)
+                html = "\n".join(parts)
+            
+            # Convert to Markdown or keep HTML
+            if keep_html:
+                text = html.strip()
+            else:
+                text = self._html_to_markdown(html.strip()) if html else ""
+            
+            return AIResult(text=text, success=True)
+        
+        except Exception as e:
+            return AIResult(text="", success=False, error=str(e))
+        finally:
+            image_stream.seek(0)
+    
+    def _html_to_markdown(self, html: str) -> str:
+        """Convert HTML to Markdown."""
+        if not html:
+            return ""
+        
+        # Extract titles from <div>
+        titles = []
+        div_pattern = r'<div[^>]*>(.*?)</div>'
+        for match in re.finditer(div_pattern, html, re.DOTALL | re.IGNORECASE):
+            title = re.sub(r'<[^>]+>', '', match.group(1)).strip()
+            if title:
+                titles.append(title)
+        
+        # Remove <div> from HTML
+        html = re.sub(div_pattern, '', html, flags=re.DOTALL | re.IGNORECASE)
+        
+        # Check for table
+        if '<table' in html.lower():
+            table_md = self._convert_html_table(html)
+            if titles:
+                return f"**{' '.join(titles)}**\n\n{table_md}"
+            return table_md
+        
+        # Plain text
+        text = re.sub(r'<[^>]+>', '', html).strip()
+        if titles:
+            return f"**{' '.join(titles)}**\n\n{text}"
+        return text
+    
+    def _convert_html_table(self, html: str) -> str:
+        """Convert HTML table to Markdown table."""
+        # Parse rows
+        rows = []
+        rowspan_cells = {}
+        
+        for row_idx, row_match in enumerate(re.finditer(r'<tr[^>]*>(.*?)</tr>', html, re.DOTALL | re.IGNORECASE)):
+            cells = []
+            col_idx = 0
+            
+            # Fill rowspan cells
+            while (row_idx, col_idx) in rowspan_cells:
+                cells.append(rowspan_cells[(row_idx, col_idx)])
+                col_idx += 1
+            
+            # Parse cells
+            for cell_match in re.finditer(r'<td([^>]*)>(.*?)</td>', row_match.group(1), re.DOTALL | re.IGNORECASE):
+                attrs, content = cell_match.groups()
+                content = re.sub(r'<[^>]+>', '', content).strip().replace('\n', ' ')
+                
+                rowspan = int(r.group(1)) if (r := re.search(r'rowspan\s*=\s*["\']?(\d+)', attrs, re.IGNORECASE)) else 1
+                colspan = int(c.group(1)) if (c := re.search(r'colspan\s*=\s*["\']?(\d+)', attrs, re.IGNORECASE)) else 1
+                
+                cells.append(content)
+                cells.extend([""] * (colspan - 1))
+                
+                if rowspan > 1:
+                    for r in range(1, rowspan):
+                        for c in range(colspan):
+                            rowspan_cells[(row_idx + r, col_idx + c)] = content
+                
+                col_idx += colspan
+            
+            # Fill remaining rowspan
+            while (row_idx, col_idx) in rowspan_cells:
+                cells.append(rowspan_cells[(row_idx, col_idx)])
+                col_idx += 1
+            
+            rows.append(cells)
+        
+        if not rows:
+            return ""
+        
+        # Normalize
+        max_cols = max(len(row) for row in rows)
+        for row in rows:
+            row.extend([""] * (max_cols - len(row)))
+        
+        # Simple output: first row as header
+        md_lines = []
+        for i, row in enumerate(rows):
+            md_row = "| " + " | ".join(c or " " for c in row) + " |"
+            md_lines.append(md_row)
+            if i == 0:
+                md_lines.append("|" + "|".join(["---"] * max_cols) + "|")
+        
+        return "\n".join(md_lines)
diff --git a/packages/nova-pdf/src/nova_pdf/_config.py b/packages/nova-pdf/src/nova_pdf/_config.py
new file mode 100644
index 000000000..305ce1894
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_config.py
@@ -0,0 +1,101 @@
+"""Configuration management for nova-pdf."""
+
+import os
+from pathlib import Path
+from typing import Optional
+from dataclasses import dataclass
+
+try:
+    import tomllib  # Python 3.11+
+except ImportError:
+    import tomli as tomllib
+
+
+@dataclass
+class NovaPdfConfig:
+    """nova-pdf configuration."""
+    
+    # API 配置
+    zhipu_api_key: str = ""
+    
+    # OCR 配置
+    model: str = "glm-ocr"
+    dpi: int = 150
+    timeout: int = 120
+    
+    # 处理策略
+    force_ai: bool = False
+    
+    @classmethod
+    def load(cls, config_path: Optional[str] = None) -> "NovaPdfConfig":
+        """
+        Load configuration from multiple sources (priority high to low):
+        1. Environment variables
+        2. Config file (pyproject.toml or nova-pdf.toml)
+        3. Default values
+        """
+        config = cls()
+        
+        # 1. Load from config file
+        config._load_from_file(config_path)
+        
+        # 2. Environment variables override
+        config._load_from_env()
+        
+        return config
+    
+    def _load_from_file(self, config_path: Optional[str] = None):
+        """Load from config file."""
+        search_paths = []
+        
+        if config_path:
+            search_paths.append(Path(config_path))
+        
+        # Current directory
+        search_paths.append(Path("pyproject.toml"))
+        search_paths.append(Path("nova-pdf.toml"))
+        
+        # User config directory
+        search_paths.append(Path.home() / ".config" / "nova-pdf" / "config.toml")
+        
+        for path in search_paths:
+            if path.exists():
+                try:
+                    with open(path, "rb") as f:
+                        data = tomllib.load(f)
+                    
+                    # Read [tool.nova-pdf] section
+                    if "tool" in data and "nova-pdf" in data["tool"]:
+                        self._apply_config(data["tool"]["nova-pdf"])
+                    elif "nova-pdf" in data:
+                        self._apply_config(data["nova-pdf"])
+                    
+                    break
+                except Exception:
+                    pass
+    
+    def _apply_config(self, data: dict):
+        """Apply config from dict."""
+        if "api_key" in data:
+            self.zhipu_api_key = data["api_key"]
+        if "model" in data:
+            self.model = data["model"]
+        if "dpi" in data:
+            self.dpi = data["dpi"]
+        if "timeout" in data:
+            self.timeout = data["timeout"]
+        if "force_ai" in data:
+            self.force_ai = data["force_ai"]
+    
+    def _load_from_env(self):
+        """Load from environment variables (highest priority)."""
+        if os.environ.get("NOVA_ZHIPU_API_KEY"):
+            self.zhipu_api_key = os.environ["NOVA_ZHIPU_API_KEY"]
+        if os.environ.get("NOVA_MODEL"):
+            self.model = os.environ["NOVA_MODEL"]
+        if os.environ.get("NOVA_DPI"):
+            self.dpi = int(os.environ["NOVA_DPI"])
+        if os.environ.get("NOVA_TIMEOUT"):
+            self.timeout = int(os.environ["NOVA_TIMEOUT"])
+        if os.environ.get("NOVA_FORCE_AI"):
+            self.force_ai = os.environ["NOVA_FORCE_AI"].lower() in ("true", "1", "yes")
diff --git a/packages/nova-pdf/src/nova_pdf/_converter.py b/packages/nova-pdf/src/nova_pdf/_converter.py
new file mode 100644
index 000000000..7ee1dd320
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_converter.py
@@ -0,0 +1,251 @@
+"""Nova PDF Converter - Intelligent PDF to Markdown conversion."""
+
+import io
+import sys
+from typing import Any, BinaryIO, Optional
+
+from markitdown import DocumentConverter, DocumentConverterResult, StreamInfo
+from markitdown._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
+
+from ._page_analyzer import PageType, analyze_page
+from ._page_renderer import render_page_to_image
+from ._ai_service import AIService
+
+# Import dependencies
+_dependency_exc_info = None
+try:
+    import pdfminer
+    import pdfminer.high_level
+    import pdfplumber
+except ImportError:
+    _dependency_exc_info = sys.exc_info()
+
+
+ACCEPTED_MIME_TYPE_PREFIXES = [
+    "application/pdf",
+    "application/x-pdf",
+]
+
+ACCEPTED_FILE_EXTENSIONS = [".pdf"]
+
+
+class NovaPdfConverter(DocumentConverter):
+    """
+    智能 PDF 转换器
+    
+    特性：
+    - 自动检测每页内容类型（纯文本 vs 包含图片/表格）
+    - 纯文本页面使用默认解析（pdfplumber/pdfminer）
+    - 复杂页面截图后调用 AI 转换为 Markdown
+    """
+
+    def __init__(
+        self,
+        ai_service: Optional[AIService] = None,
+        dpi: int = 150,
+        force_ai: bool = False,
+    ):
+        """
+        初始化转换器
+
+        Args:
+            ai_service: AI 服务实例
+            dpi: 截图分辨率（默认 150）
+            force_ai: 强制所有页面使用 AI（默认 False）
+        """
+        self.ai_service = ai_service
+        self.dpi = dpi
+        self.force_ai = force_ai
+
+    def accepts(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> bool:
+        mimetype = (stream_info.mimetype or "").lower()
+        extension = (stream_info.extension or "").lower()
+
+        if extension in ACCEPTED_FILE_EXTENSIONS:
+            return True
+
+        for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
+            if mimetype.startswith(prefix):
+                return True
+
+        return False
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        if _dependency_exc_info is not None:
+            raise MissingDependencyException(
+                MISSING_DEPENDENCY_MESSAGE.format(
+                    converter=type(self).__name__,
+                    extension=".pdf",
+                    feature="pdf",
+                )
+            ) from _dependency_exc_info[1].with_traceback(
+                _dependency_exc_info[2]
+            )
+
+        # 获取 AI 服务（从 kwargs 或实例）
+        ai_service = kwargs.get("ai_service") or self.ai_service
+
+        # 读取 PDF
+        pdf_stream = io.BytesIO(file_stream.read())
+        markdown_parts = []
+
+        try:
+            with pdfplumber.open(pdf_stream) as pdf:
+                for page_num, page in enumerate(pdf.pages):
+                    # 分析页面类型
+                    page_type = analyze_page(page)
+
+                    # 根据类型选择处理方式
+                    if self.force_ai or page_type != PageType.PLAIN_TEXT:
+                        # 复杂内容：截图 + AI
+                        if ai_service:
+                            markdown = self._convert_with_ai(
+                                page, page_num, ai_service
+                            )
+                        else:
+                            # 无 AI 服务，回退到默认解析
+                            markdown = self._extract_text_with_tables(page)
+                    else:
+                        # 纯文本：默认解析
+                        markdown = self._extract_text_with_tables(page)
+
+                    if markdown.strip():
+                        markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
+
+                    # 释放页面资源
+                    page.close()
+
+            markdown = "\n\n".join(markdown_parts).strip()
+
+        except Exception:
+            # 异常情况：回退到 pdfminer
+            pdf_stream.seek(0)
+            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
+
+        # 最终回退
+        if not markdown:
+            pdf_stream.seek(0)
+            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
+
+        return DocumentConverterResult(markdown=markdown)
+
+    def _convert_with_ai(
+        self,
+        page: Any,
+        page_num: int,
+        ai_service: AIService,
+    ) -> str:
+        """
+        使用 AI 转换页面
+
+        Args:
+            page: pdfplumber 页面对象
+            page_num: 页码
+            ai_service: AI 服务
+
+        Returns:
+            str: Markdown 内容
+        """
+        try:
+            # 截图
+            img_stream = render_page_to_image(page, self.dpi)
+
+            # 调用 AI（文件名使用页码）
+            filename = f"page_{page_num + 1}.png"
+            result = ai_service.image_to_markdown(img_stream, filename=filename)
+
+            if result.success and result.text.strip():
+                return result.text
+            else:
+                # AI 失败，回退到默认解析
+                return self._extract_text_with_tables(page)
+
+        except Exception:
+            # 异常情况，回退到默认解析
+            return self._extract_text_with_tables(page)
+
+    def _extract_text_with_tables(self, page: Any) -> str:
+        """
+        提取文本和表格
+
+        Args:
+            page: pdfplumber 页面对象
+
+        Returns:
+            str: Markdown 内容
+        """
+        parts = []
+
+        # 提取文本
+        text = page.extract_text() or ""
+        if text.strip():
+            parts.append(text.strip())
+
+        # 提取表格
+        try:
+            tables = page.extract_tables()
+            if tables:
+                for table in tables:
+                    if table:
+                        md_table = self._table_to_markdown(table)
+                        if md_table.strip():
+                            parts.append(md_table)
+        except Exception:
+            pass
+
+        return "\n\n".join(parts)
+
+    def _table_to_markdown(self, table: list[list[str]]) -> str:
+        """
+        将表格转换为 Markdown
+
+        Args:
+            table: 2D 列表
+
+        Returns:
+            str: Markdown 表格
+        """
+        if not table:
+            return ""
+
+        # 过滤 None 值
+        table = [[cell if cell is not None else "" for cell in row] for row in table]
+
+        # 过滤空行
+        table = [row for row in table if any(cell.strip() for cell in row)]
+
+        if not table:
+            return ""
+
+        # 计算列宽
+        col_widths = [
+            max(len(str(row[i])) if i < len(row) else 0 for row in table)
+            for i in range(max(len(row) for row in table))
+        ]
+
+        # 格式化表格
+        lines = []
+        for row_idx, row in enumerate(table):
+            # 补齐列数
+            padded_row = row + [""] * (len(col_widths) - len(row))
+            line = "| " + " | ".join(
+                str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
+            ) + " |"
+            lines.append(line)
+
+            # 添加分隔行
+            if row_idx == 0:
+                sep = "|" + "|".join("-" * (w + 2) for w in col_widths) + "|"
+                lines.append(sep)
+
+        return "\n".join(lines)
diff --git a/packages/nova-pdf/src/nova_pdf/_page_analyzer.py b/packages/nova-pdf/src/nova_pdf/_page_analyzer.py
new file mode 100644
index 000000000..1aa014043
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_page_analyzer.py
@@ -0,0 +1,117 @@
+"""Page content analyzer for detecting images and tables."""
+
+from enum import Enum
+from typing import Any
+
+
+class PageType(Enum):
+    """Page content type classification."""
+    PLAIN_TEXT = "plain_text"      # 纯文本，使用默认解析
+    HAS_IMAGES = "has_images"      # 包含图片
+    HAS_TABLES = "has_tables"      # 包含表格
+    COMPLEX = "complex"            # 复杂内容（图片+表格+混合）
+
+
+def detect_images(page: Any) -> bool:
+    """
+    检测页面是否包含图片
+
+    Args:
+        page: pdfplumber 页面对象
+
+    Returns:
+        bool: 是否包含图片
+    """
+    # 方法1: 直接检测 page.images
+    if hasattr(page, 'images') and len(page.images) > 0:
+        return True
+
+    # 方法2: 检测页面对象中的图像资源
+    if hasattr(page, 'objects'):
+        objects = page.objects
+        if 'image' in objects and len(objects['image']) > 0:
+            return True
+        # 检测 XObject (可能包含内嵌图像)
+        if 'xobject' in objects and len(objects['xobject']) > 0:
+            for obj in objects['xobject']:
+                if isinstance(obj, dict) and obj.get('subtype') == 'Image':
+                    return True
+
+    # 方法3: 检测页面资源字典
+    try:
+        if hasattr(page, 'page') and hasattr(page.page, 'get_resources'):
+            resources = page.page.get_resources()
+            if resources and 'XObject' in resources:
+                return True
+    except Exception:
+        pass
+
+    return False
+
+
+def detect_tables(page: Any) -> bool:
+    """
+    检测页面是否包含表格
+
+    Args:
+        page: pdfplumber 页面对象
+
+    Returns:
+        bool: 是否包含表格
+    """
+    # 方法1: 使用 pdfplumber 的 extract_tables
+    try:
+        tables = page.extract_tables()
+        if tables and len(tables) > 0:
+            # 过滤空表格
+            for table in tables:
+                if table and any(any(cell for cell in row if cell) for row in table):
+                    return True
+    except Exception:
+        pass
+
+    # 方法2: 检测表格线（边框线）
+    try:
+        if hasattr(page, 'objects') and 'line' in page.objects:
+            lines = page.objects['line']
+            if len(lines) > 10:  # 大量线条可能构成表格
+                # 分析线条是否形成网格结构
+                h_lines = []
+                v_lines = []
+                for line in lines:
+                    # 水平线：高度很小
+                    if abs(line.get('height', 1)) < 2:
+                        h_lines.append(line)
+                    # 垂直线：宽度很小
+                    elif abs(line.get('width', 1)) < 2:
+                        v_lines.append(line)
+
+                if len(h_lines) > 2 and len(v_lines) > 2:
+                    return True
+    except Exception:
+        pass
+
+    return False
+
+
+def analyze_page(page: Any) -> PageType:
+    """
+    分析页面类型
+
+    Args:
+        page: pdfplumber 页面对象
+
+    Returns:
+        PageType: 页面类型
+    """
+    has_images = detect_images(page)
+    has_tables = detect_tables(page)
+
+    if has_images and has_tables:
+        return PageType.COMPLEX
+    elif has_images:
+        return PageType.HAS_IMAGES
+    elif has_tables:
+        return PageType.HAS_TABLES
+    else:
+        return PageType.PLAIN_TEXT
diff --git a/packages/nova-pdf/src/nova_pdf/_page_renderer.py b/packages/nova-pdf/src/nova_pdf/_page_renderer.py
new file mode 100644
index 000000000..d517e8780
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_page_renderer.py
@@ -0,0 +1,32 @@
+"""Page renderer for converting PDF pages to images."""
+
+import io
+from typing import Any
+
+
+def render_page_to_image(page: Any, dpi: int = 150) -> io.BytesIO:
+    """
+    将 PDF 页面渲染为图片
+
+    Args:
+        page: pdfplumber 页面对象
+        dpi: 渲染分辨率，默认 150（平衡质量和速度）
+
+    Returns:
+        io.BytesIO: PNG 图片流
+    """
+    # 使用 pdfplumber 的 to_image 方法
+    page_image = page.to_image(resolution=dpi)
+
+    # 转换为 BytesIO
+    img_stream = io.BytesIO()
+    page_image.original.save(img_stream, format="PNG")
+    img_stream.seek(0)
+
+    return img_stream
+
+
+# DPI 预设值
+DPI_LOW = 72      # 快速预览，文件小
+DPI_MEDIUM = 150  # 平衡质量和速度（默认）
+DPI_HIGH = 300    # 高质量，适合复杂图表
diff --git a/packages/nova-pdf/src/nova_pdf/_plugin.py b/packages/nova-pdf/src/nova_pdf/_plugin.py
new file mode 100644
index 000000000..08a4fd8a0
--- /dev/null
+++ b/packages/nova-pdf/src/nova_pdf/_plugin.py
@@ -0,0 +1,56 @@
+"""Plugin registration for nova-pdf."""
+
+from typing import Any
+from markitdown import MarkItDown
+
+from ._config import NovaPdfConfig
+from ._ai_service import AIService
+from ._converter import NovaPdfConverter
+
+
+__plugin_interface_version__ = 1
+
+
+def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
+    """
+    Register nova-pdf converter.
+    
+    Config sources (priority high to low):
+    1. kwargs parameters
+    2. Environment variables
+    3. Config file (pyproject.toml)
+    4. Default values
+    """
+    # Load config
+    config = NovaPdfConfig.load()
+    
+    # kwargs override config
+    api_key = kwargs.get("api_key") or kwargs.get("zhipu_api_key") or config.zhipu_api_key
+    model = kwargs.get("model", config.model)
+    dpi = kwargs.get("dpi", config.dpi)
+    force_ai = kwargs.get("force_ai", config.force_ai)
+    timeout = kwargs.get("timeout", config.timeout)
+    
+    # Create AI service
+    ai_service = None
+    if api_key:
+        try:
+            ai_service = AIService(
+                api_key=api_key,
+                model=model,
+                timeout=timeout,
+            )
+        except Exception:
+            pass
+    
+    # Register converter
+    PRIORITY_NOVA_PDF = -1.0
+    
+    markitdown.register_converter(
+        NovaPdfConverter(
+            ai_service=ai_service,
+            dpi=dpi,
+            force_ai=force_ai,
+        ),
+        priority=PRIORITY_NOVA_PDF,
+    )
diff --git a/packages/nova-pdf/tests/__init__.py b/packages/nova-pdf/tests/__init__.py
new file mode 100644
index 000000000..dfa7b4968
--- /dev/null
+++ b/packages/nova-pdf/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for nova-pdf converter."""
\ No newline at end of file
diff --git a/packages/nova-pdf/tests/test_ai_service.py b/packages/nova-pdf/tests/test_ai_service.py
new file mode 100644
index 000000000..7c7636848
--- /dev/null
+++ b/packages/nova-pdf/tests/test_ai_service.py
@@ -0,0 +1,103 @@
+"""Tests for AI service with zai-sdk."""
+
+import io
+import pytest
+from unittest.mock import MagicMock, patch
+
+from nova_pdf._ai_service import AIService, AIResult
+from nova_pdf._config import NovaPdfConfig
+
+
+class TestAIService:
+    """AI Service tests with zai-sdk."""
+
+    def test_missing_zai_sdk_raises_error(self):
+        """Missing zai-sdk raises error."""
+        with patch("nova_pdf._ai_service.ZhipuAiClient", None):
+            with pytest.raises(ImportError, match="zai-sdk is required"):
+                AIService(api_key="test")
+
+    def test_missing_api_key_raises_error(self):
+        """Missing API key raises error."""
+        with patch("nova_pdf._ai_service.ZhipuAiClient", MagicMock()):
+            with pytest.raises(ValueError, match="API key is required"):
+                AIService(api_key="")
+
+    def test_successful_conversion(self):
+        """Successful conversion."""
+        # Mock ZhipuAiClient
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.md_results = "<table><tr><td>Test</td></tr></table>"
+        mock_response.layout_details = []
+        mock_client.layout_parsing.create.return_value = mock_response
+
+        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+            service = AIService(api_key="test-api-key")
+            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
+
+        assert result.success is True
+        assert "Test" in result.text
+
+    def test_html_table_conversion(self):
+        """HTML table to Markdown conversion."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.md_results = '<table><tr><td>A</td><td>B</td></tr><tr><td>1</td><td>2</td></tr></table>'
+        mock_response.layout_details = []
+        mock_client.layout_parsing.create.return_value = mock_response
+
+        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+            service = AIService(api_key="test-api-key")
+            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
+
+        assert result.success is True
+        assert "| A | B |" in result.text
+        assert "|---|---|" in result.text
+        assert "| 1 | 2 |" in result.text
+
+    def test_empty_result(self):
+        """Empty result handling."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.md_results = ""
+        mock_response.layout_details = []
+        mock_client.layout_parsing.create.return_value = mock_response
+
+        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+            service = AIService(api_key="test-api-key")
+            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
+
+        assert result.success is True
+        assert result.text == ""
+
+    def test_error_handling(self):
+        """Error handling."""
+        mock_client = MagicMock()
+        mock_client.layout_parsing.create.side_effect = Exception("API Error")
+
+        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+            service = AIService(api_key="test-api-key")
+            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
+
+        assert result.success is False
+        assert "API Error" in result.error
+
+    def test_base64_encoding(self):
+        """Test base64 encoding of image."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.md_results = "test"
+        mock_response.layout_details = []
+        mock_client.layout_parsing.create.return_value = mock_response
+
+        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+            service = AIService(api_key="test-api-key")
+            result = service.image_to_markdown(io.BytesIO(b"fake-image"), "test.png")
+
+        assert result.success is True
+        
+        # Verify data URI was used
+        call_args = mock_client.layout_parsing.create.call_args
+        file_arg = call_args.kwargs['file']
+        assert file_arg.startswith("data:image/png;base64,")
diff --git a/packages/nova-pdf/tests/test_analyzer.py b/packages/nova-pdf/tests/test_analyzer.py
new file mode 100644
index 000000000..137e486ab
--- /dev/null
+++ b/packages/nova-pdf/tests/test_analyzer.py
@@ -0,0 +1,131 @@
+"""Tests for page analyzer."""
+
+import pytest
+from unittest.mock import MagicMock
+
+from nova_pdf._page_analyzer import (
+    PageType,
+    detect_images,
+    detect_tables,
+    analyze_page,
+)
+
+
+class TestDetectImages:
+    """图片检测测试"""
+
+    def test_no_images(self):
+        """无图片页面"""
+        page = MagicMock()
+        page.images = []
+        page.objects = {}
+
+        assert detect_images(page) is False
+
+    def test_has_images_via_images_attr(self):
+        """通过 page.images 检测图片"""
+        page = MagicMock()
+        page.images = [MagicMock(x0=0, y0=0, x1=100, y1=100)]
+
+        assert detect_images(page) is True
+
+    def test_has_images_via_objects(self):
+        """通过 page.objects 检测图片"""
+        page = MagicMock()
+        page.images = []
+        page.objects = {"image": [MagicMock()]}
+
+        assert detect_images(page) is True
+
+    def test_has_xobject_image(self):
+        """通过 XObject 检测图片"""
+        page = MagicMock()
+        page.images = []
+        page.objects = {
+            "xobject": [{"subtype": "Image"}]
+        }
+
+        assert detect_images(page) is True
+
+
+class TestDetectTables:
+    """表格检测测试"""
+
+    def test_no_tables(self):
+        """无表格页面"""
+        page = MagicMock()
+        page.extract_tables.return_value = []
+
+        assert detect_tables(page) is False
+
+    def test_has_tables_via_extract_tables(self):
+        """通过 extract_tables 检测表格"""
+        page = MagicMock()
+        page.extract_tables.return_value = [
+            [["A", "B", "C"], ["1", "2", "3"]]
+        ]
+
+        assert detect_tables(page) is True
+
+    def test_empty_table_not_detected(self):
+        """空表格不应被检测"""
+        page = MagicMock()
+        page.extract_tables.return_value = [
+            [["", "", ""], ["", "", ""]]
+        ]
+
+        assert detect_tables(page) is False
+
+    def test_has_table_lines(self):
+        """通过线条检测表格"""
+        page = MagicMock()
+        page.extract_tables.return_value = []
+
+        # 模拟网格线条
+        lines = []
+        for i in range(5):
+            # 水平线
+            lines.append({"height": 0.5, "width": 100})
+            # 垂直线
+            lines.append({"height": 100, "width": 0.5})
+
+        page.objects = {"line": lines}
+
+        assert detect_tables(page) is True
+
+
+class TestAnalyzePage:
+    """页面分析测试"""
+
+    def test_plain_text_page(self):
+        """纯文本页面"""
+        page = MagicMock()
+        page.images = []
+        page.objects = {}
+        page.extract_tables.return_value = []
+
+        assert analyze_page(page) == PageType.PLAIN_TEXT
+
+    def test_page_with_images(self):
+        """仅包含图片"""
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_tables.return_value = []
+
+        assert analyze_page(page) == PageType.HAS_IMAGES
+
+    def test_page_with_tables(self):
+        """仅包含表格"""
+        page = MagicMock()
+        page.images = []
+        page.extract_tables.return_value = [[["A", "B"]]]
+
+        assert analyze_page(page) == PageType.HAS_TABLES
+
+    def test_complex_page(self):
+        """同时包含图片和表格"""
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_tables.return_value = [[["A", "B"]]]
+
+        assert analyze_page(page) == PageType.COMPLEX
\ No newline at end of file
diff --git a/packages/nova-pdf/tests/test_converter.py b/packages/nova-pdf/tests/test_converter.py
new file mode 100644
index 000000000..ea13266f7
--- /dev/null
+++ b/packages/nova-pdf/tests/test_converter.py
@@ -0,0 +1,181 @@
+"""Tests for nova-pdf converter."""
+
+import io
+import pytest
+from unittest.mock import MagicMock, patch
+
+from nova_pdf._converter import NovaPdfConverter
+from nova_pdf._ai_service import AIService, AIResult
+from nova_pdf._page_analyzer import PageType
+
+
+class TestNovaPdfConverter:
+    """转换器测试"""
+
+    def test_accepts_pdf_extension(self):
+        """接受 .pdf 扩展名"""
+        converter = NovaPdfConverter()
+        stream = io.BytesIO(b"%PDF-1.4")
+        stream_info = MagicMock(extension=".pdf", mimetype=None)
+
+        assert converter.accepts(stream, stream_info) is True
+
+    def test_accepts_pdf_mimetype(self):
+        """接受 PDF MIME 类型"""
+        converter = NovaPdfConverter()
+        stream = io.BytesIO(b"%PDF-1.4")
+        stream_info = MagicMock(extension=None, mimetype="application/pdf")
+
+        assert converter.accepts(stream, stream_info) is True
+
+    def test_rejects_non_pdf(self):
+        """拒绝非 PDF 文件"""
+        converter = NovaPdfConverter()
+        stream = io.BytesIO(b"not a pdf")
+        stream_info = MagicMock(extension=".txt", mimetype="text/plain")
+
+        assert converter.accepts(stream, stream_info) is False
+
+    def test_table_to_markdown(self):
+        """表格转 Markdown"""
+        converter = NovaPdfConverter()
+        table = [
+            ["Name", "Age", "City"],
+            ["Alice", "25", "Beijing"],
+            ["Bob", "30", "Shanghai"],
+        ]
+
+        result = converter._table_to_markdown(table)
+        
+        assert "|" in result
+        assert "Name" in result
+        assert "Alice" in result
+        assert "---" in result  # 分隔行
+
+    def test_plain_text_page_without_ai(self):
+        """纯文本页面不使用 AI"""
+        converter = NovaPdfConverter()
+
+        # 模拟页面
+        page = MagicMock()
+        page.images = []
+        page.objects = {}
+        page.extract_tables.return_value = []
+        page.extract_text.return_value = "Hello World"
+        page.close = MagicMock()
+
+        # 模拟 PDF
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock())
+
+        assert "Hello World" in result.markdown
+
+    def test_complex_page_with_ai(self):
+        """复杂页面使用 AI"""
+        # 模拟 AI 服务
+        ai_service = MagicMock(spec=AIService)
+        ai_service.image_to_markdown.return_value = AIResult(
+            success=True,
+            text="# AI Generated\n\nThis is from AI."
+        )
+
+        converter = NovaPdfConverter(ai_service=ai_service)
+
+        # 模拟页面
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_tables.return_value = []
+        page.extract_text.return_value = "Plain text"
+        page.to_image.return_value.original = MagicMock()
+        page.close = MagicMock()
+
+        # 模拟图片保存
+        img_stream = io.BytesIO()
+        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
+
+        # 模拟 PDF
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock())
+
+        # 应该调用 AI
+        ai_service.image_to_markdown.assert_called_once()
+        assert "AI Generated" in result.markdown
+
+    def test_force_ai_mode(self):
+        """强制 AI 模式"""
+        ai_service = MagicMock(spec=AIService)
+        ai_service.image_to_markdown.return_value = AIResult(
+            success=True,
+            text="AI result"
+        )
+
+        converter = NovaPdfConverter(ai_service=ai_service, force_ai=True)
+
+        # 即使是纯文本页面
+        page = MagicMock()
+        page.images = []
+        page.objects = {}
+        page.extract_tables.return_value = []
+        page.extract_text.return_value = "Plain text"
+        page.to_image.return_value.original = MagicMock()
+        page.close = MagicMock()
+
+        img_stream = io.BytesIO()
+        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
+
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock())
+
+        # 应该调用 AI（因为 force_ai=True）
+        ai_service.image_to_markdown.assert_called_once()
+
+    def test_fallback_on_ai_failure(self):
+        """AI 失败时回退到默认解析"""
+        ai_service = MagicMock(spec=AIService)
+        ai_service.image_to_markdown.return_value = AIResult(
+            success=False,
+            text="",
+            error="API error"
+        )
+
+        converter = NovaPdfConverter(ai_service=ai_service)
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_tables.return_value = []
+        page.extract_text.return_value = "Fallback text"
+        page.to_image.return_value.original = MagicMock()
+        page.close = MagicMock()
+
+        img_stream = io.BytesIO()
+        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
+
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock())
+
+        # 应该回退到默认文本
+        assert "Fallback text" in result.markdown
diff --git a/scripts/load_secrets.sh b/scripts/load_secrets.sh
new file mode 100755
index 000000000..ede9291d0
--- /dev/null
+++ b/scripts/load_secrets.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# 加载本地敏感配置
+
+if [ -f ".secrets.local" ]; then
+    echo "Loading secrets from .secrets.local"
+    set -a
+    source .secrets.local
+    set +a
+    echo "✓ Secrets loaded"
+else
+    echo "✗ .secrets.local not found"
+    exit 1
+fi

From 6bd22b487a28be9ee983e75f39394a973adcfb15 Mon Sep 17 00:00:00 2001
From: hankl <your.email@example.com>
Date: Sat, 9 May 2026 19:13:04 +0800
Subject: [PATCH 02/15] Update .gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 15613ea8a..5a6b7d117 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,3 +166,8 @@ cython_debug/
 src/.DS_Store
 .DS_Store
 .cursorrules
+
+# Local secrets (never commit)
+.secrets.local
+*.secrets
+.env.local

From af938277c616f9912ac3c4486832f3bcfce4ad8c Mon Sep 17 00:00:00 2001
From: hankl <your.email@example.com>
Date: Sat, 9 May 2026 19:34:45 +0800
Subject: [PATCH 03/15] refactor: rename nova-pdf to markitdown-glmocr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename package from nova-pdf to markitdown-glmocr
- Rename module from nova_pdf to markitdown_glmocr
- Rename classes: NovaPdfConfig → GlmOcrConfig, NovaPdfConverter → GlmOcrPdfConverter
- Update environment variable: NOVA_ZHIPU_API_KEY → GLMOCR_API_KEY
- Update config section: [tool.nova-pdf] → [tool.markitdown-glmocr]
- Refactor AI service to use zai-sdk with glm-ocr
- Support base64 image transmission
- Add HTML/Markdown output options for complex tables

Verified with:
- Syntax check passed
- Module imports successful
- Plugin installation successful
- Image conversion tests passed (table1/2/3)
---
 .../{nova-pdf => markitdown-glmocr}/README.md | 30 +++----
 .../pyproject.toml                            | 24 +++--
 .../src/markitdown_glmocr}/__about__.py       |  0
 .../src/markitdown_glmocr}/__init__.py        |  8 +-
 .../src/markitdown_glmocr}/_ai_service.py     | 16 ++--
 .../src/markitdown_glmocr}/_config.py         | 48 +++++-----
 .../src/markitdown_glmocr}/_converter.py      | 90 +++++++++----------
 .../src/markitdown_glmocr}/_page_analyzer.py  |  0
 .../src/markitdown_glmocr}/_page_renderer.py  |  0
 .../src/markitdown_glmocr}/_plugin.py         | 20 ++---
 .../tests/__init__.py                         |  0
 .../tests/test_ai_service.py                  | 20 ++---
 .../tests/test_analyzer.py                    |  2 +-
 .../tests/test_converter.py                   | 76 ++++++++--------
 14 files changed, 165 insertions(+), 169 deletions(-)
 rename packages/{nova-pdf => markitdown-glmocr}/README.md (84%)
 rename packages/{nova-pdf => markitdown-glmocr}/pyproject.toml (73%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/__about__.py (100%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/__init__.py (61%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_ai_service.py (94%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_config.py (58%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_converter.py (75%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_page_analyzer.py (100%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_page_renderer.py (100%)
 rename packages/{nova-pdf/src/nova_pdf => markitdown-glmocr/src/markitdown_glmocr}/_plugin.py (74%)
 rename packages/{nova-pdf => markitdown-glmocr}/tests/__init__.py (100%)
 rename packages/{nova-pdf => markitdown-glmocr}/tests/test_ai_service.py (81%)
 rename packages/{nova-pdf => markitdown-glmocr}/tests/test_analyzer.py (98%)
 rename packages/{nova-pdf => markitdown-glmocr}/tests/test_converter.py (72%)

diff --git a/packages/nova-pdf/README.md b/packages/markitdown-glmocr/README.md
similarity index 84%
rename from packages/nova-pdf/README.md
rename to packages/markitdown-glmocr/README.md
index 969e687d5..746f923fe 100644
--- a/packages/nova-pdf/README.md
+++ b/packages/markitdown-glmocr/README.md
@@ -1,4 +1,4 @@
-# Nova-PDF
+# markitdown-glmocr
 
 智能 PDF 转 Markdown 插件，使用 glm-ocr AI 驱动的图片和表格提取。
 
@@ -13,10 +13,10 @@
 
 ```bash
 # 基础安装
-pip install nova-pdf
+pip install markitdown-glmocr
 
 # 安装 AI 功能
-pip install nova-pdf[zhipu]
+pip install markitdown-glmocr[zhipu]
 ```
 
 ## 配置
@@ -27,7 +27,7 @@ pip install nova-pdf[zhipu]
 
 ```bash
 # 创建 .secrets.local 文件
-echo 'NOVA_ZHIPU_API_KEY="your-api-key"' > .secrets.local
+echo 'GLMOCR_API_KEY="your-api-key"' > .secrets.local
 
 # 加载配置
 source .secrets.local
@@ -37,12 +37,12 @@ source .secrets.local
 
 ```bash
 # 必需
-export NOVA_ZHIPU_API_KEY="your-zhipu-api-key"
+export GLMOCR_API_KEY="your-zhipu-api-key"
 
 # 可选
-export NOVA_MODEL="glm-ocr"
-export NOVA_DPI="150"
-export NOVA_TIMEOUT="120"
+export GLMOCR_MODEL="glm-ocr"
+export GLMOCR_DPI="150"
+export GLMOCR_TIMEOUT="120"
 ```
 
 ### 配置文件
@@ -50,7 +50,7 @@ export NOVA_TIMEOUT="120"
 在 `pyproject.toml` 中配置默认值：
 
 ```toml
-[tool.nova-pdf]
+[tool.markitdown-glmocr]
 model = "glm-ocr"
 dpi = 150
 timeout = 120
@@ -86,15 +86,15 @@ result = md.convert("document.pdf")
 print(result.markdown)
 
 # 方式2：手动配置
-from nova_pdf import NovaPdfConfig, AIService, NovaPdfConverter
+from markitdown_glmocr import GlmOcrConfig, AIService, GlmOcrPdfConverter
 
-config = NovaPdfConfig.load()
+config = GlmOcrConfig.load()
 ai_service = AIService(
     api_key="your-api-key",
     model="glm-ocr",
 )
 
-converter = NovaPdfConverter(
+converter = GlmOcrPdfConverter(
     ai_service=ai_service,
     dpi=150,
 )
@@ -106,17 +106,17 @@ result = md.convert("document.pdf")
 
 ## 配置选项
 
-### NovaPdfConfig 参数
+### GlmOcrConfig 参数
 
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
-| `zhipu_api_key` | str | 环境变量 `NOVA_ZHIPU_API_KEY` | 智谱 API Key |
+| `api_key` | str | 环境变量 `GLMOCR_API_KEY` | 智谱 API Key |
 | `model` | str | "glm-ocr" | 模型名称 |
 | `dpi` | int | 150 | 截图分辨率 |
 | `timeout` | int | 120 | 请求超时（秒） |
 | `force_ai` | bool | False | 强制所有页面使用 AI |
 
-### NovaPdfConverter 参数
+### GlmOcrPdfConverter 参数
 
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
diff --git a/packages/nova-pdf/pyproject.toml b/packages/markitdown-glmocr/pyproject.toml
similarity index 73%
rename from packages/nova-pdf/pyproject.toml
rename to packages/markitdown-glmocr/pyproject.toml
index f21aedf0c..a9277c272 100644
--- a/packages/nova-pdf/pyproject.toml
+++ b/packages/markitdown-glmocr/pyproject.toml
@@ -3,9 +3,9 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
-name = "nova-pdf"
+name = "markitdown-glmocr"
 dynamic = ["version"]
-description = "Intelligent PDF to Markdown converter with AI-powered image/table extraction"
+description = "Intelligent PDF to Markdown converter with glm-ocr AI-powered image/table extraction"
 readme = "README.md"
 requires-python = ">=3.10"
 license = "MIT"
@@ -44,31 +44,27 @@ Issues = "https://github.com/microsoft/markitdown/issues"
 Source = "https://github.com/microsoft/markitdown"
 
 [tool.hatch.version]
-path = "src/nova_pdf/__about__.py"
+path = "src/markitdown_glmocr/__about__.py"
 
 # Plugin entry point - MarkItDown will discover this plugin
 [project.entry-points."markitdown.plugin"]
-nova_pdf = "nova_pdf"
+markitdown_glmocr = "markitdown_glmocr"
 
 [tool.hatch.build.targets.sdist]
-only-include = ["src/nova_pdf"]
+only-include = ["src/markitdown_glmocr"]
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/nova_pdf"]
+packages = ["src/markitdown_glmocr"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
 
-# Nova-PDF configuration
-[tool.nova-pdf]
-# API key - set via environment variable NOVA_ZHIPU_API_KEY
+# markitdown-glmocr configuration
+[tool.markitdown-glmocr]
+# API key - set via environment variable GLMOCR_API_KEY
 api_key = ""
 model = "glm-ocr"
 dpi = 150
 timeout = 120
-force_ai = false
-
-# Legacy config (deprecated, will be removed)
-workflow_image2markdown_key = "<your-workflow-token>"
-fastgpt_token = "<your-fastgpt-token>"
+force_ai = false
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/__about__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
similarity index 100%
rename from packages/nova-pdf/src/nova_pdf/__about__.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
diff --git a/packages/nova-pdf/src/nova_pdf/__init__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
similarity index 61%
rename from packages/nova-pdf/src/nova_pdf/__init__.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
index ce059c499..55b9caf87 100644
--- a/packages/nova-pdf/src/nova_pdf/__init__.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
@@ -1,13 +1,13 @@
 from ._plugin import register_converters
-from ._config import NovaPdfConfig
+from ._config import GlmOcrConfig
 from ._ai_service import AIService, AIResult
-from ._converter import NovaPdfConverter
+from ._converter import GlmOcrPdfConverter
 
 __plugin_interface_version__ = 1
 __all__ = [
     "register_converters",
-    "NovaPdfConfig",
+    "GlmOcrConfig",
     "AIService",
     "AIResult",
-    "NovaPdfConverter",
+    "GlmOcrPdfConverter",
 ]
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/_ai_service.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
similarity index 94%
rename from packages/nova-pdf/src/nova_pdf/_ai_service.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
index 2451f9955..6c8e362a4 100644
--- a/packages/nova-pdf/src/nova_pdf/_ai_service.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
@@ -5,7 +5,7 @@
 from dataclasses import dataclass
 from typing import BinaryIO, Optional
 
-from ._config import NovaPdfConfig
+from ._config import GlmOcrConfig
 
 try:
     from zai import ZhipuAiClient
@@ -36,26 +36,26 @@ def __init__(
         api_key: Optional[str] = None,
         model: str = "glm-ocr",
         timeout: int = 120,
-        config: Optional[NovaPdfConfig] = None,
+        config: Optional[GlmOcrConfig] = None,
     ):
         if ZhipuAiClient is None:
             raise ImportError(
-                "zai-sdk is required. Install with: pip install nova-pdf[zhipu]"
+                "zai-sdk is required. Install with: pip install markitdown-glmocr[zhipu]"
             )
         
         if config:
-            self.api_key = api_key or config.zhipu_api_key
+            self.api_key = api_key or config.api_key
             self.model = model or config.model
             self.timeout = timeout or config.timeout
         else:
-            config = NovaPdfConfig.load()
-            self.api_key = api_key or config.zhipu_api_key
+            config = GlmOcrConfig.load()
+            self.api_key = api_key or config.api_key
             self.model = model
             self.timeout = timeout
         
         if not self.api_key:
             raise ValueError(
-                "API key is required. Set NOVA_ZHIPU_API_KEY environment variable"
+                "API key is required. Set GLMOCR_API_KEY environment variable"
             )
         
         self.client = ZhipuAiClient(api_key=self.api_key)
@@ -199,4 +199,4 @@ def _convert_html_table(self, html: str) -> str:
             if i == 0:
                 md_lines.append("|" + "|".join(["---"] * max_cols) + "|")
         
-        return "\n".join(md_lines)
+        return "\n".join(md_lines)
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/_config.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
similarity index 58%
rename from packages/nova-pdf/src/nova_pdf/_config.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
index 305ce1894..44c72da22 100644
--- a/packages/nova-pdf/src/nova_pdf/_config.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
@@ -1,4 +1,4 @@
-"""Configuration management for nova-pdf."""
+"""Configuration management for markitdown-glmocr."""
 
 import os
 from pathlib import Path
@@ -12,11 +12,11 @@
 
 
 @dataclass
-class NovaPdfConfig:
-    """nova-pdf configuration."""
+class GlmOcrConfig:
+    """markitdown-glmocr configuration."""
     
     # API 配置
-    zhipu_api_key: str = ""
+    api_key: str = ""
     
     # OCR 配置
     model: str = "glm-ocr"
@@ -27,11 +27,11 @@ class NovaPdfConfig:
     force_ai: bool = False
     
     @classmethod
-    def load(cls, config_path: Optional[str] = None) -> "NovaPdfConfig":
+    def load(cls, config_path: Optional[str] = None) -> "GlmOcrConfig":
         """
         Load configuration from multiple sources (priority high to low):
         1. Environment variables
-        2. Config file (pyproject.toml or nova-pdf.toml)
+        2. Config file (pyproject.toml or markitdown-glmocr.toml)
         3. Default values
         """
         config = cls()
@@ -53,10 +53,10 @@ def _load_from_file(self, config_path: Optional[str] = None):
         
         # Current directory
         search_paths.append(Path("pyproject.toml"))
-        search_paths.append(Path("nova-pdf.toml"))
+        search_paths.append(Path("markitdown-glmocr.toml"))
         
         # User config directory
-        search_paths.append(Path.home() / ".config" / "nova-pdf" / "config.toml")
+        search_paths.append(Path.home() / ".config" / "markitdown-glmocr" / "config.toml")
         
         for path in search_paths:
             if path.exists():
@@ -64,11 +64,11 @@ def _load_from_file(self, config_path: Optional[str] = None):
                     with open(path, "rb") as f:
                         data = tomllib.load(f)
                     
-                    # Read [tool.nova-pdf] section
-                    if "tool" in data and "nova-pdf" in data["tool"]:
-                        self._apply_config(data["tool"]["nova-pdf"])
-                    elif "nova-pdf" in data:
-                        self._apply_config(data["nova-pdf"])
+                    # Read [tool.markitdown-glmocr] section
+                    if "tool" in data and "markitdown-glmocr" in data["tool"]:
+                        self._apply_config(data["tool"]["markitdown-glmocr"])
+                    elif "markitdown-glmocr" in data:
+                        self._apply_config(data["markitdown-glmocr"])
                     
                     break
                 except Exception:
@@ -77,7 +77,7 @@ def _load_from_file(self, config_path: Optional[str] = None):
     def _apply_config(self, data: dict):
         """Apply config from dict."""
         if "api_key" in data:
-            self.zhipu_api_key = data["api_key"]
+            self.api_key = data["api_key"]
         if "model" in data:
             self.model = data["model"]
         if "dpi" in data:
@@ -89,13 +89,13 @@ def _apply_config(self, data: dict):
     
     def _load_from_env(self):
         """Load from environment variables (highest priority)."""
-        if os.environ.get("NOVA_ZHIPU_API_KEY"):
-            self.zhipu_api_key = os.environ["NOVA_ZHIPU_API_KEY"]
-        if os.environ.get("NOVA_MODEL"):
-            self.model = os.environ["NOVA_MODEL"]
-        if os.environ.get("NOVA_DPI"):
-            self.dpi = int(os.environ["NOVA_DPI"])
-        if os.environ.get("NOVA_TIMEOUT"):
-            self.timeout = int(os.environ["NOVA_TIMEOUT"])
-        if os.environ.get("NOVA_FORCE_AI"):
-            self.force_ai = os.environ["NOVA_FORCE_AI"].lower() in ("true", "1", "yes")
+        if os.environ.get("GLMOCR_API_KEY"):
+            self.api_key = os.environ["GLMOCR_API_KEY"]
+        if os.environ.get("GLMOCR_MODEL"):
+            self.model = os.environ["GLMOCR_MODEL"]
+        if os.environ.get("GLMOCR_DPI"):
+            self.dpi = int(os.environ["GLMOCR_DPI"])
+        if os.environ.get("GLMOCR_TIMEOUT"):
+            self.timeout = int(os.environ["GLMOCR_TIMEOUT"])
+        if os.environ.get("GLMOCR_FORCE_AI"):
+            self.force_ai = os.environ["GLMOCR_FORCE_AI"].lower() in ("true", "1", "yes")
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
similarity index 75%
rename from packages/nova-pdf/src/nova_pdf/_converter.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index 7ee1dd320..f1434c115 100644
--- a/packages/nova-pdf/src/nova_pdf/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -1,4 +1,4 @@
-"""Nova PDF Converter - Intelligent PDF to Markdown conversion."""
+"""GlmOcr PDF Converter - Intelligent PDF to Markdown conversion."""
 
 import io
 import sys
@@ -29,14 +29,14 @@
 ACCEPTED_FILE_EXTENSIONS = [".pdf"]
 
 
-class NovaPdfConverter(DocumentConverter):
+class GlmOcrPdfConverter(DocumentConverter):
     """
-    智能 PDF 转换器
+    Intelligent PDF converter using glm-ocr.
     
-    特性：
-    - 自动检测每页内容类型（纯文本 vs 包含图片/表格）
-    - 纯文本页面使用默认解析（pdfplumber/pdfminer）
-    - 复杂页面截图后调用 AI 转换为 Markdown
+    Features:
+    - Auto-detect page content type (plain text vs images/tables)
+    - Plain text pages use default parser (pdfplumber/pdfminer)
+    - Complex pages use AI screenshot conversion to Markdown
     """
 
     def __init__(
@@ -46,12 +46,12 @@ def __init__(
         force_ai: bool = False,
     ):
         """
-        初始化转换器
+        Initialize converter.
 
         Args:
-            ai_service: AI 服务实例
-            dpi: 截图分辨率（默认 150）
-            force_ai: 强制所有页面使用 AI（默认 False）
+            ai_service: AI service instance
+            dpi: Screenshot DPI (default: 150)
+            force_ai: Force all pages to use AI (default: False)
         """
         self.ai_service = ai_service
         self.dpi = dpi
@@ -92,47 +92,47 @@ def convert(
                 _dependency_exc_info[2]
             )
 
-        # 获取 AI 服务（从 kwargs 或实例）
+        # Get AI service (from kwargs or instance)
         ai_service = kwargs.get("ai_service") or self.ai_service
 
-        # 读取 PDF
+        # Read PDF
         pdf_stream = io.BytesIO(file_stream.read())
         markdown_parts = []
 
         try:
             with pdfplumber.open(pdf_stream) as pdf:
                 for page_num, page in enumerate(pdf.pages):
-                    # 分析页面类型
+                    # Analyze page type
                     page_type = analyze_page(page)
 
-                    # 根据类型选择处理方式
+                    # Choose processing method based on type
                     if self.force_ai or page_type != PageType.PLAIN_TEXT:
-                        # 复杂内容：截图 + AI
+                        # Complex content: screenshot + AI
                         if ai_service:
                             markdown = self._convert_with_ai(
                                 page, page_num, ai_service
                             )
                         else:
-                            # 无 AI 服务，回退到默认解析
+                            # No AI service, fallback to default
                             markdown = self._extract_text_with_tables(page)
                     else:
-                        # 纯文本：默认解析
+                        # Plain text: default parser
                         markdown = self._extract_text_with_tables(page)
 
                     if markdown.strip():
                         markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
 
-                    # 释放页面资源
+                    # Release page resources
                     page.close()
 
             markdown = "\n\n".join(markdown_parts).strip()
 
         except Exception:
-            # 异常情况：回退到 pdfminer
+            # Exception: fallback to pdfminer
             pdf_stream.seek(0)
             markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
 
-        # 最终回退
+        # Final fallback
         if not markdown:
             pdf_stream.seek(0)
             markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
@@ -146,52 +146,52 @@ def _convert_with_ai(
         ai_service: AIService,
     ) -> str:
         """
-        使用 AI 转换页面
+        Convert page using AI.
 
         Args:
-            page: pdfplumber 页面对象
-            page_num: 页码
-            ai_service: AI 服务
+            page: pdfplumber page object
+            page_num: Page number
+            ai_service: AI service
 
         Returns:
-            str: Markdown 内容
+            str: Markdown content
         """
         try:
-            # 截图
+            # Screenshot
             img_stream = render_page_to_image(page, self.dpi)
 
-            # 调用 AI（文件名使用页码）
+            # Call AI (filename uses page number)
             filename = f"page_{page_num + 1}.png"
             result = ai_service.image_to_markdown(img_stream, filename=filename)
 
             if result.success and result.text.strip():
                 return result.text
             else:
-                # AI 失败，回退到默认解析
+                # AI failed, fallback to default
                 return self._extract_text_with_tables(page)
 
         except Exception:
-            # 异常情况，回退到默认解析
+            # Exception, fallback to default
             return self._extract_text_with_tables(page)
 
     def _extract_text_with_tables(self, page: Any) -> str:
         """
-        提取文本和表格
+        Extract text and tables.
 
         Args:
-            page: pdfplumber 页面对象
+            page: pdfplumber page object
 
         Returns:
-            str: Markdown 内容
+            str: Markdown content
         """
         parts = []
 
-        # 提取文本
+        # Extract text
         text = page.extract_text() or ""
         if text.strip():
             parts.append(text.strip())
 
-        # 提取表格
+        # Extract tables
         try:
             tables = page.extract_tables()
             if tables:
@@ -207,45 +207,45 @@ def _extract_text_with_tables(self, page: Any) -> str:
 
     def _table_to_markdown(self, table: list[list[str]]) -> str:
         """
-        将表格转换为 Markdown
+        Convert table to Markdown.
 
         Args:
-            table: 2D 列表
+            table: 2D list
 
         Returns:
-            str: Markdown 表格
+            str: Markdown table
         """
         if not table:
             return ""
 
-        # 过滤 None 值
+        # Filter None values
         table = [[cell if cell is not None else "" for cell in row] for row in table]
 
-        # 过滤空行
+        # Filter empty rows
         table = [row for row in table if any(cell.strip() for cell in row)]
 
         if not table:
             return ""
 
-        # 计算列宽
+        # Calculate column widths
         col_widths = [
             max(len(str(row[i])) if i < len(row) else 0 for row in table)
             for i in range(max(len(row) for row in table))
         ]
 
-        # 格式化表格
+        # Format table
         lines = []
         for row_idx, row in enumerate(table):
-            # 补齐列数
+            # Pad columns
             padded_row = row + [""] * (len(col_widths) - len(row))
             line = "| " + " | ".join(
                 str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
             ) + " |"
             lines.append(line)
 
-            # 添加分隔行
+            # Add separator
             if row_idx == 0:
                 sep = "|" + "|".join("-" * (w + 2) for w in col_widths) + "|"
                 lines.append(sep)
 
-        return "\n".join(lines)
+        return "\n".join(lines)
\ No newline at end of file
diff --git a/packages/nova-pdf/src/nova_pdf/_page_analyzer.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py
similarity index 100%
rename from packages/nova-pdf/src/nova_pdf/_page_analyzer.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py
diff --git a/packages/nova-pdf/src/nova_pdf/_page_renderer.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py
similarity index 100%
rename from packages/nova-pdf/src/nova_pdf/_page_renderer.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py
diff --git a/packages/nova-pdf/src/nova_pdf/_plugin.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
similarity index 74%
rename from packages/nova-pdf/src/nova_pdf/_plugin.py
rename to packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
index 08a4fd8a0..8abf29ccb 100644
--- a/packages/nova-pdf/src/nova_pdf/_plugin.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
@@ -1,11 +1,11 @@
-"""Plugin registration for nova-pdf."""
+"""Plugin registration for markitdown-glmocr."""
 
 from typing import Any
 from markitdown import MarkItDown
 
-from ._config import NovaPdfConfig
+from ._config import GlmOcrConfig
 from ._ai_service import AIService
-from ._converter import NovaPdfConverter
+from ._converter import GlmOcrPdfConverter
 
 
 __plugin_interface_version__ = 1
@@ -13,7 +13,7 @@
 
 def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     """
-    Register nova-pdf converter.
+    Register markitdown-glmocr converter.
     
     Config sources (priority high to low):
     1. kwargs parameters
@@ -22,10 +22,10 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     4. Default values
     """
     # Load config
-    config = NovaPdfConfig.load()
+    config = GlmOcrConfig.load()
     
     # kwargs override config
-    api_key = kwargs.get("api_key") or kwargs.get("zhipu_api_key") or config.zhipu_api_key
+    api_key = kwargs.get("api_key") or config.api_key
     model = kwargs.get("model", config.model)
     dpi = kwargs.get("dpi", config.dpi)
     force_ai = kwargs.get("force_ai", config.force_ai)
@@ -44,13 +44,13 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
             pass
     
     # Register converter
-    PRIORITY_NOVA_PDF = -1.0
+    PRIORITY_GLMOCR = -1.0
     
     markitdown.register_converter(
-        NovaPdfConverter(
+        GlmOcrPdfConverter(
             ai_service=ai_service,
             dpi=dpi,
             force_ai=force_ai,
         ),
-        priority=PRIORITY_NOVA_PDF,
-    )
+        priority=PRIORITY_GLMOCR,
+    )
\ No newline at end of file
diff --git a/packages/nova-pdf/tests/__init__.py b/packages/markitdown-glmocr/tests/__init__.py
similarity index 100%
rename from packages/nova-pdf/tests/__init__.py
rename to packages/markitdown-glmocr/tests/__init__.py
diff --git a/packages/nova-pdf/tests/test_ai_service.py b/packages/markitdown-glmocr/tests/test_ai_service.py
similarity index 81%
rename from packages/nova-pdf/tests/test_ai_service.py
rename to packages/markitdown-glmocr/tests/test_ai_service.py
index 7c7636848..dbbe06d50 100644
--- a/packages/nova-pdf/tests/test_ai_service.py
+++ b/packages/markitdown-glmocr/tests/test_ai_service.py
@@ -4,8 +4,8 @@
 import pytest
 from unittest.mock import MagicMock, patch
 
-from nova_pdf._ai_service import AIService, AIResult
-from nova_pdf._config import NovaPdfConfig
+from markitdown_glmocr._ai_service import AIService, AIResult
+from markitdown_glmocr._config import GlmOcrConfig
 
 
 class TestAIService:
@@ -13,13 +13,13 @@ class TestAIService:
 
     def test_missing_zai_sdk_raises_error(self):
         """Missing zai-sdk raises error."""
-        with patch("nova_pdf._ai_service.ZhipuAiClient", None):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", None):
             with pytest.raises(ImportError, match="zai-sdk is required"):
                 AIService(api_key="test")
 
     def test_missing_api_key_raises_error(self):
         """Missing API key raises error."""
-        with patch("nova_pdf._ai_service.ZhipuAiClient", MagicMock()):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", MagicMock()):
             with pytest.raises(ValueError, match="API key is required"):
                 AIService(api_key="")
 
@@ -32,7 +32,7 @@ def test_successful_conversion(self):
         mock_response.layout_details = []
         mock_client.layout_parsing.create.return_value = mock_response
 
-        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
             service = AIService(api_key="test-api-key")
             result = service.image_to_markdown(io.BytesIO(b"fake-image"))
 
@@ -47,7 +47,7 @@ def test_html_table_conversion(self):
         mock_response.layout_details = []
         mock_client.layout_parsing.create.return_value = mock_response
 
-        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
             service = AIService(api_key="test-api-key")
             result = service.image_to_markdown(io.BytesIO(b"fake-image"))
 
@@ -64,7 +64,7 @@ def test_empty_result(self):
         mock_response.layout_details = []
         mock_client.layout_parsing.create.return_value = mock_response
 
-        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
             service = AIService(api_key="test-api-key")
             result = service.image_to_markdown(io.BytesIO(b"fake-image"))
 
@@ -76,7 +76,7 @@ def test_error_handling(self):
         mock_client = MagicMock()
         mock_client.layout_parsing.create.side_effect = Exception("API Error")
 
-        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
             service = AIService(api_key="test-api-key")
             result = service.image_to_markdown(io.BytesIO(b"fake-image"))
 
@@ -91,7 +91,7 @@ def test_base64_encoding(self):
         mock_response.layout_details = []
         mock_client.layout_parsing.create.return_value = mock_response
 
-        with patch("nova_pdf._ai_service.ZhipuAiClient", return_value=mock_client):
+        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
             service = AIService(api_key="test-api-key")
             result = service.image_to_markdown(io.BytesIO(b"fake-image"), "test.png")
 
@@ -100,4 +100,4 @@ def test_base64_encoding(self):
         # Verify data URI was used
         call_args = mock_client.layout_parsing.create.call_args
         file_arg = call_args.kwargs['file']
-        assert file_arg.startswith("data:image/png;base64,")
+        assert file_arg.startswith("data:image/png;base64,")
\ No newline at end of file
diff --git a/packages/nova-pdf/tests/test_analyzer.py b/packages/markitdown-glmocr/tests/test_analyzer.py
similarity index 98%
rename from packages/nova-pdf/tests/test_analyzer.py
rename to packages/markitdown-glmocr/tests/test_analyzer.py
index 137e486ab..6841f0b44 100644
--- a/packages/nova-pdf/tests/test_analyzer.py
+++ b/packages/markitdown-glmocr/tests/test_analyzer.py
@@ -3,7 +3,7 @@
 import pytest
 from unittest.mock import MagicMock
 
-from nova_pdf._page_analyzer import (
+from markitdown_glmocr._page_analyzer import (
     PageType,
     detect_images,
     detect_tables,
diff --git a/packages/nova-pdf/tests/test_converter.py b/packages/markitdown-glmocr/tests/test_converter.py
similarity index 72%
rename from packages/nova-pdf/tests/test_converter.py
rename to packages/markitdown-glmocr/tests/test_converter.py
index ea13266f7..d48c75f2d 100644
--- a/packages/nova-pdf/tests/test_converter.py
+++ b/packages/markitdown-glmocr/tests/test_converter.py
@@ -1,44 +1,44 @@
-"""Tests for nova-pdf converter."""
+"""Tests for markitdown-glmocr converter."""
 
 import io
 import pytest
 from unittest.mock import MagicMock, patch
 
-from nova_pdf._converter import NovaPdfConverter
-from nova_pdf._ai_service import AIService, AIResult
-from nova_pdf._page_analyzer import PageType
+from markitdown_glmocr._converter import GlmOcrPdfConverter
+from markitdown_glmocr._ai_service import AIService, AIResult
+from markitdown_glmocr._page_analyzer import PageType
 
 
-class TestNovaPdfConverter:
-    """转换器测试"""
+class TestGlmOcrPdfConverter:
+    """Converter tests."""
 
     def test_accepts_pdf_extension(self):
-        """接受 .pdf 扩展名"""
-        converter = NovaPdfConverter()
+        """Accept .pdf extension."""
+        converter = GlmOcrPdfConverter()
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=".pdf", mimetype=None)
 
         assert converter.accepts(stream, stream_info) is True
 
     def test_accepts_pdf_mimetype(self):
-        """接受 PDF MIME 类型"""
-        converter = NovaPdfConverter()
+        """Accept PDF MIME type."""
+        converter = GlmOcrPdfConverter()
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=None, mimetype="application/pdf")
 
         assert converter.accepts(stream, stream_info) is True
 
     def test_rejects_non_pdf(self):
-        """拒绝非 PDF 文件"""
-        converter = NovaPdfConverter()
+        """Reject non-PDF files."""
+        converter = GlmOcrPdfConverter()
         stream = io.BytesIO(b"not a pdf")
         stream_info = MagicMock(extension=".txt", mimetype="text/plain")
 
         assert converter.accepts(stream, stream_info) is False
 
     def test_table_to_markdown(self):
-        """表格转 Markdown"""
-        converter = NovaPdfConverter()
+        """Table to Markdown conversion."""
+        converter = GlmOcrPdfConverter()
         table = [
             ["Name", "Age", "City"],
             ["Alice", "25", "Beijing"],
@@ -50,13 +50,13 @@ def test_table_to_markdown(self):
         assert "|" in result
         assert "Name" in result
         assert "Alice" in result
-        assert "---" in result  # 分隔行
+        assert "---" in result  # Separator
 
     def test_plain_text_page_without_ai(self):
-        """纯文本页面不使用 AI"""
-        converter = NovaPdfConverter()
+        """Plain text page without AI."""
+        converter = GlmOcrPdfConverter()
 
-        # 模拟页面
+        # Mock page
         page = MagicMock()
         page.images = []
         page.objects = {}
@@ -64,11 +64,11 @@ def test_plain_text_page_without_ai(self):
         page.extract_text.return_value = "Hello World"
         page.close = MagicMock()
 
-        # 模拟 PDF
+        # Mock PDF
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
 
-        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
             mock_open.return_value.__enter__.return_value = mock_pdf
 
             stream = io.BytesIO(b"%PDF-1.4")
@@ -77,17 +77,17 @@ def test_plain_text_page_without_ai(self):
         assert "Hello World" in result.markdown
 
     def test_complex_page_with_ai(self):
-        """复杂页面使用 AI"""
-        # 模拟 AI 服务
+        """Complex page with AI."""
+        # Mock AI service
         ai_service = MagicMock(spec=AIService)
         ai_service.image_to_markdown.return_value = AIResult(
             success=True,
             text="# AI Generated\n\nThis is from AI."
         )
 
-        converter = NovaPdfConverter(ai_service=ai_service)
+        converter = GlmOcrPdfConverter(ai_service=ai_service)
 
-        # 模拟页面
+        # Mock page
         page = MagicMock()
         page.images = [MagicMock()]
         page.extract_tables.return_value = []
@@ -95,35 +95,35 @@ def test_complex_page_with_ai(self):
         page.to_image.return_value.original = MagicMock()
         page.close = MagicMock()
 
-        # 模拟图片保存
+        # Mock image save
         img_stream = io.BytesIO()
         page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
 
-        # 模拟 PDF
+        # Mock PDF
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
 
-        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
             mock_open.return_value.__enter__.return_value = mock_pdf
 
             stream = io.BytesIO(b"%PDF-1.4")
             result = converter.convert(stream, MagicMock())
 
-        # 应该调用 AI
+        # Should call AI
         ai_service.image_to_markdown.assert_called_once()
         assert "AI Generated" in result.markdown
 
     def test_force_ai_mode(self):
-        """强制 AI 模式"""
+        """Force AI mode."""
         ai_service = MagicMock(spec=AIService)
         ai_service.image_to_markdown.return_value = AIResult(
             success=True,
             text="AI result"
         )
 
-        converter = NovaPdfConverter(ai_service=ai_service, force_ai=True)
+        converter = GlmOcrPdfConverter(ai_service=ai_service, force_ai=True)
 
-        # 即使是纯文本页面
+        # Even plain text page
         page = MagicMock()
         page.images = []
         page.objects = {}
@@ -138,17 +138,17 @@ def test_force_ai_mode(self):
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
 
-        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
             mock_open.return_value.__enter__.return_value = mock_pdf
 
             stream = io.BytesIO(b"%PDF-1.4")
             result = converter.convert(stream, MagicMock())
 
-        # 应该调用 AI（因为 force_ai=True）
+        # Should call AI (because force_ai=True)
         ai_service.image_to_markdown.assert_called_once()
 
     def test_fallback_on_ai_failure(self):
-        """AI 失败时回退到默认解析"""
+        """Fallback on AI failure."""
         ai_service = MagicMock(spec=AIService)
         ai_service.image_to_markdown.return_value = AIResult(
             success=False,
@@ -156,7 +156,7 @@ def test_fallback_on_ai_failure(self):
             error="API error"
         )
 
-        converter = NovaPdfConverter(ai_service=ai_service)
+        converter = GlmOcrPdfConverter(ai_service=ai_service)
 
         page = MagicMock()
         page.images = [MagicMock()]
@@ -171,11 +171,11 @@ def test_fallback_on_ai_failure(self):
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
 
-        with patch("nova_pdf._converter.pdfplumber.open") as mock_open:
+        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
             mock_open.return_value.__enter__.return_value = mock_pdf
 
             stream = io.BytesIO(b"%PDF-1.4")
             result = converter.convert(stream, MagicMock())
 
-        # 应该回退到默认文本
-        assert "Fallback text" in result.markdown
+        # Should fallback to default text
+        assert "Fallback text" in result.markdown
\ No newline at end of file

From 8f2dd6a1182d0c3e5a2dab2e73249eff230c0d70 Mon Sep 17 00:00:00 2001
From: hankl <your.email@example.com>
Date: Sun, 10 May 2026 10:08:00 +0800
Subject: [PATCH 04/15] Update spec.md

---
 docs/spec.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/spec.md b/docs/spec.md
index 660e90a15..d2d624ddf 100644
--- a/docs/spec.md
+++ b/docs/spec.md
@@ -1,3 +1,4 @@
+# sprint0
 # 目标
 重构调用ai接口解析PDF的功能：对包含图片/表格的页面截图后调用 AI 接口转 Markdown
 
@@ -28,4 +29,7 @@ print(response)
 
 详细文档：https://docs.bigmodel.cn/cn/guide/models/vlm/glm-ocr#python
 
-先设计重构方案
\ No newline at end of file
+先设计重构方案
+
+## sprint1
+重命名：nova-pdf 改成markitdown-glmocr

From f81ef9f34c2e0a266c3daa7ec3407e02967aecad Mon Sep 17 00:00:00 2001
From: hankl <your.email@example.com>
Date: Tue, 12 May 2026 16:54:35 +0800
Subject: [PATCH 05/15] =?UTF-8?q?=E6=9B=BF=E6=8D=A2=E4=B8=BAglmsdk?=
 =?UTF-8?q?=E6=9D=A5=E8=A7=A3=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/markitdown-glmocr/README.md          | 168 +++++++++------
 packages/markitdown-glmocr/pyproject.toml     |  20 +-
 .../src/markitdown_glmocr/__init__.py         |   9 +-
 .../src/markitdown_glmocr/_ai_service.py      | 202 ------------------
 .../src/markitdown_glmocr/_config.py          | 108 ++--------
 .../src/markitdown_glmocr/_converter.py       | 190 +++++++++-------
 .../src/markitdown_glmocr/_page_analyzer.py   | 117 ----------
 .../src/markitdown_glmocr/_page_renderer.py   |  32 ---
 .../src/markitdown_glmocr/_plugin.py          |  41 +---
 9 files changed, 240 insertions(+), 647 deletions(-)
 delete mode 100644 packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
 delete mode 100644 packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py
 delete mode 100644 packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py

diff --git a/packages/markitdown-glmocr/README.md b/packages/markitdown-glmocr/README.md
index 746f923fe..d0193d2ed 100644
--- a/packages/markitdown-glmocr/README.md
+++ b/packages/markitdown-glmocr/README.md
@@ -1,13 +1,14 @@
 # markitdown-glmocr
 
-智能 PDF 转 Markdown 插件，使用 glm-ocr AI 驱动的图片和表格提取。
+智能 PDF 转 Markdown 插件，使用 glmocr SDK（智谱 GLM-OCR）驱动的图片和表格提取。
 
 ## 特性
 
 - 🔍 **智能检测**：自动识别每页内容类型（纯文本 vs 图片/表格）
 - 📄 **默认解析**：纯文本页面使用 pdfplumber/pdfminer 提取，速度快、成本低
-- 🤖 **AI 增强**：复杂页面（图片、表格）使用 glm-ocr 转换为 Markdown
-- ⚙️ **灵活配置**：支持配置文件、环境变量等多种配置方式
+- 🤖 **AI 增强**：复杂页面（图片、表格）使用 glmocr SDK 转换为 Markdown
+- ⚡ **一行调用**：`glmocr.parse("document.pdf")` 完成 OCR，无需手动截图编码
+- 📊 **结构化输出**：返回 Markdown + JSON 结构（含区域标签、边界框）
 
 ## 安装
 
@@ -16,45 +17,35 @@
 pip install markitdown-glmocr
 
 # 安装 AI 功能
-pip install markitdown-glmocr[zhipu]
+pip install markitdown-glmocr[glmocr]
 ```
 
 ## 配置
 
-### 本地敏感配置（推荐）
-
-项目根目录的 `.secrets.local` 文件存储敏感信息，此文件不会被提交到 Git：
+### 环境变量（推荐）
 
 ```bash
-# 创建 .secrets.local 文件
-echo 'GLMOCR_API_KEY="your-api-key"' > .secrets.local
+# 必需：智谱 API Key
+export ZHIPU_API_KEY="your-zhipu-api-key"
 
-# 加载配置
-source .secrets.local
+# 可选
+export GLMOCR_MODEL="glm-ocr"          # 模型名称
+export GLMOCR_TIMEOUT="600"             # 请求超时（秒）
+export GLMOCR_ENABLE_LAYOUT="true"      # 启用布局检测
+export GLMOCR_LOG_LEVEL="INFO"          # 日志级别
 ```
 
-### 环境变量
-
-```bash
-# 必需
-export GLMOCR_API_KEY="your-zhipu-api-key"
+### 配置优先级
 
-# 可选
-export GLMOCR_MODEL="glm-ocr"
-export GLMOCR_DPI="150"
-export GLMOCR_TIMEOUT="120"
+```
+构造函数参数 > 环境变量 > .env 文件 > config.yaml > 内置默认值
 ```
 
-### 配置文件
-
-在 `pyproject.toml` 中配置默认值：
+### 本地敏感配置
 
-```toml
-[tool.markitdown-glmocr]
-model = "glm-ocr"
-dpi = 150
-timeout = 120
-force_ai = false
+```bash
+# 创建 .env 文件（自动读取）
+echo "ZHIPU_API_KEY=your-api-key" > .env
 ```
 
 ## 使用方法
@@ -62,8 +53,8 @@ force_ai = false
 ### 命令行（推荐）
 
 ```bash
-# 1. 加载敏感配置
-source .secrets.local
+# 1. 设置 API Key
+export ZHIPU_API_KEY="sk-xxx"
 
 # 2. 查看已安装插件
 markitdown --list-plugins
@@ -79,50 +70,73 @@ markitdown -p document.pdf -o output.md
 
 ```python
 from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
 
-# 方式1：自动加载配置
-md = MarkItDown(enable_plugins=True)
+# 方式1：自动从环境变量读取 ZHIPU_API_KEY
+converter = GlmOcrConverter()
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
 result = md.convert("document.pdf")
 print(result.markdown)
 
-# 方式2：手动配置
-from markitdown_glmocr import GlmOcrConfig, AIService, GlmOcrPdfConverter
-
-config = GlmOcrConfig.load()
-ai_service = AIService(
-    api_key="your-api-key",
-    model="glm-ocr",
-)
-
-converter = GlmOcrPdfConverter(
-    ai_service=ai_service,
-    dpi=150,
-)
-
+# 方式2：手动传入 API Key
+converter = GlmOcrConverter(api_key="sk-xxx")
 md = MarkItDown(enable_plugins=False)
 md.register_converter(converter, priority=-1.0)
 result = md.convert("document.pdf")
+print(result.markdown)
+
+# 方式3：直接使用 glmocr SDK（更简单）
+import glmocr
+result = glmocr.parse("document.pdf")
+print(result.markdown_result)  # Markdown 输出
+print(result.json_result)      # 结构化 JSON（区域标签、边界框）
+```
+
+### 处理结果
+
+```python
+import glmocr
+
+result = glmocr.parse("report.pdf")
+
+# 获取 Markdown
+print(result.markdown_result)
+
+# 获取结构化数据（按页分组）
+for page_idx, page_regions in enumerate(result.json_result):
+    print(f"Page {page_idx + 1}: {len(page_regions)} regions")
+    for region in page_regions:
+        print(f"  [{region['label']}] {region['content'][:60]}")
+
+# 按标签筛选
+tables = [r for r in result.json_result[0] if r["label"] == "table"]
+formulas = [r for r in result.json_result[0] if r["label"] == "formula"]
+
+# 保存到磁盘
+result.save(output_dir="./output")
 ```
 
 ## 配置选项
 
-### GlmOcrConfig 参数
+### GlmOcrConverter 参数
 
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
-| `api_key` | str | 环境变量 `GLMOCR_API_KEY` | 智谱 API Key |
-| `model` | str | "glm-ocr" | 模型名称 |
-| `dpi` | int | 150 | 截图分辨率 |
-| `timeout` | int | 120 | 请求超时（秒） |
+| `api_key` | str | 环境变量 `ZHIPU_API_KEY` | 智谱 API Key |
+| `timeout` | int | 1800 | 请求超时（秒） |
+| `enable_layout` | bool | False | 启用布局检测 |
 | `force_ai` | bool | False | 强制所有页面使用 AI |
 
-### GlmOcrPdfConverter 参数
+### 环境变量
 
-| 参数 | 类型 | 默认值 | 说明 |
-|------|------|--------|------|
-| `ai_service` | AIService | None | AI 服务实例 |
-| `dpi` | int | 150 | 截图分辨率 |
-| `force_ai` | bool | False | 强制所有页面使用 AI |
+| 变量 | 说明 | 示例 |
+|------|------|------|
+| `ZHIPU_API_KEY` | API Key（必需） | `sk-abc123` |
+| `GLMOCR_MODEL` | 模型名称 | `glm-ocr` |
+| `GLMOCR_TIMEOUT` | 请求超时（秒） | `600` |
+| `GLMOCR_ENABLE_LAYOUT` | 布局检测 | `true` |
+| `GLMOCR_LOG_LEVEL` | 日志级别 | `INFO` |
 
 ## 工作原理
 
@@ -136,22 +150,38 @@ PDF 输入
     │
     └─ 复杂页面（图片/表格）
           │
-          ├─ 截图渲染 (150 DPI)
-          │
-          ├─ base64 编码
-          │
-          └─ 调用 glm-ocr API 转 Markdown
+          └─► glmocr.parse() 一行调用
+                │
+                ├─ 内置截图渲染
+                ├─ 内置 base64 编码
+                └─ 内置 OCR 识别
     │
     ▼
 合并输出完整 Markdown
 ```
 
+## 区域标签（json_result）
+
+glmocr SDK 返回的结构化数据支持以下标签：
+
+| 标签 | 说明 |
+|------|------|
+| `title` | 标题 |
+| `text` | 正文文本 |
+| `table` | 表格 |
+| `figure` | 图片 |
+| `formula` | 公式 |
+| `header` | 页眉 |
+| `footer` | 页脚 |
+| `page_number` | 页码 |
+| `reference` | 参考文献 |
+| `seal` | 印章 |
+
 ## 技术架构
 
-- **zai-sdk**: 智谱 AI 官方 SDK
-- **glm-ocr**: 智谱 OCR 模型，支持表格、图片识别
-- **pdfplumber**: PDF 页面分析和截图
-- **pdfminer**: 纯文本页面提取
+- **glmocr**: 智谱 OCR SDK，一行代码完成 PDF/图片解析
+- **pdfplumber**: PDF 页面分析和纯文本提取
+- **pdfminer**: 纯文本页面提取备用
 
 ## 依赖
 
@@ -159,8 +189,8 @@ PDF 输入
 - `pdfplumber>=0.11.9` - PDF 解析和截图
 - `pdfminer.six>=20251230` - 文本提取备用
 - `Pillow>=9.0.0` - 图像处理
-- `zai-sdk>=0.2.2` - 智谱 AI SDK（可选，AI 功能需要）
+- `glmocr` - 智谱 OCR SDK（可选，AI 功能需要）
 
 ## 许可证
 
-MIT
+MIT
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/pyproject.toml b/packages/markitdown-glmocr/pyproject.toml
index a9277c272..ea06823ce 100644
--- a/packages/markitdown-glmocr/pyproject.toml
+++ b/packages/markitdown-glmocr/pyproject.toml
@@ -5,11 +5,11 @@ build-backend = "hatchling.build"
 [project]
 name = "markitdown-glmocr"
 dynamic = ["version"]
-description = "Intelligent PDF to Markdown converter with glm-ocr AI-powered image/table extraction"
+description = "Intelligent PDF to Markdown converter using glmocr SDK"
 readme = "README.md"
 requires-python = ">=3.10"
 license = "MIT"
-keywords = ["markitdown", "pdf", "ocr", "ai", "llm", "vision", "glm-ocr"]
+keywords = ["markitdown", "pdf", "ocr", "ai", "llm", "vision", "glm-ocr", "glmocr"]
 authors = [
   { name = "Contributors", email = "noreply@github.com" },
 ]
@@ -27,12 +27,11 @@ dependencies = [
   "pdfminer.six>=20251230",
   "pdfplumber>=0.11.9",
   "Pillow>=9.0.0",
-  "tomli>=2.0.0;python_version<'3.11'",
 ]
 
 [project.optional-dependencies]
-zhipu = [
-  "zai-sdk>=0.2.2",
+glmocr = [
+  "glmocr",
 ]
 dev = [
   "pytest>=7.0.0",
@@ -58,13 +57,4 @@ packages = ["src/markitdown_glmocr"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-python_files = ["test_*.py"]
-
-# markitdown-glmocr configuration
-[tool.markitdown-glmocr]
-# API key - set via environment variable GLMOCR_API_KEY
-api_key = ""
-model = "glm-ocr"
-dpi = 150
-timeout = 120
-force_ai = false
\ No newline at end of file
+python_files = ["test_*.py"]
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
index 55b9caf87..45512966a 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/__init__.py
@@ -1,13 +1,12 @@
+"""markitdown-glmocr: Intelligent PDF to Markdown converter using glmocr SDK."""
+
 from ._plugin import register_converters
 from ._config import GlmOcrConfig
-from ._ai_service import AIService, AIResult
-from ._converter import GlmOcrPdfConverter
+from ._converter import GlmOcrConverter
 
 __plugin_interface_version__ = 1
 __all__ = [
     "register_converters",
     "GlmOcrConfig",
-    "AIService",
-    "AIResult",
-    "GlmOcrPdfConverter",
+    "GlmOcrConverter",
 ]
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
deleted file mode 100644
index 6c8e362a4..000000000
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_ai_service.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""AI service using zai-sdk and glm-ocr."""
-
-import base64
-import re
-from dataclasses import dataclass
-from typing import BinaryIO, Optional
-
-from ._config import GlmOcrConfig
-
-try:
-    from zai import ZhipuAiClient
-except ImportError:
-    ZhipuAiClient = None
-
-
-@dataclass
-class AIResult:
-    """Result from AI conversion."""
-    text: str
-    success: bool = True
-    error: Optional[str] = None
-
-
-class AIService:
-    """
-    AI Service using zai-sdk + glm-ocr.
-    
-    Features:
-    - Direct API call to glm-ocr layout_parsing
-    - Support image bytes via base64 data URI
-    - Return Markdown or HTML format content
-    """
-    
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        model: str = "glm-ocr",
-        timeout: int = 120,
-        config: Optional[GlmOcrConfig] = None,
-    ):
-        if ZhipuAiClient is None:
-            raise ImportError(
-                "zai-sdk is required. Install with: pip install markitdown-glmocr[zhipu]"
-            )
-        
-        if config:
-            self.api_key = api_key or config.api_key
-            self.model = model or config.model
-            self.timeout = timeout or config.timeout
-        else:
-            config = GlmOcrConfig.load()
-            self.api_key = api_key or config.api_key
-            self.model = model
-            self.timeout = timeout
-        
-        if not self.api_key:
-            raise ValueError(
-                "API key is required. Set GLMOCR_API_KEY environment variable"
-            )
-        
-        self.client = ZhipuAiClient(api_key=self.api_key)
-    
-    def image_to_markdown(
-        self,
-        image_stream: BinaryIO,
-        filename: str = "page.png",
-        keep_html: bool = False,
-    ) -> AIResult:
-        """
-        Convert image to Markdown using glm-ocr.
-        
-        Args:
-            image_stream: Image stream
-            filename: Filename (for content type detection)
-            keep_html: Keep HTML format for complex tables (default: False, convert to MD)
-        
-        Returns:
-            AIResult: Conversion result
-        """
-        try:
-            image_stream.seek(0)
-            image_bytes = image_stream.read()
-            
-            base64_image = base64.b64encode(image_bytes).decode("utf-8")
-            content_type = "image/jpeg" if filename.lower().endswith((".jpg", ".jpeg")) else "image/png"
-            data_uri = f"data:{content_type};base64,{base64_image}"
-            
-            response = self.client.layout_parsing.create(
-                model=self.model,
-                file=data_uri
-            )
-            
-            # Get HTML content
-            html = response.md_results or ""
-            
-            if not html and response.layout_details:
-                parts = []
-                for detail_list in response.layout_details:
-                    for detail in detail_list:
-                        if detail.content:
-                            parts.append(detail.content)
-                html = "\n".join(parts)
-            
-            # Convert to Markdown or keep HTML
-            if keep_html:
-                text = html.strip()
-            else:
-                text = self._html_to_markdown(html.strip()) if html else ""
-            
-            return AIResult(text=text, success=True)
-        
-        except Exception as e:
-            return AIResult(text="", success=False, error=str(e))
-        finally:
-            image_stream.seek(0)
-    
-    def _html_to_markdown(self, html: str) -> str:
-        """Convert HTML to Markdown."""
-        if not html:
-            return ""
-        
-        # Extract titles from <div>
-        titles = []
-        div_pattern = r'<div[^>]*>(.*?)</div>'
-        for match in re.finditer(div_pattern, html, re.DOTALL | re.IGNORECASE):
-            title = re.sub(r'<[^>]+>', '', match.group(1)).strip()
-            if title:
-                titles.append(title)
-        
-        # Remove <div> from HTML
-        html = re.sub(div_pattern, '', html, flags=re.DOTALL | re.IGNORECASE)
-        
-        # Check for table
-        if '<table' in html.lower():
-            table_md = self._convert_html_table(html)
-            if titles:
-                return f"**{' '.join(titles)}**\n\n{table_md}"
-            return table_md
-        
-        # Plain text
-        text = re.sub(r'<[^>]+>', '', html).strip()
-        if titles:
-            return f"**{' '.join(titles)}**\n\n{text}"
-        return text
-    
-    def _convert_html_table(self, html: str) -> str:
-        """Convert HTML table to Markdown table."""
-        # Parse rows
-        rows = []
-        rowspan_cells = {}
-        
-        for row_idx, row_match in enumerate(re.finditer(r'<tr[^>]*>(.*?)</tr>', html, re.DOTALL | re.IGNORECASE)):
-            cells = []
-            col_idx = 0
-            
-            # Fill rowspan cells
-            while (row_idx, col_idx) in rowspan_cells:
-                cells.append(rowspan_cells[(row_idx, col_idx)])
-                col_idx += 1
-            
-            # Parse cells
-            for cell_match in re.finditer(r'<td([^>]*)>(.*?)</td>', row_match.group(1), re.DOTALL | re.IGNORECASE):
-                attrs, content = cell_match.groups()
-                content = re.sub(r'<[^>]+>', '', content).strip().replace('\n', ' ')
-                
-                rowspan = int(r.group(1)) if (r := re.search(r'rowspan\s*=\s*["\']?(\d+)', attrs, re.IGNORECASE)) else 1
-                colspan = int(c.group(1)) if (c := re.search(r'colspan\s*=\s*["\']?(\d+)', attrs, re.IGNORECASE)) else 1
-                
-                cells.append(content)
-                cells.extend([""] * (colspan - 1))
-                
-                if rowspan > 1:
-                    for r in range(1, rowspan):
-                        for c in range(colspan):
-                            rowspan_cells[(row_idx + r, col_idx + c)] = content
-                
-                col_idx += colspan
-            
-            # Fill remaining rowspan
-            while (row_idx, col_idx) in rowspan_cells:
-                cells.append(rowspan_cells[(row_idx, col_idx)])
-                col_idx += 1
-            
-            rows.append(cells)
-        
-        if not rows:
-            return ""
-        
-        # Normalize
-        max_cols = max(len(row) for row in rows)
-        for row in rows:
-            row.extend([""] * (max_cols - len(row)))
-        
-        # Simple output: first row as header
-        md_lines = []
-        for i, row in enumerate(rows):
-            md_row = "| " + " | ".join(c or " " for c in row) + " |"
-            md_lines.append(md_row)
-            if i == 0:
-                md_lines.append("|" + "|".join(["---"] * max_cols) + "|")
-        
-        return "\n".join(md_lines)
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
index 44c72da22..d1122524b 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
@@ -1,101 +1,25 @@
-"""Configuration management for markitdown-glmocr."""
+"""Configuration for markitdown-glmocr."""
 
-import os
-from pathlib import Path
-from typing import Optional
-from dataclasses import dataclass
-
-try:
-    import tomllib  # Python 3.11+
-except ImportError:
-    import tomli as tomllib
+from dataclasses import dataclass, field
 
 
 @dataclass
 class GlmOcrConfig:
-    """markitdown-glmocr configuration."""
-    
-    # API 配置
-    api_key: str = ""
-    
-    # OCR 配置
-    model: str = "glm-ocr"
-    dpi: int = 150
-    timeout: int = 120
-    
-    # 处理策略
-    force_ai: bool = False
+    """markitdown-glmocr configuration.
     
-    @classmethod
-    def load(cls, config_path: Optional[str] = None) -> "GlmOcrConfig":
-        """
-        Load configuration from multiple sources (priority high to low):
-        1. Environment variables
-        2. Config file (pyproject.toml or markitdown-glmocr.toml)
-        3. Default values
-        """
-        config = cls()
-        
-        # 1. Load from config file
-        config._load_from_file(config_path)
-        
-        # 2. Environment variables override
-        config._load_from_env()
-        
-        return config
+    Configuration priority (high to low):
+    1. Constructor kwargs
+    2. Environment variables
+    3. .env file
+    4. Built-in defaults
+    """
     
-    def _load_from_file(self, config_path: Optional[str] = None):
-        """Load from config file."""
-        search_paths = []
-        
-        if config_path:
-            search_paths.append(Path(config_path))
-        
-        # Current directory
-        search_paths.append(Path("pyproject.toml"))
-        search_paths.append(Path("markitdown-glmocr.toml"))
-        
-        # User config directory
-        search_paths.append(Path.home() / ".config" / "markitdown-glmocr" / "config.toml")
-        
-        for path in search_paths:
-            if path.exists():
-                try:
-                    with open(path, "rb") as f:
-                        data = tomllib.load(f)
-                    
-                    # Read [tool.markitdown-glmocr] section
-                    if "tool" in data and "markitdown-glmocr" in data["tool"]:
-                        self._apply_config(data["tool"]["markitdown-glmocr"])
-                    elif "markitdown-glmocr" in data:
-                        self._apply_config(data["markitdown-glmocr"])
-                    
-                    break
-                except Exception:
-                    pass
+    # API configuration
+    api_key: str = ""  # Reads from ZHIPU_API_KEY by default
     
-    def _apply_config(self, data: dict):
-        """Apply config from dict."""
-        if "api_key" in data:
-            self.api_key = data["api_key"]
-        if "model" in data:
-            self.model = data["model"]
-        if "dpi" in data:
-            self.dpi = data["dpi"]
-        if "timeout" in data:
-            self.timeout = data["timeout"]
-        if "force_ai" in data:
-            self.force_ai = data["force_ai"]
+    # OCR configuration
+    timeout: int = 1800
+    enable_layout: bool = False
     
-    def _load_from_env(self):
-        """Load from environment variables (highest priority)."""
-        if os.environ.get("GLMOCR_API_KEY"):
-            self.api_key = os.environ["GLMOCR_API_KEY"]
-        if os.environ.get("GLMOCR_MODEL"):
-            self.model = os.environ["GLMOCR_MODEL"]
-        if os.environ.get("GLMOCR_DPI"):
-            self.dpi = int(os.environ["GLMOCR_DPI"])
-        if os.environ.get("GLMOCR_TIMEOUT"):
-            self.timeout = int(os.environ["GLMOCR_TIMEOUT"])
-        if os.environ.get("GLMOCR_FORCE_AI"):
-            self.force_ai = os.environ["GLMOCR_FORCE_AI"].lower() in ("true", "1", "yes")
\ No newline at end of file
+    # Processing strategy
+    force_ai: bool = False
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index f1434c115..60a488704 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -7,9 +7,7 @@
 from markitdown import DocumentConverter, DocumentConverterResult, StreamInfo
 from markitdown._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
-from ._page_analyzer import PageType, analyze_page
-from ._page_renderer import render_page_to_image
-from ._ai_service import AIService
+from ._config import GlmOcrConfig
 
 # Import dependencies
 _dependency_exc_info = None
@@ -20,6 +18,14 @@
 except ImportError:
     _dependency_exc_info = sys.exc_info()
 
+# glmocr SDK
+try:
+    import glmocr
+    from glmocr import GlmOcr
+except ImportError:
+    glmocr = None
+    GlmOcr = None
+
 
 ACCEPTED_MIME_TYPE_PREFIXES = [
     "application/pdf",
@@ -29,33 +35,63 @@
 ACCEPTED_FILE_EXTENSIONS = [".pdf"]
 
 
-class GlmOcrPdfConverter(DocumentConverter):
+class GlmOcrConverter(DocumentConverter):
     """
-    Intelligent PDF converter using glm-ocr.
+    Intelligent PDF converter using glmocr SDK.
     
     Features:
     - Auto-detect page content type (plain text vs images/tables)
-    - Plain text pages use default parser (pdfplumber/pdfminer)
-    - Complex pages use AI screenshot conversion to Markdown
+    - Plain text pages use pdfplumber/pdfminer (fast, free)
+    - Complex pages use glmocr SDK for AI-powered OCR
+    - One-liner: glmocr.parse("document.pdf") handles everything
     """
 
     def __init__(
         self,
-        ai_service: Optional[AIService] = None,
-        dpi: int = 150,
+        api_key: Optional[str] = None,
+        timeout: int = 1800,
+        enable_layout: bool = False,
         force_ai: bool = False,
+        config: Optional[GlmOcrConfig] = None,
     ):
         """
         Initialize converter.
 
         Args:
-            ai_service: AI service instance
-            dpi: Screenshot DPI (default: 150)
+            api_key: Zhipu API key (reads from ZHIPU_API_KEY env var if not provided)
+            timeout: Request timeout in seconds (default: 1800)
+            enable_layout: Enable layout detection (default: False)
             force_ai: Force all pages to use AI (default: False)
+            config: Optional GlmOcrConfig instance
         """
-        self.ai_service = ai_service
-        self.dpi = dpi
-        self.force_ai = force_ai
+        if glmocr is None:
+            raise ImportError(
+                "glmocr is required. Install with: pip install markitdown-glmocr[glmocr]"
+            )
+        
+        # Use config if provided
+        if config:
+            self.api_key = api_key or config.api_key
+            self.timeout = timeout if timeout != 1800 else config.timeout
+            self.enable_layout = enable_layout if enable_layout else config.enable_layout
+            self.force_ai = force_ai or config.force_ai
+        else:
+            self.api_key = api_key
+            self.timeout = timeout
+            self.enable_layout = enable_layout
+            self.force_ai = force_ai
+        
+        # Lazy init GlmOcr instance
+        self._glmocr: Optional[GlmOcr] = None
+
+    def _get_glmocr(self) -> GlmOcr:
+        """Get or create GlmOcr instance."""
+        if self._glmocr is None:
+            kwargs = {"timeout": self.timeout, "enable_layout": self.enable_layout}
+            if self.api_key:
+                kwargs["api_key"] = self.api_key
+            self._glmocr = GlmOcr(**kwargs)
+        return self._glmocr
 
     def accepts(
         self,
@@ -92,9 +128,6 @@ def convert(
                 _dependency_exc_info[2]
             )
 
-        # Get AI service (from kwargs or instance)
-        ai_service = kwargs.get("ai_service") or self.ai_service
-
         # Read PDF
         pdf_stream = io.BytesIO(file_stream.read())
         markdown_parts = []
@@ -103,32 +136,25 @@ def convert(
             with pdfplumber.open(pdf_stream) as pdf:
                 for page_num, page in enumerate(pdf.pages):
                     # Analyze page type
-                    page_type = analyze_page(page)
-
-                    # Choose processing method based on type
-                    if self.force_ai or page_type != PageType.PLAIN_TEXT:
-                        # Complex content: screenshot + AI
-                        if ai_service:
-                            markdown = self._convert_with_ai(
-                                page, page_num, ai_service
-                            )
-                        else:
-                            # No AI service, fallback to default
-                            markdown = self._extract_text_with_tables(page)
+                    page_type = self._analyze_page(page)
+
+                    # Choose processing method
+                    if self.force_ai or page_type != "plain_text":
+                        # Complex content: use glmocr
+                        markdown = self._convert_with_glmocr(page, page_num)
                     else:
-                        # Plain text: default parser
+                        # Plain text: use pdfplumber
                         markdown = self._extract_text_with_tables(page)
 
                     if markdown.strip():
                         markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
 
-                    # Release page resources
                     page.close()
 
             markdown = "\n\n".join(markdown_parts).strip()
 
         except Exception:
-            # Exception: fallback to pdfminer
+            # Fallback to pdfminer
             pdf_stream.seek(0)
             markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
 
@@ -139,51 +165,47 @@ def convert(
 
         return DocumentConverterResult(markdown=markdown)
 
-    def _convert_with_ai(
-        self,
-        page: Any,
-        page_num: int,
-        ai_service: AIService,
-    ) -> str:
-        """
-        Convert page using AI.
-
-        Args:
-            page: pdfplumber page object
-            page_num: Page number
-            ai_service: AI service
-
-        Returns:
-            str: Markdown content
-        """
+    def _analyze_page(self, page: Any) -> str:
+        """Analyze page content type."""
+        # Check for images
+        if hasattr(page, "images") and page.images:
+            return "complex"
+        
+        # Check for tables
+        tables = page.find_tables()
+        if tables:
+            return "complex"
+        
+        # Check for graphics/curves
+        if hasattr(page, "curves") and page.curves:
+            return "complex"
+        
+        return "plain_text"
+
+    def _convert_with_glmocr(self, page: Any, page_num: int) -> str:
+        """Convert page using glmocr SDK."""
         try:
-            # Screenshot
-            img_stream = render_page_to_image(page, self.dpi)
-
-            # Call AI (filename uses page number)
-            filename = f"page_{page_num + 1}.png"
-            result = ai_service.image_to_markdown(img_stream, filename=filename)
-
-            if result.success and result.text.strip():
-                return result.text
-            else:
-                # AI failed, fallback to default
+            # Render page to image
+            img = page.to_image(resolution=150)
+            img_bytes = io.BytesIO()
+            img.save(img_bytes, format="PNG")
+            img_bytes.seek(0)
+            
+            # Use glmocr to parse the image
+            result = self._get_glmocr().parse(img_bytes)
+            
+            # Check for errors
+            d = result.to_dict()
+            if "error" in d:
                 return self._extract_text_with_tables(page)
-
+            
+            return result.markdown_result or ""
+            
         except Exception:
-            # Exception, fallback to default
             return self._extract_text_with_tables(page)
 
     def _extract_text_with_tables(self, page: Any) -> str:
-        """
-        Extract text and tables.
-
-        Args:
-            page: pdfplumber page object
-
-        Returns:
-            str: Markdown content
-        """
+        """Extract text and tables from page."""
         parts = []
 
         # Extract text
@@ -206,15 +228,7 @@ def _extract_text_with_tables(self, page: Any) -> str:
         return "\n\n".join(parts)
 
     def _table_to_markdown(self, table: list[list[str]]) -> str:
-        """
-        Convert table to Markdown.
-
-        Args:
-            table: 2D list
-
-        Returns:
-            str: Markdown table
-        """
+        """Convert table to Markdown."""
         if not table:
             return ""
 
@@ -236,16 +250,26 @@ def _table_to_markdown(self, table: list[list[str]]) -> str:
         # Format table
         lines = []
         for row_idx, row in enumerate(table):
-            # Pad columns
             padded_row = row + [""] * (len(col_widths) - len(row))
             line = "| " + " | ".join(
                 str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
             ) + " |"
             lines.append(line)
 
-            # Add separator
             if row_idx == 0:
                 sep = "|" + "|".join("-" * (w + 2) for w in col_widths) + "|"
                 lines.append(sep)
 
-        return "\n".join(lines)
\ No newline at end of file
+        return "\n".join(lines)
+    
+    def close(self):
+        """Close the GlmOcr instance."""
+        if self._glmocr:
+            self._glmocr.close()
+            self._glmocr = None
+    
+    def __enter__(self):
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py
deleted file mode 100644
index 1aa014043..000000000
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_page_analyzer.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""Page content analyzer for detecting images and tables."""
-
-from enum import Enum
-from typing import Any
-
-
-class PageType(Enum):
-    """Page content type classification."""
-    PLAIN_TEXT = "plain_text"      # 纯文本，使用默认解析
-    HAS_IMAGES = "has_images"      # 包含图片
-    HAS_TABLES = "has_tables"      # 包含表格
-    COMPLEX = "complex"            # 复杂内容（图片+表格+混合）
-
-
-def detect_images(page: Any) -> bool:
-    """
-    检测页面是否包含图片
-
-    Args:
-        page: pdfplumber 页面对象
-
-    Returns:
-        bool: 是否包含图片
-    """
-    # 方法1: 直接检测 page.images
-    if hasattr(page, 'images') and len(page.images) > 0:
-        return True
-
-    # 方法2: 检测页面对象中的图像资源
-    if hasattr(page, 'objects'):
-        objects = page.objects
-        if 'image' in objects and len(objects['image']) > 0:
-            return True
-        # 检测 XObject (可能包含内嵌图像)
-        if 'xobject' in objects and len(objects['xobject']) > 0:
-            for obj in objects['xobject']:
-                if isinstance(obj, dict) and obj.get('subtype') == 'Image':
-                    return True
-
-    # 方法3: 检测页面资源字典
-    try:
-        if hasattr(page, 'page') and hasattr(page.page, 'get_resources'):
-            resources = page.page.get_resources()
-            if resources and 'XObject' in resources:
-                return True
-    except Exception:
-        pass
-
-    return False
-
-
-def detect_tables(page: Any) -> bool:
-    """
-    检测页面是否包含表格
-
-    Args:
-        page: pdfplumber 页面对象
-
-    Returns:
-        bool: 是否包含表格
-    """
-    # 方法1: 使用 pdfplumber 的 extract_tables
-    try:
-        tables = page.extract_tables()
-        if tables and len(tables) > 0:
-            # 过滤空表格
-            for table in tables:
-                if table and any(any(cell for cell in row if cell) for row in table):
-                    return True
-    except Exception:
-        pass
-
-    # 方法2: 检测表格线（边框线）
-    try:
-        if hasattr(page, 'objects') and 'line' in page.objects:
-            lines = page.objects['line']
-            if len(lines) > 10:  # 大量线条可能构成表格
-                # 分析线条是否形成网格结构
-                h_lines = []
-                v_lines = []
-                for line in lines:
-                    # 水平线：高度很小
-                    if abs(line.get('height', 1)) < 2:
-                        h_lines.append(line)
-                    # 垂直线：宽度很小
-                    elif abs(line.get('width', 1)) < 2:
-                        v_lines.append(line)
-
-                if len(h_lines) > 2 and len(v_lines) > 2:
-                    return True
-    except Exception:
-        pass
-
-    return False
-
-
-def analyze_page(page: Any) -> PageType:
-    """
-    分析页面类型
-
-    Args:
-        page: pdfplumber 页面对象
-
-    Returns:
-        PageType: 页面类型
-    """
-    has_images = detect_images(page)
-    has_tables = detect_tables(page)
-
-    if has_images and has_tables:
-        return PageType.COMPLEX
-    elif has_images:
-        return PageType.HAS_IMAGES
-    elif has_tables:
-        return PageType.HAS_TABLES
-    else:
-        return PageType.PLAIN_TEXT
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py
deleted file mode 100644
index d517e8780..000000000
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_page_renderer.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Page renderer for converting PDF pages to images."""
-
-import io
-from typing import Any
-
-
-def render_page_to_image(page: Any, dpi: int = 150) -> io.BytesIO:
-    """
-    将 PDF 页面渲染为图片
-
-    Args:
-        page: pdfplumber 页面对象
-        dpi: 渲染分辨率，默认 150（平衡质量和速度）
-
-    Returns:
-        io.BytesIO: PNG 图片流
-    """
-    # 使用 pdfplumber 的 to_image 方法
-    page_image = page.to_image(resolution=dpi)
-
-    # 转换为 BytesIO
-    img_stream = io.BytesIO()
-    page_image.original.save(img_stream, format="PNG")
-    img_stream.seek(0)
-
-    return img_stream
-
-
-# DPI 预设值
-DPI_LOW = 72      # 快速预览，文件小
-DPI_MEDIUM = 150  # 平衡质量和速度（默认）
-DPI_HIGH = 300    # 高质量，适合复杂图表
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
index 8abf29ccb..a940acf7d 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
@@ -3,9 +3,7 @@
 from typing import Any
 from markitdown import MarkItDown
 
-from ._config import GlmOcrConfig
-from ._ai_service import AIService
-from ._converter import GlmOcrPdfConverter
+from ._converter import GlmOcrConverter
 
 
 __plugin_interface_version__ = 1
@@ -17,40 +15,19 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     
     Config sources (priority high to low):
     1. kwargs parameters
-    2. Environment variables
-    3. Config file (pyproject.toml)
-    4. Default values
+    2. Environment variables (ZHIPU_API_KEY)
+    3. .env file
+    4. Built-in defaults
     """
-    # Load config
-    config = GlmOcrConfig.load()
-    
-    # kwargs override config
-    api_key = kwargs.get("api_key") or config.api_key
-    model = kwargs.get("model", config.model)
-    dpi = kwargs.get("dpi", config.dpi)
-    force_ai = kwargs.get("force_ai", config.force_ai)
-    timeout = kwargs.get("timeout", config.timeout)
-    
-    # Create AI service
-    ai_service = None
-    if api_key:
-        try:
-            ai_service = AIService(
-                api_key=api_key,
-                model=model,
-                timeout=timeout,
-            )
-        except Exception:
-            pass
-    
     # Register converter
     PRIORITY_GLMOCR = -1.0
     
     markitdown.register_converter(
-        GlmOcrPdfConverter(
-            ai_service=ai_service,
-            dpi=dpi,
-            force_ai=force_ai,
+        GlmOcrConverter(
+            api_key=kwargs.get("api_key"),
+            timeout=kwargs.get("timeout", 1800),
+            enable_layout=kwargs.get("enable_layout", False),
+            force_ai=kwargs.get("force_ai", False),
         ),
         priority=PRIORITY_GLMOCR,
     )
\ No newline at end of file

From 972fbeda96f597c4af5eddc40260a9b7ee74e8e3 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Tue, 12 May 2026 18:14:12 +0800
Subject: [PATCH 06/15] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9B=BE=E7=89=87ocr?=
 =?UTF-8?q?=E8=83=BD=E5=8A=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/markitdown_glmocr/_converter.py       | 45 +++++++++++++++----
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index 60a488704..cafee2ec0 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -1,4 +1,4 @@
-"""GlmOcr PDF Converter - Intelligent PDF to Markdown conversion."""
+"""GlmOcr PDF/Image Converter - Intelligent PDF and Image to Markdown conversion."""
 
 import io
 import sys
@@ -30,19 +30,22 @@
 ACCEPTED_MIME_TYPE_PREFIXES = [
     "application/pdf",
     "application/x-pdf",
+    "image/jpeg",
+    "image/png",
 ]
 
-ACCEPTED_FILE_EXTENSIONS = [".pdf"]
+ACCEPTED_FILE_EXTENSIONS = [".pdf", ".jpg", ".jpeg", ".png"]
 
 
 class GlmOcrConverter(DocumentConverter):
     """
-    Intelligent PDF converter using glmocr SDK.
+    Intelligent PDF/Image converter using glmocr SDK.
     
     Features:
     - Auto-detect page content type (plain text vs images/tables)
     - Plain text pages use pdfplumber/pdfminer (fast, free)
     - Complex pages use glmocr SDK for AI-powered OCR
+    - Image files (PNG, JPG) use glmocr SDK directly
     - One-liner: glmocr.parse("document.pdf") handles everything
     """
 
@@ -128,7 +131,36 @@ def convert(
                 _dependency_exc_info[2]
             )
 
-        # Read PDF
+        extension = (stream_info.extension or "").lower()
+
+        # Image files: use glmocr directly
+        if extension in (".jpg", ".jpeg", ".png"):
+            return self._convert_image(file_stream, extension)
+
+        # PDF files: use hybrid approach
+        return self._convert_pdf(file_stream)
+
+    def _convert_image(self, file_stream: BinaryIO, extension: str = ".png") -> DocumentConverterResult:
+        """Convert image file using glmocr SDK."""
+        img_bytes = file_stream.read()
+
+        try:
+            result = self._get_glmocr().parse(img_bytes)
+
+            # Check for errors
+            d = result.to_dict()
+            if "error" in d:
+                return DocumentConverterResult(markdown="")
+
+            return DocumentConverterResult(
+                markdown=result.markdown_result or ""
+            )
+        except Exception as e:
+            return DocumentConverterResult(
+                markdown=f"<!-- Error converting image: {e} -->"
+            )
+
+    def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         pdf_stream = io.BytesIO(file_stream.read())
         markdown_parts = []
 
@@ -189,10 +221,7 @@ def _convert_with_glmocr(self, page: Any, page_num: int) -> str:
             img = page.to_image(resolution=150)
             img_bytes = io.BytesIO()
             img.save(img_bytes, format="PNG")
-            img_bytes.seek(0)
-            
-            # Use glmocr to parse the image
-            result = self._get_glmocr().parse(img_bytes)
+            result = self._get_glmocr().parse(img_bytes.getvalue())
             
             # Check for errors
             d = result.to_dict()

From bc349838ca6788eaf3063b88430758fc93458ef7 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Wed, 20 May 2026 18:08:13 +0800
Subject: [PATCH 07/15] =?UTF-8?q?=E6=96=B0=E5=A2=9Epaddleocr=E6=8F=92?=
 =?UTF-8?q?=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    |    1 +
 docs/distribution-and-publishing.md           |  679 ++++++++++
 docs/nova-markitdown/SKILL.md                 |  173 +++
 .../references/advanced-usage.md              |  253 ++++
 docs/nova-pdf-refactor-zhipu.md               |  565 --------
 docs/nova-pdf-technical-design.md             | 1175 -----------------
 docs/paddleocr-plugin-design.md               |  102 ++
 ...72\344\276\213\344\273\243\347\240\201.md" |  122 ++
 packages/markitdown-paddleocr/README.md       |  157 +++
 packages/markitdown-paddleocr/pyproject.toml  |   58 +
 .../src/markitdown_paddleocr/__about__.py     |    1 +
 .../src/markitdown_paddleocr/__init__.py      |   16 +
 .../src/markitdown_paddleocr/_config.py       |   46 +
 .../src/markitdown_paddleocr/_converter.py    |  304 +++++
 .../markitdown_paddleocr/_dual_converter.py   |  160 +++
 .../markitdown_paddleocr/_paddle_client.py    |  189 +++
 .../src/markitdown_paddleocr/_plugin.py       |   35 +
 .../markitdown-paddleocr/tests/__init__.py    |    1 +
 .../tests/test_converter.py                   |  214 +++
 .../tests/test_paddle_client.py               |  241 ++++
 20 files changed, 2752 insertions(+), 1740 deletions(-)
 create mode 100644 docs/distribution-and-publishing.md
 create mode 100644 docs/nova-markitdown/SKILL.md
 create mode 100644 docs/nova-markitdown/references/advanced-usage.md
 delete mode 100644 docs/nova-pdf-refactor-zhipu.md
 delete mode 100644 docs/nova-pdf-technical-design.md
 create mode 100644 docs/paddleocr-plugin-design.md
 create mode 100644 "docs/panddle\347\244\272\344\276\213\344\273\243\347\240\201.md"
 create mode 100644 packages/markitdown-paddleocr/README.md
 create mode 100644 packages/markitdown-paddleocr/pyproject.toml
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/__init__.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/_paddle_client.py
 create mode 100644 packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
 create mode 100644 packages/markitdown-paddleocr/tests/__init__.py
 create mode 100644 packages/markitdown-paddleocr/tests/test_converter.py
 create mode 100644 packages/markitdown-paddleocr/tests/test_paddle_client.py

diff --git a/.gitignore b/.gitignore
index 5a6b7d117..dc473bfb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ src/.DS_Store
 .secrets.local
 *.secrets
 .env.local
+test-data/
diff --git a/docs/distribution-and-publishing.md b/docs/distribution-and-publishing.md
new file mode 100644
index 000000000..c2240b0db
--- /dev/null
+++ b/docs/distribution-and-publishing.md
@@ -0,0 +1,679 @@
+# MarkItDown 分发与发布方案
+
+## 背景
+
+本地 fork 版本包含两个核心包：
+- **markitdown** `0.1.6b2`（官方 PyPI 最新为 `0.1.5`）
+- **markitdown-glmocr** `0.1.0`（PyPI 上不存在，纯本地新增插件）
+
+目标：让其他人能方便使用包含 glmocr 插件的 markitdown，不依赖官方是否合并 PR。
+
+---
+
+## 方案总览
+
+| 方案 | 适用场景 | 用户体验 | 维护成本 | 分发方式 |
+|------|---------|----------|---------|---------|
+| **A. PyPI 独立发布** | 面向 Python 开发者 | `pip install` 即用 | 低 | PyPI |
+| **B. Pyx 打包独立可执行文件** | 面向非技术用户 | 双击/命令行直接运行 | 中 | GitHub Releases |
+| **C. Docker 镜像** | 服务端/CI 场景 | `docker run` 即用 | 低 | Docker Hub / GHCR |
+| **D. 混合方案（推荐）** | 覆盖所有场景 | 按需选择 | 中 | PyPI + GitHub Releases |
+
+---
+
+## 方案 A：PyPI 独立发布（推荐优先执行）
+
+### 核心思路
+
+不改动 `markitdown` 主包名，仅将 `markitdown-glmocr` 发布到 PyPI。用户安装方式：
+
+```bash
+pip install markitdown[all] markitdown-glmocr[glmocr]
+```
+
+使用时加 `-p` 参数启用插件：
+
+```bash
+markitdown -p document.pdf
+```
+
+### 为什么不 fork 一个 `markitdown-glmocr-all` 包？
+
+1. `markitdown` 的插件机制（entry_points）已经设计好，`markitdown-glmocr` 作为插件包完全解耦
+2. 避免维护 markitdown 核心代码的 fork 副本
+3. 官方更新 markitdown 核心时，用户直接 `pip install -U markitdown` 即可升级
+
+### 详细步骤
+
+#### 1. 修改 `markitdown-glmocr` 的 pyproject.toml
+
+```toml
+[project]
+name = "markitdown-glmocr"
+version = "0.1.0"  # 改为静态版本，首次发布不用 dynamic
+description = "Intelligent PDF/Image to Markdown converter using GLM-OCR SDK"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [
+  { name = "Your Name", email = "your@email.com" },
+]
+
+# 关键：声明对 markitdown 的版本范围依赖
+dependencies = [
+  "markitdown>=0.1.0,<1.0.0",
+  "pdfminer.six>=20251230",
+  "pdfplumber>=0.11.9",
+  "Pillow>=9.0.0",
+]
+
+[project.optional-dependencies]
+glmocr = ["glmocr>=0.1.0"]
+all = [
+  "glmocr>=0.1.0",
+  "markitdown[all]",
+]
+dev = ["pytest>=7.0.0", "build", "twine"]
+
+# 插件入口点（已有，无需修改）
+[project.entry-points."markitdown.plugin"]
+markitdown_glmocr = "markitdown_glmocr"
+```
+
+#### 2. 编写 README.md
+
+在 `packages/markitdown-glmocr/` 下创建完善的 README：
+
+```markdown
+# markitdown-glmocr
+
+Intelligent PDF/Image to Markdown converter plugin for [markitdown](https://github.com/microsoft/markitdown),
+powered by [GLM-OCR](https://github.com/zai-org/glm-ocr) SDK.
+
+## Installation
+
+pip install markitdown-glmocr[glmocr]
+
+## Usage
+
+# Enable plugins with -p flag
+markitdown -p document.pdf
+markitdown -p image.png
+
+# Or use programmatically
+from markitdown import MarkItDown
+md = MarkItDown(enable_plugins=True)
+result = md.convert("document.pdf")
+print(result.markdown)
+
+## Configuration
+
+Set your Zhipu API key:
+
+export ZHIPU_API_KEY=your_api_key_here
+```
+
+#### 3. 构建并发布
+
+```bash
+cd packages/markitdown-glmocr
+
+# 安装构建工具
+pip install build twine
+
+# 构建 wheel 和 sdist
+python -m build
+
+# 检查包
+twine check dist/*
+
+# 上传到 TestPyPI 先验证
+twine upload --repository testpypi dist/*
+
+# 验证安装
+pip install --index-url https://test.pypi.org/simple/ markitdown-glmocr[glmocr]
+
+# 正式发布到 PyPI
+twine upload dist/*
+```
+
+#### 4. PyPI 账号准备
+
+- 注册 https://pypi.org 账号
+- 配置 API Token：Account settings → API tokens → Add API token
+- 配置 `~/.pypirc`：
+
+```ini
+[pypi]
+username = __token__
+password = pypi-xxxxxxxxxxxx
+
+[testpypi]
+username = __token__
+password = pypi-test-xxxxxxxxxxxx
+```
+
+### 优缺点
+
+| 优点 | 缺点 |
+|------|------|
+| 标准Python生态分发方式 | 需要用户有Python环境 |
+| 插件机制天然解耦，官方更新不受影响 | glmocr SDK 依赖较多（numpy, pymupdf等） |
+| 版本管理清晰 | 需要维护PyPI账号和token |
+| `pip install` 一行搞定 | |
+
+---
+
+## 方案 B：PyInstaller 打包独立可执行文件
+
+### 核心思路
+
+将 markitdown + markitdown-glmocr + glmocr + 所有依赖打包成单个可执行文件，用户无需安装 Python。
+
+### 详细步骤
+
+#### 1. 创建打包配置
+
+在项目根目录创建 `build_standalone/` 目录：
+
+```
+build_standalone/
+├── build.py          # 构建脚本
+├── markitdown.spec   # PyInstaller spec 文件
+└── README.md         # 使用说明
+```
+
+#### 2. 编写 PyInstaller spec 文件
+
+`build_standalone/markitdown.spec`：
+
+```python
+# -*- mode: python ; coding: utf-8 -*-
+import sys
+from pathlib import Path
+
+block_cipher = None
+
+# 收集所有隐式导入的模块
+hiddenimports = [
+    'markitdown',
+    'markitdown.converters',
+    'markitdown_glmocr',
+    'glmocr',
+    'pdfminer',
+    'pdfminer.high_level',
+    'pdfminer.layout',
+    'pdfminer.utils',
+    'pdfplumber',
+    'PIL',
+    'magika',
+    'charset_normalizer',
+    'markdownify',
+    'beautifulsoup4',
+    'bs4',
+    'mammoth',
+    'openpyxl',
+    'pandas',
+    'python_pptx',
+    'lxml',
+    'numpy',
+    'pydantic',
+    'pymupdf',
+    'fitz',           # pymupdf 的内部名
+    'tqdm',
+    'yaml',
+    'dotenv',
+    'requests',
+    'defusedxml',
+]
+
+a = Analysis(
+    ['entry_point.py'],
+    pathex=[],
+    binaries=[],
+    datas=[
+        # 包含 magika 的模型文件
+        ('magika/models', 'magika/models'),
+    ],
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='markitdown',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+    icon=None,
+)
+```
+
+#### 3. 编写入口文件
+
+`build_standalone/entry_point.py`：
+
+```python
+"""Entry point for PyInstaller build."""
+import sys
+import os
+
+# 确保插件被启用
+if '-p' not in sys.argv and '--use-plugins' not in sys.argv:
+    # 自动启用 glmocr 插件
+    sys.argv.insert(1, '-p')
+
+from markitdown.__main__ import main
+
+if __name__ == '__main__':
+    main()
+```
+
+#### 4. 编写构建脚本
+
+`build_standalone/build.py`：
+
+```python
+#!/usr/bin/env python3
+"""Build standalone markitdown executable with PyInstaller."""
+import subprocess
+import sys
+import platform
+import shutil
+from pathlib import Path
+
+def main():
+    project_root = Path(__file__).parent.parent
+    build_dir = Path(__file__).parent
+
+    # 1. 确保依赖已安装
+    print(">>> Installing dependencies...")
+    subprocess.run([
+        sys.executable, "-m", "pip", "install", "-e",
+        str(project_root / "packages" / "markitdown[all]"),
+    ], check=True)
+    subprocess.run([
+        sys.executable, "-m", "pip", "install", "-e",
+        str(project_root / "packages" / "markitdown-glmocr[glmocr]"),
+    ], check=True)
+    subprocess.run([
+        sys.executable, "-m", "pip", "install", "pyinstaller",
+    ], check=True)
+
+    # 2. 执行 PyInstaller
+    print(">>> Building executable...")
+    subprocess.run([
+        sys.executable, "-m", "PyInstaller",
+        "--clean",
+        "--noconfirm",
+        str(build_dir / "markitdown.spec"),
+    ], cwd=str(build_dir), check=True)
+
+    # 3. 输出结果
+    dist_dir = build_dir / "dist"
+    exe_name = "markitdown.exe" if platform.system() == "Windows" else "markitdown"
+    exe_path = dist_dir / exe_name
+
+    if exe_path.exists():
+        size_mb = exe_path.stat().st_size / (1024 * 1024)
+        print(f"\n✅ Build successful!")
+        print(f"   Executable: {exe_path}")
+        print(f"   Size: {size_mb:.1f} MB")
+        print(f"   Platform: {platform.system()} {platform.machine()}")
+    else:
+        print("\n❌ Build failed - executable not found")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
+```
+
+#### 5. GitHub Actions 自动构建多平台
+
+`.github/workflows/build-standalone.yml`：
+
+```yaml
+name: Build Standalone Executable
+
+on:
+  push:
+    tags: ['v*']
+  workflow_dispatch:
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        include:
+          - os: windows-latest
+            artifact: markitdown-windows-x64.exe
+          - os: ubuntu-latest
+            artifact: markitdown-linux-x64
+          - os: macos-latest
+            artifact: markitdown-macos-x64
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Install dependencies
+        run: |
+          pip install -e ./packages/markitdown[all]
+          pip install -e ./packages/markitdown-glmocr[glmocr]
+          pip install pyinstaller
+
+      - name: Build with PyInstaller
+        run: |
+          pyinstaller --clean --noconfirm build_standalone/markitdown.spec
+        working-directory: .
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.artifact }}
+          path: dist/markitdown*
+
+  release:
+    needs: build
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+
+      - name: Create Release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: artifacts/**
+          generate_release_notes: true
+```
+
+### 预估产物大小
+
+| 平台 | 预估大小 | 说明 |
+|------|---------|------|
+| Windows x64 | ~80-120 MB | 含 Python 运行时 + numpy + pymupdf 等 |
+| Linux x64 | ~60-90 MB | |
+| macOS x64 | ~70-100 MB | |
+
+### 优缺点
+
+| 优点 | 缺点 |
+|------|------|
+| 无需Python环境，双击可用 | 产物体积大（80-120MB） |
+| 非技术用户友好 | 每次更新需重新打包 |
+| 可离线使用 | PyInstaller 隐式导入容易遗漏，调试成本高 |
+| 可通过 GitHub Releases 分发 | 跨平台需分别构建 |
+| | 杀毒软件可能误报 |
+
+### 替代方案：Nuitka
+
+如果 PyInstaller 遇到问题，可考虑 [Nuitka](https://nuitka.net/)：
+
+```bash
+pip install nuitka
+python -m nuitka --standalone --onefile \
+    --enable-plugin=numpy,pandas \
+    --include-data-dir=magika/models=magika/models \
+    entry_point.py
+```
+
+Nuitka 编译为真正的机器码，性能更好，但构建时间更长。
+
+---
+
+## 方案 C：Docker 镜像
+
+### 核心思路
+
+基于官方 Dockerfile 扩展，加入 glmocr 插件。
+
+### Dockerfile
+
+```dockerfile
+FROM python:3.13-slim-bullseye
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg exiftool && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY packages/markitdown /app/packages/markitdown
+COPY packages/markitdown-glmocr /app/packages/markitdown-glmocr
+
+RUN pip --no-cache-dir install \
+    /app/packages/markitdown[all] \
+    /app/packages/markitdown-glmocr[glmocr]
+
+ENTRYPOINT ["markitdown"]
+```
+
+### 使用方式
+
+```bash
+# 构建
+docker build -t markitdown-glmocr .
+
+# 使用
+docker run --rm -v $(pwd):/data markitdown-glmocr -p /data/document.pdf
+
+# 发布到 GHCR
+docker tag markitdown-glmocr ghcr.io/yourname/markitdown-glmocr:latest
+docker push ghcr.io/yourname/markitdown-glmocr:latest
+```
+
+### 优缺点
+
+| 优点 | 缺点 |
+|------|------|
+| 环境完全隔离 | 需要 Docker 环境 |
+| 适合 CI/CD 集成 | 镜像体积 ~500MB+ |
+| 服务端部署友好 | 桌面用户不友好 |
+
+---
+
+## 方案 D：混合方案（推荐）
+
+### 执行优先级
+
+```
+1️⃣ 方案A：PyPI 发布 markitdown-glmocr    →  Python 开发者首选
+2️⃣ 方案B：PyInstaller 打包               →  非技术用户 / 离线场景
+3️⃣ 方案C：Docker 镜像                    →  服务端 / CI 场景（可选）
+```
+
+### 具体执行计划
+
+#### Phase 1：PyPI 发布（1-2 天）
+
+1. **完善 markitdown-glmocr 包**
+   - [ ] 补充 README.md（安装、使用、配置说明）
+   - [ ] 补充 LICENSE 文件
+   - [ ] 添加 `py.typed` 标记（如需类型提示支持）
+   - [ ] 修复 `__about__.py` 版本号为 `0.1.0`
+   - [ ] 确保所有依赖版本范围合理
+
+2. **本地验证**
+   - [ ] 在全新虚拟环境中测试安装流程
+   ```bash
+   python -m venv /tmp/test-env
+   source /tmp/test-env/bin/activate
+   pip install markitdown[all] markitdown-glmocr[glmocr]
+   markitdown -p --list-plugins  # 应显示 markitdown_glmocr
+   markitdown -p test.pdf        # 功能测试
+   ```
+
+3. **发布到 TestPyPI 验证**
+   - [ ] `python -m build`
+   - [ ] `twine upload --repository testpypi dist/*`
+   - [ ] 从 TestPyPI 安装并测试
+
+4. **正式发布到 PyPI**
+   - [ ] `twine upload dist/*`
+
+5. **发布后验证**
+   - [ ] `pip install markitdown-glmocr[glmocr]`
+   - [ ] 功能测试通过
+
+#### Phase 2：独立可执行文件（2-3 天）
+
+1. **搭建 PyInstaller 构建流程**
+   - [ ] 创建 `build_standalone/` 目录和配置
+   - [ ] 本地 Windows 构建测试
+   - [ ] 解决隐式导入问题（最耗时）
+
+2. **GitHub Actions CI/CD**
+   - [ ] 配置多平台构建 workflow
+   - [ ] 打 tag 触发自动构建和 Release
+
+3. **分发**
+   - [ ] GitHub Releases 页面提供下载
+   - [ ] README 中添加下载链接
+
+#### Phase 3：Docker 镜像（可选，0.5 天）
+
+1. **编写 Dockerfile**
+2. **发布到 GHCR**
+3. **文档补充**
+
+---
+
+## 关于 PR 合并的判断
+
+### 官方接受 PR 的可能性分析
+
+| 因素 | 评估 |
+|------|------|
+| markitdown 已有插件机制 | ✅ 架构上完全兼容 |
+| glmocr 是第三方商业API | ⚠️ 官方可能不愿绑定特定商业服务 |
+| 官方已有 azure-doc-intel 集成 | ✅ 有先例，但 Azure 是微软自家产品 |
+| PR 贡献者不是微软员工 | ⚠️ 可能需要较长时间审核 |
+| markitdown 版本还在 0.x (Beta) | ✅ 正是引入新功能的阶段 |
+
+**结论**：官方大概率不会直接接受 glmocr 插件 PR（因为绑定了非微软的商业 API），但插件机制的存在意味着**不需要官方接受 PR**，独立发布到 PyPI 是完全合理的路径。
+
+### 建议策略
+
+1. **先独立发布到 PyPI**（方案A），不依赖官方
+2. **同时提交 PR**，作为"贡献回社区"的姿态，即使被拒也无所谓
+3. PR 描述中强调：
+   - 完全通过插件机制扩展，不修改核心代码
+   - 可作为"第三方插件集成"的参考实现
+   - 有完整的测试和文档
+
+---
+
+## 快速开始：5分钟发布到 PyPI
+
+如果你现在就想发布，执行以下命令：
+
+```bash
+# 1. 进入 glmocr 插件目录
+cd D:/15-AI-Coding/markitdown/packages/markitdown-glmocr
+
+# 2. 安装构建工具
+pip install build twine
+
+# 3. 构建
+python -m build
+
+# 4. 检查
+twine check dist/*
+
+# 5. 发布到 TestPyPI（先测试）
+twine upload --repository testpypi dist/*
+
+# 6. 确认无误后发布到正式 PyPI
+twine upload dist/*
+```
+
+发布后，其他人只需：
+
+```bash
+pip install markitdown-glmocr[glmocr]
+export ZHIPU_API_KEY=your_key
+markitdown -p your-file.pdf
+```
+
+---
+
+## 附录：常见问题
+
+### Q1: 用户不装 glmocr SDK，只装 markitdown-glmocr 会怎样？
+
+不会报错。`_converter.py` 中 glmocr 是 lazy import，只在实际转换时才检查。
+但建议用户安装 `markitdown-glmocr[glmocr]` 以获得完整功能。
+
+### Q2: 如何处理 markitdown 核心包的版本兼容性？
+
+`markitdown-glmocr` 的 `pyproject.toml` 中声明 `markitdown>=0.1.0,<1.0.0`。
+markitdown 的插件接口（entry_points）是稳定的，0.x 版本间不会 breaking change。
+
+### Q3: PyInstaller 打包后 API Key 如何配置？
+
+通过环境变量 `ZHIPU_API_KEY` 传入，或在运行时通过 `.env` 文件：
+```bash
+# 方式1：环境变量
+set ZHIPU_API_KEY=your_key
+markitdown -p document.pdf
+
+# 方式2：.env 文件（glmocr SDK 自动读取）
+echo ZHIPU_API_KEY=your_key > .env
+markitdown -p document.pdf
+```
+
+### Q4: 能否做一个"一键安装包"给非技术用户？
+
+可以结合 PyInstaller + Inno Setup（Windows）或 create-dmg（macOS）做安装向导：
+
+```
+Windows: PyInstaller → .exe → Inno Setup → .exe 安装向导
+macOS:   PyInstaller → binary → create-dmg → .dmg
+Linux:   PyInstaller → binary → AppImage → .AppImage
+```
+
+但这增加了维护成本，建议先只提供裸 executable，待有需求再加安装向导。
+
+### Q5: uvx / pipx 支持吗？
+
+支持！发布到 PyPI 后：
+
+```bash
+# 一次性运行（无需安装）
+uvx --from markitdown-glmocr[glmocr] markitdown -p document.pdf
+
+# 或用 pipx
+pipx run markitdown -p document.pdf
+```
+
+这是最推荐的非技术用户使用方式——比 PyInstaller 更轻量，且始终使用最新版。
diff --git a/docs/nova-markitdown/SKILL.md b/docs/nova-markitdown/SKILL.md
new file mode 100644
index 000000000..c9c53a7dc
--- /dev/null
+++ b/docs/nova-markitdown/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: nova-markitdown
+description:
+  Convert various file formats (PDF, Word, Excel, PPT, images, HTML, audio, video) to Markdown using markitdown CLI with dual OCR fallback:glmocr (primary) → paddleocr (fallback). Activate when users need file-to-markdown conversion, OCR recognition, content extraction, structured data from documents, or batch document processing. Keywords:PDF to markdown, image OCR, document conversion, markitdown, glmocr, paddleocr, file extraction.
+compatibility:
+  Python 3.10+, pip packages:markitdown[all], markitdown-glmocr[glmocr], markitdown-paddleocr.  Requires ZHIPU_API_KEY for glmocr, BAIDU_PADDLE_TOKEN for paddleocr fallback.  Network access to Zhipu AI API and Baidu PaddleOCR API.
+metadata:
+  author: hankl
+  version: "2.0.0"
+---
+
+# nova-markitdown
+
+使用 markitdown 命令行工具将各种文件格式转换为 Markdown，**双 OCR 引擎自动降级**：glmocr（主）→ paddleocr（备）。
+
+## 触发条件
+
+当用户需要以下操作时激活此技能：
+
+- 将文件（PDF、Word、Excel、PPT、图片、HTML、音频、视频等）转换为 Markdown 文本
+- 提取文件中的文本内容、表格、图片描述等
+- 对 PDF 或图片进行 OCR 识别和结构化提取
+- 批量转换多个文件为 Markdown
+
+## 环境设置
+
+### 安装依赖
+
+```bash
+# 基础 markitdown（支持大部分文件格式）
+pip install 'markitdown'
+
+# markitdown-glmocr 插件（主 OCR，智谱 GLM-OCR）
+pip install 'markitdown-glmocr[glmocr]'
+
+# markitdown-paddleocr 插件（备 OCR，百度 PaddleOCR）
+pip install 'markitdown-paddleocr'
+```
+
+### 环境变量
+
+```bash
+# 主 OCR：智谱 API Key（glmocr）
+export ZHIPU_API_KEY="your-zhipu-api-key"
+
+# 备 OCR：百度 PaddleOCR Token（paddleocr，glmocr 失败时自动切换）
+export BAIDU_PADDLE_TOKEN="your-paddle-token"
+
+# 可选配置
+export GLMOCR_MODEL="glm-ocr"          # glmocr 模型名称
+export GLMOCR_TIMEOUT="600"             # glmocr 请求超时秒数
+export PADDLE_OCR_MODEL="PaddleOCR-VL-1.5"  # paddleocr 模型名称
+```
+
+> **重要**：`ZHIPU_API_KEY` 用于 glmocr（主），`BAIDU_PADDLE_TOKEN` 用于 paddleocr（备）。两者都设置可实现自动降级。
+
+### 验证安装
+
+```bash
+markitdown --version
+markitdown --list-plugins  # 输出中应包含 markitdown_glmocr 和 markitdown_paddleocr
+```
+
+## 核心规则
+
+1. **优先使用 markitdown 命令行**：所有文件转换优先通过 `markitdown` CLI 完成。
+2. **PDF 和图片使用双 OCR 降级策略**：
+   - **第一步**：使用 `markitdown -p`（glmocr 插件）尝试解析
+   - **第二步**：若 glmocr 报错（API 错误、超时、Key 失效等），自动切换到 paddleocr 插件重试
+   - **实现方式**：通过 Python 脚本封装，捕获异常后切换
+3. **其他文件类型不使用 `-p`**：Word、Excel、PPT、HTML、音频等使用不带 `-p` 的 markitdown 命令。
+4. **复杂场景回退到 Python SDK**：需要结构化 JSON 输出、按区域筛选、自定义处理流程时，使用 Python 代码。详见 [advanced-usage.md](references/advanced-usage.md)。
+
+## 快速参考
+
+| 文件类型 | 命令 | `-p` | 说明 |
+|----------|------|:---:|------|
+| PDF | `markitdown -p file.pdf -o out.md` | Yes | glmocr AI OCR |
+| 图片 (.jpg/.png) | `markitdown -p image.png -o out.md` | Yes | glmocr AI OCR |
+| Word (.docx) | `markitdown file.docx -o out.md` | No | 内置转换器 |
+| Excel (.xlsx/.xls) | `markitdown file.xlsx -o out.md` | No | 内置转换器 |
+| PPT (.pptx) | `markitdown file.pptx -o out.md` | No | 内置转换器 |
+| HTML | `markitdown file.html -o out.md` | No | 内置转换器 |
+| CSV/JSON/XML | `markitdown file.csv -o out.md` | No | 内置转换器 |
+| 音频 | `markitdown audio.mp3 -o out.md` | No | 内置转换器 |
+| ZIP | `markitdown archive.zip -o out.md` | No | 自动遍历 |
+| YouTube | `markitdown "https://youtube.com/..." -o out.md` | No | 视频转录 |
+
+## 使用指南
+
+### PDF 转换（双 OCR 降级）
+
+```bash
+# 方式1：CLI 直接调用（仅 glmocr，无降级）
+markitdown -p document.pdf -o output.md
+
+# 方式2：Python 双 OCR 降级（推荐，glmocr 失败自动切 paddleocr）
+python -c "
+from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
+from markitdown_paddleocr import PaddleOcrConverter
+
+md = MarkItDown(enable_plugins=False)
+try:
+    md.register_converter(GlmOcrConverter(), priority=-1.0)
+    result = md.convert('document.pdf')
+    if not result.markdown.strip():
+        raise Exception('Empty result')
+except Exception as e:
+    print(f'glmocr failed: {e}, falling back to paddleocr...')
+    md = MarkItDown(enable_plugins=False)
+    md.register_converter(PaddleOcrConverter(), priority=-1.0)
+    result = md.convert('document.pdf')
+print(result.markdown)
+"
+```
+
+工作原理：纯文本页面使用 pdfplumber/pdfminer 快速提取；复杂页面（含图片、表格、公式）自动使用 AI OCR。glmocr 失败时自动降级到 paddleocr。
+
+### 图片转换（双 OCR 降级）
+
+```bash
+# CLI 直接调用（仅 glmocr）
+markitdown -p photo.jpg -o photo.md
+
+# Python 双 OCR 降级（推荐）
+python -c "
+from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
+from markitdown_paddleocr import PaddleOcrConverter
+
+md = MarkItDown(enable_plugins=False)
+try:
+    md.register_converter(GlmOcrConverter(), priority=-1.0)
+    result = md.convert('photo.jpg')
+    if not result.markdown.strip():
+        raise Exception('Empty result')
+except Exception as e:
+    print(f'glmocr failed: {e}, falling back to paddleocr...')
+    md = MarkItDown(enable_plugins=False)
+    md.register_converter(PaddleOcrConverter(), priority=-1.0)
+    result = md.convert('photo.jpg')
+print(result.markdown)
+"
+```
+
+### 其他文件格式
+
+```bash
+markitdown document.docx -o document.md     # Word
+markitdown spreadsheet.xlsx -o data.md      # Excel
+markitdown presentation.pptx -o slides.md   # PPT
+markitdown webpage.html -o webpage.md       # HTML
+markitdown data.csv -o data.md              # CSV
+markitdown config.json -o config.md         # JSON
+markitdown archive.zip -o archive.md        # ZIP
+```
+
+## 故障排查
+
+**插件未发现**：运行 `markitdown --list-plugins`，若无 glmocr 则 `pip install 'markitdown-glmocr[glmocr]'`，若无 paddleocr 则 `pip install markitdown-paddleocr`。
+
+**glmocr API Key 错误**：检查 `echo $ZHIPU_API_KEY`，或在 `.env` 中设置。glmocr 失败时会自动降级到 paddleocr。
+
+**paddleocr Token 错误**：检查 `echo $BAIDU_PADDLE_TOKEN`，或在 `.env` 中设置。
+
+**PDF 输出为空或质量差**：确保使用 `-p` 参数，检查 API Key/Token，可设置 `GLMOCR_ENABLE_LAYOUT=true` 提升结构化输出。
+
+**两个 OCR 都失败**：检查网络连接，确认两个 API Key/Token 都有效。
+
+## 高级用法
+
+需要结构化 JSON 输出、按区域筛选、批量处理、自定义参数、**双 OCR 降级封装**等高级场景，请参考 [advanced-usage.md](references/advanced-usage.md)，包含 Python SDK 的完整示例和 `DualOcrConverter` 统一封装。
diff --git a/docs/nova-markitdown/references/advanced-usage.md b/docs/nova-markitdown/references/advanced-usage.md
new file mode 100644
index 000000000..f21a3699d
--- /dev/null
+++ b/docs/nova-markitdown/references/advanced-usage.md
@@ -0,0 +1,253 @@
+# 高级用法：Python SDK + 双 OCR 降级
+
+当 markitdown 命令行无法满足需求时（如需要结构化 JSON 输出、按区域筛选、自定义处理流程、双 OCR 降级等），使用 Python 代码实现。
+
+## 场景 0：DualOcrConverter — 双 OCR 自动降级（推荐）
+
+`DualOcrConverter` 封装了 glmocr（主）→ paddleocr（备）的自动降级逻辑，是 PDF/图片处理的推荐方式。
+
+```python
+from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
+from markitdown_paddleocr import PaddleOcrConverter
+
+class DualOcrConverter:
+    """双 OCR 转换器：glmocr（主）→ paddleocr（备）自动降级。"""
+
+    def __init__(self, glmocr_kwargs=None, paddleocr_kwargs=None):
+        self.glmocr_kwargs = glmocr_kwargs or {}
+        self.paddleocr_kwargs = paddleocr_kwargs or {}
+
+    def convert(self, file_path: str) -> str:
+        """转换文件，glmocr 失败自动降级到 paddleocr。"""
+        # 第一步：尝试 glmocr
+        try:
+            md = MarkItDown(enable_plugins=False)
+            md.register_converter(GlmOcrConverter(**self.glmocr_kwargs), priority=-1.0)
+            result = md.convert(file_path)
+            if result.markdown and result.markdown.strip():
+                print("✓ glmocr 解析成功")
+                return result.markdown
+            raise Exception("glmocr returned empty result")
+        except Exception as e:
+            print(f"⚠ glmocr 失败: {e}")
+
+        # 第二步：降级到 paddleocr
+        try:
+            md = MarkItDown(enable_plugins=False)
+            md.register_converter(PaddleOcrConverter(**self.paddleocr_kwargs), priority=-1.0)
+            result = md.convert(file_path)
+            if result.markdown and result.markdown.strip():
+                print("✓ paddleocr 解析成功（降级）")
+                return result.markdown
+            raise Exception("paddleocr returned empty result")
+        except Exception as e:
+            print(f"✗ paddleocr 也失败: {e}")
+            raise RuntimeError(f"Both OCR engines failed. glmocr error preceded paddleocr fallback error.")
+
+# 使用
+converter = DualOcrConverter()
+markdown = converter.convert("document.pdf")
+```
+
+### 自定义参数
+
+```python
+converter = DualOcrConverter(
+    glmocr_kwargs={
+        "api_key": "sk-xxx",
+        "enable_layout": True,
+        "force_ai": True,
+    },
+    paddleocr_kwargs={
+        "token": "your-paddle-token",
+        "model": "PaddleOCR-VL-1.5",
+        "use_chart_recognition": True,
+    }
+)
+markdown = converter.convert("complex_report.pdf")
+```
+
+### 批量处理 + 双 OCR
+
+```python
+from pathlib import Path
+
+converter = DualOcrConverter()
+pdf_dir = Path("./documents")
+output_dir = pdf_dir / "output"
+output_dir.mkdir(exist_ok=True)
+
+for pdf_file in pdf_dir.glob("*.pdf"):
+    try:
+        markdown = converter.convert(str(pdf_file))
+        (output_dir / f"{pdf_file.stem}.md").write_text(markdown, encoding="utf-8")
+        print(f"✓ {pdf_file.name}")
+    except RuntimeError:
+        print(f"✗ {pdf_file.name} — both OCR engines failed")
+```
+
+## 场景 1：结构化 JSON 输出（glmocr 区域标签、边界框）
+
+```python
+import glmocr
+
+# 一行调用完成 OCR
+result = glmocr.parse("report.pdf")
+
+# 获取 Markdown 文本
+print(result.markdown_result)
+
+# 获取结构化数据（按页分组，每页包含多个区域）
+for page_idx, page_regions in enumerate(result.json_result):
+    print(f"Page {page_idx + 1}: {len(page_regions)} regions")
+    for region in page_regions:
+        print(f"  [{region['label']}] {region['content'][:60]}")
+
+# 按标签筛选特定类型内容
+tables = [r for r in result.json_result[0] if r["label"] == "table"]
+formulas = [r for r in result.json_result[0] if r["label"] == "formula"]
+titles = [r for r in result.json_result[0] if r["label"] == "title"]
+
+# 保存到磁盘（Markdown + JSON 同时保存）
+result.save(output_dir="./output")
+```
+
+### 支持的区域标签
+
+| 标签 | 说明 |
+|------|------|
+| `title` | 标题 |
+| `text` | 正文文本 |
+| `table` | 表格 |
+| `figure` | 图片 |
+| `formula` | 公式 |
+| `header` | 页眉 |
+| `footer` | 页脚 |
+| `page_number` | 页码 |
+| `reference` | 参考文献 |
+| `seal` | 印章 |
+
+## 场景 2：单独使用 PaddleClient（paddleocr 直接调用）
+
+```python
+from markitdown_paddleocr import PaddleClient
+
+client = PaddleClient(token="your-paddle-token")
+
+# 本地文件 OCR
+with open("image.png", "rb") as f:
+    markdown = client.ocr(file_bytes=f.read(), filename="image.png")
+print(markdown)
+
+# URL 模式 OCR
+markdown = client.ocr(file_url="https://example.com/document.pdf")
+print(markdown)
+```
+
+## 场景 3：MarkItDown Python API + 单个 Converter
+
+```python
+from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
+# 或 from markitdown_paddleocr import PaddleOcrConverter
+
+# glmocr
+converter = GlmOcrConverter()
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+print(result.text_content)
+
+# paddleocr
+from markitdown_paddleocr import PaddleOcrConverter
+converter = PaddleOcrConverter()
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+print(result.text_content)
+```
+
+## 场景 4：自定义转换器参数
+
+```python
+from markitdown import MarkItDown
+from markitdown_glmocr import GlmOcrConverter
+from markitdown_paddleocr import PaddleOcrConverter
+
+# glmocr 自定义
+glmocr_converter = GlmOcrConverter(
+    api_key="sk-xxx",
+    timeout=600,
+    enable_layout=True,
+    force_ai=True,
+)
+
+# paddleocr 自定义
+paddleocr_converter = PaddleOcrConverter(
+    token="your-token",
+    model="PaddleOCR-VL-1.5",
+    poll_interval=3.0,
+    poll_timeout=600.0,
+    force_ai=True,
+    use_chart_recognition=True,
+)
+
+# 使用 DualOcrConverter 封装
+converter = DualOcrConverter(
+    glmocr_kwargs={"api_key": "sk-xxx", "enable_layout": True},
+    paddleocr_kwargs={"token": "your-token", "use_chart_recognition": True},
+)
+markdown = converter.convert("complex_document.pdf")
+```
+
+## 场景 5：只处理图片（不经过 PDF）
+
+```python
+import glmocr
+
+# glmocr 直接对图片 OCR
+result = glmocr.parse("screenshot.png")
+print(result.markdown_result)
+
+# paddleocr 直接对图片 OCR
+from markitdown_paddleocr import PaddleClient
+client = PaddleClient(token="your-token")
+with open("photo.jpg", "rb") as f:
+    markdown = client.ocr(file_bytes=f.read(), filename="photo.jpg")
+print(markdown)
+```
+
+## 场景 6：批量处理多个文件
+
+```python
+from pathlib import Path
+
+# 使用 DualOcrConverter 批量处理（推荐）
+converter = DualOcrConverter()
+
+pdf_dir = Path("./documents")
+for pdf_file in pdf_dir.glob("*.pdf"):
+    try:
+        markdown = converter.convert(str(pdf_file))
+        output_path = pdf_dir / "output" / f"{pdf_file.stem}.md"
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(markdown, encoding="utf-8")
+        print(f"✓ {pdf_file.name}")
+    except RuntimeError:
+        print(f"✗ {pdf_file.name} — both OCR engines failed")
+```
+
+## OCR 引擎对比
+
+| 维度 | glmocr | paddleocr |
+|------|--------|-----------|
+| API 风格 | 同步 SDK 调用 | 异步 Job 轮询（submit → poll → fetch） |
+| 认证 | `ZHIPU_API_KEY` | `BAIDU_PADDLE_TOKEN` |
+| 结果格式 | SDK 封装对象 | JSONL 流 |
+| 结构化输出 | ✅ 区域标签 + 边界框 | ❌ 仅 Markdown |
+| 表格识别 | ✅ HTML → Markdown | ✅ HTML 表格 |
+| 公式识别 | ✅ LaTeX | ✅ LaTeX |
+| 印章识别 | ✅ | ✅ |
+| 响应速度 | 快（同步） | 较慢（需轮询，2-30s） |
+| 适用场景 | 首选，结构化需求 | 降级备选，glmocr 不可用时 |
diff --git a/docs/nova-pdf-refactor-zhipu.md b/docs/nova-pdf-refactor-zhipu.md
deleted file mode 100644
index cf6b2b5ff..000000000
--- a/docs/nova-pdf-refactor-zhipu.md
+++ /dev/null
@@ -1,565 +0,0 @@
-# Nova-PDF 重构方案：使用 zai-sdk + glm-ocr
-
-## 1. 重构目标
-
-将现有的自定义 AI 服务替换为 zai-sdk + glm-ocr，简化代码并提升 OCR 能力。
-
-## 2. 技术对比
-
-| 项目 | 原方案 | 新方案 |
-|------|--------|--------|
-| SDK | requests (手动调用) | zai-sdk (官方 SDK) |
-| 模型 | 自定义 Workflow | glm-ocr |
-| 接口 | 两步上传（上传+调用） | 直接调用 layout_parsing |
-| 认证 | 双 token (upload + workflow) | 单 API key |
-| 配置 | 环境变量 | 配置文件 + 环境变量 |
-
-## 3. 接口分析
-
-### 3.1 glm-ocr API
-
-```python
-from zai import ZhipuAiClient
-
-client = ZhipuAiClient(api_key="your-api-key")
-
-# 支持图片 URL
-response = client.layout_parsing.create(
-    model="glm-ocr",
-    file="https://example.com/image.png"
-)
-
-# 支持本地文件路径
-response = client.layout_parsing.create(
-    model="glm-ocr",
-    file="/path/to/image.png"
-)
-
-# 返回结果（包含 Markdown 格式的内容）
-print(response)
-```
-
-### 3.2 响应结构
-
-```python
-# response 包含解析后的结构化内容
-# 具体字段需查看实际返回，通常包括：
-# - 文本内容
-# - 布局信息
-# - 表格识别结果
-# - Markdown 格式输出
-```
-
-## 4. 架构设计
-
-### 4.1 组件变更
-
-```
-原架构：
-┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
-│  Page Renderer  │────►│  File Uploader  │────►│  Workflow API   │
-│  (截图)          │     │  (上传获取URL)   │     │  (自定义接口)    │
-└─────────────────┘     └─────────────────┘     └─────────────────┘
-
-新架构：
-┌─────────────────┐     ┌─────────────────┐
-│  Page Renderer  │────►│   glm-ocr API   │
-│  (截图→临时文件) │     │  (layout_parsing)│
-└─────────────────┘     └─────────────────┘
-```
-
-### 4.2 文件变更清单
-
-| 文件 | 变更类型 | 说明 |
-|------|----------|------|
-| `_ai_service.py` | **重写** | 使用 zai-sdk + glm-ocr |
-| `_converter.py` | 微调 | 适配新 AIService 接口 |
-| `_plugin.py` | 微调 | 简化配置参数 |
-| `pyproject.toml` | 更新 | 添加 zai-sdk 依赖 |
-| `_config.py` | **新增** | 配置文件读取 |
-| `README.md` | 更新 | 新的使用说明 |
-
-## 5. 详细设计
-
-### 5.1 配置模块 (_config.py)
-
-```python
-"""Configuration management for nova-pdf."""
-
-import os
-from pathlib import Path
-from typing import Optional
-from dataclasses import dataclass
-
-try:
-    import tomllib  # Python 3.11+
-except ImportError:
-    import tomli as tomllib
-
-
-@dataclass
-class NovaPdfConfig:
-    """nova-pdf configuration."""
-    
-    # API 配置
-    zhipu_api_key: str = ""
-    
-    # OCR 配置
-    model: str = "glm-ocr"
-    dpi: int = 150
-    timeout: int = 120
-    
-    # 处理策略
-    force_ai: bool = False
-    
-    @classmethod
-    def load(cls, config_path: Optional[str] = None) -> "NovaPdfConfig":
-        """
-        从多个来源加载配置（优先级从高到低）：
-        1. 环境变量
-        2. 配置文件 (pyproject.toml 或 nova-pdf.toml)
-        3. 默认值
-        """
-        config = cls()
-        
-        # 1. 从配置文件加载
-        config._load_from_file(config_path)
-        
-        # 2. 环境变量覆盖
-        config._load_from_env()
-        
-        return config
-    
-    def _load_from_file(self, config_path: Optional[str] = None):
-        """从配置文件加载"""
-        # 查找配置文件
-        search_paths = []
-        
-        if config_path:
-            search_paths.append(Path(config_path))
-        
-        # 当前目录的 pyproject.toml
-        search_paths.append(Path("pyproject.toml"))
-        
-        # 当前目录的 nova-pdf.toml
-        search_paths.append(Path("nova-pdf.toml"))
-        
-        # 用户目录
-        search_paths.append(Path.home() / ".config" / "nova-pdf" / "config.toml")
-        
-        for path in search_paths:
-            if path.exists():
-                try:
-                    with open(path, "rb") as f:
-                        data = tomllib.load(f)
-                    
-                    # 读取 [tool.nova-pdf] 配置段
-                    if "tool" in data and "nova-pdf" in data["tool"]:
-                        self._apply_config(data["tool"]["nova-pdf"])
-                    elif "nova-pdf" in data:
-                        self._apply_config(data["nova-pdf"])
-                    
-                    break
-                except Exception:
-                    pass
-    
-    def _apply_config(self, data: dict):
-        """应用配置"""
-        if "api_key" in data:
-            self.zhipu_api_key = data["api_key"]
-        if "model" in data:
-            self.model = data["model"]
-        if "dpi" in data:
-            self.dpi = data["dpi"]
-        if "timeout" in data:
-            self.timeout = data["timeout"]
-        if "force_ai" in data:
-            self.force_ai = data["force_ai"]
-    
-    def _load_from_env(self):
-        """从环境变量加载（优先级最高）"""
-        if os.environ.get("NOVA_ZHIPU_API_KEY"):
-            self.zhipu_api_key = os.environ["NOVA_ZHIPU_API_KEY"]
-        if os.environ.get("NOVA_MODEL"):
-            self.model = os.environ["NOVA_MODEL"]
-        if os.environ.get("NOVA_DPI"):
-            self.dpi = int(os.environ["NOVA_DPI"])
-        if os.environ.get("NOVA_TIMEOUT"):
-            self.timeout = int(os.environ["NOVA_TIMEOUT"])
-        if os.environ.get("NOVA_FORCE_AI"):
-            self.force_ai = os.environ["NOVA_FORCE_AI"].lower() in ("true", "1", "yes")
-```
-
-### 5.2 AI 服务模块 (_ai_service.py)
-
-```python
-"""AI service using zai-sdk and glm-ocr."""
-
-import io
-import os
-import tempfile
-from dataclasses import dataclass
-from typing import BinaryIO, Optional
-
-try:
-    from zai import ZhipuAiClient
-except ImportError:
-    ZhipuAiClient = None
-
-from ._config import NovaPdfConfig
-
-
-@dataclass
-class AIResult:
-    """Result from AI conversion."""
-    text: str
-    success: bool = True
-    error: Optional[str] = None
-
-
-class AIService:
-    """
-    AI 服务 - 使用 zai-sdk + glm-ocr
-    
-    特点：
-    - 直接调用 glm-ocr 的 layout_parsing API
-    - 支持本地文件路径或图片 URL
-    - 自动处理图片格式转换
-    """
-    
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        model: str = "glm-ocr",
-        timeout: int = 120,
-        config: Optional[NovaPdfConfig] = None,
-    ):
-        """
-        初始化 AI 服务
-        
-        Args:
-            api_key: 智谱 API Key，默认从配置读取
-            model: 模型名称，默认 glm-ocr
-            timeout: 请求超时时间（秒）
-            config: 配置对象
-        """
-        if ZhipuAiClient is None:
-            raise ImportError(
-                "zai-sdk is required for AIService. "
-                "Install with: pip install nova-pdf[zhipu]"
-            )
-        
-        # 从配置加载
-        if config:
-            self.api_key = api_key or config.zhipu_api_key
-            self.model = model or config.model
-            self.timeout = timeout or config.timeout
-        else:
-            config = NovaPdfConfig.load()
-            self.api_key = api_key or config.zhipu_api_key
-            self.model = model
-            self.timeout = timeout
-        
-        if not self.api_key:
-            raise ValueError(
-                "API key is required. Set NOVA_ZHIPU_API_KEY environment variable "
-                "or add 'api_key' to [tool.nova-pdf] in pyproject.toml"
-            )
-        
-        # 初始化客户端
-        self.client = ZhipuAiClient(api_key=self.api_key)
-    
-    def image_to_markdown(
-        self,
-        image_stream: BinaryIO,
-        filename: str = "page.png",
-    ) -> AIResult:
-        """
-        将图片转换为 Markdown
-        
-        Args:
-            image_stream: 图片流
-            filename: 文件名（用于临时文件）
-        
-        Returns:
-            AIResult: 转换结果
-        """
-        try:
-            # 方案1：保存为临时文件，传文件路径
-            with tempfile.NamedTemporaryFile(
-                suffix=".png",
-                delete=False
-            ) as tmp:
-                tmp.write(image_stream.read())
-                tmp_path = tmp.name
-            
-            image_stream.seek(0)
-            
-            # 调用 glm-ocr API
-            response = self.client.layout_parsing.create(
-                model=self.model,
-                file=tmp_path
-            )
-            
-            # 清理临时文件
-            try:
-                os.unlink(tmp_path)
-            except Exception:
-                pass
-            
-            # 解析响应
-            # 响应格式可能是字符串或对象，需要适配
-            if hasattr(response, 'content'):
-                text = response.content
-            elif hasattr(response, 'text'):
-                text = response.text
-            elif isinstance(response, str):
-                text = response
-            else:
-                text = str(response)
-            
-            return AIResult(
-                text=text.strip() if text else "",
-                success=True,
-            )
-        
-        except Exception as e:
-            return AIResult(
-                text="",
-                success=False,
-                error=str(e),
-            )
-```
-
-### 5.3 插件注册 (_plugin.py)
-
-```python
-"""Plugin registration for nova-pdf."""
-
-from typing import Any
-from markitdown import MarkItDown
-
-from ._config import NovaPdfConfig
-from ._ai_service import AIService
-from ._converter import NovaPdfConverter
-
-
-__plugin_interface_version__ = 1
-
-
-def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
-    """
-    注册 nova-pdf 转换器
-    
-    配置来源（优先级从高到低）：
-    1. kwargs 参数
-    2. 环境变量
-    3. 配置文件 (pyproject.toml)
-    4. 默认值
-    """
-    # 加载配置
-    config = NovaPdfConfig.load()
-    
-    # kwargs 覆盖配置
-    api_key = kwargs.get("api_key") or kwargs.get("zhipu_api_key") or config.zhipu_api_key
-    model = kwargs.get("model", config.model)
-    dpi = kwargs.get("dpi", config.dpi)
-    force_ai = kwargs.get("force_ai", config.force_ai)
-    timeout = kwargs.get("timeout", config.timeout)
-    
-    # 创建 AI 服务
-    ai_service = None
-    if api_key:
-        try:
-            ai_service = AIService(
-                api_key=api_key,
-                model=model,
-                timeout=timeout,
-            )
-        except Exception:
-            pass
-    
-    # 注册转换器
-    PRIORITY_NOVA_PDF = -1.0
-    
-    markitdown.register_converter(
-        NovaPdfConverter(
-            ai_service=ai_service,
-            dpi=dpi,
-            force_ai=force_ai,
-        ),
-        priority=PRIORITY_NOVA_PDF,
-    )
-```
-
-### 5.4 pyproject.toml 更新
-
-```toml
-[project]
-name = "nova-pdf"
-dependencies = [
-    "markitdown>=0.1.0",
-    "pdfminer.six>=20251230",
-    "pdfplumber>=0.11.9",
-    "Pillow>=9.0.0",
-    "tomli>=2.0.0;python_version<'3.11'",
-]
-
-[project.optional-dependencies]
-zhipu = [
-    "zai-sdk>=0.2.2",
-]
-dev = [
-    "pytest>=7.0.0",
-]
-
-[project.entry-points."markitdown.plugin"]
-nova_pdf = "nova_pdf"
-
-[tool.nova-pdf]
-# API 配置
-api_key = ""
-model = "glm-ocr"
-dpi = 150
-timeout = 120
-force_ai = false
-```
-
-## 6. 配置方式
-
-### 6.1 本地敏感配置文件（推荐）
-
-项目根目录下的 `.secrets.local` 文件存储敏感信息，此文件不会被提交到 Git：
-
-```bash
-# .secrets.local
-NOVA_ZHIPU_API_KEY="your-api-key-here"
-```
-
-使用方式：
-```bash
-# 加载敏感配置
-source .secrets.local
-
-# 或使用脚本
-source scripts/load_secrets.sh
-
-# 然后运行
-markitdown -p document.pdf
-```
-
-### 6.2 配置文件 (pyproject.toml)
-
-```toml
-[tool.nova-pdf]
-# API key 请通过环境变量或 .secrets.local 文件设置，不要硬编码
-api_key = ""
-model = "glm-ocr"
-dpi = 150
-timeout = 120
-```
-
-### 6.3 环境变量（推荐）
-
-```bash
-export NOVA_ZHIPU_API_KEY="your-api-key-here"
-export NOVA_MODEL="glm-ocr"
-export NOVA_DPI="150"
-```
-
-### 6.3 Python API
-
-```python
-from markitdown import MarkItDown
-
-md = MarkItDown(
-    enable_plugins=True,
-    api_key="your-api-key",
-)
-```
-
-### 6.4 命令行
-
-```bash
-export NOVA_ZHIPU_API_KEY="your-api-key"
-markitdown -p document.pdf
-```
-
-## 7. 使用示例
-
-```python
-from markitdown import MarkItDown
-from nova_pdf import AIService, NovaPdfConverter
-
-# 方式1：自动加载配置
-md = MarkItDown(enable_plugins=True)
-result = md.convert("document.pdf")
-
-# 方式2：手动配置
-from nova_pdf import NovaPdfConfig, AIService
-
-config = NovaPdfConfig.load()
-ai_service = AIService(
-    api_key="your-api-key",
-    model="glm-ocr",
-)
-
-converter = NovaPdfConverter(
-    ai_service=ai_service,
-    dpi=150,
-)
-
-md = MarkItDown(enable_plugins=False)
-md.register_converter(converter, priority=-1.0)
-result = md.convert("document.pdf")
-```
-
-## 8. 迁移路径
-
-### 8.1 从旧版本迁移
-
-| 旧配置 | 新配置 |
-|--------|--------|
-| `NOVA_UPLOAD_TOKEN` | `NOVA_ZHIPU_API_KEY` |
-| `NOVA_WORKFLOW_TOKEN` | （删除） |
-| `NOVA_BASE_URL` | （删除） |
-| `NOVA_APP_ID` | （删除） |
-
-### 8.2 API 兼容性
-
-- 旧版 `AIService(upload_token, workflow_token, ...)` → 废弃
-- 新版 `AIService(api_key, ...)` → 推荐
-
-## 9. 实施计划
-
-### ✅ Phase 1: 核心实现（已完成）
-- [x] 设计配置模块
-- [x] 实现 `_config.py`
-- [x] 重写 `_ai_service.py`（使用 zai-sdk + glm-ocr）
-- [x] 更新 `_plugin.py`
-
-### ✅ Phase 2: 集成测试（已完成）
-- [x] 更新 `pyproject.toml`
-- [x] 测试 glm-ocr API
-- [x] 测试插件集成
-
-### Phase 3: 文档更新（进行中）
-- [x] 更新 README.md
-- [ ] 更新技术方案文档
-- [ ] 添加迁移指南
-
-## 10. 风险与缓解
-
-| 风险 | 缓解措施 |
-|------|----------|
-| zai-sdk 接口变化 | 封装适配层，隔离 SDK 细节 |
-| glm-ocr 返回格式不确定 | 做多种格式兼容处理 |
-| 临时文件清理失败 | 使用 try-finally 确保清理 |
-| API key 泄露 | 支持环境变量，避免硬编码 |
-
-## 11. 待确认事项
-
-- [ ] glm-ocr 返回的具体数据结构
-- [ ] 是否支持直接传图片字节流（不保存临时文件）
-- [ ] 超时和重试策略
-- [ ] 并发请求限制
diff --git a/docs/nova-pdf-technical-design.md b/docs/nova-pdf-technical-design.md
deleted file mode 100644
index 25128e33a..000000000
--- a/docs/nova-pdf-technical-design.md
+++ /dev/null
@@ -1,1175 +0,0 @@
-# Nova-PDF 插件技术方案
-
-## 1. 概述
-
-### 1.1 目标
-开发一个智能 PDF 解析插件 `nova-pdf`，实现：
-- 自动检测 PDF 每页内容类型（纯文本 vs 包含图片/表格）
-- 对纯文本页面使用默认解析能力（pdfminer/pdfplumber）
-- 对包含图片/表格的页面截图后调用 AI 接口转 Markdown
-
-### 1.2 核心价值
-- **提升复杂 PDF 解析质量**：图表、扫描件等传统方法效果差的内容
-- **降低成本**：纯文本页面不调用 AI，节省 API 费用
-- **灵活配置**：支持自定义 AI 模型、分辨率、提示词等
-
----
-
-## 2. 架构设计
-
-### 2.1 插件结构
-```
-packages/nova-pdf/
-├── src/
-│   └── nova_pdf/
-│       ├── __init__.py           # 导出和版本信息
-│       ├── __about__.py          # 版本号
-│       ├── _plugin.py            # 插件注册入口
-│       ├── _converter.py         # PDF 转换器核心实现
-│       ├── _page_analyzer.py     # 页面内容分析器
-│       ├── _page_renderer.py     # 页面截图渲染器
-│       └── _ai_service.py        # AI 接口封装
-├── tests/
-│   ├── __init__.py
-│   ├── test_converter.py
-│   ├── test_analyzer.py
-│   └── fixtures/
-│       ├── text_only.pdf
-│       ├── with_images.pdf
-│       └── mixed_content.pdf
-├── pyproject.toml
-└── README.md
-```
-
-### 2.2 组件职责
-
-| 组件 | 职责 |
-|------|------|
-| `_plugin.py` | 实现 `register_converters` 入口，注册转换器 |
-| `_converter.py` | 继承 `DocumentConverter`，协调整体流程 |
-| `_page_analyzer.py` | 分析页面是否包含图片/表格 |
-| `_page_renderer.py` | 将 PDF 页面渲染为图片 |
-| `_ai_service.py` | 调用 AI Vision API 转换图片为 Markdown |
-
-### 2.3 流程图
-
-```
-┌──────────────────────────────────────────────────────────────────┐
-│                        PDF 文件输入                                │
-└──────────────────────────────────────────────────────────────────┘
-                                │
-                                ▼
-┌──────────────────────────────────────────────────────────────────┐
-│                     逐页分析 (PageAnalyzer)                        │
-│  ┌────────────────────────────────────────────────────────────┐  │
-│  │  对每一页:                                                   │  │
-│  │  1. 检测是否包含图片 (images)                                │  │
-│  │  2. 检测是否包含表格 (tables)                                 │  │
-│  │  3. 标记页面类型: PLAIN_TEXT / COMPLEX                       │  │
-│  └────────────────────────────────────────────────────────────┘  │
-└──────────────────────────────────────────────────────────────────┘
-                                │
-          ┌─────────────────────┴─────────────────────┐
-          ▼                                           ▼
-┌─────────────────────┐                    ┌─────────────────────┐
-│   PLAIN_TEXT 页面    │                    │    COMPLEX 页面      │
-│                     │                    │                     │
-│  使用默认解析:        │                    │  1. 截图渲染         │
-│  - pdfplumber 提取   │                    │  2. 调用 AI 接口     │
-│  - pdfminer 备用     │                    │  3. 转换为 Markdown   │
-└─────────────────────┘                    └─────────────────────┘
-          │                                           │
-          └─────────────────────┬─────────────────────┘
-                                │
-                                ▼
-┌──────────────────────────────────────────────────────────────────┐
-│                    合并所有页面结果                                │
-│                    输出完整 Markdown                               │
-└──────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## 3. 核心算法设计
-
-### 3.1 页面内容检测 (PageAnalyzer)
-
-#### 检测策略
-```python
-class PageType(Enum):
-    PLAIN_TEXT = "plain_text"      # 纯文本，使用默认解析
-    HAS_IMAGES = "has_images"      # 包含图片
-    HAS_TABLES = "has_tables"      # 包含表格
-    COMPLEX = "complex"            # 复杂内容（图片+表格+混合）
-```
-
-#### 图片检测方法
-使用 **pdfplumber** 的页面对象检测：
-
-```python
-def detect_images(page) -> bool:
-    """检测页面是否包含图片"""
-    # 方法1: 直接检测 page.images
-    if hasattr(page, 'images') and len(page.images) > 0:
-        return True
-
-    # 方法2: 检测页面对象中的图像资源
-    if hasattr(page, 'objects'):
-        if 'image' in page.objects and len(page.objects['image']) > 0:
-            return True
-        # 检测 XObject (可能包含内嵌图像)
-        if 'xobject' in page.objects and len(page.objects['xobject']) > 0:
-            for obj in page.objects['xobject']:
-                if obj.get('subtype') == 'Image':
-                    return True
-
-    # 方法3: 检测页面资源字典
-    try:
-        if hasattr(page.page, 'get_resources'):
-            resources = page.page.get_resources()
-            if resources and 'XObject' in resources:
-                return True
-    except Exception:
-        pass
-
-    return False
-```
-
-#### 表格检测方法
-```python
-def detect_tables(page) -> bool:
-    """检测页面是否包含表格"""
-    # 方法1: 使用 pdfplumber 的 extract_tables
-    tables = page.extract_tables()
-    if tables and len(tables) > 0:
-        # 过滤空表格
-        for table in tables:
-            if table and any(any(cell for cell in row) for row in table):
-                return True
-
-    # 方法2: 检测表格线（边框线）
-    if hasattr(page, 'objects') and 'line' in page.objects:
-        lines = page.objects['line']
-        if len(lines) > 10:  # 大量线条可能构成表格
-            # 分析线条是否形成网格结构
-            h_lines = [l for l in lines if l.get('height', 1) < 2]
-            v_lines = [l for l in lines if l.get('width', 1) < 2]
-            if len(h_lines) > 2 and len(v_lines) > 2:
-                return True
-
-    return False
-```
-
-#### 综合判断
-```python
-def analyze_page(page) -> PageType:
-    """分析页面类型"""
-    has_images = detect_images(page)
-    has_tables = detect_tables(page)
-
-    if has_images and has_tables:
-        return PageType.COMPLEX
-    elif has_images:
-        return PageType.HAS_IMAGES
-    elif has_tables:
-        return PageType.HAS_TABLES
-    else:
-        return PageType.PLAIN_TEXT
-```
-
-### 3.2 页面截图渲染 (PageRenderer)
-
-#### 技术选型
-
-使用 **pdfplumber.to_image**，理由：
-- 已是项目依赖，无需额外安装
-- 实现简单，代码量少
-- 底层使用 PIL，满足需求
-
-#### 实现方案
-```python
-import io
-
-def render_page_to_image(page, dpi: int = 150) -> io.BytesIO:
-    """
-    将 PDF 页面渲染为图片
-
-    Args:
-        page: pdfplumber 页面对象
-        dpi: 渲染分辨率，默认 150（平衡质量和速度）
-
-    Returns:
-        BytesIO: PNG 图片流
-    """
-    # 使用 pdfplumber 的 to_image 方法
-    page_image = page.to_image(resolution=dpi)
-
-    # 转换为 BytesIO
-    img_stream = io.BytesIO()
-    page_image.original.save(img_stream, format="PNG")
-    img_stream.seek(0)
-
-    return img_stream
-```
-
-#### DPI 推荐值
-```python
-DPI_SETTINGS = {
-    "low": 72,      # 快速预览，文件小
-    "medium": 150,  # 平衡质量和速度（默认）
-    "high": 300,   # 高质量，适合复杂图表
-}
-```
-
-### 3.3 AI 接口调用 (AIService)
-
-#### 复用 markitdown 的 LLM 客户端机制
-```python
-from markitdown.converters._llm_caption import llm_caption
-
-class AIService:
-    """AI Vision 服务封装"""
-
-    def __init__(
-        self,
-        client,                    # OpenAI 兼容客户端
-        model: str = "gpt-4o",     # 模型名称
-        prompt: str | None = None, # 自定义提示词
-    ):
-        self.client = client
-        self.model = model
-        self.prompt = prompt or self._default_prompt()
-
-    def _default_prompt(self) -> str:
-        return """请将这张图片的内容转换为 Markdown 格式。
-
-要求：
-1. 保持原有的文档结构（标题、段落、列表等）
-2. 表格使用 Markdown 表格语法
-3. 图片中的文字清晰转写
-4. 数学公式使用 LaTeX 语法
-5. 如有图表，用文字描述其内容
-6. 不要添加任何额外的解释或评论"""
-
-    def image_to_markdown(
-        self,
-        image_stream: io.BytesIO,
-        stream_info: StreamInfo,
-    ) -> str:
-        """调用 AI 将图片转为 Markdown"""
-        result = llm_caption(
-            image_stream,
-            stream_info,
-            client=self.client,
-            model=self.model,
-            prompt=self.prompt,
-        )
-        return result or ""
-```
-
----
-
-## 4. 转换器实现 (_converter.py)
-
-### 4.1 核心流程
-```python
-class NovaPdfConverter(DocumentConverter):
-    """智能 PDF 转换器"""
-
-    def __init__(
-        self,
-        ai_service: AIService | None = None,
-        dpi: int = 150,
-        force_ai: bool = False,  # 强制所有页面使用 AI
-    ):
-        self.ai_service = ai_service
-        self.dpi = dpi
-        self.force_ai = force_ai
-
-    def convert(
-        self,
-        file_stream: BinaryIO,
-        stream_info: StreamInfo,
-        **kwargs: Any,
-    ) -> DocumentConverterResult:
-        # 读取 PDF
-        pdf_stream = io.BytesIO(file_stream.read())
-
-        markdown_parts = []
-
-        with pdfplumber.open(pdf_stream) as pdf:
-            for page_num, page in enumerate(pdf.pages):
-                # 分析页面类型
-                page_type = analyze_page(page)
-
-                # 根据类型选择处理方式
-                if self.force_ai or page_type != PageType.PLAIN_TEXT:
-                    # 复杂内容：截图 + AI
-                    if self.ai_service:
-                        img = render_page_to_image(page, self.dpi)
-                        md = self.ai_service.image_to_markdown(img, StreamInfo())
-                    else:
-                        # 无 AI 服务，回退到默认解析
-                        md = page.extract_text() or ""
-                else:
-                    # 纯文本：默认解析
-                    md = page.extract_text() or ""
-
-                if md.strip():
-                    markdown_parts.append(f"## Page {page_num + 1}\n\n{md}")
-
-        return DocumentConverterResult(
-            markdown="\n\n".join(markdown_parts),
-        )
-```
-
----
-
-## 5. 配置选项
-
-### 5.1 初始化参数
-```python
-class NovaPdfConfig:
-    """nova-pdf 配置"""
-
-    # AI 服务配置
-    llm_client: Any = None           # OpenAI 兼容客户端（必需）
-    llm_model: str = "gpt-4o"        # 模型名称
-    llm_prompt: str | None = None   # 自定义提示词
-
-    # 渲染配置
-    dpi: int = 150                   # 截图分辨率
-    image_format: str = "png"       # 图片格式
-
-    # 处理策略
-    force_ai: bool = False          # 强制所有页面使用 AI
-    skip_tables: bool = False       # 跳过表格检测（表格用默认解析）
-    skip_images: bool = False       # 跳过图片检测（图片用默认解析）
-
-    # 性能配置
-    max_concurrent: int = 5          # 并发请求数
-    timeout: int = 60                # 单页 AI 调用超时（秒）
-```
-
-### 5.2 使用示例
-```python
-from openai import OpenAI
-from markitdown import MarkItDown
-
-# 初始化 LLM 客户端
-client = OpenAI(api_key="your-api-key")
-
-# 创建 MarkItDown 实例并启用 nova-pdf 插件
-md = MarkItDown(
-    enable_plugins=True,
-    llm_client=client,
-    llm_model="gpt-4o",
-)
-
-# 转换 PDF
-result = md.convert("complex_document.pdf")
-print(result.markdown)
-```
-
----
-
-## 6. 依赖管理
-
-### 6.1 pyproject.toml
-```toml
-[project]
-name = "nova-pdf"
-dependencies = [
-    "markitdown>=0.1.0",
-    "pdfminer.six>=20251230",
-    "pdfplumber>=0.11.9",   # 页面解析和截图渲染
-    "Pillow>=9.0.0",        # 图像处理（pdfplumber.to_image 底层依赖）
-]
-
-[project.optional-dependencies]
-dev = [
-    "pytest>=7.0.0",
-    "pytest-asyncio>=0.21.0",
-]
-
-# 插件入口点
-[project.entry-points."markitdown.plugin"]
-nova_pdf = "nova_pdf"
-```
-
----
-
-## 7. 错误处理
-
-### 7.1 降级策略
-```python
-def convert_with_fallback(
-    self,
-    pdf_bytes: bytes,
-    page_num: int,
-    page_type: PageType,
-) -> str:
-    """带降级的转换"""
-
-    # 尝试 AI 转换
-    if self.ai_service and page_type != PageType.PLAIN_TEXT:
-        try:
-            img = render_page_to_image(pdf_bytes, page_num, self.dpi)
-            result = self.ai_service.image_to_markdown(img, StreamInfo())
-            if result.strip():
-                return result
-        except AIServiceError as e:
-            logger.warning(f"AI 转换失败，降级到默认解析: {e}")
-
-    # 降级到默认解析
-    with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
-        page = pdf.pages[page_num]
-        text = page.extract_text() or ""
-
-        # 尝试提取表格
-        tables = page.extract_tables()
-        if tables:
-            for table in tables:
-                text += "\n\n" + self._table_to_markdown(table)
-
-        return text
-```
-
----
-
-## 8. 性能优化
-
-### 8.1 异步处理
-```python
-import asyncio
-from typing import List
-
-async def convert_pages_async(
-    self,
-    pdf_bytes: bytes,
-    pages: List[PageInfo],
-) -> List[str]:
-    """异步并发处理多页"""
-
-    async def process_page(page_info: PageInfo) -> str:
-        if page_info.type == PageType.PLAIN_TEXT:
-            return self._extract_text(pdf_bytes, page_info.num)
-        else:
-            return await self._ai_convert_async(pdf_bytes, page_info.num)
-
-    # 使用信号量限制并发
-    semaphore = asyncio.Semaphore(self.max_concurrent)
-
-    async def limited_process(page_info):
-        async with semaphore:
-            return await process_page(page_info)
-
-    tasks = [limited_process(p) for p in pages]
-    return await asyncio.gather(*tasks)
-```
-
-### 8.2 缓存机制
-```python
-from functools import lru_cache
-import hashlib
-
-class CachedAIService(AIService):
-    """带缓存的 AI 服务"""
-
-    @lru_cache(maxsize=100)
-    def _get_cache_key(self, image_hash: str) -> str | None:
-        """获取缓存结果"""
-        # 可接入 Redis 等
-        pass
-
-    def image_to_markdown(self, image_stream: io.BytesIO, ...) -> str:
-        # 计算图片哈希
-        image_hash = hashlib.md5(image_stream.read()).hexdigest()
-        image_stream.seek(0)
-
-        # 检查缓存
-        cached = self._get_cache_key(image_hash)
-        if cached:
-            return cached
-
-        # 调用 AI
-        result = super().image_to_markdown(image_stream, ...)
-
-        # 存入缓存
-        self._cache_result(image_hash, result)
-        return result
-```
-
----
-
-## 9. 测试策略
-
-### 9.1 测试用例设计
-```python
-class TestNovaPdfConverter:
-    """nova-pdf 转换器测试"""
-
-    def test_plain_text_pdf(self):
-        """纯文本 PDF 应使用默认解析"""
-        pass
-
-    def test_pdf_with_images(self):
-        """包含图片的 PDF 应调用 AI"""
-        pass
-
-    def test_pdf_with_tables(self):
-        """包含表格的 PDF 应调用 AI"""
-        pass
-
-    def test_mixed_content_pdf(self):
-        """混合内容应正确区分处理"""
-        pass
-
-    def test_ai_service_fallback(self):
-        """AI 服务失败时应降级"""
-        pass
-
-    def test_dpi_settings(self):
-        """不同 DPI 设置的渲染质量"""
-        pass
-
-    def test_concurrent_processing(self):
-        """并发处理性能测试"""
-        pass
-```
-
----
-
-## 10. 扩展性设计
-
-### 10.1 自定义页面分析器
-```python
-class PageAnalyzerPlugin(ABC):
-    """页面分析器插件接口"""
-
-    @abstractmethod
-    def analyze(self, page) -> PageType:
-        """分析页面类型"""
-        pass
-
-# 允许用户注入自定义分析器
-class NovaPdfConverter(DocumentConverter):
-    def __init__(
-        self,
-        page_analyzer: PageAnalyzerPlugin | None = None,
-        ...
-    ):
-        self.page_analyzer = page_analyzer or DefaultPageAnalyzer()
-```
-
-### 10.2 自定义 AI 提示词模板
-```python
-PROMPT_TEMPLATES = {
-    "default": "...",
-    "academic": "学术论文模板...",
-    "financial": "财务报表模板...",
-    "legal": "法律文档模板...",
-}
-
-class AIService:
-    def __init__(self, prompt_template: str = "default", ...):
-        self.prompt = PROMPT_TEMPLATES.get(prompt_template, PROMPT_TEMPLATES["default"])
-```
-
----
-
-## 11. 风险与缓解措施
-
-| 风险 | 影响 | 缓解措施 |
-|------|------|----------|
-| AI API 调用失败 | 转换中断 | 实现降级策略，回退到默认解析 |
-| 大文件内存溢出 | 程序崩溃 | 分页处理，控制内存占用 |
-| AI 响应慢 | 用户体验差 | 异步处理、进度反馈、超时控制 |
-| 解析质量不稳定 | 输出错误 | 多模型对比、人工审核机制 |
-| API 费用过高 | 成本失控 | 智能跳过纯文本页面、缓存机制 |
-
----
-
-## 12. 实施计划
-
-### ✅ Phase 1: 基础框架（已完成）
-- [x] 创建项目结构
-- [x] 实现插件注册入口
-- [x] 实现基础转换器框架
-
-### ✅ Phase 2: 核心功能（已完成）
-- [x] 实现页面内容检测 (`_page_analyzer.py`)
-- [x] 实现页面截图渲染 (`_page_renderer.py`)
-- [x] 实现 AI 服务接口 (`_ai_service.py`)
-- [x] 实现完整转换流程 (`_converter.py`)
-
-### ⏳ Phase 3: 测试与优化（待进行）
-- [ ] 运行单元测试
-- [ ] 添加测试 PDF 样本
-- [ ] 性能测试和优化
-
-### ⏳ Phase 4: 文档与发布（待进行）
-- [x] 编写 README 和使用文档
-- [x] 准备示例代码
-- [ ] 打包发布
-
----
-
-## 代码结构
-
-```
-packages/nova-pdf/
-├── src/nova_pdf/
-│   ├── __about__.py          # 版本号 (0.1.0)
-│   ├── __init__.py           # 导出 register_converters
-│   ├── _plugin.py            # 插件注册入口
-│   ├── _converter.py         # PDF 转换器核心
-│   ├── _page_analyzer.py     # 图片/表格检测
-│   ├── _page_renderer.py     # 页面截图 (pdfplumber.to_image)
-│   └── _ai_service.py        # AI 接口封装（两步上传）
-├── tests/
-│   ├── test_analyzer.py      # 分析器测试
-│   ├── test_converter.py     # 转换器测试
-│   └── test_ai_service.py    # AI 服务测试
-├── pyproject.toml            # 项目配置 + nova-pdf 配置
-└── README.md                 # 使用文档
-```
-
-**语法验证**: ✓ 所有 Python 文件通过语法检查
-
----
-
-## 15. 改造完成总结
-
-### 15.1 主要变更
-
-| 文件 | 变更内容 |
-|------|----------|
-| `_ai_service.py` | 重写为两步调用：上传 → Workflow |
-| `_plugin.py` | 适配新 AIService 初始化参数 |
-| `_converter.py` | 传递文件名给 AI 服务 |
-| `pyproject.toml` | 添加 `[tool.nova-pdf]` 配置段 |
-| `README.md` | 更新环境变量和配置说明 |
-| `tests/test_ai_service.py` | 新增 AI 服务测试（13 个用例）|
-
-### 15.2 环境变量
-
-```bash
-export NOVA_UPLOAD_TOKEN="your-fastgpt-token"      # 必需
-export NOVA_WORKFLOW_TOKEN="your-workflow-token"  # 必需
-export NOVA_BASE_URL="https://xny-test.glodon.com/jsf-ai"  # 可选
-export NOVA_APP_ID="69fc37113fedac1eaaf65c82"     # 可选
-```
-
-### 15.3 快速开始
-
-```python
-from markitdown import MarkItDown
-
-# 启用插件
-md = MarkItDown(enable_plugins=True)
-
-# 转换 PDF（复杂页面自动调用 AI）
-result = md.convert("document.pdf")
-print(result.markdown)
-```
-
-### 15.4 实测结果
-
-**测试图片**: `数位顺序表.png` (22KB)
-
-**测试结果**: ✓ 成功转换
-
-```markdown
-|  | 整数部分 | | | | | | | 小数部分 | | | | |
-|:---:|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|
-| 数位 | ...... | 万位 | 千位 | 百位 | 十位 | 个位 | . | 十分位 | 百分位 | 千分位 | 万分位 | ...... |
-| 单位 | ...... | 万 | 千 | 百 | 十 | 个 | | 十分之一 0.1 | 百分之一 0.01 | 千分之一 0.001 | 万分之一 0.0001 | ...... |
-```
-
-**关键修正**:
-1. 上传接口返回 `code: 200`（不是 0）
-2. Workflow 接口需要 `messages` 字段（OpenAI 兼容格式）
-3. SSL 验证跳过（`verify=False`）以适配内部 API
-
----
-
-## 13. 附录
-
-### 13.1 参考实现
-- `markitdown-ocr`: 已有的 OCR 插件，可参考架构
-- `markitdown-sample-plugin`: 官方插件示例
-- `_pdf_converter.py`: 默认 PDF 转换器实现
-
-### 13.2 关键代码参考
-```python
-# 参考 markitdown-ocr 的插件注册方式
-def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
-    PRIORITY_NOVA_PDF = -1.0  # 优先于默认 PDF 转换器
-
-    llm_client = kwargs.get("llm_client")
-    llm_model = kwargs.get("llm_model", "gpt-4o")
-
-    ai_service = None
-    if llm_client:
-        ai_service = AIService(client=llm_client, model=llm_model)
-
-    markitdown.register_converter(
-        NovaPdfConverter(ai_service=ai_service),
-        priority=PRIORITY_NOVA_PDF,
-    )
-
-# 页面截图渲染（简化版）
-def render_page_to_image(page, dpi: int = 150) -> io.BytesIO:
-    """使用 pdfplumber.to_image 渲染页面"""
-    page_image = page.to_image(resolution=dpi)
-    img_stream = io.BytesIO()
-    page_image.original.save(img_stream, format="PNG")
-    img_stream.seek(0)
-    return img_stream
-```
-
----
-
-## 14. AI 接口改造方案（自定义两步调用）
-
-### 14.1 背景
-
-原方案使用 OpenAI 兼容的 base64 图片上传方式，现需改造为自定义两步流程：
-1. 上传图片到文件服务，获取 URL
-2. 调用 Workflow 接口处理图片
-
-### 14.2 接口分析
-
-#### Step 1: 文件上传接口
-
-**请求**
-```
-POST https://xny-test.glodon.com/jsf-ai/api/common/file/upload
-Content-Type: multipart/form-data
-Cookie: fastgpt_token=<token>
-```
-
-**表单参数**
-| 字段 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| metadata | string | ✓ | JSON 字符串，如 `{"chatId":"<uuid>"}`，每次动态生成 |
-| bucketName | string | ✓ | 固定值 `chat` |
-| file | binary | ✓ | 图片文件（PNG/JPEG） |
-| data | string | ✓ | JSON 字符串，如 `{"appId":"69fc37113fedac1eaaf65c82"}` |
-
-**响应示例**
-```json
-{
-  "code": 200,
-  "data": {
-    "previewUrl": "https://xny-test.glodon.com/jsf-ai/api/common/file/read/xxx.png?token=...",
-    "fileId": "69fc42e024457b47b7e22b4a"
-  }
-}
-```
-
-> 注意：接口返回 `code: 200` 表示成功（不是 0）
-
-#### Step 2: Workflow 调用接口
-
-**请求**
-```
-POST https://xny-test.glodon.com/jsf-ai/api/v1/chat/completions
-Content-Type: application/json
-Authorization: Bearer <workflow_image2markdown_key>
-```
-
-**请求体**（OpenAI 兼容格式）
-```json
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": [
-        {"type": "text", "text": "请将这张图片的内容转换为 Markdown 格式。"},
-        {"type": "image_url", "image_url": {"url": "https://...previewUrl..."}}
-      ]
-    }
-  ]
-}
-```
-
-**响应示例**（OpenAI 兼容格式）
-```json
-{
-  "id": "",
-  "model": "",
-  "choices": [
-    {
-      "message": {
-        "role": "assistant",
-        "content": "| 数位顺序表 |\n|---|"
-      }
-    }
-  ]
-}
-```
-
-> 注意：Workflow 接口使用 OpenAI 兼容的消息格式，需要 `messages` 字段
-
-### 14.3 改造后的 AIService
-
-```python
-"""AI service with custom two-step API calls."""
-
-import io
-import json
-import requests
-from dataclasses import dataclass
-from typing import Any, BinaryIO, Optional
-
-
-@dataclass
-class AIResult:
-    """Result from AI conversion."""
-    text: str
-    success: bool = True
-    error: Optional[str] = None
-
-
-class AIService:
-    """
-    AI 服务 - 自定义两步调用方式
-
-    流程：
-    1. 上传图片到文件服务，获取 previewUrl
-    2. 调用 Workflow 接口，传入 fileUrls 参数
-    """
-
-    def __init__(
-        self,
-        base_url: str = "https://xny-test.glodon.com/jsf-ai",
-        upload_token: str = "",           # fastgpt_token (Cookie)
-        workflow_token: str = "",         # workflow_image2markdown_key (Authorization)
-        chat_id: str = "",                # 用于上传接口的 chatId
-        app_id: str = "",                 # 用于上传接口的 appId
-        timeout: int = 60,
-    ):
-        """
-        初始化 AI 服务
-
-        Args:
-            base_url: API 基础地址
-            upload_token: 文件上传认证 token（fastgpt_token）
-            workflow_token: Workflow 接口认证 token
-            chat_id: 会话 ID
-            app_id: 应用 ID
-            timeout: 请求超时时间（秒）
-        """
-        self.base_url = base_url.rstrip("/")
-        self.upload_token = upload_token
-        self.workflow_token = workflow_token
-        self.chat_id = chat_id
-        self.app_id = app_id
-        self.timeout = timeout
-
-    def image_to_markdown(
-        self,
-        image_stream: BinaryIO,
-        filename: str = "page.png",
-    ) -> AIResult:
-        """
-        将图片转换为 Markdown（两步调用）
-
-        Args:
-            image_stream: 图片流
-            filename: 文件名
-
-        Returns:
-            AIResult: 转换结果
-        """
-        try:
-            # Step 1: 上传图片
-            upload_result = self._upload_file(image_stream, filename)
-            if not upload_result["success"]:
-                return AIResult(
-                    text="",
-                    success=False,
-                    error=f"Upload failed: {upload_result.get('error')}"
-                )
-
-            file_url = upload_result["preview_url"]
-
-            # Step 2: 调用 Workflow
-            workflow_result = self._call_workflow(file_url)
-            if not workflow_result["success"]:
-                return AIResult(
-                    text="",
-                    success=False,
-                    error=f"Workflow failed: {workflow_result.get('error')}"
-                )
-
-            return AIResult(
-                text=workflow_result["text"],
-                success=True,
-            )
-
-        except Exception as e:
-            return AIResult(
-                text="",
-                success=False,
-                error=str(e),
-            )
-
-    def _upload_file(
-        self,
-        image_stream: BinaryIO,
-        filename: str,
-    ) -> dict:
-        """
-        上传文件到文件服务
-
-        Args:
-            image_stream: 图片流
-            filename: 文件名
-
-        Returns:
-            dict: {"success": bool, "preview_url": str, "error": str}
-        """
-        url = f"{self.base_url}/api/common/file/upload"
-
-        # 准备 multipart/form-data
-        files = {
-            "file": (filename, image_stream, "image/png")
-        }
-
-        data = {
-            "metadata": json.dumps({"chatId": self.chat_id}),
-            "bucketName": "chat",
-            "data": json.dumps({"appId": self.app_id}),
-        }
-
-        headers = {
-            "Cookie": f"fastgpt_token={self.upload_token}",
-        }
-
-        try:
-            response = requests.post(
-                url,
-                files=files,
-                data=data,
-                headers=headers,
-                timeout=self.timeout,
-            )
-            response.raise_for_status()
-
-            result = response.json()
-
-            if result.get("code") == 0 and result.get("data", {}).get("previewUrl"):
-                return {
-                    "success": True,
-                    "preview_url": result["data"]["previewUrl"],
-                }
-            else:
-                return {
-                    "success": False,
-                    "error": result.get("message", "Unknown error"),
-                }
-
-        except requests.RequestException as e:
-            return {
-                "success": False,
-                "error": str(e),
-            }
-
-    def _call_workflow(self, file_url: str) -> dict:
-        """
-        调用 Workflow 接口处理图片
-
-        Args:
-            file_url: 文件 URL
-
-        Returns:
-            dict: {"success": bool, "text": str, "error": str}
-        """
-        url = f"{self.base_url}/api/v1/chat/completions"
-
-        headers = {
-            "Authorization": f"Bearer {self.workflow_token}",
-            "Content-Type": "application/json",
-        }
-
-        payload = {
-            "fileUrls": [file_url],
-        }
-
-        try:
-            response = requests.post(
-                url,
-                json=payload,
-                headers=headers,
-                timeout=self.timeout,
-            )
-            response.raise_for_status()
-
-            result = response.json()
-
-            # 解析 OpenAI 兼容响应格式
-            choices = result.get("choices", [])
-            if choices:
-                content = choices[0].get("message", {}).get("content", "")
-                return {
-                    "success": True,
-                    "text": content.strip(),
-                }
-            else:
-                return {
-                    "success": False,
-                    "error": "No response content",
-                }
-
-        except requests.RequestException as e:
-            return {
-                "success": False,
-                "error": str(e),
-            }
-```
-
-### 14.4 使用示例
-
-```python
-from markitdown import MarkItDown
-from nova_pdf import AIService, NovaPdfConverter
-
-# 创建自定义 AI 服务
-ai_service = AIService(
-    base_url="https://xny-test.glodon.com/jsf-ai",
-    upload_token="<your-fastgpt-token>",  # fastgpt_token
-    workflow_token="your-workflow-token",
-    chat_id="tv1cyJFTt4wEKLqTKEx1KPEN",
-    app_id="69fc37113fedac1eaaf65c82",
-    timeout=120,
-)
-
-# 创建转换器
-converter = NovaPdfConverter(
-    ai_service=ai_service,
-    dpi=150,
-)
-
-# 手动注册
-md = MarkItDown(enable_plugins=False)
-md.register_converter(converter, priority=-1.0)
-
-# 转换 PDF
-result = md.convert("document.pdf")
-print(result.markdown)
-```
-
-### 14.5 配置参数说明
-
-| 参数 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| `base_url` | str | ✓ | API 基础地址 |
-| `upload_token` | str | ✓ | 文件上传认证 token（fastgpt_token） |
-| `workflow_token` | str | ✓ | Workflow 接口认证 token |
-| `chat_id` | str | ✓ | 会话 ID（用于上传接口） |
-| `app_id` | str | ✓ | 应用 ID（用于上传接口） |
-| `timeout` | int | | 超时时间，默认 60 秒 |
-
-### 14.6 错误处理
-
-```python
-def image_to_markdown(self, image_stream, filename="page.png") -> AIResult:
-    """带完善错误处理的转换"""
-    try:
-        # Step 1: 上传
-        upload_result = self._upload_file(image_stream, filename)
-        if not upload_result["success"]:
-            # 上传失败，返回详细错误
-            return AIResult(
-                text="",
-                success=False,
-                error=f"上传失败: {upload_result.get('error')}"
-            )
-
-        # Step 2: Workflow
-        workflow_result = self._call_workflow(upload_result["preview_url"])
-        if not workflow_result["success"]:
-            # Workflow 失败，返回详细错误
-            return AIResult(
-                text="",
-                success=False,
-                error=f"AI 处理失败: {workflow_result.get('error')}"
-            )
-
-        return AIResult(
-            text=workflow_result["text"],
-            success=True,
-        )
-
-    except requests.Timeout:
-        return AIResult(
-            text="",
-            success=False,
-            error="请求超时，请检查网络或增加 timeout 设置"
-        )
-    except requests.ConnectionError:
-        return AIResult(
-            text="",
-            success=False,
-            error="网络连接失败，请检查网络设置"
-        )
-    except json.JSONDecodeError:
-        return AIResult(
-            text="",
-            success=False,
-            error="响应解析失败，接口返回非 JSON 格式"
-        )
-    except Exception as e:
-        return AIResult(
-            text="",
-            success=False,
-            error=f"未知错误: {str(e)}"
-        )
-```
-
-### 14.7 与原方案的对比
-
-| 对比项 | 原方案（base64） | 新方案（两步上传） |
-|--------|-----------------|-------------------|
-| 图片传输 | base64 内嵌 | URL 引用 |
-| 请求大小 | 大（含图片数据） | 小（仅 URL） |
-| 适用场景 | 小图片 | 大图片、多图片 |
-| 依赖 | OpenAI SDK | requests |
-| 认证方式 | API Key | Token + Cookie |
-| 接口格式 | OpenAI 标准 | 自定义 |
-
-### 14.8 配置确认
-
-- [x] ~~`chat_id` 是否需要每次动态生成？~~ **是的，每次生成 UUID**
-- [x] ~~`app_id` 是否固定？~~ **是的，固定值**
-- [x] ~~`workflow_image2markdown_key` 如何获取？~~ **在 pyproject.toml 中配置**
-- [x] ~~是否需要支持并发上传？~~ **否**
-
-### 14.9 配置文件设计
-
-**pyproject.toml 新增配置项**
-```toml
-[project.optional-dependencies]
-nova-api = [
-  "requests>=2.28.0",
-]
-
-[tool.nova-pdf]
-# AI 服务配置
-base_url = "https://xny-test.glodon.com/jsf-ai"
-app_id = "69fc37113fedac1eaaf65c82"
-timeout = 120
-
-# 认证配置（建议通过环境变量覆盖）
-# upload_token = ""    # 环境变量: NOVA_UPLOAD_TOKEN
-# workflow_token = ""  # 环境变量: NOVA_WORKFLOW_TOKEN
-```
-
-**环境变量**
-- `NOVA_UPLOAD_TOKEN`: 上传接口认证 token (fastgpt_token)
-- `NOVA_WORKFLOW_TOKEN`: Workflow 接口认证 token
-- `NOVA_BASE_URL`: API 基础地址（可选，覆盖配置文件）
-- `NOVA_APP_ID`: 应用 ID（可选，覆盖配置文件）
diff --git a/docs/paddleocr-plugin-design.md b/docs/paddleocr-plugin-design.md
new file mode 100644
index 000000000..8adeb8cfa
--- /dev/null
+++ b/docs/paddleocr-plugin-design.md
@@ -0,0 +1,102 @@
+# markitdown-paddleocr 方案设计
+
+## 概述
+
+基于百度 PaddleOCR 云端 API 实现的 markitdown OCR 插件，参考 markitdown-glmocr 架构。
+
+## 与 glmocr 的核心差异
+
+| 维度 | glmocr | paddleocr |
+|------|--------|-----------|
+| API 风格 | 同步 SDK 调用 | 异步 Job 轮询（submit → poll → fetch result） |
+| 认证 | `ZHIPU_API_KEY` | `BAIDU_PADDLE_TOKEN` (bearer token) |
+| 结果格式 | SDK 封装对象 | JSONL 流（逐行 JSON，含 layoutParsingResults） |
+| 图片处理 | SDK 内置 base64 编码 | 需手动上传文件或传 fileUrl |
+| 模型 | glm-ocr | PaddleOCR-VL-1.5 |
+
+## 架构
+
+```
+markitdown-paddleocr/
+├── pyproject.toml
+├── README.md
+└── src/markitdown_paddleocr/
+    ├── __init__.py          # 导出 + __plugin_interface_version__
+    ├── __about__.py         # __version__
+    ├── _config.py           # PaddleOcrConfig dataclass
+    ├── _paddle_client.py    # PaddleOCR API 客户端（submit/poll/fetch）
+    ├── _converter.py        # PaddleOcrConverter(DocumentConverter)
+    └── _plugin.py           # register_converters 入口
+```
+
+## 核心流程
+
+```
+文件输入 (PDF/图片)
+    │
+    ▼
+PaddleOcrConverter.convert()
+    │
+    ├─ 图片文件 ──► _convert_image() ──► PaddleClient.ocr() ──► markdown
+    │
+    └─ PDF 文件 ──► _convert_pdf()
+          │
+          ├─ 逐页分析 (pdfplumber)
+          ├─ 纯文本页 ──► pdfplumber 提取
+          └─ 复杂页 ──► 渲染为图片 ──► PaddleClient.ocr() ──► markdown
+```
+
+## PaddleClient 核心逻辑
+
+```python
+class PaddleClient:
+    JOB_URL = "https://paddleocr.aistudio-app.com/api/v2/ocr/jobs"
+
+    def ocr(self, file_bytes, filename=None, file_url=None) -> str:
+        # 1. 提交 Job（本地文件用 multipart，URL 用 JSON）
+        job_id = self._submit(file_bytes, filename, file_url)
+        # 2. 轮询 Job 状态（pending → running → done）
+        result_url = self._poll(job_id)
+        # 3. 获取 JSONL 结果，拼接 markdown
+        return self._fetch_markdown(result_url)
+```
+
+## 关键设计决策
+
+1. **异步轮询间隔**: 默认 2s，可配置，最大等待 300s
+2. **PDF 处理策略**: 与 glmocr 一致，纯文本页用 pdfplumber，复杂页用 OCR
+3. **图片上传**: 使用 multipart/form-data 上传本地文件；支持 fileUrl 模式
+4. **结果解析**: 从 JSONL 的 `layoutParsingResults[].markdown.text` 提取 markdown
+5. **环境变量**: `BAIDU_PADDLE_TOKEN`（必需），`PADDLE_OCR_MODEL`（默认 PaddleOCR-VL-1.5）
+6. **可选参数**: `useDocOrientationClassify`, `useDocUnwarping`, `useChartRecognition`
+
+## 依赖
+
+```
+markitdown>=0.1.0
+pdfminer.six>=20251230
+pdfplumber>=0.11.9
+Pillow>=9.0.0
+requests>=2.28.0
+```
+
+## 入口点
+
+```toml
+[project.entry-points."markitdown.plugin"]
+markitdown_paddleocr = "markitdown_paddleocr"
+```
+
+## 使用方式
+
+```bash
+# 环境变量
+export BAIDU_PADDLE_TOKEN="your-token"
+
+# CLI
+markitdown -p document.pdf
+
+# Python
+from markitdown_paddleocr import PaddleOcrConverter
+converter = PaddleOcrConverter(token="your-token")
+```
diff --git "a/docs/panddle\347\244\272\344\276\213\344\273\243\347\240\201.md" "b/docs/panddle\347\244\272\344\276\213\344\273\243\347\240\201.md"
new file mode 100644
index 000000000..b1d68059a
--- /dev/null
+++ "b/docs/panddle\347\244\272\344\276\213\344\273\243\347\240\201.md"
@@ -0,0 +1,122 @@
+# Please make sure the requests library is installed
+# pip install requests
+import json
+import os
+import requests
+import sys
+import time
+
+JOB_URL = "https://paddleocr.aistudio-app.com/api/v2/ocr/jobs"
+TOKEN = "7963b85a6bac7a4f5243d26210f1b8fa86daf5ef"
+MODEL = "PaddleOCR-VL-1.5"
+
+file_path = "<local file path or file url>"
+
+headers = {
+    "Authorization": f"bearer {TOKEN}",
+}
+
+optional_payload = {
+    "useDocOrientationClassify": False,
+    "useDocUnwarping": False,
+    "useChartRecognition": False,
+}
+
+print(f"Processing file: {file_path}")
+
+if file_path.startswith("http"):
+    # URL Mode
+    headers["Content-Type"] = "application/json"
+    payload = {
+        "fileUrl": file_path,
+        "model": MODEL,
+        "optionalPayload": optional_payload
+    }
+    job_response = requests.post(JOB_URL, json=payload, headers=headers)
+else:
+    # Local File Mode
+    if not os.path.exists(file_path):
+        print(f"Error: File not found at {file_path}")
+        sys.exit(1)
+        
+    data = {
+        "model": MODEL,
+        "optionalPayload": json.dumps(optional_payload)
+    }
+    
+    with open(file_path, "rb") as f:
+        files = {"file": f}
+        job_response = requests.post(JOB_URL, headers=headers, data=data, files=files)
+
+print(f"Response status: {job_response.status_code}")
+if job_response.status_code != 200:
+    print(f"Response content: {job_response.text}")
+
+assert job_response.status_code == 200
+jobId = job_response.json()["data"]["jobId"]
+print(f"Job submitted successfully. job id: {jobId}")
+print("Start polling for results")
+
+jsonl_url = ""
+while True:
+    job_result_response = requests.get(f"{JOB_URL}/{jobId}", headers=headers)
+    assert job_result_response.status_code == 200
+    state = job_result_response.json()["data"]["state"]
+    if state == 'pending':
+        print("The current status of the job is pending")
+    elif state == 'running':
+        try:
+            total_pages = job_result_response.json()['data']['extractProgress']['totalPages']
+            extracted_pages = job_result_response.json()['data']['extractProgress']['extractedPages']
+            print(f"The current status of the job is running, total pages: {total_pages}, extracted pages: {extracted_pages}")
+        except KeyError:
+             print("The current status of the job is running...")
+    elif state == 'done':
+        extracted_pages = job_result_response.json()['data']['extractProgress']['extractedPages']
+        start_time = job_result_response.json()['data']['extractProgress']['startTime']
+        end_time = job_result_response.json()['data']['extractProgress']['endTime']
+        print(f"Job completed, successfully extracted pages: {extracted_pages}, start time: {start_time}, end time: {end_time}")
+        jsonl_url = job_result_response.json()['data']['resultUrl']['jsonUrl']
+        break
+    elif state == "failed":
+        error_msg = job_result_response.json()['data']['errorMsg']
+        print(f"Job failed, failure reason：{error_msg}")
+        sys.exit()
+
+    time.sleep(5)
+
+if jsonl_url:
+    jsonl_response = requests.get(jsonl_url)
+    jsonl_response.raise_for_status()
+    lines = jsonl_response.text.strip().split('\n')
+    output_dir = "output"
+    os.makedirs(output_dir, exist_ok=True)
+    page_num = 0
+    for line_num, line in enumerate(lines, start=1):
+        line = line.strip()
+        if not line:
+            continue
+        result = json.loads(line)["result"]
+        for i, res in enumerate(result["layoutParsingResults"]):
+            md_filename = os.path.join(output_dir, f"doc_{page_num}.md")
+            with open(md_filename, "w", encoding="utf-8") as md_file:
+                md_file.write(res["markdown"]["text"])
+            print(f"Markdown document saved at {md_filename}")
+            for img_path, img in res["markdown"]["images"].items():
+                full_img_path = os.path.join(output_dir, img_path)
+                os.makedirs(os.path.dirname(full_img_path), exist_ok=True)
+                img_bytes = requests.get(img).content
+                with open(full_img_path, "wb") as img_file:
+                    img_file.write(img_bytes)
+                print(f"Image saved to: {full_img_path}")
+            for img_name, img in res["outputImages"].items():
+                img_response = requests.get(img)
+                if img_response.status_code == 200:
+                    # Save image to local
+                    filename = os.path.join(output_dir, f"{img_name}_{page_num}.jpg")
+                    with open(filename, "wb") as f:
+                        f.write(img_response.content)
+                    print(f"Image saved to: {filename}")
+                else:
+                    print(f"Failed to download image, status code: {img_response.status_code}")
+            page_num += 1
diff --git a/packages/markitdown-paddleocr/README.md b/packages/markitdown-paddleocr/README.md
new file mode 100644
index 000000000..4685a343f
--- /dev/null
+++ b/packages/markitdown-paddleocr/README.md
@@ -0,0 +1,157 @@
+# markitdown-paddleocr
+
+智能 PDF/图片转 Markdown 插件，使用百度 PaddleOCR 云端 API 驱动的 OCR 识别。
+
+## 特性
+
+- 🔍 **智能检测**：自动识别每页内容类型（纯文本 vs 图片/表格）
+- 📄 **默认解析**：纯文本页面使用 pdfplumber/pdfminer 提取，速度快、成本低
+- 🤖 **AI 增强**：复杂页面（图片、表格）使用 PaddleOCR API 转换为 Markdown
+- 🔄 **异步 Job 模型**：提交 OCR 任务 → 轮询状态 → 获取结果
+- 📊 **结构化输出**：返回 Markdown（含表格、公式、图表等）
+
+## 安装
+
+```bash
+pip install markitdown-paddleocr
+```
+
+## 配置
+
+### 环境变量（推荐）
+
+```bash
+# 必需：百度 PaddleOCR Token
+export BAIDU_PADDLE_TOKEN="your-paddle-token"
+
+# 可选
+export PADDLE_OCR_MODEL="PaddleOCR-VL-1.5"   # 模型名称
+```
+
+### 配置优先级
+
+```
+构造函数参数 > 环境变量 > 内置默认值
+```
+
+## 使用方法
+
+### 命令行（推荐）
+
+```bash
+# 1. 设置 Token
+export BAIDU_PADDLE_TOKEN="your-token"
+
+# 2. 查看已安装插件
+markitdown --list-plugins
+
+# 3. 使用插件转换 PDF
+markitdown -p document.pdf
+
+# 4. 保存到文件
+markitdown -p document.pdf -o output.md
+```
+
+### Python API
+
+```python
+from markitdown import MarkItDown
+from markitdown_paddleocr import PaddleOcrConverter
+
+# 方式1：自动从环境变量读取 BAIDU_PADDLE_TOKEN
+converter = PaddleOcrConverter()
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+print(result.markdown)
+
+# 方式2：手动传入 Token
+converter = PaddleOcrConverter(token="your-token")
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+print(result.markdown)
+
+# 方式3：强制所有页面使用 OCR
+converter = PaddleOcrConverter(token="your-token", force_ai=True)
+md = MarkItDown(enable_plugins=False)
+md.register_converter(converter, priority=-1.0)
+result = md.convert("document.pdf")
+print(result.markdown)
+```
+
+### 直接使用 PaddleClient
+
+```python
+from markitdown_paddleocr import PaddleClient
+
+client = PaddleClient(token="your-token")
+
+# 本地文件
+markdown = client.ocr(file_bytes=open("image.png", "rb").read(), filename="image.png")
+print(markdown)
+
+# URL 模式
+markdown = client.ocr(file_url="https://example.com/document.pdf")
+print(markdown)
+```
+
+## 配置选项
+
+### PaddleOcrConverter 参数
+
+| 参数 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `token` | str | 环境变量 `BAIDU_PADDLE_TOKEN` | PaddleOCR Token |
+| `model` | str | `PaddleOCR-VL-1.5` | OCR 模型名称 |
+| `poll_interval` | float | 2.0 | 轮询间隔（秒） |
+| `poll_timeout` | float | 300.0 | 轮询超时（秒） |
+| `force_ai` | bool | False | 强制所有页面使用 OCR |
+| `use_doc_orientation_classify` | bool | False | 文档方向分类 |
+| `use_doc_unwarping` | bool | False | 文档去扭曲 |
+| `use_chart_recognition` | bool | False | 图表识别 |
+
+### 环境变量
+
+| 变量 | 说明 | 示例 |
+|------|------|------|
+| `BAIDU_PADDLE_TOKEN` | Token（必需） | `7963b85a...` |
+| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.5` |
+
+## 工作原理
+
+```
+PDF/图片 输入
+    │
+    ▼
+PaddleOcrConverter.convert()
+    │
+    ├─ 图片文件 ──► PaddleClient.ocr() ──► markdown
+    │
+    └─ PDF 文件 ──► 逐页分析内容类型
+          │
+          ├─ 纯文本页 ──► pdfplumber 提取文本
+          │
+          └─ 复杂页（图片/表格）
+                │
+                └─► 渲染为图片 ──► PaddleClient.ocr()
+                      │
+                      ├─ POST /api/v2/ocr/jobs  (提交 Job)
+                      ├─ GET  /api/v2/ocr/jobs/{id}  (轮询状态)
+                      └─ GET  jsonUrl  (获取 JSONL 结果)
+    │
+    ▼
+合并输出完整 Markdown
+```
+
+## 依赖
+
+- `markitdown>=0.1.0` - 基础框架
+- `pdfplumber>=0.11.9` - PDF 解析和截图
+- `pdfminer.six>=20251230` - 文本提取备用
+- `Pillow>=9.0.0` - 图像处理
+- `requests>=2.28.0` - HTTP 请求
+
+## 许可证
+
+MIT
diff --git a/packages/markitdown-paddleocr/pyproject.toml b/packages/markitdown-paddleocr/pyproject.toml
new file mode 100644
index 000000000..f3326cd04
--- /dev/null
+++ b/packages/markitdown-paddleocr/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "markitdown-paddleocr"
+dynamic = ["version"]
+description = "Intelligent PDF/Image to Markdown converter using PaddleOCR cloud API"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+keywords = ["markitdown", "pdf", "ocr", "paddleocr", "baidu", "vision"]
+authors = [
+  { name = "Contributors", email = "noreply@github.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+
+dependencies = [
+  "markitdown>=0.1.0",
+  "pdfminer.six>=20251230",
+  "pdfplumber>=0.11.9",
+  "Pillow>=9.0.0",
+  "requests>=2.28.0",
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest>=7.0.0",
+]
+
+[project.urls]
+Documentation = "https://github.com/microsoft/markitdown#readme"
+Issues = "https://github.com/microsoft/markitdown/issues"
+Source = "https://github.com/microsoft/markitdown"
+
+[tool.hatch.version]
+path = "src/markitdown_paddleocr/__about__.py"
+
+# Plugin entry point - MarkItDown will discover this plugin
+[project.entry-points."markitdown.plugin"]
+markitdown_paddleocr = "markitdown_paddleocr"
+
+[tool.hatch.build.targets.sdist]
+only-include = ["src/markitdown_paddleocr"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/markitdown_paddleocr"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
new file mode 100644
index 000000000..3dc1f76bc
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__init__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__init__.py
new file mode 100644
index 000000000..00b431621
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__init__.py
@@ -0,0 +1,16 @@
+"""markitdown-paddleocr: PDF/Image to Markdown converter using PaddleOCR cloud API."""
+
+from ._plugin import register_converters
+from ._config import PaddleOcrConfig
+from ._converter import PaddleOcrConverter
+from ._paddle_client import PaddleClient
+from ._dual_converter import DualOcrConverter
+
+__plugin_interface_version__ = 1
+__all__ = [
+    "register_converters",
+    "PaddleOcrConfig",
+    "PaddleOcrConverter",
+    "PaddleClient",
+    "DualOcrConverter",
+]
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
new file mode 100644
index 000000000..51fc00d60
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
@@ -0,0 +1,46 @@
+"""Configuration for markitdown-paddleocr."""
+
+import os
+from dataclasses import dataclass
+
+
+@dataclass
+class PaddleOcrConfig:
+    """markitdown-paddleocr configuration.
+
+    Configuration priority (high to low):
+    1. Constructor kwargs
+    2. Environment variables
+    3. Built-in defaults
+    """
+
+    # API configuration
+    token: str = ""  # Reads from BAIDU_PADDLE_TOKEN by default
+
+    # OCR model
+    model: str = "PaddleOCR-VL-1.5"
+
+    # API endpoint
+    job_url: str = "https://paddleocr.aistudio-app.com/api/v2/ocr/jobs"
+
+    # Polling configuration
+    poll_interval: float = 2.0  # seconds between polls
+    poll_timeout: float = 300.0  # max seconds to wait for job completion
+
+    # Optional OCR features
+    use_doc_orientation_classify: bool = False
+    use_doc_unwarping: bool = False
+    use_chart_recognition: bool = False
+
+    # Processing strategy
+    force_ai: bool = False
+
+    @classmethod
+    def from_env(cls, **overrides) -> "PaddleOcrConfig":
+        """Create config from environment variables with optional overrides."""
+        defaults = {
+            "token": os.environ.get("BAIDU_PADDLE_TOKEN", ""),
+            "model": os.environ.get("PADDLE_OCR_MODEL", "PaddleOCR-VL-1.5"),
+        }
+        defaults.update(overrides)
+        return cls(**defaults)
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
new file mode 100644
index 000000000..6d8ae5e63
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
@@ -0,0 +1,304 @@
+"""PaddleOcr Converter - PDF/Image to Markdown using PaddleOCR cloud API."""
+
+import io
+import sys
+from typing import Any, BinaryIO, Optional
+
+from markitdown import DocumentConverter, DocumentConverterResult, StreamInfo
+from markitdown._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
+
+from ._config import PaddleOcrConfig
+from ._paddle_client import PaddleClient
+
+# Import PDF dependencies
+_dependency_exc_info = None
+try:
+    import pdfminer
+    import pdfminer.high_level
+    import pdfplumber
+except ImportError:
+    _dependency_exc_info = sys.exc_info()
+
+
+ACCEPTED_MIME_TYPE_PREFIXES = [
+    "application/pdf",
+    "application/x-pdf",
+    "image/jpeg",
+    "image/png",
+]
+
+ACCEPTED_FILE_EXTENSIONS = [".pdf", ".jpg", ".jpeg", ".png"]
+
+
+class PaddleOcrConverter(DocumentConverter):
+    """Intelligent PDF/Image converter using PaddleOCR cloud API.
+
+    Features:
+    - Auto-detect page content type (plain text vs images/tables)
+    - Plain text pages use pdfplumber/pdfminer (fast, free)
+    - Complex pages use PaddleOCR API for AI-powered OCR
+    - Image files (PNG, JPG) use PaddleOCR API directly
+    - Asynchronous job model: submit → poll → fetch result
+    """
+
+    def __init__(
+        self,
+        token: Optional[str] = None,
+        model: str = "PaddleOCR-VL-1.5",
+        poll_interval: float = 2.0,
+        poll_timeout: float = 300.0,
+        force_ai: bool = False,
+        use_doc_orientation_classify: bool = False,
+        use_doc_unwarping: bool = False,
+        use_chart_recognition: bool = False,
+        config: Optional[PaddleOcrConfig] = None,
+    ):
+        """Initialize converter.
+
+        Args:
+            token: Baidu PaddleOCR token (reads from BAIDU_PADDLE_TOKEN env var if not provided)
+            model: OCR model name (default: PaddleOCR-VL-1.5)
+            poll_interval: Seconds between status polls (default: 2.0)
+            poll_timeout: Max seconds to wait for job completion (default: 300.0)
+            force_ai: Force all pages to use OCR (default: False)
+            use_doc_orientation_classify: Enable document orientation classification
+            use_doc_unwarping: Enable document unwarping
+            use_chart_recognition: Enable chart recognition
+            config: Optional PaddleOcrConfig instance
+        """
+        # Build config from explicit params or provided config
+        if config:
+            self.token = token or config.token
+            self.model = model if model != "PaddleOCR-VL-1.5" else config.model
+            self.poll_interval = poll_interval if poll_interval != 2.0 else config.poll_interval
+            self.poll_timeout = poll_timeout if poll_timeout != 300.0 else config.poll_timeout
+            self.force_ai = force_ai or config.force_ai
+            self.use_doc_orientation_classify = use_doc_orientation_classify or config.use_doc_orientation_classify
+            self.use_doc_unwarping = use_doc_unwarping or config.use_doc_unwarping
+            self.use_chart_recognition = use_chart_recognition or config.use_chart_recognition
+        else:
+            self.token = token
+            self.model = model
+            self.poll_interval = poll_interval
+            self.poll_timeout = poll_timeout
+            self.force_ai = force_ai
+            self.use_doc_orientation_classify = use_doc_orientation_classify
+            self.use_doc_unwarping = use_doc_unwarping
+            self.use_chart_recognition = use_chart_recognition
+
+        # Lazy init client
+        self._client: Optional[PaddleClient] = None
+
+    def _get_client(self) -> PaddleClient:
+        """Get or create PaddleClient instance."""
+        if self._client is None:
+            config = PaddleOcrConfig(
+                token=self.token or "",
+                model=self.model,
+                poll_interval=self.poll_interval,
+                poll_timeout=self.poll_timeout,
+                force_ai=self.force_ai,
+                use_doc_orientation_classify=self.use_doc_orientation_classify,
+                use_doc_unwarping=self.use_doc_unwarping,
+                use_chart_recognition=self.use_chart_recognition,
+            )
+            self._client = PaddleClient(config=config)
+        return self._client
+
+    def accepts(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> bool:
+        mimetype = (stream_info.mimetype or "").lower()
+        extension = (stream_info.extension or "").lower()
+
+        if extension in ACCEPTED_FILE_EXTENSIONS:
+            return True
+
+        for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
+            if mimetype.startswith(prefix):
+                return True
+
+        return False
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        if _dependency_exc_info is not None:
+            raise MissingDependencyException(
+                MISSING_DEPENDENCY_MESSAGE.format(
+                    converter=type(self).__name__,
+                    extension=".pdf",
+                    feature="pdf",
+                )
+            ) from _dependency_exc_info[1].with_traceback(
+                _dependency_exc_info[2]
+            )
+
+        extension = (stream_info.extension or "").lower()
+
+        # Image files: use PaddleOCR directly
+        if extension in (".jpg", ".jpeg", ".png"):
+            return self._convert_image(file_stream, extension)
+
+        # PDF files: use hybrid approach
+        return self._convert_pdf(file_stream)
+
+    def _convert_image(self, file_stream: BinaryIO, extension: str = ".png") -> DocumentConverterResult:
+        """Convert image file using PaddleOCR API."""
+        img_bytes = file_stream.read()
+        filename = f"image{extension}"
+
+        try:
+            markdown = self._get_client().ocr(file_bytes=img_bytes, filename=filename)
+            return DocumentConverterResult(markdown=markdown)
+        except Exception as e:
+            return DocumentConverterResult(
+                markdown=f"<!-- Error converting image with PaddleOCR: {e} -->"
+            )
+
+    def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
+        """Convert PDF using hybrid approach (pdfplumber for text, PaddleOCR for complex pages)."""
+        pdf_stream = io.BytesIO(file_stream.read())
+        markdown_parts = []
+
+        try:
+            with pdfplumber.open(pdf_stream) as pdf:
+                for page_num, page in enumerate(pdf.pages):
+                    # Analyze page type
+                    page_type = self._analyze_page(page)
+
+                    # Choose processing method
+                    if self.force_ai or page_type != "plain_text":
+                        # Complex content: use PaddleOCR
+                        markdown = self._convert_with_paddleocr(page, page_num)
+                    else:
+                        # Plain text: use pdfplumber
+                        markdown = self._extract_text_with_tables(page)
+
+                    if markdown.strip():
+                        markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
+
+                    page.close()
+
+            markdown = "\n\n".join(markdown_parts).strip()
+
+        except Exception:
+            # Fallback to pdfminer
+            pdf_stream.seek(0)
+            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
+
+        # Final fallback
+        if not markdown:
+            pdf_stream.seek(0)
+            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
+
+        return DocumentConverterResult(markdown=markdown)
+
+    def _analyze_page(self, page: Any) -> str:
+        """Analyze page content type."""
+        # Check for images
+        if hasattr(page, "images") and page.images:
+            return "complex"
+
+        # Check for tables
+        tables = page.find_tables()
+        if tables:
+            return "complex"
+
+        # Check for graphics/curves
+        if hasattr(page, "curves") and page.curves:
+            return "complex"
+
+        return "plain_text"
+
+    def _convert_with_paddleocr(self, page: Any, page_num: int) -> str:
+        """Convert page using PaddleOCR API."""
+        try:
+            # Render page to image
+            img = page.to_image(resolution=150)
+            img_bytes = io.BytesIO()
+            img.save(img_bytes, format="PNG")
+
+            markdown = self._get_client().ocr(
+                file_bytes=img_bytes.getvalue(),
+                filename=f"page_{page_num + 1}.png",
+            )
+            return markdown
+
+        except Exception:
+            # Fallback to pdfplumber text extraction
+            return self._extract_text_with_tables(page)
+
+    def _extract_text_with_tables(self, page: Any) -> str:
+        """Extract text and tables from page."""
+        parts = []
+
+        # Extract text
+        text = page.extract_text() or ""
+        if text.strip():
+            parts.append(text.strip())
+
+        # Extract tables
+        try:
+            tables = page.extract_tables()
+            if tables:
+                for table in tables:
+                    if table:
+                        md_table = self._table_to_markdown(table)
+                        if md_table.strip():
+                            parts.append(md_table)
+        except Exception:
+            pass
+
+        return "\n\n".join(parts)
+
+    def _table_to_markdown(self, table: list[list[str]]) -> str:
+        """Convert table to Markdown."""
+        if not table:
+            return ""
+
+        # Filter None values
+        table = [[cell if cell is not None else "" for cell in row] for row in table]
+
+        # Filter empty rows
+        table = [row for row in table if any(cell.strip() for cell in row)]
+
+        if not table:
+            return ""
+
+        # Calculate column widths
+        col_widths = [
+            max(len(str(row[i])) if i < len(row) else 0 for row in table)
+            for i in range(max(len(row) for row in table))
+        ]
+
+        # Format table
+        lines = []
+        for row_idx, row in enumerate(table):
+            padded_row = row + [""] * (len(col_widths) - len(row))
+            line = "| " + " | ".join(
+                str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
+            ) + " |"
+            lines.append(line)
+
+            if row_idx == 0:
+                sep = "|" + "|".join("-" * (w + 2) for w in col_widths) + "|"
+                lines.append(sep)
+
+        return "\n".join(lines)
+
+    def close(self):
+        """Close the client."""
+        self._client = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
new file mode 100644
index 000000000..e27395c4d
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
@@ -0,0 +1,160 @@
+"""DualOcrConverter - glmocr (primary) → paddleocr (fallback) automatic degradation."""
+
+import logging
+from typing import Optional
+
+from markitdown import MarkItDown, DocumentConverter, DocumentConverterResult, StreamInfo
+from typing import BinaryIO, Any
+
+logger = logging.getLogger(__name__)
+
+
+class DualOcrConverter(DocumentConverter):
+    """Dual OCR converter with automatic fallback: glmocr → paddleocr.
+
+    Usage:
+        converter = DualOcrConverter()
+        md = MarkItDown(enable_plugins=False)
+        md.register_converter(converter, priority=-1.0)
+        result = md.convert("document.pdf")
+    """
+
+    def __init__(
+        self,
+        # glmocr kwargs
+        glmocr_api_key: Optional[str] = None,
+        glmocr_timeout: int = 1800,
+        glmocr_enable_layout: bool = False,
+        glmocr_force_ai: bool = False,
+        # paddleocr kwargs
+        paddleocr_token: Optional[str] = None,
+        paddleocr_model: str = "PaddleOCR-VL-1.5",
+        paddleocr_poll_interval: float = 2.0,
+        paddleocr_poll_timeout: float = 300.0,
+        paddleocr_force_ai: bool = False,
+        paddleocr_use_doc_orientation_classify: bool = False,
+        paddleocr_use_doc_unwarping: bool = False,
+        paddleocr_use_chart_recognition: bool = False,
+    ):
+        self.glmocr_kwargs = {
+            "api_key": glmocr_api_key,
+            "timeout": glmocr_timeout,
+            "enable_layout": glmocr_enable_layout,
+            "force_ai": glmocr_force_ai,
+        }
+        self.paddleocr_kwargs = {
+            "token": paddleocr_token,
+            "model": paddleocr_model,
+            "poll_interval": paddleocr_poll_interval,
+            "poll_timeout": paddleocr_poll_timeout,
+            "force_ai": paddleocr_force_ai,
+            "use_doc_orientation_classify": paddleocr_use_doc_orientation_classify,
+            "use_doc_unwarping": paddleocr_use_doc_unwarping,
+            "use_chart_recognition": paddleocr_use_chart_recognition,
+        }
+
+        self._primary = None
+        self._fallback = None
+        self._init_converters()
+
+    def _init_converters(self):
+        """Lazily init both converters."""
+        try:
+            from markitdown_glmocr import GlmOcrConverter
+            # Filter out None values
+            kwargs = {k: v for k, v in self.glmocr_kwargs.items() if v is not None}
+            self._primary = GlmOcrConverter(**kwargs)
+            logger.info("glmocr converter initialized (primary)")
+        except Exception as e:
+            logger.warning("glmocr init failed: %s", e)
+            self._primary = None
+
+        try:
+            from markitdown_paddleocr import PaddleOcrConverter
+            kwargs = {k: v for k, v in self.paddleocr_kwargs.items() if v is not None}
+            self._fallback = PaddleOcrConverter(**kwargs)
+            logger.info("paddleocr converter initialized (fallback)")
+        except Exception as e:
+            logger.warning("paddleocr init failed: %s", e)
+            self._fallback = None
+
+    def accepts(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> bool:
+        """Accept if either converter accepts."""
+        if self._primary:
+            try:
+                file_stream.seek(0)
+                if self._primary.accepts(file_stream, stream_info, **kwargs):
+                    return True
+            except Exception:
+                pass
+
+        if self._fallback:
+            try:
+                file_stream.seek(0)
+                if self._fallback.accepts(file_stream, stream_info, **kwargs):
+                    return True
+            except Exception:
+                pass
+
+        return False
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        """Convert with primary, fallback on failure."""
+        data = file_stream.read()
+
+        # Try primary (glmocr)
+        if self._primary:
+            try:
+                result = self._primary.convert(io_bytes(data), stream_info, **kwargs)
+                if result.markdown and result.markdown.strip():
+                    logger.info("✓ glmocr succeeded")
+                    return result
+                logger.warning("glmocr returned empty result, falling back")
+            except Exception as e:
+                logger.warning("glmocr failed: %s, falling back to paddleocr", e)
+
+        # Fallback (paddleocr)
+        if self._fallback:
+            try:
+                result = self._fallback.convert(io_bytes(data), stream_info, **kwargs)
+                if result.markdown and result.markdown.strip():
+                    logger.info("✓ paddleocr succeeded (fallback)")
+                    return result
+                logger.warning("paddleocr returned empty result")
+            except Exception as e:
+                logger.error("paddleocr also failed: %s", e)
+
+        # Both failed
+        return DocumentConverterResult(
+            markdown="<!-- Both OCR engines (glmocr, paddleocr) failed to convert this file -->"
+        )
+
+    def close(self):
+        if self._primary and hasattr(self._primary, "close"):
+            self._primary.close()
+        if self._fallback and hasattr(self._fallback, "close"):
+            self._fallback.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+
+def io_bytes(data: bytes):
+    """Create a seekable BytesIO from bytes."""
+    import io
+    buf = io.BytesIO(data)
+    buf.seek(0)
+    return buf
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_paddle_client.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_paddle_client.py
new file mode 100644
index 000000000..ba12e51c9
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_paddle_client.py
@@ -0,0 +1,189 @@
+"""PaddleOCR API Client - handles job submission, polling, and result fetching."""
+
+import json
+import logging
+import time
+from typing import Optional
+
+import requests
+
+from ._config import PaddleOcrConfig
+
+logger = logging.getLogger(__name__)
+
+
+class PaddleOcrError(Exception):
+    """PaddleOCR API error."""
+
+    pass
+
+
+class PaddleClient:
+    """Client for PaddleOCR cloud API.
+
+    Workflow: submit job → poll status → fetch JSONL result → extract markdown.
+    """
+
+    def __init__(self, config: Optional[PaddleOcrConfig] = None, **kwargs):
+        if config is None:
+            config = PaddleOcrConfig(**kwargs)
+        self.config = config
+
+        # Token from config or env
+        self.token = config.token
+        if not self.token:
+            import os
+            self.token = os.environ.get("BAIDU_PADDLE_TOKEN", "")
+
+    def _headers(self) -> dict:
+        """Build authorization headers."""
+        return {"Authorization": f"bearer {self.token}"}
+
+    def _optional_payload(self) -> dict:
+        """Build optional payload flags."""
+        return {
+            "useDocOrientationClassify": self.config.use_doc_orientation_classify,
+            "useDocUnwarping": self.config.use_doc_unwarping,
+            "useChartRecognition": self.config.use_chart_recognition,
+        }
+
+    def ocr(
+        self,
+        file_bytes: Optional[bytes] = None,
+        filename: Optional[str] = None,
+        file_url: Optional[str] = None,
+    ) -> str:
+        """Run OCR on a file or URL, return concatenated markdown.
+
+        Args:
+            file_bytes: File content bytes (for local file upload).
+            filename: Filename for multipart upload (e.g. "page.png").
+            file_url: File URL (for URL mode, alternative to file_bytes).
+
+        Returns:
+            Markdown text extracted from all pages.
+
+        Raises:
+            PaddleOcrError: On API errors or timeout.
+        """
+        # 1. Submit job
+        job_id = self._submit(file_bytes=file_bytes, filename=filename, file_url=file_url)
+        logger.info("Job submitted: %s", job_id)
+
+        # 2. Poll until done
+        result_url = self._poll(job_id)
+        logger.info("Job completed, result URL obtained")
+
+        # 3. Fetch and parse results
+        return self._fetch_markdown(result_url)
+
+    def _submit(
+        self,
+        file_bytes: Optional[bytes] = None,
+        filename: Optional[str] = None,
+        file_url: Optional[str] = None,
+    ) -> str:
+        """Submit an OCR job, return job ID."""
+        headers = self._headers()
+
+        if file_url:
+            # URL mode
+            headers["Content-Type"] = "application/json"
+            payload = {
+                "fileUrl": file_url,
+                "model": self.config.model,
+                "optionalPayload": self._optional_payload(),
+            }
+            resp = requests.post(self.config.job_url, json=payload, headers=headers)
+        elif file_bytes is not None:
+            # Local file mode - multipart upload
+            data = {
+                "model": self.config.model,
+                "optionalPayload": json.dumps(self._optional_payload()),
+            }
+            fname = filename or "document"
+            files = {"file": (fname, file_bytes)}
+            resp = requests.post(self.config.job_url, headers=headers, data=data, files=files)
+        else:
+            raise PaddleOcrError("Either file_bytes or file_url must be provided")
+
+        if resp.status_code != 200:
+            raise PaddleOcrError(f"Submit failed (HTTP {resp.status_code}): {resp.text}")
+
+        result = resp.json()
+        job_id = result.get("data", {}).get("jobId")
+        if not job_id:
+            raise PaddleOcrError(f"No jobId in response: {result}")
+
+        return job_id
+
+    def _poll(self, job_id: str) -> str:
+        """Poll job status until done, return JSONL result URL."""
+        headers = self._headers()
+        url = f"{self.config.job_url}/{job_id}"
+        start = time.time()
+
+        while True:
+            resp = requests.get(url, headers=headers)
+            if resp.status_code != 200:
+                raise PaddleOcrError(f"Poll failed (HTTP {resp.status_code}): {resp.text}")
+
+            data = resp.json().get("data", {})
+            state = data.get("state", "")
+
+            if state == "done":
+                result_url = data.get("resultUrl", {}).get("jsonUrl", "")
+                if not result_url:
+                    raise PaddleOcrError("Job done but no resultUrl in response")
+                return result_url
+
+            if state == "failed":
+                error_msg = data.get("errorMsg", "Unknown error")
+                raise PaddleOcrError(f"Job failed: {error_msg}")
+
+            # Still pending or running
+            if state == "running":
+                progress = data.get("extractProgress", {})
+                total = progress.get("totalPages", "?")
+                extracted = progress.get("extractedPages", "?")
+                logger.debug("Running: %s/%s pages", extracted, total)
+            else:
+                logger.debug("State: %s", state)
+
+            # Check timeout
+            elapsed = time.time() - start
+            if elapsed > self.config.poll_timeout:
+                raise PaddleOcrError(
+                    f"Job polling timed out after {self.config.poll_timeout}s (state={state})"
+                )
+
+            time.sleep(self.config.poll_interval)
+
+    def _fetch_markdown(self, jsonl_url: str) -> str:
+        """Fetch JSONL result and extract markdown from all pages."""
+        resp = requests.get(jsonl_url)
+        resp.raise_for_status()
+
+        markdown_parts = []
+        lines = resp.text.strip().split("\n")
+
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+
+            try:
+                page_data = json.loads(line)
+            except json.JSONDecodeError:
+                logger.warning("Skipping invalid JSONL line")
+                continue
+
+            result = page_data.get("result", {})
+            layout_results = result.get("layoutParsingResults", [])
+
+            for layout in layout_results:
+                md_text = layout.get("markdown", {}).get("text", "")
+                if md_text.strip():
+                    markdown_parts.append(md_text.strip())
+
+        return "\n\n".join(markdown_parts)
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
new file mode 100644
index 000000000..09ae96e6d
--- /dev/null
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
@@ -0,0 +1,35 @@
+"""Plugin registration for markitdown-paddleocr."""
+
+from typing import Any
+from markitdown import MarkItDown
+
+from ._converter import PaddleOcrConverter
+
+
+__plugin_interface_version__ = 1
+
+
+def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
+    """Register markitdown-paddleocr converter.
+
+    Config sources (priority high to low):
+    1. kwargs parameters
+    2. Environment variables (BAIDU_PADDLE_TOKEN)
+    3. Built-in defaults
+    """
+    # Register converter with higher priority than default PDF converter
+    PRIORITY_PADDLEOCR = -1.0
+
+    markitdown.register_converter(
+        PaddleOcrConverter(
+            token=kwargs.get("token"),
+            model=kwargs.get("model", "PaddleOCR-VL-1.5"),
+            poll_interval=kwargs.get("poll_interval", 2.0),
+            poll_timeout=kwargs.get("poll_timeout", 300.0),
+            force_ai=kwargs.get("force_ai", False),
+            use_doc_orientation_classify=kwargs.get("use_doc_orientation_classify", False),
+            use_doc_unwarping=kwargs.get("use_doc_unwarping", False),
+            use_chart_recognition=kwargs.get("use_chart_recognition", False),
+        ),
+        priority=PRIORITY_PADDLEOCR,
+    )
diff --git a/packages/markitdown-paddleocr/tests/__init__.py b/packages/markitdown-paddleocr/tests/__init__.py
new file mode 100644
index 000000000..4be5c24f3
--- /dev/null
+++ b/packages/markitdown-paddleocr/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for markitdown-paddleocr."""
diff --git a/packages/markitdown-paddleocr/tests/test_converter.py b/packages/markitdown-paddleocr/tests/test_converter.py
new file mode 100644
index 000000000..aaca74c8c
--- /dev/null
+++ b/packages/markitdown-paddleocr/tests/test_converter.py
@@ -0,0 +1,214 @@
+"""Tests for PaddleOcrConverter."""
+
+import io
+import pytest
+from unittest.mock import MagicMock, patch
+
+from markitdown_paddleocr._converter import PaddleOcrConverter
+
+
+class TestPaddleOcrConverterAccepts:
+    """Accepts method tests."""
+
+    def test_accepts_pdf_extension(self):
+        """Accept .pdf extension."""
+        converter = PaddleOcrConverter()
+        stream = io.BytesIO(b"%PDF-1.4")
+        stream_info = MagicMock(extension=".pdf", mimetype=None)
+        assert converter.accepts(stream, stream_info) is True
+
+    def test_accepts_pdf_mimetype(self):
+        """Accept PDF MIME type."""
+        converter = PaddleOcrConverter()
+        stream = io.BytesIO(b"%PDF-1.4")
+        stream_info = MagicMock(extension=None, mimetype="application/pdf")
+        assert converter.accepts(stream, stream_info) is True
+
+    def test_accepts_image_extensions(self):
+        """Accept image extensions."""
+        converter = PaddleOcrConverter()
+        for ext in [".jpg", ".jpeg", ".png"]:
+            stream = io.BytesIO(b"fake")
+            stream_info = MagicMock(extension=ext, mimetype=None)
+            assert converter.accepts(stream, stream_info) is True
+
+    def test_rejects_non_supported(self):
+        """Reject non-supported files."""
+        converter = PaddleOcrConverter()
+        stream = io.BytesIO(b"not a pdf")
+        stream_info = MagicMock(extension=".txt", mimetype="text/plain")
+        assert converter.accepts(stream, stream_info) is False
+
+
+class TestPaddleOcrConverterTable:
+    """Table to Markdown conversion tests."""
+
+    def test_table_to_markdown(self):
+        """Table to Markdown conversion."""
+        converter = PaddleOcrConverter()
+        table = [
+            ["Name", "Age", "City"],
+            ["Alice", "25", "Beijing"],
+            ["Bob", "30", "Shanghai"],
+        ]
+        result = converter._table_to_markdown(table)
+        assert "|" in result
+        assert "Name" in result
+        assert "Alice" in result
+        assert "---" in result
+
+    def test_empty_table(self):
+        """Empty table returns empty string."""
+        converter = PaddleOcrConverter()
+        assert converter._table_to_markdown([]) == ""
+
+    def test_table_with_none_values(self):
+        """Table with None values."""
+        converter = PaddleOcrConverter()
+        table = [
+            ["A", None, "C"],
+            ["1", "2", None],
+        ]
+        result = converter._table_to_markdown(table)
+        assert "|" in result
+        assert "A" in result
+
+
+class TestPaddleOcrConverterImage:
+    """Image conversion tests."""
+
+    def test_convert_image_success(self):
+        """Convert image with PaddleOCR success."""
+        converter = PaddleOcrConverter(token="test-token")
+
+        mock_client = MagicMock()
+        mock_client.ocr.return_value = "# Image Title\n\nContent"
+        converter._client = mock_client
+
+        stream = io.BytesIO(b"fake-image")
+        stream_info = MagicMock(extension=".png", mimetype="image/png")
+        result = converter.convert(stream, stream_info)
+
+        assert "# Image Title" in result.markdown
+        mock_client.ocr.assert_called_once()
+
+    def test_convert_image_error(self):
+        """Convert image with PaddleOCR error returns comment."""
+        converter = PaddleOcrConverter(token="test-token")
+
+        mock_client = MagicMock()
+        mock_client.ocr.side_effect = Exception("API Error")
+        converter._client = mock_client
+
+        stream = io.BytesIO(b"fake-image")
+        stream_info = MagicMock(extension=".png", mimetype="image/png")
+        result = converter.convert(stream, stream_info)
+
+        assert "Error converting image" in result.markdown
+
+
+class TestPaddleOcrConverterPdf:
+    """PDF conversion tests."""
+
+    def test_plain_text_page(self):
+        """Plain text page uses pdfplumber."""
+        converter = PaddleOcrConverter()
+
+        page = MagicMock()
+        page.images = []
+        page.find_tables.return_value = []
+        page.extract_tables.return_value = []
+        page.extract_text.return_value = "Hello World"
+        page.close = MagicMock()
+
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock(extension=".pdf", mimetype=None))
+
+        assert "Hello World" in result.markdown
+
+    def test_complex_page_uses_paddleocr(self):
+        """Complex page uses PaddleOCR."""
+        converter = PaddleOcrConverter(token="test-token")
+
+        mock_client = MagicMock()
+        mock_client.ocr.return_value = "OCR result for complex page"
+        converter._client = mock_client
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.find_tables.return_value = []
+        page.to_image.return_value.save = MagicMock(
+            side_effect=lambda buf, format: buf.write(b"fake-png")
+        )
+        page.close = MagicMock()
+
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock(extension=".pdf", mimetype=None))
+
+        mock_client.ocr.assert_called_once()
+        assert "OCR result" in result.markdown
+
+    def test_force_ai_mode(self):
+        """Force AI mode uses PaddleOCR for all pages."""
+        converter = PaddleOcrConverter(token="test-token", force_ai=True)
+
+        mock_client = MagicMock()
+        mock_client.ocr.return_value = "AI result"
+        converter._client = mock_client
+
+        page = MagicMock()
+        page.images = []
+        page.find_tables.return_value = []
+        page.to_image.return_value.save = MagicMock(
+            side_effect=lambda buf, format: buf.write(b"fake-png")
+        )
+        page.close = MagicMock()
+
+        mock_pdf = MagicMock()
+        mock_pdf.pages = [page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = mock_pdf
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter.convert(stream, MagicMock(extension=".pdf", mimetype=None))
+
+        mock_client.ocr.assert_called_once()
+
+
+class TestPaddleOcrConverterConfig:
+    """Config initialization tests."""
+
+    def test_default_config(self):
+        """Default configuration values."""
+        converter = PaddleOcrConverter()
+        assert converter.model == "PaddleOCR-VL-1.5"
+        assert converter.poll_interval == 2.0
+        assert converter.poll_timeout == 300.0
+        assert converter.force_ai is False
+
+    def test_custom_config(self):
+        """Custom configuration values."""
+        converter = PaddleOcrConverter(
+            token="my-token",
+            model="custom-model",
+            poll_interval=5.0,
+            poll_timeout=600.0,
+            force_ai=True,
+            use_chart_recognition=True,
+        )
+        assert converter.token == "my-token"
+        assert converter.model == "custom-model"
+        assert converter.poll_interval == 5.0
+        assert converter.poll_timeout == 600.0
+        assert converter.force_ai is True
+        assert converter.use_chart_recognition is True
diff --git a/packages/markitdown-paddleocr/tests/test_paddle_client.py b/packages/markitdown-paddleocr/tests/test_paddle_client.py
new file mode 100644
index 000000000..361a329b6
--- /dev/null
+++ b/packages/markitdown-paddleocr/tests/test_paddle_client.py
@@ -0,0 +1,241 @@
+"""Tests for PaddleClient."""
+
+import json
+import pytest
+from unittest.mock import MagicMock, patch
+
+from markitdown_paddleocr._paddle_client import PaddleClient, PaddleOcrError
+from markitdown_paddleocr._config import PaddleOcrConfig
+
+
+class TestPaddleClientInit:
+    """Client initialization tests."""
+
+    def test_init_with_token(self):
+        """Init with explicit token."""
+        client = PaddleClient(token="test-token")
+        assert client.token == "test-token"
+
+    @patch.dict("os.environ", {"BAIDU_PADDLE_TOKEN": "env-token"})
+    def test_init_from_env(self):
+        """Init from environment variable."""
+        client = PaddleClient()
+        assert client.token == "env-token"
+
+    def test_init_with_config(self):
+        """Init with PaddleOcrConfig."""
+        config = PaddleOcrConfig(token="config-token", model="custom-model")
+        client = PaddleClient(config=config)
+        assert client.token == "config-token"
+        assert client.config.model == "custom-model"
+
+
+class TestPaddleClientSubmit:
+    """Job submission tests."""
+
+    def test_submit_local_file(self):
+        """Submit local file via multipart upload."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"jobId": "job-123"}}
+
+        with patch("requests.post", return_value=mock_response) as mock_post:
+            job_id = client._submit(file_bytes=b"fake-image", filename="test.png")
+
+        assert job_id == "job-123"
+        # Verify multipart upload was used (files parameter)
+        call_kwargs = mock_post.call_args
+        assert "files" in call_kwargs.kwargs or len(call_kwargs.args) > 0
+
+    def test_submit_url_mode(self):
+        """Submit file URL via JSON."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"jobId": "job-456"}}
+
+        with patch("requests.post", return_value=mock_response) as mock_post:
+            job_id = client._submit(file_url="https://example.com/doc.pdf")
+
+        assert job_id == "job-456"
+
+    def test_submit_error(self):
+        """Submit with API error."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 500
+        mock_response.text = "Internal Server Error"
+
+        with patch("requests.post", return_value=mock_response):
+            with pytest.raises(PaddleOcrError, match="Submit failed"):
+                client._submit(file_bytes=b"fake", filename="test.png")
+
+    def test_submit_no_input(self):
+        """Submit without file or URL raises error."""
+        client = PaddleClient(token="test-token")
+        with pytest.raises(PaddleOcrError, match="Either file_bytes or file_url"):
+            client._submit()
+
+
+class TestPaddleClientPoll:
+    """Job polling tests."""
+
+    def test_poll_done_immediately(self):
+        """Job is done on first poll."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "data": {
+                "state": "done",
+                "resultUrl": {"jsonUrl": "https://result.url/data.jsonl"},
+            }
+        }
+
+        with patch("requests.get", return_value=mock_response):
+            result_url = client._poll("job-123")
+
+        assert result_url == "https://result.url/data.jsonl"
+
+    def test_poll_failed(self):
+        """Job fails."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "data": {"state": "failed", "errorMsg": "Processing error"}
+        }
+
+        with patch("requests.get", return_value=mock_response):
+            with pytest.raises(PaddleOcrError, match="Job failed"):
+                client._poll("job-123")
+
+    def test_poll_timeout(self):
+        """Polling timeout."""
+        config = PaddleOcrConfig(token="test-token", poll_interval=0.01, poll_timeout=0.05)
+        client = PaddleClient(config=config)
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"state": "pending"}}
+
+        with patch("requests.get", return_value=mock_response):
+            with pytest.raises(PaddleOcrError, match="timed out"):
+                client._poll("job-123")
+
+
+class TestPaddleClientFetchMarkdown:
+    """Result fetching tests."""
+
+    def test_fetch_single_page(self):
+        """Fetch single page result."""
+        client = PaddleClient(token="test-token")
+
+        jsonl_content = json.dumps({
+            "result": {
+                "layoutParsingResults": [
+                    {"markdown": {"text": "# Title\n\nHello world"}}
+                ]
+            }
+        })
+
+        mock_response = MagicMock()
+        mock_response.text = jsonl_content
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("requests.get", return_value=mock_response):
+            markdown = client._fetch_markdown("https://result.url/data.jsonl")
+
+        assert "# Title" in markdown
+        assert "Hello world" in markdown
+
+    def test_fetch_multi_page(self):
+        """Fetch multi-page result."""
+        client = PaddleClient(token="test-token")
+
+        page1 = json.dumps({
+            "result": {
+                "layoutParsingResults": [
+                    {"markdown": {"text": "Page 1 content"}}
+                ]
+            }
+        })
+        page2 = json.dumps({
+            "result": {
+                "layoutParsingResults": [
+                    {"markdown": {"text": "Page 2 content"}}
+                ]
+            }
+        })
+        jsonl_content = f"{page1}\n{page2}"
+
+        mock_response = MagicMock()
+        mock_response.text = jsonl_content
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("requests.get", return_value=mock_response):
+            markdown = client._fetch_markdown("https://result.url/data.jsonl")
+
+        assert "Page 1 content" in markdown
+        assert "Page 2 content" in markdown
+
+    def test_fetch_empty_result(self):
+        """Fetch empty result."""
+        client = PaddleClient(token="test-token")
+
+        mock_response = MagicMock()
+        mock_response.text = ""
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("requests.get", return_value=mock_response):
+            markdown = client._fetch_markdown("https://result.url/data.jsonl")
+
+        assert markdown == ""
+
+
+class TestPaddleClientOcr:
+    """Full OCR workflow tests."""
+
+    def test_ocr_workflow(self):
+        """Complete OCR workflow: submit → poll → fetch."""
+        client = PaddleClient(token="test-token")
+
+        # Mock submit
+        submit_resp = MagicMock()
+        submit_resp.status_code = 200
+        submit_resp.json.return_value = {"data": {"jobId": "job-789"}}
+
+        # Mock poll
+        poll_resp = MagicMock()
+        poll_resp.status_code = 200
+        poll_resp.json.return_value = {
+            "data": {
+                "state": "done",
+                "resultUrl": {"jsonUrl": "https://result.url/data.jsonl"},
+            }
+        }
+
+        # Mock fetch
+        jsonl_content = json.dumps({
+            "result": {
+                "layoutParsingResults": [
+                    {"markdown": {"text": "# OCR Result\n\nExtracted text."}}
+                ]
+            }
+        })
+        fetch_resp = MagicMock()
+        fetch_resp.text = jsonl_content
+        fetch_resp.raise_for_status = MagicMock()
+
+        with patch("requests.post", return_value=submit_resp), \
+             patch("requests.get", side_effect=[poll_resp, fetch_resp]):
+            markdown = client.ocr(file_bytes=b"fake-image", filename="test.png")
+
+        assert "# OCR Result" in markdown
+        assert "Extracted text." in markdown

From 8420af6bb2996fc5f483cca2022a77d32e9ca724 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Thu, 21 May 2026 11:13:38 +0800
Subject: [PATCH 08/15] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8F=91=E5=B8=83?=
 =?UTF-8?q?=E5=88=B0Pypi=E7=9A=84=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/markitdown-glmocr/README.md    | 48 +++++++++++++++++++++++++
 packages/markitdown-paddleocr/README.md | 48 +++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/packages/markitdown-glmocr/README.md b/packages/markitdown-glmocr/README.md
index d0193d2ed..15c2b819e 100644
--- a/packages/markitdown-glmocr/README.md
+++ b/packages/markitdown-glmocr/README.md
@@ -191,6 +191,54 @@ glmocr SDK 返回的结构化数据支持以下标签：
 - `Pillow>=9.0.0` - 图像处理
 - `glmocr` - 智谱 OCR SDK（可选，AI 功能需要）
 
+## 发布到 PyPI
+
+### 前置条件
+
+- 确保已安装 `build` 和 `twine`：
+
+```bash
+pip install build twine
+```
+
+- 确保环境变量 `PyPI_API_Token` 已设置为你的 PyPI API Token：
+
+```bash
+export PyPI_API_Token="pypi-..."
+```
+
+### 发布步骤
+
+```bash
+# 1. 进入项目根目录（包含 pyproject.toml）
+cd packages/markitdown-glmocr
+
+# 2. 构建分发包（生成 dist/ 目录下的 .tar.gz 和 .whl 文件）
+python -m build
+
+# 3. 检查包的元数据和内容
+twine check dist/*
+
+# 4. 上传到 PyPI（使用环境变量中的 Token 认证）
+twine upload dist/* -u __token__ -p "$PyPI_API_Token"
+```
+
+### 发布到 TestPyPI（测试）
+
+```bash
+# 先上传到 TestPyPI 验证包是否正确
+twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
+
+# 从 TestPyPI 安装验证
+pip install --index-url https://test.pypi.org/simple/ markitdown-glmocr
+```
+
+### 注意事项
+
+- 发布前确保 `pyproject.toml` 中的版本号已更新
+- 同一版本号不能重复上传，如需修正必须 bump 版本号
+- `PyPI_API_Token` 环境变量切勿硬编码到脚本或提交到代码仓库
+
 ## 许可证
 
 MIT
\ No newline at end of file
diff --git a/packages/markitdown-paddleocr/README.md b/packages/markitdown-paddleocr/README.md
index 4685a343f..2f4c49a13 100644
--- a/packages/markitdown-paddleocr/README.md
+++ b/packages/markitdown-paddleocr/README.md
@@ -152,6 +152,54 @@ PaddleOcrConverter.convert()
 - `Pillow>=9.0.0` - 图像处理
 - `requests>=2.28.0` - HTTP 请求
 
+## 发布到 PyPI
+
+### 前置条件
+
+- 确保已安装 `build` 和 `twine`：
+
+```bash
+pip install build twine
+```
+
+- 确保环境变量 `PyPI_API_Token` 已设置为你的 PyPI API Token：
+
+```bash
+export PyPI_API_Token="pypi-..."
+```
+
+### 发布步骤
+
+```bash
+# 1. 进入项目根目录（包含 pyproject.toml）
+cd packages/markitdown-paddleocr
+
+# 2. 构建分发包（生成 dist/ 目录下的 .tar.gz 和 .whl 文件）
+python -m build
+
+# 3. 检查包的元数据和内容
+twine check dist/*
+
+# 4. 上传到 PyPI（使用环境变量中的 Token 认证）
+twine upload dist/* -u __token__ -p "$PyPI_API_Token"
+```
+
+### 发布到 TestPyPI（测试）
+
+```bash
+# 先上传到 TestPyPI 验证包是否正确
+twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
+
+# 从 TestPyPI 安装验证
+pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
+```
+
+### 注意事项
+
+- 发布前确保 `pyproject.toml` 中的版本号已更新
+- 同一版本号不能重复上传，如需修正必须 bump 版本号
+- `PyPI_API_Token` 环境变量切勿硬编码到脚本或提交到代码仓库
+
 ## 许可证
 
 MIT

From 7ba7e585c543d4f1ec7becd21348c5bb4a2a32ad Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Thu, 21 May 2026 13:34:35 +0800
Subject: [PATCH 09/15] =?UTF-8?q?=E4=BC=98=E5=85=88=E9=80=89=E6=8B=A9glmoc?=
 =?UTF-8?q?r=EF=BC=8Cglmocr=20=E5=A4=B1=E8=B4=A5=EF=BC=88=E6=8A=9B?=
 =?UTF-8?q?=E5=BC=82=E5=B8=B8=EF=BC=89=E2=86=92=20PaddleOcrConverter=20?=
 =?UTF-8?q?=E5=B0=9D=E8=AF=95=20=E2=86=92=20=E5=86=85=E7=BD=AE=20PdfConver?=
 =?UTF-8?q?ter=20=E5=85=9C=E5=BA=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/markitdown_glmocr/_converter.py       | 202 +++++++++++-------
 .../src/markitdown_glmocr/_plugin.py          |  31 ++-
 .../src/markitdown_paddleocr/_converter.py    | 141 +++++++++---
 .../src/markitdown_paddleocr/_plugin.py       |  29 ++-
 .../tests/test_converter.py                   |  34 +--
 .../markitdown/src/markitdown/__main__.py     |  23 +-
 6 files changed, 324 insertions(+), 136 deletions(-)

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index cafee2ec0..afa7500d4 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -1,11 +1,15 @@
 """GlmOcr PDF/Image Converter - Intelligent PDF and Image to Markdown conversion."""
 
 import io
+import logging
 import sys
 from typing import Any, BinaryIO, Optional
 
 from markitdown import DocumentConverter, DocumentConverterResult, StreamInfo
-from markitdown._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
+from markitdown._exceptions import (
+    MISSING_DEPENDENCY_MESSAGE,
+    MissingDependencyException,
+)
 
 from ._config import GlmOcrConfig
 
@@ -37,10 +41,13 @@
 ACCEPTED_FILE_EXTENSIONS = [".pdf", ".jpg", ".jpeg", ".png"]
 
 
+logger = logging.getLogger(__name__)
+
+
 class GlmOcrConverter(DocumentConverter):
     """
     Intelligent PDF/Image converter using glmocr SDK.
-    
+
     Features:
     - Auto-detect page content type (plain text vs images/tables)
     - Plain text pages use pdfplumber/pdfminer (fast, free)
@@ -71,19 +78,21 @@ def __init__(
             raise ImportError(
                 "glmocr is required. Install with: pip install markitdown-glmocr[glmocr]"
             )
-        
+
         # Use config if provided
         if config:
             self.api_key = api_key or config.api_key
             self.timeout = timeout if timeout != 1800 else config.timeout
-            self.enable_layout = enable_layout if enable_layout else config.enable_layout
+            self.enable_layout = (
+                enable_layout if enable_layout else config.enable_layout
+            )
             self.force_ai = force_ai or config.force_ai
         else:
             self.api_key = api_key
             self.timeout = timeout
             self.enable_layout = enable_layout
             self.force_ai = force_ai
-        
+
         # Lazy init GlmOcr instance
         self._glmocr: Optional[GlmOcr] = None
 
@@ -127,12 +136,12 @@ def convert(
                     extension=".pdf",
                     feature="pdf",
                 )
-            ) from _dependency_exc_info[1].with_traceback(
-                _dependency_exc_info[2]
-            )
+            ) from _dependency_exc_info[1].with_traceback(_dependency_exc_info[2])
 
         extension = (stream_info.extension or "").lower()
 
+        logger.info("GlmOcrConverter: 开始转换, 文件类型=%s", extension)
+
         # Image files: use glmocr directly
         if extension in (".jpg", ".jpeg", ".png"):
             return self._convert_image(file_stream, extension)
@@ -140,61 +149,77 @@ def convert(
         # PDF files: use hybrid approach
         return self._convert_pdf(file_stream)
 
-    def _convert_image(self, file_stream: BinaryIO, extension: str = ".png") -> DocumentConverterResult:
+    def _convert_image(
+        self, file_stream: BinaryIO, extension: str = ".png"
+    ) -> DocumentConverterResult:
         """Convert image file using glmocr SDK."""
         img_bytes = file_stream.read()
 
+        logger.info("GlmOcrConverter: 开始 OCR 识别图片, 格式=%s", extension)
         try:
             result = self._get_glmocr().parse(img_bytes)
-
-            # Check for errors
-            d = result.to_dict()
-            if "error" in d:
-                return DocumentConverterResult(markdown="")
-
-            return DocumentConverterResult(
-                markdown=result.markdown_result or ""
-            )
         except Exception as e:
-            return DocumentConverterResult(
-                markdown=f"<!-- Error converting image: {e} -->"
+            logger.error(
+                "GlmOcrConverter: 图片 OCR 识别异常, 格式=%s, 错误=%s", extension, e
+            )
+            raise
+
+        # Check for errors
+        d = result.to_dict()
+        if "error" in d:
+            logger.error(
+                "GlmOcrConverter: 图片 OCR 返回错误, 格式=%s, 错误=%s",
+                extension,
+                d["error"],
+            )
+            raise RuntimeError(
+                f"GlmOcrConverter: glmocr SDK returned error: {d['error']}"
             )
 
+        markdown = result.markdown_result or ""
+        logger.info("GlmOcrConverter: 图片 OCR 识别完成, 输出长度=%d", len(markdown))
+        return DocumentConverterResult(markdown=markdown)
+
     def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         pdf_stream = io.BytesIO(file_stream.read())
         markdown_parts = []
 
-        try:
-            with pdfplumber.open(pdf_stream) as pdf:
-                for page_num, page in enumerate(pdf.pages):
-                    # Analyze page type
-                    page_type = self._analyze_page(page)
-
-                    # Choose processing method
-                    if self.force_ai or page_type != "plain_text":
-                        # Complex content: use glmocr
-                        markdown = self._convert_with_glmocr(page, page_num)
-                    else:
-                        # Plain text: use pdfplumber
-                        markdown = self._extract_text_with_tables(page)
-
-                    if markdown.strip():
-                        markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
-
-                    page.close()
-
-            markdown = "\n\n".join(markdown_parts).strip()
-
-        except Exception:
-            # Fallback to pdfminer
-            pdf_stream.seek(0)
-            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
-
-        # Final fallback
-        if not markdown:
-            pdf_stream.seek(0)
-            markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
-
+        with pdfplumber.open(pdf_stream) as pdf:
+            total_pages = len(pdf.pages)
+            logger.info("GlmOcrConverter: 开始处理 PDF, 总页数=%d", total_pages)
+
+            for page_num, page in enumerate(pdf.pages):
+                # Analyze page type
+                page_type = self._analyze_page(page)
+
+                # Choose processing method
+                if self.force_ai or page_type != "plain_text":
+                    # Complex content: use glmocr
+                    # Let exceptions propagate so the framework can try the next converter
+                    logger.info(
+                        "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 glmocr OCR",
+                        page_num + 1,
+                        total_pages,
+                        page_type,
+                    )
+                    markdown = self._convert_with_glmocr(page, page_num)
+                else:
+                    # Plain text: use pdfplumber
+                    logger.info(
+                        "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
+                        page_num + 1,
+                        total_pages,
+                        page_type,
+                    )
+                    markdown = self._extract_text_with_tables(page)
+
+                if markdown.strip():
+                    markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
+
+                page.close()
+
+        markdown = "\n\n".join(markdown_parts).strip()
+        logger.info("GlmOcrConverter: PDF 转换完成, 输出长度=%d", len(markdown))
         return DocumentConverterResult(markdown=markdown)
 
     def _analyze_page(self, page: Any) -> str:
@@ -202,36 +227,56 @@ def _analyze_page(self, page: Any) -> str:
         # Check for images
         if hasattr(page, "images") and page.images:
             return "complex"
-        
+
         # Check for tables
         tables = page.find_tables()
         if tables:
             return "complex"
-        
+
         # Check for graphics/curves
         if hasattr(page, "curves") and page.curves:
             return "complex"
-        
+
         return "plain_text"
 
     def _convert_with_glmocr(self, page: Any, page_num: int) -> str:
-        """Convert page using glmocr SDK."""
+        """Convert page using glmocr SDK.
+
+        Raises RuntimeError on OCR failure so the framework can try the next converter.
+        """
+        # Render page to image
+        img = page.to_image(resolution=150)
+        img_bytes = io.BytesIO()
+        img.save(img_bytes, format="PNG")
+
+        logger.info("GlmOcrConverter: glmocr SDK 开始识别第 %d 页", page_num + 1)
         try:
-            # Render page to image
-            img = page.to_image(resolution=150)
-            img_bytes = io.BytesIO()
-            img.save(img_bytes, format="PNG")
             result = self._get_glmocr().parse(img_bytes.getvalue())
-            
-            # Check for errors
-            d = result.to_dict()
-            if "error" in d:
-                return self._extract_text_with_tables(page)
-            
-            return result.markdown_result or ""
-            
-        except Exception:
-            return self._extract_text_with_tables(page)
+        except Exception as e:
+            logger.error(
+                "GlmOcrConverter: glmocr SDK 第 %d 页识别异常, 错误=%s", page_num + 1, e
+            )
+            raise
+
+        # Check for errors
+        d = result.to_dict()
+        if "error" in d:
+            logger.error(
+                "GlmOcrConverter: glmocr SDK 第 %d 页返回错误, 错误=%s",
+                page_num + 1,
+                d["error"],
+            )
+            raise RuntimeError(
+                f"GlmOcrConverter: glmocr SDK returned error on page {page_num + 1}: {d['error']}"
+            )
+
+        markdown = result.markdown_result or ""
+        logger.info(
+            "GlmOcrConverter: glmocr SDK 第 %d 页识别完成, 输出长度=%d",
+            page_num + 1,
+            len(markdown),
+        )
+        return markdown
 
     def _extract_text_with_tables(self, page: Any) -> str:
         """Extract text and tables from page."""
@@ -280,9 +325,14 @@ def _table_to_markdown(self, table: list[list[str]]) -> str:
         lines = []
         for row_idx, row in enumerate(table):
             padded_row = row + [""] * (len(col_widths) - len(row))
-            line = "| " + " | ".join(
-                str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
-            ) + " |"
+            line = (
+                "| "
+                + " | ".join(
+                    str(cell).ljust(width)
+                    for cell, width in zip(padded_row, col_widths)
+                )
+                + " |"
+            )
             lines.append(line)
 
             if row_idx == 0:
@@ -290,15 +340,15 @@ def _table_to_markdown(self, table: list[list[str]]) -> str:
                 lines.append(sep)
 
         return "\n".join(lines)
-    
+
     def close(self):
         """Close the GlmOcr instance."""
         if self._glmocr:
             self._glmocr.close()
             self._glmocr = None
-    
+
     def __enter__(self):
         return self
-    
+
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
\ No newline at end of file
+        self.close()
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
index a940acf7d..5963dd43c 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
@@ -1,33 +1,46 @@
 """Plugin registration for markitdown-glmocr."""
 
+import logging
 from typing import Any
+
 from markitdown import MarkItDown
 
 from ._converter import GlmOcrConverter
 
-
 __plugin_interface_version__ = 1
 
+logger = logging.getLogger(__name__)
+
 
 def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     """
     Register markitdown-glmocr converter.
-    
+
     Config sources (priority high to low):
     1. kwargs parameters
     2. Environment variables (ZHIPU_API_KEY)
     3. .env file
     4. Built-in defaults
     """
+    logger.info("markitdown-glmocr: 开始注册插件")
+
     # Register converter
-    PRIORITY_GLMOCR = -1.0
-    
-    markitdown.register_converter(
-        GlmOcrConverter(
+    # Priority -2.0: higher priority than PaddleOcrConverter (-1.0),
+    # so glmocr is tried first and paddleocr serves as fallback.
+    PRIORITY_GLMOCR = -2.0
+
+    try:
+        converter = GlmOcrConverter(
             api_key=kwargs.get("api_key"),
             timeout=kwargs.get("timeout", 1800),
             enable_layout=kwargs.get("enable_layout", False),
             force_ai=kwargs.get("force_ai", False),
-        ),
-        priority=PRIORITY_GLMOCR,
-    )
\ No newline at end of file
+        )
+        markitdown.register_converter(
+            converter,
+            priority=PRIORITY_GLMOCR,
+        )
+        logger.info("markitdown-glmocr: 插件注册成功, priority=%.1f", PRIORITY_GLMOCR)
+    except Exception as e:
+        logger.error("markitdown-glmocr: 插件注册失败, 错误=%s", e)
+        raise
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
index 6d8ae5e63..627b2dfd5 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
@@ -1,11 +1,15 @@
 """PaddleOcr Converter - PDF/Image to Markdown using PaddleOCR cloud API."""
 
 import io
+import logging
 import sys
 from typing import Any, BinaryIO, Optional
 
 from markitdown import DocumentConverter, DocumentConverterResult, StreamInfo
-from markitdown._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
+from markitdown._exceptions import (
+    MISSING_DEPENDENCY_MESSAGE,
+    MissingDependencyException,
+)
 
 from ._config import PaddleOcrConfig
 from ._paddle_client import PaddleClient
@@ -30,6 +34,9 @@
 ACCEPTED_FILE_EXTENSIONS = [".pdf", ".jpg", ".jpeg", ".png"]
 
 
+logger = logging.getLogger(__name__)
+
+
 class PaddleOcrConverter(DocumentConverter):
     """Intelligent PDF/Image converter using PaddleOCR cloud API.
 
@@ -70,12 +77,20 @@ def __init__(
         if config:
             self.token = token or config.token
             self.model = model if model != "PaddleOCR-VL-1.5" else config.model
-            self.poll_interval = poll_interval if poll_interval != 2.0 else config.poll_interval
-            self.poll_timeout = poll_timeout if poll_timeout != 300.0 else config.poll_timeout
+            self.poll_interval = (
+                poll_interval if poll_interval != 2.0 else config.poll_interval
+            )
+            self.poll_timeout = (
+                poll_timeout if poll_timeout != 300.0 else config.poll_timeout
+            )
             self.force_ai = force_ai or config.force_ai
-            self.use_doc_orientation_classify = use_doc_orientation_classify or config.use_doc_orientation_classify
+            self.use_doc_orientation_classify = (
+                use_doc_orientation_classify or config.use_doc_orientation_classify
+            )
             self.use_doc_unwarping = use_doc_unwarping or config.use_doc_unwarping
-            self.use_chart_recognition = use_chart_recognition or config.use_chart_recognition
+            self.use_chart_recognition = (
+                use_chart_recognition or config.use_chart_recognition
+            )
         else:
             self.token = token
             self.model = model
@@ -105,12 +120,25 @@ def _get_client(self) -> PaddleClient:
             self._client = PaddleClient(config=config)
         return self._client
 
+    def _has_token(self) -> bool:
+        """Check if a valid token is available."""
+        if self.token:
+            return True
+        import os
+
+        return bool(os.environ.get("BAIDU_PADDLE_TOKEN", ""))
+
     def accepts(
         self,
         file_stream: BinaryIO,
         stream_info: StreamInfo,
         **kwargs: Any,
     ) -> bool:
+        # Without a token, PaddleOCR API cannot work — decline so other
+        # converters (e.g. GlmOcrConverter) get a chance.
+        if not self._has_token():
+            return False
+
         mimetype = (stream_info.mimetype or "").lower()
         extension = (stream_info.extension or "").lower()
 
@@ -136,12 +164,12 @@ def convert(
                     extension=".pdf",
                     feature="pdf",
                 )
-            ) from _dependency_exc_info[1].with_traceback(
-                _dependency_exc_info[2]
-            )
+            ) from _dependency_exc_info[1].with_traceback(_dependency_exc_info[2])
 
         extension = (stream_info.extension or "").lower()
 
+        logger.info("PaddleOcrConverter: 开始转换, 文件类型=%s", extension)
+
         # Image files: use PaddleOCR directly
         if extension in (".jpg", ".jpeg", ".png"):
             return self._convert_image(file_stream, extension)
@@ -149,36 +177,68 @@ def convert(
         # PDF files: use hybrid approach
         return self._convert_pdf(file_stream)
 
-    def _convert_image(self, file_stream: BinaryIO, extension: str = ".png") -> DocumentConverterResult:
+    def _convert_image(
+        self, file_stream: BinaryIO, extension: str = ".png"
+    ) -> DocumentConverterResult:
         """Convert image file using PaddleOCR API."""
         img_bytes = file_stream.read()
         filename = f"image{extension}"
 
+        logger.info("PaddleOcrConverter: 开始 OCR 识别图片, 格式=%s", extension)
         try:
             markdown = self._get_client().ocr(file_bytes=img_bytes, filename=filename)
-            return DocumentConverterResult(markdown=markdown)
         except Exception as e:
-            return DocumentConverterResult(
-                markdown=f"<!-- Error converting image with PaddleOCR: {e} -->"
+            logger.error(
+                "PaddleOcrConverter: 图片 OCR 识别异常, 格式=%s, 错误=%s", extension, e
             )
+            raise
+
+        logger.info("PaddleOcrConverter: 图片 OCR 识别完成, 输出长度=%d", len(markdown))
+        return DocumentConverterResult(markdown=markdown)
 
     def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         """Convert PDF using hybrid approach (pdfplumber for text, PaddleOCR for complex pages)."""
         pdf_stream = io.BytesIO(file_stream.read())
         markdown_parts = []
+        ocr_failed = False
 
         try:
             with pdfplumber.open(pdf_stream) as pdf:
+                total_pages = len(pdf.pages)
+                logger.info("PaddleOcrConverter: 开始处理 PDF, 总页数=%d", total_pages)
+
                 for page_num, page in enumerate(pdf.pages):
                     # Analyze page type
                     page_type = self._analyze_page(page)
 
                     # Choose processing method
                     if self.force_ai or page_type != "plain_text":
-                        # Complex content: use PaddleOCR
-                        markdown = self._convert_with_paddleocr(page, page_num)
+                        # Complex content: try PaddleOCR, fallback to pdfplumber on failure
+                        logger.info(
+                            "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 PaddleOCR",
+                            page_num + 1,
+                            total_pages,
+                            page_type,
+                        )
+                        try:
+                            markdown = self._convert_with_paddleocr(page, page_num)
+                        except Exception as e:
+                            logger.warning(
+                                "PaddleOcrConverter: 第 %d/%d 页 OCR 失败, 降级为 pdfplumber, 错误=%s",
+                                page_num + 1,
+                                total_pages,
+                                e,
+                            )
+                            ocr_failed = True
+                            markdown = self._extract_text_with_tables(page)
                     else:
                         # Plain text: use pdfplumber
+                        logger.info(
+                            "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
+                            page_num + 1,
+                            total_pages,
+                            page_type,
+                        )
                         markdown = self._extract_text_with_tables(page)
 
                     if markdown.strip():
@@ -188,7 +248,10 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
 
             markdown = "\n\n".join(markdown_parts).strip()
 
-        except Exception:
+        except Exception as e:
+            logger.error(
+                "PaddleOcrConverter: PDF 处理异常, 降级为 pdfminer, 错误=%s", e
+            )
             # Fallback to pdfminer
             pdf_stream.seek(0)
             markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
@@ -198,6 +261,15 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
             pdf_stream.seek(0)
             markdown = pdfminer.high_level.extract_text(pdf_stream) or ""
 
+        # If OCR failed and result is empty, raise so the framework can try
+        # the next converter (e.g. GlmOcrConverter) instead of returning empty.
+        if ocr_failed and not markdown.strip():
+            logger.error("PaddleOcrConverter: OCR 失败且所有兜底结果为空, 抛出异常")
+            raise RuntimeError(
+                "PaddleOcrConverter: OCR failed and all fallbacks returned empty"
+            )
+
+        logger.info("PaddleOcrConverter: PDF 转换完成, 输出长度=%d", len(markdown))
         return DocumentConverterResult(markdown=markdown)
 
     def _analyze_page(self, page: Any) -> str:
@@ -219,21 +291,31 @@ def _analyze_page(self, page: Any) -> str:
 
     def _convert_with_paddleocr(self, page: Any, page_num: int) -> str:
         """Convert page using PaddleOCR API."""
-        try:
-            # Render page to image
-            img = page.to_image(resolution=150)
-            img_bytes = io.BytesIO()
-            img.save(img_bytes, format="PNG")
+        # Render page to image
+        img = page.to_image(resolution=150)
+        img_bytes = io.BytesIO()
+        img.save(img_bytes, format="PNG")
 
+        logger.info("PaddleOcrConverter: PaddleOCR API 开始识别第 %d 页", page_num + 1)
+        try:
             markdown = self._get_client().ocr(
                 file_bytes=img_bytes.getvalue(),
                 filename=f"page_{page_num + 1}.png",
             )
-            return markdown
+        except Exception as e:
+            logger.error(
+                "PaddleOcrConverter: PaddleOCR API 第 %d 页识别异常, 错误=%s",
+                page_num + 1,
+                e,
+            )
+            raise
 
-        except Exception:
-            # Fallback to pdfplumber text extraction
-            return self._extract_text_with_tables(page)
+        logger.info(
+            "PaddleOcrConverter: PaddleOCR API 第 %d 页识别完成, 输出长度=%d",
+            page_num + 1,
+            len(markdown),
+        )
+        return markdown
 
     def _extract_text_with_tables(self, page: Any) -> str:
         """Extract text and tables from page."""
@@ -282,9 +364,14 @@ def _table_to_markdown(self, table: list[list[str]]) -> str:
         lines = []
         for row_idx, row in enumerate(table):
             padded_row = row + [""] * (len(col_widths) - len(row))
-            line = "| " + " | ".join(
-                str(cell).ljust(width) for cell, width in zip(padded_row, col_widths)
-            ) + " |"
+            line = (
+                "| "
+                + " | ".join(
+                    str(cell).ljust(width)
+                    for cell, width in zip(padded_row, col_widths)
+                )
+                + " |"
+            )
             lines.append(line)
 
             if row_idx == 0:
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
index 09ae96e6d..d24916ac5 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
@@ -1,13 +1,16 @@
 """Plugin registration for markitdown-paddleocr."""
 
+import logging
 from typing import Any
+
 from markitdown import MarkItDown
 
 from ._converter import PaddleOcrConverter
 
-
 __plugin_interface_version__ = 1
 
+logger = logging.getLogger(__name__)
+
 
 def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     """Register markitdown-paddleocr converter.
@@ -17,19 +20,31 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     2. Environment variables (BAIDU_PADDLE_TOKEN)
     3. Built-in defaults
     """
+    logger.info("markitdown-paddleocr: 开始注册插件")
+
     # Register converter with higher priority than default PDF converter
     PRIORITY_PADDLEOCR = -1.0
 
-    markitdown.register_converter(
-        PaddleOcrConverter(
+    try:
+        converter = PaddleOcrConverter(
             token=kwargs.get("token"),
             model=kwargs.get("model", "PaddleOCR-VL-1.5"),
             poll_interval=kwargs.get("poll_interval", 2.0),
             poll_timeout=kwargs.get("poll_timeout", 300.0),
             force_ai=kwargs.get("force_ai", False),
-            use_doc_orientation_classify=kwargs.get("use_doc_orientation_classify", False),
+            use_doc_orientation_classify=kwargs.get(
+                "use_doc_orientation_classify", False
+            ),
             use_doc_unwarping=kwargs.get("use_doc_unwarping", False),
             use_chart_recognition=kwargs.get("use_chart_recognition", False),
-        ),
-        priority=PRIORITY_PADDLEOCR,
-    )
+        )
+        markitdown.register_converter(
+            converter,
+            priority=PRIORITY_PADDLEOCR,
+        )
+        logger.info(
+            "markitdown-paddleocr: 插件注册成功, priority=%.1f", PRIORITY_PADDLEOCR
+        )
+    except Exception as e:
+        logger.error("markitdown-paddleocr: 插件注册失败, 错误=%s", e)
+        raise
diff --git a/packages/markitdown-paddleocr/tests/test_converter.py b/packages/markitdown-paddleocr/tests/test_converter.py
index aaca74c8c..0e569dc94 100644
--- a/packages/markitdown-paddleocr/tests/test_converter.py
+++ b/packages/markitdown-paddleocr/tests/test_converter.py
@@ -10,28 +10,35 @@
 class TestPaddleOcrConverterAccepts:
     """Accepts method tests."""
 
-    def test_accepts_pdf_extension(self):
-        """Accept .pdf extension."""
-        converter = PaddleOcrConverter()
+    def test_accepts_pdf_extension_with_token(self):
+        """Accept .pdf extension when token is available."""
+        converter = PaddleOcrConverter(token="test-token")
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=".pdf", mimetype=None)
         assert converter.accepts(stream, stream_info) is True
 
-    def test_accepts_pdf_mimetype(self):
-        """Accept PDF MIME type."""
-        converter = PaddleOcrConverter()
+    def test_accepts_pdf_mimetype_with_token(self):
+        """Accept PDF MIME type when token is available."""
+        converter = PaddleOcrConverter(token="test-token")
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=None, mimetype="application/pdf")
         assert converter.accepts(stream, stream_info) is True
 
-    def test_accepts_image_extensions(self):
-        """Accept image extensions."""
-        converter = PaddleOcrConverter()
+    def test_accepts_image_extensions_with_token(self):
+        """Accept image extensions when token is available."""
+        converter = PaddleOcrConverter(token="test-token")
         for ext in [".jpg", ".jpeg", ".png"]:
             stream = io.BytesIO(b"fake")
             stream_info = MagicMock(extension=ext, mimetype=None)
             assert converter.accepts(stream, stream_info) is True
 
+    def test_rejects_without_token(self):
+        """Reject all files when no token is available."""
+        converter = PaddleOcrConverter()  # no token
+        stream = io.BytesIO(b"%PDF-1.4")
+        stream_info = MagicMock(extension=".pdf", mimetype="application/pdf")
+        assert converter.accepts(stream, stream_info) is False
+
     def test_rejects_non_supported(self):
         """Reject non-supported files."""
         converter = PaddleOcrConverter()
@@ -92,8 +99,8 @@ def test_convert_image_success(self):
         assert "# Image Title" in result.markdown
         mock_client.ocr.assert_called_once()
 
-    def test_convert_image_error(self):
-        """Convert image with PaddleOCR error returns comment."""
+    def test_convert_image_error_raises(self):
+        """Convert image with PaddleOCR error raises exception (for framework fallback)."""
         converter = PaddleOcrConverter(token="test-token")
 
         mock_client = MagicMock()
@@ -102,9 +109,8 @@ def test_convert_image_error(self):
 
         stream = io.BytesIO(b"fake-image")
         stream_info = MagicMock(extension=".png", mimetype="image/png")
-        result = converter.convert(stream, stream_info)
-
-        assert "Error converting image" in result.markdown
+        with pytest.raises(Exception, match="API Error"):
+            converter.convert(stream, stream_info)
 
 
 class TestPaddleOcrConverterPdf:
diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py
index 6085ad6bb..934b3df72 100644
--- a/packages/markitdown/src/markitdown/__main__.py
+++ b/packages/markitdown/src/markitdown/__main__.py
@@ -2,12 +2,14 @@
 #
 # SPDX-License-Identifier: MIT
 import argparse
-import sys
 import codecs
-from textwrap import dedent
+import logging
+import sys
 from importlib.metadata import entry_points
+from textwrap import dedent
+
 from .__about__ import __version__
-from ._markitdown import MarkItDown, StreamInfo, DocumentConverterResult
+from ._markitdown import DocumentConverterResult, MarkItDown, StreamInfo
 
 
 def main():
@@ -104,6 +106,14 @@ def main():
         help="List installed 3rd-party plugins. Plugins are loaded when using the -p or --use-plugin option.",
     )
 
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="WARNING",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Set the logging level (default: WARNING). Use INFO or DEBUG to see plugin logs.",
+    )
+
     parser.add_argument(
         "--keep-data-uris",
         action="store_true",
@@ -113,6 +123,13 @@ def main():
     parser.add_argument("filename", nargs="?")
     args = parser.parse_args()
 
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
+        datefmt="%H:%M:%S",
+    )
+
     # Parse the extension hint
     extension_hint = args.extension
     if extension_hint is not None:

From e88628cd64915376e832e31cb07472ecdfdec126 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Thu, 21 May 2026 14:59:08 +0800
Subject: [PATCH 10/15] =?UTF-8?q?=E6=89=B9=E9=87=8Focr?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/markitdown_glmocr/_config.py          |  20 +-
 .../src/markitdown_glmocr/_converter.py       | 229 ++++++++-
 .../tests/test_ai_service.py                  | 103 -----
 .../markitdown-glmocr/tests/test_analyzer.py  | 131 ------
 .../markitdown-glmocr/tests/test_converter.py | 146 ++----
 .../tests/test_scan_detection.py              | 437 ++++++++++++++++++
 .../src/markitdown_paddleocr/_config.py       |  18 +
 .../src/markitdown_paddleocr/_converter.py    | 211 ++++++++-
 .../tests/test_scan_detection.py              | 430 +++++++++++++++++
 9 files changed, 1355 insertions(+), 370 deletions(-)
 delete mode 100644 packages/markitdown-glmocr/tests/test_ai_service.py
 delete mode 100644 packages/markitdown-glmocr/tests/test_analyzer.py
 create mode 100644 packages/markitdown-glmocr/tests/test_scan_detection.py
 create mode 100644 packages/markitdown-paddleocr/tests/test_scan_detection.py

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
index d1122524b..6f2531fb8 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_config.py
@@ -1,6 +1,19 @@
 """Configuration for markitdown-glmocr."""
 
 from dataclasses import dataclass, field
+from enum import Enum
+
+
+class ScanDetectionMode(str, Enum):
+    """扫描检测模式。
+
+    - PAGE_BY_PAGE: 逐页分析，当前默认行为
+    - FIRST_PAGE_HINT: 首页是扫描件则全文档使用OCR
+    - SAMPLING: 抽样前N页，多数是扫描件则全部OCR
+    """
+    PAGE_BY_PAGE = "page_by_page"
+    FIRST_PAGE_HINT = "first_page_hint"
+    SAMPLING = "sampling"
 
 
 @dataclass
@@ -22,4 +35,9 @@ class GlmOcrConfig:
     enable_layout: bool = False
     
     # Processing strategy
-    force_ai: bool = False
\ No newline at end of file
+    force_ai: bool = False
+    
+    # Scan detection mode for optimization
+    scan_detection_mode: ScanDetectionMode = ScanDetectionMode.SAMPLING
+    scan_sample_pages: int = 3  # Number of pages to sample in SAMPLING mode
+    scan_text_threshold: int = 50  # Min text length to consider page as non-scanned
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index afa7500d4..19fa0b90f 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -11,7 +11,7 @@
     MissingDependencyException,
 )
 
-from ._config import GlmOcrConfig
+from ._config import GlmOcrConfig, ScanDetectionMode
 
 # Import dependencies
 _dependency_exc_info = None
@@ -62,6 +62,9 @@ def __init__(
         timeout: int = 1800,
         enable_layout: bool = False,
         force_ai: bool = False,
+        scan_detection_mode: Optional[ScanDetectionMode] = None,
+        scan_sample_pages: Optional[int] = None,
+        scan_text_threshold: Optional[int] = None,
         config: Optional[GlmOcrConfig] = None,
     ):
         """
@@ -72,6 +75,9 @@ def __init__(
             timeout: Request timeout in seconds (default: 1800)
             enable_layout: Enable layout detection (default: False)
             force_ai: Force all pages to use AI (default: False)
+            scan_detection_mode: 扫描检测模式，优化扫描PDF处理
+            scan_sample_pages: SAMPLING模式下抽样页数 (default: 3)
+            scan_text_threshold: 判定为扫描件的最小文本长度阈值 (default: 50)
             config: Optional GlmOcrConfig instance
         """
         if glmocr is None:
@@ -87,11 +93,35 @@ def __init__(
                 enable_layout if enable_layout else config.enable_layout
             )
             self.force_ai = force_ai or config.force_ai
+            self.scan_detection_mode = (
+                scan_detection_mode
+                if scan_detection_mode is not None
+                else config.scan_detection_mode
+            )
+            self.scan_sample_pages = (
+                scan_sample_pages
+                if scan_sample_pages is not None
+                else config.scan_sample_pages
+            )
+            self.scan_text_threshold = (
+                scan_text_threshold
+                if scan_text_threshold is not None
+                else config.scan_text_threshold
+            )
         else:
             self.api_key = api_key
             self.timeout = timeout
             self.enable_layout = enable_layout
             self.force_ai = force_ai
+            self.scan_detection_mode = (
+                scan_detection_mode
+                if scan_detection_mode is not None
+                else ScanDetectionMode.SAMPLING
+            )
+            self.scan_sample_pages = scan_sample_pages if scan_sample_pages is not None else 3
+            self.scan_text_threshold = (
+                scan_text_threshold if scan_text_threshold is not None else 50
+            )
 
         # Lazy init GlmOcr instance
         self._glmocr: Optional[GlmOcr] = None
@@ -182,36 +212,84 @@ def _convert_image(
 
     def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         pdf_stream = io.BytesIO(file_stream.read())
+        pdf_bytes = pdf_stream.getvalue()  # Keep original bytes for batch OCR
         markdown_parts = []
 
         with pdfplumber.open(pdf_stream) as pdf:
             total_pages = len(pdf.pages)
             logger.info("GlmOcrConverter: 开始处理 PDF, 总页数=%d", total_pages)
 
-            for page_num, page in enumerate(pdf.pages):
-                # Analyze page type
-                page_type = self._analyze_page(page)
+            # Optimization: detect if entire PDF is scanned
+            all_scanned = self._detect_all_scanned(pdf)
+
+            if all_scanned and not self.force_ai:
+                # Batch mode: upload entire PDF to glmocr SDK (single API call)
+                logger.info(
+                    "GlmOcrConverter: 全文档扫描模式, 批量上传PDF, 页数=%d",
+                    total_pages,
+                )
+                try:
+                    markdown = self._convert_pdf_batch(pdf_bytes)
+                    if markdown.strip():
+                        logger.info(
+                            "GlmOcrConverter: 批量OCR完成, 输出长度=%d",
+                            len(markdown),
+                        )
+                        return DocumentConverterResult(markdown=markdown)
+                except Exception as e:
+                    logger.warning(
+                        "GlmOcrConverter: 批量OCR失败, 降级为逐页处理, 错误=%s",
+                        e,
+                    )
+                    # Fall through to per-page processing
 
+            # Per-page processing (PAGE_BY_PAGE mode or batch failed)
+            for page_num, page in enumerate(pdf.pages):
                 # Choose processing method
-                if self.force_ai or page_type != "plain_text":
-                    # Complex content: use glmocr
-                    # Let exceptions propagate so the framework can try the next converter
+                if self.force_ai or all_scanned:
+                    # All scanned (after batch failed) or force_ai
                     logger.info(
-                        "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 glmocr OCR",
+                        "GlmOcrConverter: 第 %d/%d 页, 使用 glmocr OCR",
                         page_num + 1,
                         total_pages,
-                        page_type,
                     )
-                    markdown = self._convert_with_glmocr(page, page_num)
+                    try:
+                        markdown = self._convert_with_glmocr(page, page_num)
+                    except Exception as e:
+                        logger.error(
+                            "GlmOcrConverter: 第 %d/%d 页识别异常, 错误=%s",
+                            page_num + 1,
+                            e,
+                        )
+                        raise
                 else:
-                    # Plain text: use pdfplumber
-                    logger.info(
-                        "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
-                        page_num + 1,
-                        total_pages,
-                        page_type,
-                    )
-                    markdown = self._extract_text_with_tables(page)
+                    # Per-page analysis (PAGE_BY_PAGE mode or non-scanned doc)
+                    page_type = self._analyze_page(page)
+
+                    if page_type != "plain_text":
+                        logger.info(
+                            "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 glmocr OCR",
+                            page_num + 1,
+                            total_pages,
+                            page_type,
+                        )
+                        try:
+                            markdown = self._convert_with_glmocr(page, page_num)
+                        except Exception as e:
+                            logger.error(
+                                "GlmOcrConverter: 第 %d/%d 页识别异常, 错误=%s",
+                                page_num + 1,
+                                e,
+                            )
+                            raise
+                    else:
+                        logger.info(
+                            "GlmOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
+                            page_num + 1,
+                            total_pages,
+                            page_type,
+                        )
+                        markdown = self._extract_text_with_tables(page)
 
                 if markdown.strip():
                     markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
@@ -222,6 +300,34 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         logger.info("GlmOcrConverter: PDF 转换完成, 输出长度=%d", len(markdown))
         return DocumentConverterResult(markdown=markdown)
 
+    def _convert_pdf_batch(self, pdf_bytes: bytes) -> str:
+        """Convert entire PDF in a single API call.
+
+        More efficient for scanned PDFs: one API call instead of N calls for N pages.
+
+        Args:
+            pdf_bytes: Raw PDF file content.
+
+        Returns:
+            Markdown text from all pages.
+        """
+        logger.info("GlmOcrConverter: 批量上传PDF到glmocr SDK, 大小=%d bytes", len(pdf_bytes))
+        result = self._get_glmocr().parse(pdf_bytes)
+
+        # Check for errors
+        d = result.to_dict()
+        if "error" in d:
+            logger.error(
+                "GlmOcrConverter: 批量OCR返回错误, 错误=%s",
+                d["error"],
+            )
+            raise RuntimeError(
+                f"GlmOcrConverter: glmocr SDK batch OCR error: {d['error']}"
+            )
+
+        markdown = result.markdown_result or ""
+        return markdown
+
     def _analyze_page(self, page: Any) -> str:
         """Analyze page content type."""
         # Check for images
@@ -239,6 +345,93 @@ def _analyze_page(self, page: Any) -> str:
 
         return "plain_text"
 
+    def _is_scanned_page(self, page: Any) -> bool:
+        """Check if a page is likely a scanned image.
+
+        A page is considered scanned if:
+        1. It contains images, AND
+        2. It has very little extractable text (below threshold)
+
+        Args:
+            page: pdfplumber page object
+
+        Returns:
+            True if the page appears to be a scanned image
+        """
+        # Must have images to be a scan
+        has_images = hasattr(page, "images") and bool(page.images)
+        if not has_images:
+            return False
+
+        # Check extractable text length
+        try:
+            text = page.extract_text() or ""
+            text_len = len(text.strip())
+            # If there's substantial text, it might be a mixed page or
+            # a digital PDF with embedded images
+            if text_len >= self.scan_text_threshold:
+                return False
+        except Exception:
+            # If text extraction fails, assume it's a scan
+            return True
+
+        return True
+
+    def _detect_all_scanned(self, pdf: Any) -> bool:
+        """Detect if entire PDF is scanned based on scan_detection_mode.
+
+        Optimization: When first few pages are scanned, we can assume
+        all pages are scanned and skip per-page analysis.
+
+        Args:
+            pdf: pdfplumber PDF object
+
+        Returns:
+            True if entire PDF should be treated as scanned
+        """
+        if self.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE:
+            return False
+
+        total_pages = len(pdf.pages)
+        if total_pages == 0:
+            return False
+
+        if self.scan_detection_mode == ScanDetectionMode.FIRST_PAGE_HINT:
+            # Check only first page
+            first_page = pdf.pages[0]
+            is_scanned = self._is_scanned_page(first_page)
+            first_page.close()
+            if is_scanned:
+                logger.info(
+                    "GlmOcrConverter: 首页检测为扫描件, 模式=FIRST_PAGE_HINT, 全文档使用OCR"
+                )
+            return is_scanned
+
+        if self.scan_detection_mode == ScanDetectionMode.SAMPLING:
+            # Sample first N pages
+            sample_count = min(self.scan_sample_pages, total_pages)
+            scanned_count = 0
+
+            for i in range(sample_count):
+                page = pdf.pages[i]
+                if self._is_scanned_page(page):
+                    scanned_count += 1
+
+            # If majority of sampled pages are scanned, treat all as scanned
+            majority_threshold = sample_count // 2 + 1
+            all_scanned = scanned_count >= majority_threshold
+
+            if all_scanned:
+                logger.info(
+                    "GlmOcrConverter: 抽样检测 %d/%d 页为扫描件, 模式=SAMPLING, 全文档使用OCR",
+                    scanned_count,
+                    sample_count,
+                )
+
+            return all_scanned
+
+        return False
+
     def _convert_with_glmocr(self, page: Any, page_num: int) -> str:
         """Convert page using glmocr SDK.
 
diff --git a/packages/markitdown-glmocr/tests/test_ai_service.py b/packages/markitdown-glmocr/tests/test_ai_service.py
deleted file mode 100644
index dbbe06d50..000000000
--- a/packages/markitdown-glmocr/tests/test_ai_service.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""Tests for AI service with zai-sdk."""
-
-import io
-import pytest
-from unittest.mock import MagicMock, patch
-
-from markitdown_glmocr._ai_service import AIService, AIResult
-from markitdown_glmocr._config import GlmOcrConfig
-
-
-class TestAIService:
-    """AI Service tests with zai-sdk."""
-
-    def test_missing_zai_sdk_raises_error(self):
-        """Missing zai-sdk raises error."""
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", None):
-            with pytest.raises(ImportError, match="zai-sdk is required"):
-                AIService(api_key="test")
-
-    def test_missing_api_key_raises_error(self):
-        """Missing API key raises error."""
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", MagicMock()):
-            with pytest.raises(ValueError, match="API key is required"):
-                AIService(api_key="")
-
-    def test_successful_conversion(self):
-        """Successful conversion."""
-        # Mock ZhipuAiClient
-        mock_client = MagicMock()
-        mock_response = MagicMock()
-        mock_response.md_results = "<table><tr><td>Test</td></tr></table>"
-        mock_response.layout_details = []
-        mock_client.layout_parsing.create.return_value = mock_response
-
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
-            service = AIService(api_key="test-api-key")
-            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
-
-        assert result.success is True
-        assert "Test" in result.text
-
-    def test_html_table_conversion(self):
-        """HTML table to Markdown conversion."""
-        mock_client = MagicMock()
-        mock_response = MagicMock()
-        mock_response.md_results = '<table><tr><td>A</td><td>B</td></tr><tr><td>1</td><td>2</td></tr></table>'
-        mock_response.layout_details = []
-        mock_client.layout_parsing.create.return_value = mock_response
-
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
-            service = AIService(api_key="test-api-key")
-            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
-
-        assert result.success is True
-        assert "| A | B |" in result.text
-        assert "|---|---|" in result.text
-        assert "| 1 | 2 |" in result.text
-
-    def test_empty_result(self):
-        """Empty result handling."""
-        mock_client = MagicMock()
-        mock_response = MagicMock()
-        mock_response.md_results = ""
-        mock_response.layout_details = []
-        mock_client.layout_parsing.create.return_value = mock_response
-
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
-            service = AIService(api_key="test-api-key")
-            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
-
-        assert result.success is True
-        assert result.text == ""
-
-    def test_error_handling(self):
-        """Error handling."""
-        mock_client = MagicMock()
-        mock_client.layout_parsing.create.side_effect = Exception("API Error")
-
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
-            service = AIService(api_key="test-api-key")
-            result = service.image_to_markdown(io.BytesIO(b"fake-image"))
-
-        assert result.success is False
-        assert "API Error" in result.error
-
-    def test_base64_encoding(self):
-        """Test base64 encoding of image."""
-        mock_client = MagicMock()
-        mock_response = MagicMock()
-        mock_response.md_results = "test"
-        mock_response.layout_details = []
-        mock_client.layout_parsing.create.return_value = mock_response
-
-        with patch("markitdown_glmocr._ai_service.ZhipuAiClient", return_value=mock_client):
-            service = AIService(api_key="test-api-key")
-            result = service.image_to_markdown(io.BytesIO(b"fake-image"), "test.png")
-
-        assert result.success is True
-        
-        # Verify data URI was used
-        call_args = mock_client.layout_parsing.create.call_args
-        file_arg = call_args.kwargs['file']
-        assert file_arg.startswith("data:image/png;base64,")
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/tests/test_analyzer.py b/packages/markitdown-glmocr/tests/test_analyzer.py
deleted file mode 100644
index 6841f0b44..000000000
--- a/packages/markitdown-glmocr/tests/test_analyzer.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""Tests for page analyzer."""
-
-import pytest
-from unittest.mock import MagicMock
-
-from markitdown_glmocr._page_analyzer import (
-    PageType,
-    detect_images,
-    detect_tables,
-    analyze_page,
-)
-
-
-class TestDetectImages:
-    """图片检测测试"""
-
-    def test_no_images(self):
-        """无图片页面"""
-        page = MagicMock()
-        page.images = []
-        page.objects = {}
-
-        assert detect_images(page) is False
-
-    def test_has_images_via_images_attr(self):
-        """通过 page.images 检测图片"""
-        page = MagicMock()
-        page.images = [MagicMock(x0=0, y0=0, x1=100, y1=100)]
-
-        assert detect_images(page) is True
-
-    def test_has_images_via_objects(self):
-        """通过 page.objects 检测图片"""
-        page = MagicMock()
-        page.images = []
-        page.objects = {"image": [MagicMock()]}
-
-        assert detect_images(page) is True
-
-    def test_has_xobject_image(self):
-        """通过 XObject 检测图片"""
-        page = MagicMock()
-        page.images = []
-        page.objects = {
-            "xobject": [{"subtype": "Image"}]
-        }
-
-        assert detect_images(page) is True
-
-
-class TestDetectTables:
-    """表格检测测试"""
-
-    def test_no_tables(self):
-        """无表格页面"""
-        page = MagicMock()
-        page.extract_tables.return_value = []
-
-        assert detect_tables(page) is False
-
-    def test_has_tables_via_extract_tables(self):
-        """通过 extract_tables 检测表格"""
-        page = MagicMock()
-        page.extract_tables.return_value = [
-            [["A", "B", "C"], ["1", "2", "3"]]
-        ]
-
-        assert detect_tables(page) is True
-
-    def test_empty_table_not_detected(self):
-        """空表格不应被检测"""
-        page = MagicMock()
-        page.extract_tables.return_value = [
-            [["", "", ""], ["", "", ""]]
-        ]
-
-        assert detect_tables(page) is False
-
-    def test_has_table_lines(self):
-        """通过线条检测表格"""
-        page = MagicMock()
-        page.extract_tables.return_value = []
-
-        # 模拟网格线条
-        lines = []
-        for i in range(5):
-            # 水平线
-            lines.append({"height": 0.5, "width": 100})
-            # 垂直线
-            lines.append({"height": 100, "width": 0.5})
-
-        page.objects = {"line": lines}
-
-        assert detect_tables(page) is True
-
-
-class TestAnalyzePage:
-    """页面分析测试"""
-
-    def test_plain_text_page(self):
-        """纯文本页面"""
-        page = MagicMock()
-        page.images = []
-        page.objects = {}
-        page.extract_tables.return_value = []
-
-        assert analyze_page(page) == PageType.PLAIN_TEXT
-
-    def test_page_with_images(self):
-        """仅包含图片"""
-        page = MagicMock()
-        page.images = [MagicMock()]
-        page.extract_tables.return_value = []
-
-        assert analyze_page(page) == PageType.HAS_IMAGES
-
-    def test_page_with_tables(self):
-        """仅包含表格"""
-        page = MagicMock()
-        page.images = []
-        page.extract_tables.return_value = [[["A", "B"]]]
-
-        assert analyze_page(page) == PageType.HAS_TABLES
-
-    def test_complex_page(self):
-        """同时包含图片和表格"""
-        page = MagicMock()
-        page.images = [MagicMock()]
-        page.extract_tables.return_value = [[["A", "B"]]]
-
-        assert analyze_page(page) == PageType.COMPLEX
\ No newline at end of file
diff --git a/packages/markitdown-glmocr/tests/test_converter.py b/packages/markitdown-glmocr/tests/test_converter.py
index d48c75f2d..d91c7d995 100644
--- a/packages/markitdown-glmocr/tests/test_converter.py
+++ b/packages/markitdown-glmocr/tests/test_converter.py
@@ -2,43 +2,46 @@
 
 import io
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, patch, PropertyMock
 
-from markitdown_glmocr._converter import GlmOcrPdfConverter
-from markitdown_glmocr._ai_service import AIService, AIResult
-from markitdown_glmocr._page_analyzer import PageType
+from markitdown_glmocr._converter import GlmOcrConverter
+from markitdown_glmocr._config import ScanDetectionMode
 
 
-class TestGlmOcrPdfConverter:
+class TestGlmOcrConverter:
     """Converter tests."""
 
-    def test_accepts_pdf_extension(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_accepts_pdf_extension(self, mock_glmocr):
         """Accept .pdf extension."""
-        converter = GlmOcrPdfConverter()
+        converter = GlmOcrConverter()
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=".pdf", mimetype=None)
 
         assert converter.accepts(stream, stream_info) is True
 
-    def test_accepts_pdf_mimetype(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_accepts_pdf_mimetype(self, mock_glmocr):
         """Accept PDF MIME type."""
-        converter = GlmOcrPdfConverter()
+        converter = GlmOcrConverter()
         stream = io.BytesIO(b"%PDF-1.4")
         stream_info = MagicMock(extension=None, mimetype="application/pdf")
 
         assert converter.accepts(stream, stream_info) is True
 
-    def test_rejects_non_pdf(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_rejects_non_pdf(self, mock_glmocr):
         """Reject non-PDF files."""
-        converter = GlmOcrPdfConverter()
+        converter = GlmOcrConverter()
         stream = io.BytesIO(b"not a pdf")
         stream_info = MagicMock(extension=".txt", mimetype="text/plain")
 
         assert converter.accepts(stream, stream_info) is False
 
-    def test_table_to_markdown(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_table_to_markdown(self, mock_glmocr):
         """Table to Markdown conversion."""
-        converter = GlmOcrPdfConverter()
+        converter = GlmOcrConverter()
         table = [
             ["Name", "Age", "City"],
             ["Alice", "25", "Beijing"],
@@ -46,59 +49,28 @@ def test_table_to_markdown(self):
         ]
 
         result = converter._table_to_markdown(table)
-        
+
         assert "|" in result
         assert "Name" in result
         assert "Alice" in result
         assert "---" in result  # Separator
 
-    def test_plain_text_page_without_ai(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_plain_text_page_without_ai(self, mock_glmocr):
         """Plain text page without AI."""
-        converter = GlmOcrPdfConverter()
+        converter = GlmOcrConverter(
+            scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+        )
 
         # Mock page
         page = MagicMock()
         page.images = []
-        page.objects = {}
-        page.extract_tables.return_value = []
+        page.find_tables.return_value = []
+        page.curves = []
         page.extract_text.return_value = "Hello World"
-        page.close = MagicMock()
-
-        # Mock PDF
-        mock_pdf = MagicMock()
-        mock_pdf.pages = [page]
-
-        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
-            mock_open.return_value.__enter__.return_value = mock_pdf
-
-            stream = io.BytesIO(b"%PDF-1.4")
-            result = converter.convert(stream, MagicMock())
-
-        assert "Hello World" in result.markdown
-
-    def test_complex_page_with_ai(self):
-        """Complex page with AI."""
-        # Mock AI service
-        ai_service = MagicMock(spec=AIService)
-        ai_service.image_to_markdown.return_value = AIResult(
-            success=True,
-            text="# AI Generated\n\nThis is from AI."
-        )
-
-        converter = GlmOcrPdfConverter(ai_service=ai_service)
-
-        # Mock page
-        page = MagicMock()
-        page.images = [MagicMock()]
         page.extract_tables.return_value = []
-        page.extract_text.return_value = "Plain text"
-        page.to_image.return_value.original = MagicMock()
         page.close = MagicMock()
 
-        # Mock image save
-        img_stream = io.BytesIO()
-        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
-
         # Mock PDF
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
@@ -109,64 +81,36 @@ def test_complex_page_with_ai(self):
             stream = io.BytesIO(b"%PDF-1.4")
             result = converter.convert(stream, MagicMock())
 
-        # Should call AI
-        ai_service.image_to_markdown.assert_called_once()
-        assert "AI Generated" in result.markdown
+        assert "Hello World" in result.markdown
 
-    def test_force_ai_mode(self):
+    @patch("markitdown_glmocr._converter.glmocr")
+    def test_force_ai_mode(self, mock_glmocr):
         """Force AI mode."""
-        ai_service = MagicMock(spec=AIService)
-        ai_service.image_to_markdown.return_value = AIResult(
-            success=True,
-            text="AI result"
-        )
+        # Mock glmocr instance
+        mock_result = MagicMock()
+        mock_result.markdown_result = "AI result"
+        mock_result.to_dict.return_value = {}
 
-        converter = GlmOcrPdfConverter(ai_service=ai_service, force_ai=True)
+        mock_glmocr_instance = MagicMock()
+        mock_glmocr_instance.parse.return_value = mock_result
+        mock_glmocr.GlmOcr.return_value = mock_glmocr_instance
+
+        converter = GlmOcrConverter(force_ai=True)
+        # Force initialization of the mocked glmocr
+        converter._get_glmocr = lambda: mock_glmocr_instance
 
         # Even plain text page
         page = MagicMock()
         page.images = []
-        page.objects = {}
-        page.extract_tables.return_value = []
+        page.find_tables.return_value = []
+        page.curves = []
         page.extract_text.return_value = "Plain text"
-        page.to_image.return_value.original = MagicMock()
-        page.close = MagicMock()
-
-        img_stream = io.BytesIO()
-        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
-
-        mock_pdf = MagicMock()
-        mock_pdf.pages = [page]
-
-        with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
-            mock_open.return_value.__enter__.return_value = mock_pdf
-
-            stream = io.BytesIO(b"%PDF-1.4")
-            result = converter.convert(stream, MagicMock())
-
-        # Should call AI (because force_ai=True)
-        ai_service.image_to_markdown.assert_called_once()
-
-    def test_fallback_on_ai_failure(self):
-        """Fallback on AI failure."""
-        ai_service = MagicMock(spec=AIService)
-        ai_service.image_to_markdown.return_value = AIResult(
-            success=False,
-            text="",
-            error="API error"
-        )
-
-        converter = GlmOcrPdfConverter(ai_service=ai_service)
-
-        page = MagicMock()
-        page.images = [MagicMock()]
         page.extract_tables.return_value = []
-        page.extract_text.return_value = "Fallback text"
-        page.to_image.return_value.original = MagicMock()
         page.close = MagicMock()
 
-        img_stream = io.BytesIO()
-        page.to_image.return_value.original.save = lambda s, format: s.write(b"fake")
+        # Mock to_image
+        mock_img = MagicMock()
+        page.to_image.return_value = mock_img
 
         mock_pdf = MagicMock()
         mock_pdf.pages = [page]
@@ -177,5 +121,5 @@ def test_fallback_on_ai_failure(self):
             stream = io.BytesIO(b"%PDF-1.4")
             result = converter.convert(stream, MagicMock())
 
-        # Should fallback to default text
-        assert "Fallback text" in result.markdown
\ No newline at end of file
+        # Should call AI (because force_ai=True)
+        mock_glmocr_instance.parse.assert_called_once()
diff --git a/packages/markitdown-glmocr/tests/test_scan_detection.py b/packages/markitdown-glmocr/tests/test_scan_detection.py
new file mode 100644
index 000000000..01b2442a6
--- /dev/null
+++ b/packages/markitdown-glmocr/tests/test_scan_detection.py
@@ -0,0 +1,437 @@
+"""Tests for scan detection optimization in GlmOcrConverter."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from markitdown_glmocr._config import GlmOcrConfig, ScanDetectionMode
+from markitdown_glmocr._converter import GlmOcrConverter
+
+
+class TestScanDetectionMode:
+    """扫描检测模式配置测试"""
+
+    def test_default_mode_is_sampling(self):
+        """默认模式应为 SAMPLING"""
+        config = GlmOcrConfig()
+        assert config.scan_detection_mode == ScanDetectionMode.SAMPLING
+
+    def test_custom_mode_from_config(self):
+        """从配置对象读取自定义模式"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            config = GlmOcrConfig(scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT)
+            converter = GlmOcrConverter(config=config)
+            assert converter.scan_detection_mode == ScanDetectionMode.FIRST_PAGE_HINT
+
+    def test_custom_mode_from_constructor(self):
+        """从构造函数传入自定义模式"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+            )
+            assert converter.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE
+
+    def test_constructor_overrides_config(self):
+        """构造函数参数优先于配置对象"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            config = GlmOcrConfig(scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT)
+            converter = GlmOcrConverter(
+                config=config,
+                scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+            )
+            assert converter.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE
+
+
+class TestIsScannedPage:
+    """扫描页面检测测试"""
+
+    def test_page_without_images_not_scanned(self):
+        """无图片的页面不是扫描件"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter()
+
+            page = MagicMock()
+            page.images = []
+            page.extract_text.return_value = "Some text content here"
+
+            assert converter._is_scanned_page(page) is False
+
+    def test_page_with_images_and_text_not_scanned(self):
+        """有图片但有足够文本的页面不是扫描件"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(scan_text_threshold=50)
+
+            page = MagicMock()
+            page.images = [MagicMock()]
+            page.extract_text.return_value = "This is more than 50 characters of text content that should be extracted"
+
+            assert converter._is_scanned_page(page) is False
+
+    def test_page_with_images_no_text_is_scanned(self):
+        """有图片但无文本的页面是扫描件"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(scan_text_threshold=50)
+
+            page = MagicMock()
+            page.images = [MagicMock()]
+            page.extract_text.return_value = ""
+
+            assert converter._is_scanned_page(page) is True
+
+    def test_page_with_images_little_text_is_scanned(self):
+        """有图片但文本少于阈值的页面是扫描件"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(scan_text_threshold=50)
+
+            page = MagicMock()
+            page.images = [MagicMock()]
+            page.extract_text.return_value = "Short text"  # Only 10 chars
+
+            assert converter._is_scanned_page(page) is True
+
+    def test_text_extraction_error_assumes_scanned(self):
+        """文本提取失败时假定是扫描件"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter()
+
+            page = MagicMock()
+            page.images = [MagicMock()]
+            page.extract_text.side_effect = Exception("Extraction failed")
+
+            assert converter._is_scanned_page(page) is True
+
+    def test_custom_threshold(self):
+        """自定义阈值生效"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(scan_text_threshold=100)
+
+            # Text below threshold
+            page1 = MagicMock()
+            page1.images = [MagicMock()]
+            page1.extract_text.return_value = "This is exactly 50 characters"  # ~30 chars
+
+            assert converter._is_scanned_page(page1) is True
+
+            # Text above threshold
+            page2 = MagicMock()
+            page2.images = [MagicMock()]
+            page2.extract_text.return_value = "This is definitely more than 100 characters of text content here for testing and verification purposes"  # 106 chars
+
+            assert converter._is_scanned_page(page2) is False
+
+
+class TestDetectAllScanned:
+    """全文档扫描检测测试"""
+
+    def test_page_by_page_mode_returns_false(self):
+        """PAGE_BY_PAGE 模式永远返回 False"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+            )
+
+            # Even with all scanned pages
+            pdf = MagicMock()
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+            scanned_page.close = MagicMock()
+            pdf.pages = [scanned_page, scanned_page, scanned_page]
+
+            assert converter._detect_all_scanned(pdf) is False
+
+    def test_first_page_hint_first_page_scanned(self):
+        """FIRST_PAGE_HINT 模式，首页扫描则全文档扫描"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT,
+            )
+
+            # First page scanned
+            pdf = MagicMock()
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+            scanned_page.close = MagicMock()
+
+            normal_page = MagicMock()
+            normal_page.images = []
+            normal_page.extract_text.return_value = "Normal text"
+
+            pdf.pages = [scanned_page, normal_page, normal_page]
+
+            assert converter._detect_all_scanned(pdf) is True
+
+    def test_first_page_hint_first_page_not_scanned(self):
+        """FIRST_PAGE_HINT 模式，首页非扫描则不判定全扫描"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT,
+            )
+
+            # First page not scanned
+            pdf = MagicMock()
+            normal_page = MagicMock()
+            normal_page.images = []
+            normal_page.extract_text.return_value = "Normal text"
+
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            pdf.pages = [normal_page, scanned_page, scanned_page]
+
+            assert converter._detect_all_scanned(pdf) is False
+
+    def test_sampling_mode_majority_scanned(self):
+        """SAMPLING 模式，多数页面扫描则全文档扫描"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            # 3 pages, 2 scanned, 1 normal -> majority scanned
+            pdf = MagicMock()
+
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            normal_page = MagicMock()
+            normal_page.images = []
+            normal_page.extract_text.return_value = "Normal text"
+
+            pdf.pages = [scanned_page, scanned_page, normal_page]
+
+            assert converter._detect_all_scanned(pdf) is True
+
+    def test_sampling_mode_minority_scanned(self):
+        """SAMPLING 模式，少数页面扫描则不判定全扫描"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            # 3 pages, 1 scanned, 2 normal -> minority scanned
+            pdf = MagicMock()
+
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            normal_page = MagicMock()
+            normal_page.images = []
+            normal_page.extract_text.return_value = "Normal text"
+
+            pdf.pages = [normal_page, normal_page, scanned_page]
+
+            assert converter._detect_all_scanned(pdf) is False
+
+    def test_sampling_mode_all_scanned(self):
+        """SAMPLING 模式，所有抽样页扫描则全文档扫描"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            pdf = MagicMock()
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            pdf.pages = [scanned_page, scanned_page, scanned_page, scanned_page]
+
+            assert converter._detect_all_scanned(pdf) is True
+
+    def test_sampling_mode_custom_sample_count(self):
+        """SAMPLING 模式，自定义抽样页数"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=5,
+            )
+
+            # 5 pages sampled, 3 scanned -> majority
+            pdf = MagicMock()
+
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            normal_page = MagicMock()
+            normal_page.images = []
+            normal_page.extract_text.return_value = "Normal text"
+
+            pdf.pages = [scanned_page, scanned_page, scanned_page, normal_page, normal_page]
+
+            assert converter._detect_all_scanned(pdf) is True
+
+    def test_empty_pdf_returns_false(self):
+        """空 PDF 返回 False"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter()
+
+            pdf = MagicMock()
+            pdf.pages = []
+
+            assert converter._detect_all_scanned(pdf) is False
+
+    def test_pdf_with_less_pages_than_sample_count(self):
+        """PDF 页数少于抽样数时使用实际页数"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=5,
+            )
+
+            # Only 2 pages, both scanned -> majority
+            pdf = MagicMock()
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+
+            pdf.pages = [scanned_page, scanned_page]
+
+            assert converter._detect_all_scanned(pdf) is True
+
+
+class TestConvertPdfWithScanDetection:
+    """PDF 转换中的扫描检测集成测试"""
+
+    def test_all_scanned_uses_batch_mode(self):
+        """全扫描模式优先使用批量上传"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            # Mock _detect_all_scanned to return True
+            converter._detect_all_scanned = MagicMock(return_value=True)
+            converter._convert_pdf_batch = MagicMock(return_value="Batch OCR result")
+            converter._convert_with_glmocr = MagicMock(return_value="Page OCR result")
+
+            # Mock PDF
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+            scanned_page.close = MagicMock()
+
+            pdf = MagicMock()
+            pdf.pages = [scanned_page, scanned_page]
+
+            with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
+                mock_open.return_value.__enter__.return_value = pdf
+
+                import io
+                stream = io.BytesIO(b"%PDF-1.4")
+                result = converter._convert_pdf(stream)
+
+            # Should call batch mode (1 API call)
+            converter._convert_pdf_batch.assert_called_once()
+            # Should NOT call per-page OCR
+            converter._convert_with_glmocr.assert_not_called()
+            assert "Batch OCR result" in result.markdown
+
+    def test_batch_failure_fallback_to_per_page(self):
+        """批量OCR失败后降级为逐页处理"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            # Mock _detect_all_scanned to return True
+            converter._detect_all_scanned = MagicMock(return_value=True)
+            converter._convert_pdf_batch = MagicMock(side_effect=RuntimeError("Batch API error"))
+            converter._convert_with_glmocr = MagicMock(return_value="Page OCR result")
+
+            # Mock PDF
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+            scanned_page.close = MagicMock()
+
+            pdf = MagicMock()
+            pdf.pages = [scanned_page, scanned_page]
+
+            with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
+                mock_open.return_value.__enter__.return_value = pdf
+
+                import io
+                stream = io.BytesIO(b"%PDF-1.4")
+                result = converter._convert_pdf(stream)
+
+            # Should have tried batch first
+            converter._convert_pdf_batch.assert_called_once()
+            # Should fall back to per-page OCR
+            assert converter._convert_with_glmocr.call_count == 2
+
+    def test_all_scanned_skips_per_page_analysis(self):
+        """全扫描模式跳过逐页分析"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.SAMPLING,
+                scan_sample_pages=3,
+            )
+
+            # Mock _detect_all_scanned to return True
+            converter._detect_all_scanned = MagicMock(return_value=True)
+            converter._convert_pdf_batch = MagicMock(return_value="Batch OCR result")
+            converter._analyze_page = MagicMock(return_value="plain_text")
+
+            # Mock PDF
+            scanned_page = MagicMock()
+            scanned_page.images = [MagicMock()]
+            scanned_page.extract_text.return_value = ""
+            scanned_page.close = MagicMock()
+
+            pdf = MagicMock()
+            pdf.pages = [scanned_page, scanned_page]
+
+            with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
+                mock_open.return_value.__enter__.return_value = pdf
+
+                import io
+                stream = io.BytesIO(b"%PDF-1.4")
+                result = converter._convert_pdf(stream)
+
+            # Should call batch mode, not _analyze_page
+            converter._convert_pdf_batch.assert_called_once()
+            converter._analyze_page.assert_not_called()
+
+    def test_page_by_page_mode_analyzes_each_page(self):
+        """PAGE_BY_PAGE 模式分析每页"""
+        with patch("markitdown_glmocr._converter.glmocr"):
+            converter = GlmOcrConverter(
+                scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+            )
+
+            # Mock _analyze_page to return different results
+            converter._analyze_page = MagicMock(side_effect=["plain_text", "complex"])
+            converter._convert_with_glmocr = MagicMock(return_value="OCR result")
+            converter._extract_text_with_tables = MagicMock(return_value="Text result")
+
+            # Mock PDF
+            page1 = MagicMock()
+            page1.close = MagicMock()
+            page2 = MagicMock()
+            page2.close = MagicMock()
+
+            pdf = MagicMock()
+            pdf.pages = [page1, page2]
+
+            with patch("markitdown_glmocr._converter.pdfplumber.open") as mock_open:
+                mock_open.return_value.__enter__.return_value = pdf
+
+                import io
+                stream = io.BytesIO(b"%PDF-1.4")
+                result = converter._convert_pdf(stream)
+
+            # Should analyze each page
+            assert converter._analyze_page.call_count == 2
+            # Should use different methods for different pages
+            converter._extract_text_with_tables.assert_called_once()
+            converter._convert_with_glmocr.assert_called_once()
\ No newline at end of file
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
index 51fc00d60..ddd6ca794 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
@@ -2,6 +2,19 @@
 
 import os
 from dataclasses import dataclass
+from enum import Enum
+
+
+class ScanDetectionMode(str, Enum):
+    """扫描检测模式。
+
+    - PAGE_BY_PAGE: 逐页分析，当前默认行为
+    - FIRST_PAGE_HINT: 首页是扫描件则全文档使用OCR
+    - SAMPLING: 抽样前N页，多数是扫描件则全部OCR
+    """
+    PAGE_BY_PAGE = "page_by_page"
+    FIRST_PAGE_HINT = "first_page_hint"
+    SAMPLING = "sampling"
 
 
 @dataclass
@@ -35,6 +48,11 @@ class PaddleOcrConfig:
     # Processing strategy
     force_ai: bool = False
 
+    # Scan detection mode for optimization
+    scan_detection_mode: ScanDetectionMode = ScanDetectionMode.SAMPLING
+    scan_sample_pages: int = 3  # Number of pages to sample in SAMPLING mode
+    scan_text_threshold: int = 50  # Min text length to consider page as non-scanned
+
     @classmethod
     def from_env(cls, **overrides) -> "PaddleOcrConfig":
         """Create config from environment variables with optional overrides."""
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
index 627b2dfd5..48e5c2bd6 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
@@ -11,7 +11,7 @@
     MissingDependencyException,
 )
 
-from ._config import PaddleOcrConfig
+from ._config import PaddleOcrConfig, ScanDetectionMode
 from ._paddle_client import PaddleClient
 
 # Import PDF dependencies
@@ -58,6 +58,9 @@ def __init__(
         use_doc_orientation_classify: bool = False,
         use_doc_unwarping: bool = False,
         use_chart_recognition: bool = False,
+        scan_detection_mode: Optional[ScanDetectionMode] = None,
+        scan_sample_pages: Optional[int] = None,
+        scan_text_threshold: Optional[int] = None,
         config: Optional[PaddleOcrConfig] = None,
     ):
         """Initialize converter.
@@ -71,6 +74,9 @@ def __init__(
             use_doc_orientation_classify: Enable document orientation classification
             use_doc_unwarping: Enable document unwarping
             use_chart_recognition: Enable chart recognition
+            scan_detection_mode: 扫描检测模式，优化扫描PDF处理
+            scan_sample_pages: SAMPLING模式下抽样页数 (default: 3)
+            scan_text_threshold: 判定为扫描件的最小文本长度阈值 (default: 50)
             config: Optional PaddleOcrConfig instance
         """
         # Build config from explicit params or provided config
@@ -91,6 +97,21 @@ def __init__(
             self.use_chart_recognition = (
                 use_chart_recognition or config.use_chart_recognition
             )
+            self.scan_detection_mode = (
+                scan_detection_mode
+                if scan_detection_mode is not None
+                else config.scan_detection_mode
+            )
+            self.scan_sample_pages = (
+                scan_sample_pages
+                if scan_sample_pages is not None
+                else config.scan_sample_pages
+            )
+            self.scan_text_threshold = (
+                scan_text_threshold
+                if scan_text_threshold is not None
+                else config.scan_text_threshold
+            )
         else:
             self.token = token
             self.model = model
@@ -100,6 +121,15 @@ def __init__(
             self.use_doc_orientation_classify = use_doc_orientation_classify
             self.use_doc_unwarping = use_doc_unwarping
             self.use_chart_recognition = use_chart_recognition
+            self.scan_detection_mode = (
+                scan_detection_mode
+                if scan_detection_mode is not None
+                else ScanDetectionMode.SAMPLING
+            )
+            self.scan_sample_pages = scan_sample_pages if scan_sample_pages is not None else 3
+            self.scan_text_threshold = (
+                scan_text_threshold if scan_text_threshold is not None else 50
+            )
 
         # Lazy init client
         self._client: Optional[PaddleClient] = None
@@ -199,6 +229,7 @@ def _convert_image(
     def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         """Convert PDF using hybrid approach (pdfplumber for text, PaddleOCR for complex pages)."""
         pdf_stream = io.BytesIO(file_stream.read())
+        pdf_bytes = pdf_stream.getvalue()  # Keep original bytes for batch OCR
         markdown_parts = []
         ocr_failed = False
 
@@ -207,18 +238,40 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
                 total_pages = len(pdf.pages)
                 logger.info("PaddleOcrConverter: 开始处理 PDF, 总页数=%d", total_pages)
 
-                for page_num, page in enumerate(pdf.pages):
-                    # Analyze page type
-                    page_type = self._analyze_page(page)
+                # Optimization: detect if entire PDF is scanned
+                all_scanned = self._detect_all_scanned(pdf)
+
+                if all_scanned and not self.force_ai:
+                    # Batch mode: upload entire PDF to OCR API (single API call)
+                    logger.info(
+                        "PaddleOcrConverter: 全文档扫描模式, 批量上传PDF, 页数=%d",
+                        total_pages,
+                    )
+                    try:
+                        markdown = self._convert_pdf_batch(pdf_bytes)
+                        if markdown.strip():
+                            logger.info(
+                                "PaddleOcrConverter: 批量OCR完成, 输出长度=%d",
+                                len(markdown),
+                            )
+                            return DocumentConverterResult(markdown=markdown)
+                    except Exception as e:
+                        logger.warning(
+                            "PaddleOcrConverter: 批量OCR失败, 降级为逐页处理, 错误=%s",
+                            e,
+                        )
+                        ocr_failed = True
+                        # Fall through to per-page processing
 
+                # Per-page processing (PAGE_BY_PAGE mode or batch failed)
+                for page_num, page in enumerate(pdf.pages):
                     # Choose processing method
-                    if self.force_ai or page_type != "plain_text":
-                        # Complex content: try PaddleOCR, fallback to pdfplumber on failure
+                    if self.force_ai or all_scanned:
+                        # All scanned (after batch failed) or force_ai
                         logger.info(
-                            "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 PaddleOCR",
+                            "PaddleOcrConverter: 第 %d/%d 页, 使用 PaddleOCR",
                             page_num + 1,
                             total_pages,
-                            page_type,
                         )
                         try:
                             markdown = self._convert_with_paddleocr(page, page_num)
@@ -232,14 +285,35 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
                             ocr_failed = True
                             markdown = self._extract_text_with_tables(page)
                     else:
-                        # Plain text: use pdfplumber
-                        logger.info(
-                            "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
-                            page_num + 1,
-                            total_pages,
-                            page_type,
-                        )
-                        markdown = self._extract_text_with_tables(page)
+                        # Per-page analysis (PAGE_BY_PAGE mode or non-scanned doc)
+                        page_type = self._analyze_page(page)
+
+                        if page_type != "plain_text":
+                            logger.info(
+                                "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 PaddleOCR",
+                                page_num + 1,
+                                total_pages,
+                                page_type,
+                            )
+                            try:
+                                markdown = self._convert_with_paddleocr(page, page_num)
+                            except Exception as e:
+                                logger.warning(
+                                    "PaddleOcrConverter: 第 %d/%d 页 OCR 失败, 降级为 pdfplumber, 错误=%s",
+                                    page_num + 1,
+                                    total_pages,
+                                    e,
+                                )
+                                ocr_failed = True
+                                markdown = self._extract_text_with_tables(page)
+                        else:
+                            logger.info(
+                                "PaddleOcrConverter: 第 %d/%d 页, 类型=%s, 使用 pdfplumber",
+                                page_num + 1,
+                                total_pages,
+                                page_type,
+                            )
+                            markdown = self._extract_text_with_tables(page)
 
                     if markdown.strip():
                         markdown_parts.append(f"## Page {page_num + 1}\n\n{markdown}")
@@ -272,6 +346,24 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
         logger.info("PaddleOcrConverter: PDF 转换完成, 输出长度=%d", len(markdown))
         return DocumentConverterResult(markdown=markdown)
 
+    def _convert_pdf_batch(self, pdf_bytes: bytes) -> str:
+        """Convert entire PDF in a single API call.
+
+        More efficient for scanned PDFs: one API call instead of N calls for N pages.
+
+        Args:
+            pdf_bytes: Raw PDF file content.
+
+        Returns:
+            Markdown text from all pages.
+        """
+        logger.info("PaddleOcrConverter: 批量上传PDF到OCR API, 大小=%d bytes", len(pdf_bytes))
+        markdown = self._get_client().ocr(
+            file_bytes=pdf_bytes,
+            filename="document.pdf",
+        )
+        return markdown
+
     def _analyze_page(self, page: Any) -> str:
         """Analyze page content type."""
         # Check for images
@@ -289,6 +381,93 @@ def _analyze_page(self, page: Any) -> str:
 
         return "plain_text"
 
+    def _is_scanned_page(self, page: Any) -> bool:
+        """Check if a page is likely a scanned image.
+
+        A page is considered scanned if:
+        1. It contains images, AND
+        2. It has very little extractable text (below threshold)
+
+        Args:
+            page: pdfplumber page object
+
+        Returns:
+            True if the page appears to be a scanned image
+        """
+        # Must have images to be a scan
+        has_images = hasattr(page, "images") and bool(page.images)
+        if not has_images:
+            return False
+
+        # Check extractable text length
+        try:
+            text = page.extract_text() or ""
+            text_len = len(text.strip())
+            # If there's substantial text, it might be a mixed page or
+            # a digital PDF with embedded images
+            if text_len >= self.scan_text_threshold:
+                return False
+        except Exception:
+            # If text extraction fails, assume it's a scan
+            return True
+
+        return True
+
+    def _detect_all_scanned(self, pdf: Any) -> bool:
+        """Detect if entire PDF is scanned based on scan_detection_mode.
+
+        Optimization: When first few pages are scanned, we can assume
+        all pages are scanned and skip per-page analysis.
+
+        Args:
+            pdf: pdfplumber PDF object
+
+        Returns:
+            True if entire PDF should be treated as scanned
+        """
+        if self.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE:
+            return False
+
+        total_pages = len(pdf.pages)
+        if total_pages == 0:
+            return False
+
+        if self.scan_detection_mode == ScanDetectionMode.FIRST_PAGE_HINT:
+            # Check only first page
+            first_page = pdf.pages[0]
+            is_scanned = self._is_scanned_page(first_page)
+            first_page.close()
+            if is_scanned:
+                logger.info(
+                    "PaddleOcrConverter: 首页检测为扫描件, 模式=FIRST_PAGE_HINT, 全文档使用OCR"
+                )
+            return is_scanned
+
+        if self.scan_detection_mode == ScanDetectionMode.SAMPLING:
+            # Sample first N pages
+            sample_count = min(self.scan_sample_pages, total_pages)
+            scanned_count = 0
+
+            for i in range(sample_count):
+                page = pdf.pages[i]
+                if self._is_scanned_page(page):
+                    scanned_count += 1
+
+            # If majority of sampled pages are scanned, treat all as scanned
+            majority_threshold = sample_count // 2 + 1
+            all_scanned = scanned_count >= majority_threshold
+
+            if all_scanned:
+                logger.info(
+                    "PaddleOcrConverter: 抽样检测 %d/%d 页为扫描件, 模式=SAMPLING, 全文档使用OCR",
+                    scanned_count,
+                    sample_count,
+                )
+
+            return all_scanned
+
+        return False
+
     def _convert_with_paddleocr(self, page: Any, page_num: int) -> str:
         """Convert page using PaddleOCR API."""
         # Render page to image
diff --git a/packages/markitdown-paddleocr/tests/test_scan_detection.py b/packages/markitdown-paddleocr/tests/test_scan_detection.py
new file mode 100644
index 000000000..116197fe6
--- /dev/null
+++ b/packages/markitdown-paddleocr/tests/test_scan_detection.py
@@ -0,0 +1,430 @@
+"""Tests for scan detection optimization."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from markitdown_paddleocr._config import PaddleOcrConfig, ScanDetectionMode
+from markitdown_paddleocr._converter import PaddleOcrConverter
+
+
+class TestScanDetectionMode:
+    """扫描检测模式配置测试"""
+
+    def test_default_mode_is_sampling(self):
+        """默认模式应为 SAMPLING"""
+        config = PaddleOcrConfig()
+        assert config.scan_detection_mode == ScanDetectionMode.SAMPLING
+
+    def test_custom_mode_from_config(self):
+        """从配置对象读取自定义模式"""
+        config = PaddleOcrConfig(scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT)
+        converter = PaddleOcrConverter(config=config, token="test_token")
+        assert converter.scan_detection_mode == ScanDetectionMode.FIRST_PAGE_HINT
+
+    def test_custom_mode_from_constructor(self):
+        """从构造函数传入自定义模式"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+        )
+        assert converter.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE
+
+    def test_constructor_overrides_config(self):
+        """构造函数参数优先于配置对象"""
+        config = PaddleOcrConfig(scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT)
+        converter = PaddleOcrConverter(
+            config=config,
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+        )
+        assert converter.scan_detection_mode == ScanDetectionMode.PAGE_BY_PAGE
+
+
+class TestIsScannedPage:
+    """扫描页面检测测试"""
+
+    def test_page_without_images_not_scanned(self):
+        """无图片的页面不是扫描件"""
+        converter = PaddleOcrConverter(token="test_token")
+
+        page = MagicMock()
+        page.images = []
+        page.extract_text.return_value = "Some text content here"
+
+        assert converter._is_scanned_page(page) is False
+
+    def test_page_with_images_and_text_not_scanned(self):
+        """有图片但有足够文本的页面不是扫描件"""
+        converter = PaddleOcrConverter(token="test_token", scan_text_threshold=50)
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_text.return_value = "This is more than 50 characters of text content that should be extracted"
+
+        assert converter._is_scanned_page(page) is False
+
+    def test_page_with_images_no_text_is_scanned(self):
+        """有图片但无文本的页面是扫描件"""
+        converter = PaddleOcrConverter(token="test_token", scan_text_threshold=50)
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_text.return_value = ""
+
+        assert converter._is_scanned_page(page) is True
+
+    def test_page_with_images_little_text_is_scanned(self):
+        """有图片但文本少于阈值的页面是扫描件"""
+        converter = PaddleOcrConverter(token="test_token", scan_text_threshold=50)
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_text.return_value = "Short text"  # Only 10 chars
+
+        assert converter._is_scanned_page(page) is True
+
+    def test_text_extraction_error_assumes_scanned(self):
+        """文本提取失败时假定是扫描件"""
+        converter = PaddleOcrConverter(token="test_token")
+
+        page = MagicMock()
+        page.images = [MagicMock()]
+        page.extract_text.side_effect = Exception("Extraction failed")
+
+        assert converter._is_scanned_page(page) is True
+
+    def test_custom_threshold(self):
+        """自定义阈值生效"""
+        converter = PaddleOcrConverter(token="test_token", scan_text_threshold=100)
+
+        # Text below threshold
+        page1 = MagicMock()
+        page1.images = [MagicMock()]
+        page1.extract_text.return_value = "This is exactly 50 characters"  # ~30 chars
+
+        assert converter._is_scanned_page(page1) is True
+
+        # Text above threshold
+        page2 = MagicMock()
+        page2.images = [MagicMock()]
+        page2.extract_text.return_value = "This is definitely more than 100 characters of text content here for testing and verification purposes"  # 106 chars
+
+        assert converter._is_scanned_page(page2) is False
+
+
+class TestDetectAllScanned:
+    """全文档扫描检测测试"""
+
+    def test_page_by_page_mode_returns_false(self):
+        """PAGE_BY_PAGE 模式永远返回 False"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+        )
+
+        # Even with all scanned pages
+        pdf = MagicMock()
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+        scanned_page.close = MagicMock()
+        pdf.pages = [scanned_page, scanned_page, scanned_page]
+
+        assert converter._detect_all_scanned(pdf) is False
+
+    def test_first_page_hint_first_page_scanned(self):
+        """FIRST_PAGE_HINT 模式，首页扫描则全文档扫描"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT,
+        )
+
+        # First page scanned
+        pdf = MagicMock()
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+        scanned_page.close = MagicMock()
+
+        normal_page = MagicMock()
+        normal_page.images = []
+        normal_page.extract_text.return_value = "Normal text"
+
+        pdf.pages = [scanned_page, normal_page, normal_page]
+
+        assert converter._detect_all_scanned(pdf) is True
+
+    def test_first_page_hint_first_page_not_scanned(self):
+        """FIRST_PAGE_HINT 模式，首页非扫描则不判定全扫描"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.FIRST_PAGE_HINT,
+        )
+
+        # First page not scanned
+        pdf = MagicMock()
+        normal_page = MagicMock()
+        normal_page.images = []
+        normal_page.extract_text.return_value = "Normal text"
+
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        pdf.pages = [normal_page, scanned_page, scanned_page]
+
+        assert converter._detect_all_scanned(pdf) is False
+
+    def test_sampling_mode_majority_scanned(self):
+        """SAMPLING 模式，多数页面扫描则全文档扫描"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        # 3 pages, 2 scanned, 1 normal -> majority scanned
+        pdf = MagicMock()
+
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        normal_page = MagicMock()
+        normal_page.images = []
+        normal_page.extract_text.return_value = "Normal text"
+
+        pdf.pages = [scanned_page, scanned_page, normal_page]
+
+        assert converter._detect_all_scanned(pdf) is True
+
+    def test_sampling_mode_minority_scanned(self):
+        """SAMPLING 模式，少数页面扫描则不判定全扫描"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        # 3 pages, 1 scanned, 2 normal -> minority scanned
+        pdf = MagicMock()
+
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        normal_page = MagicMock()
+        normal_page.images = []
+        normal_page.extract_text.return_value = "Normal text"
+
+        pdf.pages = [normal_page, normal_page, scanned_page]
+
+        assert converter._detect_all_scanned(pdf) is False
+
+    def test_sampling_mode_all_scanned(self):
+        """SAMPLING 模式，所有抽样页扫描则全文档扫描"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        pdf = MagicMock()
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        pdf.pages = [scanned_page, scanned_page, scanned_page, scanned_page]
+
+        assert converter._detect_all_scanned(pdf) is True
+
+    def test_sampling_mode_custom_sample_count(self):
+        """SAMPLING 模式，自定义抽样页数"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=5,
+        )
+
+        # 5 pages sampled, 3 scanned -> majority
+        pdf = MagicMock()
+
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        normal_page = MagicMock()
+        normal_page.images = []
+        normal_page.extract_text.return_value = "Normal text"
+
+        pdf.pages = [scanned_page, scanned_page, scanned_page, normal_page, normal_page]
+
+        assert converter._detect_all_scanned(pdf) is True
+
+    def test_empty_pdf_returns_false(self):
+        """空 PDF 返回 False"""
+        converter = PaddleOcrConverter(token="test_token")
+
+        pdf = MagicMock()
+        pdf.pages = []
+
+        assert converter._detect_all_scanned(pdf) is False
+
+    def test_pdf_with_less_pages_than_sample_count(self):
+        """PDF 页数少于抽样数时使用实际页数"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=5,
+        )
+
+        # Only 2 pages, both scanned -> majority
+        pdf = MagicMock()
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+
+        pdf.pages = [scanned_page, scanned_page]
+
+        assert converter._detect_all_scanned(pdf) is True
+
+
+class TestConvertPdfWithScanDetection:
+    """PDF 转换中的扫描检测集成测试"""
+
+    def test_all_scanned_uses_batch_mode(self):
+        """全扫描模式优先使用批量上传"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        # Mock _detect_all_scanned to return True
+        converter._detect_all_scanned = MagicMock(return_value=True)
+        converter._convert_pdf_batch = MagicMock(return_value="Batch OCR result")
+        converter._convert_with_paddleocr = MagicMock(return_value="Page OCR result")
+
+        # Mock PDF
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+        scanned_page.close = MagicMock()
+
+        pdf = MagicMock()
+        pdf.pages = [scanned_page, scanned_page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = pdf
+
+            import io
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter._convert_pdf(stream)
+
+        # Should call batch mode (1 API call)
+        converter._convert_pdf_batch.assert_called_once()
+        # Should NOT call per-page OCR
+        converter._convert_with_paddleocr.assert_not_called()
+        assert "Batch OCR result" in result.markdown
+
+    def test_batch_failure_fallback_to_per_page(self):
+        """批量OCR失败后降级为逐页处理"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        # Mock _detect_all_scanned to return True
+        converter._detect_all_scanned = MagicMock(return_value=True)
+        converter._convert_pdf_batch = MagicMock(side_effect=RuntimeError("Batch API error"))
+        converter._convert_with_paddleocr = MagicMock(return_value="Page OCR result")
+
+        # Mock PDF
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+        scanned_page.close = MagicMock()
+
+        pdf = MagicMock()
+        pdf.pages = [scanned_page, scanned_page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = pdf
+
+            import io
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter._convert_pdf(stream)
+
+        # Should have tried batch first
+        converter._convert_pdf_batch.assert_called_once()
+        # Should fall back to per-page OCR
+        assert converter._convert_with_paddleocr.call_count == 2
+
+    def test_all_scanned_skips_per_page_analysis(self):
+        """全扫描模式跳过逐页分析"""
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.SAMPLING,
+            scan_sample_pages=3,
+        )
+
+        # Mock _detect_all_scanned to return True
+        converter._detect_all_scanned = MagicMock(return_value=True)
+        converter._convert_pdf_batch = MagicMock(return_value="Batch OCR result")
+        converter._analyze_page = MagicMock(return_value="plain_text")
+
+        # Mock PDF
+        scanned_page = MagicMock()
+        scanned_page.images = [MagicMock()]
+        scanned_page.extract_text.return_value = ""
+        scanned_page.close = MagicMock()
+
+        pdf = MagicMock()
+        pdf.pages = [scanned_page, scanned_page]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = pdf
+
+            import io
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter._convert_pdf(stream)
+
+        # Should call batch mode, not _analyze_page
+        converter._convert_pdf_batch.assert_called_once()
+        converter._analyze_page.assert_not_called()
+
+    def test_page_by_page_mode_analyzes_each_page(self):
+        """PAGE_BY_PAGE 模式分析每页"""
+
+        converter = PaddleOcrConverter(
+            token="test_token",
+            scan_detection_mode=ScanDetectionMode.PAGE_BY_PAGE,
+        )
+
+        # Mock _analyze_page to return different results
+        converter._analyze_page = MagicMock(side_effect=["plain_text", "complex"])
+        converter._convert_with_paddleocr = MagicMock(return_value="OCR result")
+        converter._extract_text_with_tables = MagicMock(return_value="Text result")
+
+        # Mock PDF
+        page1 = MagicMock()
+        page1.close = MagicMock()
+        page2 = MagicMock()
+        page2.close = MagicMock()
+
+        pdf = MagicMock()
+        pdf.pages = [page1, page2]
+
+        with patch("markitdown_paddleocr._converter.pdfplumber.open") as mock_open:
+            mock_open.return_value.__enter__.return_value = pdf
+
+            import io
+            stream = io.BytesIO(b"%PDF-1.4")
+            result = converter._convert_pdf(stream)
+
+        # Should analyze each page
+        assert converter._analyze_page.call_count == 2
+        # Should use different methods for different pages
+        converter._extract_text_with_tables.assert_called_once()
+        converter._convert_with_paddleocr.assert_called_once()
\ No newline at end of file

From 67e871b40520fbbd50a0195211f6b474a1272660 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Thu, 21 May 2026 16:22:20 +0800
Subject: [PATCH 11/15] =?UTF-8?q?=E4=B8=8A=E4=BC=A0pypi=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/markitdown-glmocr/README.md          | 69 ++++++++++++----
 .../src/markitdown_glmocr/__about__.py        |  2 +-
 packages/markitdown-paddleocr/README.md       | 69 ++++++++++++----
 .../src/markitdown_paddleocr/__about__.py     |  2 +-
 scripts/pypi-upload.ps1                       | 76 ++++++++++++++++++
 scripts/pypi-upload.sh                        | 79 +++++++++++++++++++
 6 files changed, 265 insertions(+), 32 deletions(-)
 create mode 100644 scripts/pypi-upload.ps1
 create mode 100644 scripts/pypi-upload.sh

diff --git a/packages/markitdown-glmocr/README.md b/packages/markitdown-glmocr/README.md
index 15c2b819e..35c221524 100644
--- a/packages/markitdown-glmocr/README.md
+++ b/packages/markitdown-glmocr/README.md
@@ -195,39 +195,78 @@ glmocr SDK 返回的结构化数据支持以下标签：
 
 ### 前置条件
 
-- 确保已安装 `build` 和 `twine`：
+1. 安装构建工具：
 
 ```bash
-pip install build twine
+pip install build twine hatch
 ```
 
-- 确保环境变量 `PyPI_API_Token` 已设置为你的 PyPI API Token：
+2. 配置 PyPI API Token（Windows 用户环境变量）：
 
+```powershell
+# PowerShell 设置用户环境变量
+[System.Environment]::SetEnvironmentVariable('PYPI_API_TOKEN', 'pypi-...', 'User')
+```
+
+或在 Bash/Zsh 中：
+
+```bash
+export PYPI_API_TOKEN="pypi-..."
+```
+
+### 快速发布（推荐）
+
+项目根目录提供了上传脚本，可一键发布两个插件：
+
+**Bash / Git Bash:**
 ```bash
-export PyPI_API_Token="pypi-..."
+# 构建两个插件
+cd packages/markitdown-glmocr && hatch build
+
+cd ../markitdown-paddleocr && hatch build
+
+# 上传（自动上传所有构建的版本）
+cd ../..
+./scripts/pypi-upload.sh
+
+# 或指定版本号
+./scripts/pypi-upload.sh 0.2.0
+```
+
+**PowerShell:**
+```powershell
+# 构建两个插件
+cd packages/markitdown-glmocr; hatch build
+cd ../markitdown-paddleocr; hatch build
+
+# 上传
+cd ../..
+.\scripts\pypi-upload.ps1
+
+# 或指定版本号
+.\scripts\pypi-upload.ps1 -Version "0.2.0"
 ```
 
-### 发布步骤
+### 手动发布
 
 ```bash
-# 1. 进入项目根目录（包含 pyproject.toml）
+# 1. 进入项目目录
 cd packages/markitdown-glmocr
 
-# 2. 构建分发包（生成 dist/ 目录下的 .tar.gz 和 .whl 文件）
-python -m build
+# 2. 构建
+hatch build
 
-# 3. 检查包的元数据和内容
+# 3. 检查
 twine check dist/*
 
-# 4. 上传到 PyPI（使用环境变量中的 Token 认证）
-twine upload dist/* -u __token__ -p "$PyPI_API_Token"
+# 4. 上传
+twine upload --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
 ```
 
 ### 发布到 TestPyPI（测试）
 
 ```bash
-# 先上传到 TestPyPI 验证包是否正确
-twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
+twine upload --repository testpypi --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
 
 # 从 TestPyPI 安装验证
 pip install --index-url https://test.pypi.org/simple/ markitdown-glmocr
@@ -235,9 +274,9 @@ pip install --index-url https://test.pypi.org/simple/ markitdown-glmocr
 
 ### 注意事项
 
-- 发布前确保 `pyproject.toml` 中的版本号已更新
+- 发布前确保 `src/markitdown_glmocr/__about__.py` 中的版本号已更新
 - 同一版本号不能重复上传，如需修正必须 bump 版本号
-- `PyPI_API_Token` 环境变量切勿硬编码到脚本或提交到代码仓库
+- `PYPI_API_TOKEN` 切勿提交到代码仓库
 
 ## 许可证
 
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
index 3dc1f76bc..d3ec452c3 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
diff --git a/packages/markitdown-paddleocr/README.md b/packages/markitdown-paddleocr/README.md
index 2f4c49a13..ac7011d4d 100644
--- a/packages/markitdown-paddleocr/README.md
+++ b/packages/markitdown-paddleocr/README.md
@@ -156,39 +156,78 @@ PaddleOcrConverter.convert()
 
 ### 前置条件
 
-- 确保已安装 `build` 和 `twine`：
+1. 安装构建工具：
 
 ```bash
-pip install build twine
+pip install build twine hatch
 ```
 
-- 确保环境变量 `PyPI_API_Token` 已设置为你的 PyPI API Token：
+2. 配置 PyPI API Token（Windows 用户环境变量）：
 
+```powershell
+# PowerShell 设置用户环境变量
+[System.Environment]::SetEnvironmentVariable('PYPI_API_TOKEN', 'pypi-...', 'User')
+```
+
+或在 Bash/Zsh 中：
+
+```bash
+export PYPI_API_TOKEN="pypi-..."
+```
+
+### 快速发布（推荐）
+
+项目根目录提供了上传脚本，可一键发布两个插件：
+
+**Bash / Git Bash:**
 ```bash
-export PyPI_API_Token="pypi-..."
+# 构建两个插件
+cd packages/markitdown-glmocr && hatch build
+
+cd ../markitdown-paddleocr && hatch build
+
+# 上传（自动上传所有构建的版本）
+cd ../..
+./scripts/pypi-upload.sh
+
+# 或指定版本号
+./scripts/pypi-upload.sh 0.2.0
+```
+
+**PowerShell:**
+```powershell
+# 构建两个插件
+cd packages/markitdown-glmocr; hatch build
+cd ../markitdown-paddleocr; hatch build
+
+# 上传
+cd ../..
+.\scripts\pypi-upload.ps1
+
+# 或指定版本号
+.\scripts\pypi-upload.ps1 -Version "0.2.0"
 ```
 
-### 发布步骤
+### 手动发布
 
 ```bash
-# 1. 进入项目根目录（包含 pyproject.toml）
+# 1. 进入项目目录
 cd packages/markitdown-paddleocr
 
-# 2. 构建分发包（生成 dist/ 目录下的 .tar.gz 和 .whl 文件）
-python -m build
+# 2. 构建
+hatch build
 
-# 3. 检查包的元数据和内容
+# 3. 检查
 twine check dist/*
 
-# 4. 上传到 PyPI（使用环境变量中的 Token 认证）
-twine upload dist/* -u __token__ -p "$PyPI_API_Token"
+# 4. 上传
+twine upload --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
 ```
 
 ### 发布到 TestPyPI（测试）
 
 ```bash
-# 先上传到 TestPyPI 验证包是否正确
-twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
+twine upload --repository testpypi --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
 
 # 从 TestPyPI 安装验证
 pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
@@ -196,9 +235,9 @@ pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
 
 ### 注意事项
 
-- 发布前确保 `pyproject.toml` 中的版本号已更新
+- 发布前确保 `src/markitdown_paddleocr/__about__.py` 中的版本号已更新
 - 同一版本号不能重复上传，如需修正必须 bump 版本号
-- `PyPI_API_Token` 环境变量切勿硬编码到脚本或提交到代码仓库
+- `PYPI_API_TOKEN` 切勿提交到代码仓库
 
 ## 许可证
 
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
index 3dc1f76bc..d3ec452c3 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
diff --git a/scripts/pypi-upload.ps1 b/scripts/pypi-upload.ps1
new file mode 100644
index 000000000..a1dbec0b6
--- /dev/null
+++ b/scripts/pypi-upload.ps1
@@ -0,0 +1,76 @@
+# 上传 markitdown-glmocr 和 markitdown-paddleocr 到 PyPI
+# 用法: .\scripts\pypi-upload.ps1 [-Version "0.2.0"]
+#   -Version: 可选，指定版本号，默认上传 dist 目录下所有文件
+
+param(
+    [string]$Version = ""
+)
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "=== PyPI Upload Script ===" -ForegroundColor Green
+Write-Host ""
+
+# 从用户环境变量读取 PYPI_API_TOKEN
+$PypiToken = [System.Environment]::GetEnvironmentVariable('PYPI_API_TOKEN', 'User')
+
+if ([string]::IsNullOrEmpty($PypiToken)) {
+    Write-Host "错误: 未找到 PYPI_API_TOKEN 环境变量" -ForegroundColor Red
+    Write-Host "请在 Windows 用户环境变量中配置 PYPI_API_TOKEN"
+    exit 1
+}
+
+Write-Host "✓ PyPI API Token 已加载" -ForegroundColor Green
+Write-Host ""
+
+# 设置 UTF-8 编码
+$env:PYTHONUTF8 = "1"
+
+$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
+$ProjectRoot = Split-Path -Parent $ScriptDir
+
+$Packages = @("markitdown-glmocr", "markitdown-paddleocr")
+
+foreach ($Pkg in $Packages) {
+    $PkgDir = Join-Path $ProjectRoot "packages\$Pkg"
+    $DistDir = Join-Path $PkgDir "dist"
+    
+    if (-not (Test-Path $DistDir)) {
+        Write-Host "跳过 $Pkg : dist 目录不存在" -ForegroundColor Yellow
+        continue
+    }
+    
+    Write-Host "--- 上传 $Pkg ---" -ForegroundColor Green
+    
+    # 获取包名格式 (markitdown-glmocr -> markitdown_glmocr)
+    $PkgName = $Pkg -replace '-', '_'
+    
+    # 确定要上传的文件
+    if ($Version) {
+        $Pattern = "$PkgName-$Version*"
+    } else {
+        $Pattern = "$PkgName*"
+    }
+    
+    $UploadFiles = Get-ChildItem -Path $DistDir -Filter $Pattern -ErrorAction SilentlyContinue
+    
+    if ($UploadFiles) {
+        Write-Host "文件:"
+        $UploadFiles | ForEach-Object { Write-Host "  $($_.Name)" }
+        Write-Host ""
+        
+        $FilesArg = $UploadFiles | ForEach-Object { $_.FullName }
+        & twine upload --username __token__ --password $PypiToken --disable-progress-bar @FilesArg
+        
+        # 提取版本号
+        $LatestVersion = ($UploadFiles[0].Name | Select-String -Pattern '\d+\.\d+\.\d+').Matches.Value
+        Write-Host "✓ $Pkg 上传成功!" -ForegroundColor Green
+        Write-Host "  https://pypi.org/project/$Pkg/$LatestVersion/" -ForegroundColor Cyan
+        Write-Host ""
+    } else {
+        Write-Host "跳过 $Pkg : 未找到版本 $Version 的构建文件" -ForegroundColor Yellow
+        Write-Host ""
+    }
+}
+
+Write-Host "=== 上传完成 ===" -ForegroundColor Green
diff --git a/scripts/pypi-upload.sh b/scripts/pypi-upload.sh
new file mode 100644
index 000000000..dcd3ca6e6
--- /dev/null
+++ b/scripts/pypi-upload.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# 上传 markitdown-glmocr 和 markitdown-paddleocr 到 PyPI
+# 用法: ./scripts/pypi-upload.sh [version]
+#   version: 可选，指定版本号，默认上传 dist 目录下所有文件
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+
+# 颜色输出
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo -e "${GREEN}=== PyPI Upload Script ===${NC}"
+echo ""
+
+# 从 Windows 用户环境变量读取 PYPI_API_TOKEN
+if [ -z "$PYPI_API_TOKEN" ]; then
+    PYPI_API_TOKEN=$(powershell -Command "[System.Environment]::GetEnvironmentVariable('PYPI_API_TOKEN', 'User')" 2>/dev/null)
+fi
+
+if [ -z "$PYPI_API_TOKEN" ] || [ "$PYPI_API_TOKEN" = "(no output)" ]; then
+    echo -e "${RED}错误: 未找到 PYPI_API_TOKEN 环境变量${NC}"
+    echo "请设置 PYPI_API_TOKEN 环境变量或在 Windows 用户环境变量中配置"
+    exit 1
+fi
+
+echo -e "${GREEN}✓ PyPI API Token 已加载${NC}"
+echo ""
+
+# 设置 UTF-8 编码避免 Windows GBK 问题
+export PYTHONUTF8=1
+
+VERSION="${1:-}"
+PACKAGES=("markitdown-glmocr" "markitdown-paddleocr")
+
+for PKG in "${PACKAGES[@]}"; do
+    PKG_DIR="$PROJECT_ROOT/packages/$PKG"
+    
+    if [ ! -d "$PKG_DIR/dist" ]; then
+        echo -e "${YELLOW}跳过 $PKG: dist 目录不存在${NC}"
+        continue
+    fi
+    
+    echo -e "${GREEN}--- 上传 $PKG ---${NC}"
+    
+    # 获取包名格式 (markitdown-glmocr -> markitdown_glmocr)
+    PKG_NAME=$(echo "$PKG" | tr '-' '_')
+    
+    # 确定要上传的文件
+    if [ -n "$VERSION" ]; then
+        UPLOAD_FILES="$PKG_DIR/dist/${PKG_NAME}-${VERSION}*"
+    else
+        UPLOAD_FILES="$PKG_DIR/dist/${PKG_NAME}*"
+    fi
+    
+    # 检查文件是否存在
+    if ls $UPLOAD_FILES 1> /dev/null 2>&1; then
+        echo "文件:"
+        ls $UPLOAD_FILES
+        echo ""
+        
+        twine upload --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar $UPLOAD_FILES
+        
+        # 从输出中提取版本号
+        LATEST_VERSION=$(ls $UPLOAD_FILES | head -1 | grep -oP '\d+\.\d+\.\d+' | head -1)
+        echo -e "${GREEN}✓ $PKG 上传成功!${NC}"
+        echo "  https://pypi.org/project/$PKG/${LATEST_VERSION:-latest}/"
+        echo ""
+    else
+        echo -e "${YELLOW}跳过 $PKG: 未找到版本 ${VERSION:-任何} 的构建文件${NC}"
+        echo ""
+    fi
+done
+
+echo -e "${GREEN}=== 上传完成 ===${NC}"

From 3d674e76f120171f0c9436adccc475f2dc023c1d Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Tue, 2 Jun 2026 11:18:31 +0800
Subject: [PATCH 12/15] =?UTF-8?q?markitdown-glmocr:=20=E6=89=B9=E9=87=8FOC?=
 =?UTF-8?q?R=E5=A4=B1=E8=B4=A5=E6=97=B6=E7=9B=B4=E6=8E=A5=E6=8A=9B?=
 =?UTF-8?q?=E5=BC=82=E5=B8=B8=E8=AE=A9=E6=A1=86=E6=9E=B6fallback=EF=BC=8C?=
 =?UTF-8?q?=E4=B8=8D=E5=86=8D=E9=99=8D=E7=BA=A7=E4=B8=BA=E9=80=90=E9=A1=B5?=
 =?UTF-8?q?=E5=A4=84=E7=90=86=EF=BC=88=E9=81=BF=E5=85=8D429=E9=99=90?=
 =?UTF-8?q?=E6=B5=81=E6=97=B6N=E6=AC=A1=E6=97=A0=E6=95=88=E9=87=8D?=
 =?UTF-8?q?=E8=AF=95=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/markitdown_glmocr/_converter.py            | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
index 19fa0b90f..35e00900c 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_converter.py
@@ -118,7 +118,9 @@ def __init__(
                 if scan_detection_mode is not None
                 else ScanDetectionMode.SAMPLING
             )
-            self.scan_sample_pages = scan_sample_pages if scan_sample_pages is not None else 3
+            self.scan_sample_pages = (
+                scan_sample_pages if scan_sample_pages is not None else 3
+            )
             self.scan_text_threshold = (
                 scan_text_threshold if scan_text_threshold is not None else 50
             )
@@ -237,11 +239,11 @@ def _convert_pdf(self, file_stream: BinaryIO) -> DocumentConverterResult:
                         )
                         return DocumentConverterResult(markdown=markdown)
                 except Exception as e:
-                    logger.warning(
-                        "GlmOcrConverter: 批量OCR失败, 降级为逐页处理, 错误=%s",
+                    logger.error(
+                        "GlmOcrConverter: 批量OCR失败, 抛出异常让框架fallback到下一个converter, 错误=%s",
                         e,
                     )
-                    # Fall through to per-page processing
+                    raise
 
             # Per-page processing (PAGE_BY_PAGE mode or batch failed)
             for page_num, page in enumerate(pdf.pages):
@@ -311,7 +313,9 @@ def _convert_pdf_batch(self, pdf_bytes: bytes) -> str:
         Returns:
             Markdown text from all pages.
         """
-        logger.info("GlmOcrConverter: 批量上传PDF到glmocr SDK, 大小=%d bytes", len(pdf_bytes))
+        logger.info(
+            "GlmOcrConverter: 批量上传PDF到glmocr SDK, 大小=%d bytes", len(pdf_bytes)
+        )
         result = self._get_glmocr().parse(pdf_bytes)
 
         # Check for errors

From b537542233ae890ec932dd6ffb152212d6e27987 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Tue, 2 Jun 2026 15:04:56 +0800
Subject: [PATCH 13/15] bump version: markitdown-glmocr 0.2.1,
 markitdown-paddleocr 0.2.1

---
 packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py   | 2 +-
 .../markitdown-paddleocr/src/markitdown_paddleocr/__about__.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
index d3ec452c3..3ced3581b 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
index d3ec452c3..3ced3581b 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.2.0"
+__version__ = "0.2.1"

From 4f94d9b10012ec1faf863f462c00822932e8d181 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Tue, 2 Jun 2026 16:13:00 +0800
Subject: [PATCH 14/15] =?UTF-8?q?glmocr=E5=92=8Cpaddleocr=E4=BC=98?=
 =?UTF-8?q?=E5=85=88=E7=BA=A7=E5=B9=B3=E7=BA=A7(-1.0)=EF=BC=8C=E7=94=B1?=
 =?UTF-8?q?=E4=B8=8A=E5=B1=82agent=20skills=E6=8E=A7=E5=88=B6=E8=B0=83?=
 =?UTF-8?q?=E7=94=A8=E9=A1=BA=E5=BA=8F=EF=BC=9Bbump=20version=200.2.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../markitdown-glmocr/src/markitdown_glmocr/__about__.py    | 2 +-
 packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py | 6 +++---
 .../src/markitdown_paddleocr/__about__.py                   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
index 3ced3581b..b5fdc7530 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.2.1"
+__version__ = "0.2.2"
diff --git a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
index 5963dd43c..025a5ffd7 100644
--- a/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
+++ b/packages/markitdown-glmocr/src/markitdown_glmocr/_plugin.py
@@ -25,9 +25,9 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     logger.info("markitdown-glmocr: 开始注册插件")
 
     # Register converter
-    # Priority -2.0: higher priority than PaddleOcrConverter (-1.0),
-    # so glmocr is tried first and paddleocr serves as fallback.
-    PRIORITY_GLMOCR = -2.0
+    # Priority -1.0: same level as PaddleOcrConverter,
+    # the upper-level agent's skills control which plugin to call first.
+    PRIORITY_GLMOCR = -1.0
 
     try:
         converter = GlmOcrConverter(
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
index 3ced3581b..b5fdc7530 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.2.1"
+__version__ = "0.2.2"

From 3f04dbcdfedb450a4a87cf258e6f5e8eb7a34e91 Mon Sep 17 00:00:00 2001
From: hankl <hankl@glodon.com>
Date: Tue, 2 Jun 2026 16:28:19 +0800
Subject: [PATCH 15/15] =?UTF-8?q?markitdown-paddleocr:=20PaddleOCR?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E4=BB=8EVL-1.5=E5=88=87=E6=8D=A2=E5=88=B0VL-?=
 =?UTF-8?q?1.6;=20bump=20version=200.2.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/markitdown-paddleocr/README.md           |  6 +++---
 .../src/markitdown_paddleocr/__about__.py         |  2 +-
 .../src/markitdown_paddleocr/_config.py           |  5 +++--
 .../src/markitdown_paddleocr/_converter.py        | 14 +++++++++-----
 .../src/markitdown_paddleocr/_dual_converter.py   | 15 +++++++++++----
 .../src/markitdown_paddleocr/_plugin.py           |  2 +-
 6 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/packages/markitdown-paddleocr/README.md b/packages/markitdown-paddleocr/README.md
index ac7011d4d..e64f7120a 100644
--- a/packages/markitdown-paddleocr/README.md
+++ b/packages/markitdown-paddleocr/README.md
@@ -25,7 +25,7 @@ pip install markitdown-paddleocr
 export BAIDU_PADDLE_TOKEN="your-paddle-token"
 
 # 可选
-export PADDLE_OCR_MODEL="PaddleOCR-VL-1.5"   # 模型名称
+export PADDLE_OCR_MODEL="PaddleOCR-VL-1.6"   # 模型名称
 ```
 
 ### 配置优先级
@@ -103,7 +103,7 @@ print(markdown)
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
 | `token` | str | 环境变量 `BAIDU_PADDLE_TOKEN` | PaddleOCR Token |
-| `model` | str | `PaddleOCR-VL-1.5` | OCR 模型名称 |
+| `model` | str | `PaddleOCR-VL-1.6` | OCR 模型名称 |
 | `poll_interval` | float | 2.0 | 轮询间隔（秒） |
 | `poll_timeout` | float | 300.0 | 轮询超时（秒） |
 | `force_ai` | bool | False | 强制所有页面使用 OCR |
@@ -116,7 +116,7 @@ print(markdown)
 | 变量 | 说明 | 示例 |
 |------|------|------|
 | `BAIDU_PADDLE_TOKEN` | Token（必需） | `7963b85a...` |
-| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.5` |
+| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.6` |
 
 ## 工作原理
 
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
index b5fdc7530..d31c31eae 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/__about__.py
@@ -1 +1 @@
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
index ddd6ca794..e66bb21e6 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_config.py
@@ -12,6 +12,7 @@ class ScanDetectionMode(str, Enum):
     - FIRST_PAGE_HINT: 首页是扫描件则全文档使用OCR
     - SAMPLING: 抽样前N页，多数是扫描件则全部OCR
     """
+
     PAGE_BY_PAGE = "page_by_page"
     FIRST_PAGE_HINT = "first_page_hint"
     SAMPLING = "sampling"
@@ -31,7 +32,7 @@ class PaddleOcrConfig:
     token: str = ""  # Reads from BAIDU_PADDLE_TOKEN by default
 
     # OCR model
-    model: str = "PaddleOCR-VL-1.5"
+    model: str = "PaddleOCR-VL-1.6"
 
     # API endpoint
     job_url: str = "https://paddleocr.aistudio-app.com/api/v2/ocr/jobs"
@@ -58,7 +59,7 @@ def from_env(cls, **overrides) -> "PaddleOcrConfig":
         """Create config from environment variables with optional overrides."""
         defaults = {
             "token": os.environ.get("BAIDU_PADDLE_TOKEN", ""),
-            "model": os.environ.get("PADDLE_OCR_MODEL", "PaddleOCR-VL-1.5"),
+            "model": os.environ.get("PADDLE_OCR_MODEL", "PaddleOCR-VL-1.6"),
         }
         defaults.update(overrides)
         return cls(**defaults)
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
index 48e5c2bd6..6a11b8c85 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_converter.py
@@ -51,7 +51,7 @@ class PaddleOcrConverter(DocumentConverter):
     def __init__(
         self,
         token: Optional[str] = None,
-        model: str = "PaddleOCR-VL-1.5",
+        model: str = "PaddleOCR-VL-1.6",
         poll_interval: float = 2.0,
         poll_timeout: float = 300.0,
         force_ai: bool = False,
@@ -67,7 +67,7 @@ def __init__(
 
         Args:
             token: Baidu PaddleOCR token (reads from BAIDU_PADDLE_TOKEN env var if not provided)
-            model: OCR model name (default: PaddleOCR-VL-1.5)
+            model: OCR model name (default: PaddleOCR-VL-1.6)
             poll_interval: Seconds between status polls (default: 2.0)
             poll_timeout: Max seconds to wait for job completion (default: 300.0)
             force_ai: Force all pages to use OCR (default: False)
@@ -82,7 +82,7 @@ def __init__(
         # Build config from explicit params or provided config
         if config:
             self.token = token or config.token
-            self.model = model if model != "PaddleOCR-VL-1.5" else config.model
+            self.model = model if model != "PaddleOCR-VL-1.6" else config.model
             self.poll_interval = (
                 poll_interval if poll_interval != 2.0 else config.poll_interval
             )
@@ -126,7 +126,9 @@ def __init__(
                 if scan_detection_mode is not None
                 else ScanDetectionMode.SAMPLING
             )
-            self.scan_sample_pages = scan_sample_pages if scan_sample_pages is not None else 3
+            self.scan_sample_pages = (
+                scan_sample_pages if scan_sample_pages is not None else 3
+            )
             self.scan_text_threshold = (
                 scan_text_threshold if scan_text_threshold is not None else 50
             )
@@ -357,7 +359,9 @@ def _convert_pdf_batch(self, pdf_bytes: bytes) -> str:
         Returns:
             Markdown text from all pages.
         """
-        logger.info("PaddleOcrConverter: 批量上传PDF到OCR API, 大小=%d bytes", len(pdf_bytes))
+        logger.info(
+            "PaddleOcrConverter: 批量上传PDF到OCR API, 大小=%d bytes", len(pdf_bytes)
+        )
         markdown = self._get_client().ocr(
             file_bytes=pdf_bytes,
             filename="document.pdf",
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
index e27395c4d..0957b9b87 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_dual_converter.py
@@ -1,10 +1,14 @@
 """DualOcrConverter - glmocr (primary) → paddleocr (fallback) automatic degradation."""
 
 import logging
-from typing import Optional
+from typing import Any, BinaryIO, Optional
 
-from markitdown import MarkItDown, DocumentConverter, DocumentConverterResult, StreamInfo
-from typing import BinaryIO, Any
+from markitdown import (
+    DocumentConverter,
+    DocumentConverterResult,
+    MarkItDown,
+    StreamInfo,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -28,7 +32,7 @@ def __init__(
         glmocr_force_ai: bool = False,
         # paddleocr kwargs
         paddleocr_token: Optional[str] = None,
-        paddleocr_model: str = "PaddleOCR-VL-1.5",
+        paddleocr_model: str = "PaddleOCR-VL-1.6",
         paddleocr_poll_interval: float = 2.0,
         paddleocr_poll_timeout: float = 300.0,
         paddleocr_force_ai: bool = False,
@@ -61,6 +65,7 @@ def _init_converters(self):
         """Lazily init both converters."""
         try:
             from markitdown_glmocr import GlmOcrConverter
+
             # Filter out None values
             kwargs = {k: v for k, v in self.glmocr_kwargs.items() if v is not None}
             self._primary = GlmOcrConverter(**kwargs)
@@ -71,6 +76,7 @@ def _init_converters(self):
 
         try:
             from markitdown_paddleocr import PaddleOcrConverter
+
             kwargs = {k: v for k, v in self.paddleocr_kwargs.items() if v is not None}
             self._fallback = PaddleOcrConverter(**kwargs)
             logger.info("paddleocr converter initialized (fallback)")
@@ -155,6 +161,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 def io_bytes(data: bytes):
     """Create a seekable BytesIO from bytes."""
     import io
+
     buf = io.BytesIO(data)
     buf.seek(0)
     return buf
diff --git a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
index d24916ac5..e84e70bb8 100644
--- a/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
+++ b/packages/markitdown-paddleocr/src/markitdown_paddleocr/_plugin.py
@@ -28,7 +28,7 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
     try:
         converter = PaddleOcrConverter(
             token=kwargs.get("token"),
-            model=kwargs.get("model", "PaddleOCR-VL-1.5"),
+            model=kwargs.get("model", "PaddleOCR-VL-1.6"),
             poll_interval=kwargs.get("poll_interval", 2.0),
             poll_timeout=kwargs.get("poll_timeout", 300.0),
             force_ai=kwargs.get("force_ai", False),