newSafari/summarize_safari.py at main · qoli/newSafari · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#!/Users/ronnie/.pyenv/versions/3.10.4/bin/python

import sys
sys.path.insert(0, "/Users/ronnie/.pyenv/versions/3.10.4/lib/python3.10/site-packages")

import subprocess
import re
import argparse
from openai import OpenAI
from readability import Document
import os
from rich.console import Console, Group
from rich.markdown import Markdown
from rich.live import Live
from rich.text import Text
from rich.panel import Panel

# LLM API 設定
LLM_BASE_URL = "http://192.168.10.1:1234/v1"
LLM_MODEL = "qwen/qwen3-30b-a3b-2507"

def parse_arguments():
    """解析命令行參數"""
    parser = argparse.ArgumentParser(description='Safari 網頁內容擷取與對話助手')
    parser.add_argument('--api-key',
                      required=True,
                      help='Glama API Key')
    return parser.parse_args()

def get_safari_content():
    """獲取 Safari 當前頁面的 HTML 源代碼"""
    try:
        # 獲取當前頁面的 URL 和標題 (直接使用 main.py 的 script)
        url_script = """
            tell application "Safari"
                set currentURL to URL of document 1
                set pageTitle to name of document 1
                return currentURL & ", " & pageTitle
            end tell
        """
        url_result = subprocess.run(
            ["osascript", "-e", url_script], capture_output=True, text=True
        )

        if url_result.returncode != 0:
            print(f"錯誤：無法獲取 URL 和標題\n{url_result.stderr}")
            return None

        url, title = url_result.stdout.strip().split(", ", 1)

        # 獲取頁面源代碼 (直接使用 main.py 的 script)
        html_script = """
            tell application "Safari"
                tell document 1
                    set theSource to source
                end tell
            end tell
            return theSource
        """

        html_result = subprocess.run(
            ["osascript", "-e", html_script], capture_output=True, text=True
        )
        if html_result.returncode != 0:
            print(f"錯誤：無法獲取頁面源代碼\n{html_result.stderr}")
            return None

        html = html_result.stdout
        return {"url": url, "title": title, "html": html}

    except Exception as e:
        print(f"發生錯誤：{str(e)}")
        return None

def extract_text(html):
    """使用 python-readability 從 HTML 中提取主要文本和標題"""
    try:
        doc = Document(html)
        title = doc.title()
        summary = doc.summary()
        return title, summary
    except Exception as e:
        print(f"提取文本時發生錯誤：{e}")
        return None, None

def summarize_text(client, text, title, user_input=None):
    """調用 LLM API 來總結文本或進行對話，支持流式輸出"""
    console = Console()

    try:
        if user_input is None:
            # 初始總結模式
            messages = [
                {
                    "role": "system",
                    "content": "為最後提供的文字內容進行按要求的總結。如果是其他語言，請翻譯到繁體中文。\n\n請嚴格按照下面的格式進行輸出，在格式以外的地方，不需要多餘的文本內容。這裡是格式指導：總結：簡短的一句話概括內容，此單獨佔用一行，記得輸出換行符號；要點：對文字內容提出多個要點內容，並每一個要點都附加一個裝飾用的 emoji，每一個要點佔用一行，注意記得輸出換行符號；下面為需要總結的文字內容：",
                },
                {
                    "role": "user",
                    "content": f"Title: {title}\n\nContent:\n{text}"
                }
            ]
            temperature = 0.1
            prefix = "\n[bold cyan]📝 網頁摘要：[/]\n"
        else:
            # 對話模式
            messages = [
                {
                    "role": "system",
                    "content": "你是一個熱心的助手。基於先前提供的網頁內容進行對話。回答時請使用繁體中文。",
                },
                {
                    "role": "assistant",
                    "content": text,  # 這裡的 text 參數用於傳遞先前的總結
                },
                {
                    "role": "user",
                    "content": user_input,
                }
            ]
            temperature = 0.7
            prefix = "\n[bold green]🤖 回答：[/]\n" if user_input else ""

        # 顯示思考狀態
        console.print("\n[bold yellow]🤔 正在思考...[/]")

        # 創建流式輸出
        stream = client.chat.completions.create(
            model=LLM_MODEL,
            messages=messages,
            temperature=temperature,
            max_tokens=8192,
            top_p=0.95,
            presence_penalty=0.1,
            stream=True
        )

        full_response = []

        # 顯示對話模式標題（如果需要）
        if user_input is not None:
            console.rule("[bold cyan]💬 對話模式 [/]", characters="─")
            console.print("[dim] 您可以詢問任何關於該網頁內容的問題。輸入 'exit' 退出，輸入 're' 重新開始。[/]")

        # 使用 Live 進行動態更新
        with Live(
            Text("正在生成回應...", style="yellow"),
            console=console,
            refresh_per_second=4,
            vertical_overflow="visible"
        ) as live:
            # 流式接收和更新
            for chunk in stream:
                if chunk.choices[0].delta.content:
                    content = chunk.choices[0].delta.content
                    full_response.append(content)
                    current_text = "".join(full_response)

                    # 更新顯示內容
                    if user_input is None:
                        # 摘要模式，使用不同顏色突出顯示
                        formatted_lines = []
                        for line in current_text.split('\n'):
                            if line.startswith('總結：'):
                                formatted_lines.append(f"[bold cyan]{line}[/]")
                            elif '：' in line:
                                formatted_lines.append(f"[bold yellow]{line}[/]")
                            else:
                                formatted_lines.append(line)
                        display_text = '\n'.join(formatted_lines)
                    else:
                        # 對話模式，使用 Markdown
                        display_text = current_text

                    # 直接更新顯示內容
                    live.update(
                        Group(
                            Text.from_markup(prefix) if prefix else Text(""),
                            Markdown(display_text) if user_input else Text.from_markup(display_text)
                        )
                    )

        # 添加一個空行作為分隔
        console.print()

        # 構造一個類似非流式響應的對象
        class SimpleResponse:
            def __init__(self, content):
                self.choices = [type('Choice', (), {'message': type('Message', (), {'content': content})()})]

        return SimpleResponse(''.join(full_response))

    except Exception as e:
        console.print(f"\n[bold red]❌ LLM 請求錯誤：{str(e)}[/]")
        return None


def main():
    """主要執行邏輯"""
    # 解析命令行參數
    args = parse_arguments()

    # 創建 rich console
    console = Console()

    # 顯示程序標題
    console.rule("[bold cyan]🚀 Safari 網頁助手 [/]", characters="═")

    with console.status("[bold yellow] 初始化中...[/]") as status:
        # 創建 OpenAI 客戶端
        client = OpenAI(base_url=LLM_BASE_URL, api_key=args.api_key)

        # 更新狀態
        status.update("[bold yellow] 正在獲取頁面內容...[/]")
        page_data = get_safari_content()
        if page_data is None:
            console.print("\n[bold red]❌ 無法獲取頁面數據，程序終止 [/]")
            return

        # 顯示頁面信息
        console.print("\n[bold green]✅ 成功獲取頁面 [/]")
        console.print(f"📑 標題：[bold]{page_data['title']}[/]")
        console.print(f"🔗 網址：[blue underline]{page_data['url']}[/]")

        # 更新狀態
        status.update("[bold yellow] 正在提取文本內容...[/]")
        title, extracted_text = extract_text(page_data["html"])
        if extracted_text is None:
            console.print("\n[bold red]❌ 無法從頁面提取文本，程序終止 [/]")
            return

        console.print("\n[bold green]✅ 文本提取完成 [/]")

        # 更新狀態
        status.update("[bold yellow] 正在生成摘要...[/]")
        summary_response = summarize_text(client, extracted_text, title)

        if not summary_response:
            console.print("\n[bold red]❌ 無法生成摘要 [/]")
            return

    # 進入對話模式
    console.rule("[bold cyan]💬 對話模式 [/]", characters="─")
    console.print("[dim] 您可以詢問任何關於該網頁內容的問題。輸入 'exit' 退出，輸入 're' 重新開始。[/]")

    while True:
        try:
            # 使用 console.print 來正確顯示樣式化的輸入提示
            console.print("\n您的問題", style="bold purple", end=" > ")
            user_input = input()
            user_command = user_input.lower()
            if user_command == "exit":
                console.rule("[bold cyan]👋 感謝使用 [/]", characters="─")
                break
            elif user_command == "re":
                console.print("\n[bold yellow]🔄 重新啟動程序...[/]")
                # 使用遞迴調用來重新啟動程序
                return main()

            # 使用相同的 summarize_text 函數進行對話
            chat_response = summarize_text(client,
                                         summary_response.choices[0].message.content,
                                         title,
                                         user_input)

            if not chat_response:
                console.print("\n[bold red]❌ 無法生成回應 [/]")
        except KeyboardInterrupt:
            console.print("\n\n[bold cyan]👋 感謝使用 [/]")
            break

if __name__ == "__main__":
    main()