fix

2456868764 · 2456868764 · commit f1f61ed99bdf · 2025-09-22T23:42:04.000+08:00
diff --git a/plugins/golang-filter/mcp-server/servers/rag/READMD.md b/plugins/golang-filter/mcp-server/servers/rag/READMD.md
@@ -23,40 +23,36 @@
 
 ### 配置结构
 
-```yaml
-rag:
-  # RAG系统基础配置
-  splitter:
-    type: "recursive"  # 递归分块器 recursive 和 nosplitter
-    chunk_size: 500
-    chunk_overlap: 50
-  top_k: 5  # 搜索返回的知识块数量
-  threshold: 0.5  # 搜索阈值
-
-llm:
-  provider: "openai"  # openai
-  api_key: "your-llm-api-key"
-  base_url: "https://api.openai.com/v1"  # 可选
-  model: "gpt-3.5-turbo"  # LLM模型
-  max_tokens: 2048  # 最大令牌数
-  temperature: 0.5  # 温度参数
-
-embedding:
-  provider: "openai"  # openai, dashscope
-  api_key: "your-embedding-api-key"
-  base_url: "https://api.openai.com/v1"  # 可选
-  model: "text-embedding-ada-002"  # 嵌入模型
-
-vectordb:
-  provider: "milvus"  # milvus
-  host: "localhost"
-  port: 19530
-  database: "default"
-  collection: "test_collection"
-  username: ""  # 可选
-  password: ""  # 可选
+| 名称                         | 数据类型 | 填写要求 | 默认值 | 描述 |
+|----------------------------|----------|-----------|---------|--------|
+| **rag**                    | object | 必填 | - | RAG系统基础配置 |
+| rag.splitter.provider      | string | 必填 | recursive | 分块器类型：recursive或nosplitter |
+| rag.splitter.chunk_size    | integer | 可选 | 500 | 块大小 |
+| rag.splitter.chunk_overlap | integer | 可选 | 50 | 块重叠大小 |
+| rag.top_k                  | integer | 可选 | 10 | 搜索返回的知识块数量 |
+| rag.threshold              | float | 可选 | 0.5 | 搜索阈值 |
+| **llm**                    | object | 可选 | - | LLM配置 |
+| llm.provider               | string | 可选 | openai | LLM提供商 |
+| llm.api_key                | string | 可选 | - | LLM API密钥 |
+| llm.base_url               | string | 可选 |  | LLM API基础URL |
+| llm.model                  | string | 可选 | gpt-4o | LLM模型名称 |
+| llm.max_tokens             | integer | 可选 | 2048 | 最大令牌数 |
+| llm.temperature            | float | 可选 | 0.5 | 温度参数 |
+| **embedding**              | object | 必填 | - | 嵌入配置 |
+| embedding.provider         | string | 必填 | dashscope | 嵌入提供商：openai或dashscope |
+| embedding.api_key          | string | 必填 | - | 嵌入API密钥 |
+| embedding.base_url         | string | 可选 |  | 嵌入API基础URL |
+| embedding.model            | string | 必填 | text-embedding-v4 | 嵌入模型名称 |
+| **vectordb**               | object | 必填 | - | 向量数据库配置 |
+| vectordb.provider          | string | 必填 | milvus | 向量数据库提供商 |
+| vectordb.host              | string | 必填 | localhost | 数据库主机地址 |
+| vectordb.port              | integer | 必填 | 19530 | 数据库端口 |
+| vectordb.database          | string | 必填 | default | 数据库名称 |
+| vectordb.collection        | string | 必填 | test_collection | 集合名称 |
+| vectordb.username          | stri选ng | 可选 | - | 数据库用户名 |
+| vectordb.password          | string | 可选 | - | 数据库密码 |
+
 
-```
 ### higress-config 配置样例
 
 ```yaml
@@ -124,6 +120,83 @@ data:
 #### LLM 
 - **OpenAI**
 
+## 如何测试数据集的效果
+
+测试数据集的效果分两步，第一步导入数据集语料，第二步测试Chat效果。
+
+### 导入数据集语料
+
+使用 `RAGClient.CreateChunkFromText` 工具导入数据集语料，比如数据集语料格式为 JSON，每个 JSON 对象包含 `body`、`title` 和 `url` 等字段。样例代码如下：
+
+```golang
+func TestRAGClient_LoadChunks(t *testing.T) {
+	t.Logf("TestRAGClient_LoadChunks")
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
+	file, err := os.Open("/dataset/corpus.json")
+	if err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+	defer file.Close()
+	decoder := json.NewDecoder(file)
+	var data []struct {
+		Body  string `json:"body"`
+		Title string `json:"title"`
+		Url   string `json:"url"`
+	}
+	if err := decoder.Decode(&data); err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+
+	for _, item := range data {
+		t.Logf("LoadData() url = %s", item.Url)
+		t.Logf("LoadData() title = %s", item.Title)
+		t.Logf("LoadData() len body = %d", len(item.Body))
+		chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
+		if err != nil {
+			t.Errorf("LoadData() error = %v", err)
+			continue
+		} else {
+			t.Logf("LoadData() chunks len = %d", len(chunks))
+		}
+	}
+	t.Logf("TestRAGClient_LoadChunks done")
+}
+```
+
+### 测试Chat效果
+
+使用 `RAGClient.Chat` 工具测试 Chat 效果。样例代码如下：
+
+```golang
+func TestRAGClient_Chat(t *testing.T) {
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
+	resp, err := ragClient.Chat(query)
+	if err != nil {
+		t.Errorf("Chat() error = %v", err)
+		return
+	}
+	if resp == "" {
+		t.Errorf("Chat() resp = %s, want not empty", resp)
+		return
+	}
+	t.Logf("Chat() resp = %s", resp)
+}
+```
+
+
+
 
 ## Milvus 安装
 
diff --git a/plugins/golang-filter/mcp-server/servers/rag/llm/openai.go b/plugins/golang-filter/mcp-server/servers/rag/llm/openai.go
@@ -12,7 +12,7 @@ import (
 
 const (
 	OPENAI_CHAT_ENDPOINT = "/chat/completions"
-	OPENAI_DEFAULT_MODEL = "gpt-3.5-turbo"
+	OPENAI_DEFAULT_MODEL = "gpt-4o"
 )
 
 // openAI specific configuration captured after initialization.
diff --git a/plugins/golang-filter/mcp-server/servers/rag/llm/prompt.go b/plugins/golang-filter/mcp-server/servers/rag/llm/prompt.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 )
 
-const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide accurate, complete, and strictly relevant answers based on the user's question and retrieved context.
+const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide direct and concise answers based on the user's question and retrieved context.
 
 Retrieved relevant context (may be empty, multiple segments separated by line breaks):
 {contexts}
@@ -13,9 +13,11 @@ User question:
 {query}
 
 Requirements:
-1. If the context provides sufficient information, answer directly based on the context. You may use domain knowledge to supplement, but do not fabricate facts beyond the context.
-2. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
-3. Your response must correctly answer the user's question and must not contain any irrelevant or unrelated content.`
+1. Provide ONLY the direct answer without any explanation, reasoning, or additional context.
+2. If the context provides sufficient information, output the answer in the most concise form possible.
+3. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
+4. Do not include any phrases like "The answer is", "Based on the context", etc. Just output the answer directly.
+`
 
 func BuildPrompt(query string, contexts []string, join string) string {
 	rendered := strings.ReplaceAll(RAGPromptTemplate, "{query}", query)
diff --git a/plugins/golang-filter/mcp-server/servers/rag/rag_client.go b/plugins/golang-filter/mcp-server/servers/rag/rag_client.go
@@ -46,11 +46,15 @@ func NewRAGClient(config *config.Config) (*RAGClient, error) {
 	}
 	ragclient.embeddingProvider = embeddingProvider
 
-	llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
-	if err != nil {
-		return nil, fmt.Errorf("create llm provider failed, err: %w", err)
+	if ragclient.config.LLM.Provider == "" {
+		ragclient.llmProvider = nil
+	} else {
+		llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
+		if err != nil {
+			return nil, fmt.Errorf("create llm provider failed, err: %w", err)
+		}
+		ragclient.llmProvider = llmProvider
 	}
-	ragclient.llmProvider = llmProvider
 
 	demoVector, err := embeddingProvider.GetEmbedding(context.Background(), "initialization")
 	if err != nil {
diff --git a/plugins/golang-filter/mcp-server/servers/rag/rag_client_test.go b/plugins/golang-filter/mcp-server/servers/rag/rag_client_test.go
@@ -1,6 +1,8 @@
 package rag
 
 import (
+	"encoding/json"
+	"os"
 	"testing"
 
 	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
@@ -137,7 +139,11 @@ func TestRAGClient_Chat(t *testing.T) {
 		t.Errorf("getRAGClient() error = %v", err)
 		return
 	}
-	query := "what is the competition about?"
+	// query := "Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?"
+	// query := "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?"
+	// query := "Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?"
+	// query := "Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?"
+	query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
 	resp, err := ragClient.Chat(query)
 	if err != nil {
 		t.Errorf("Chat() error = %v", err)
@@ -147,4 +153,45 @@ func TestRAGClient_Chat(t *testing.T) {
 		t.Errorf("Chat() resp = %s, want not empty", resp)
 		return
 	}
+	t.Logf("Chat() resp = %s", resp)
+}
+
+func TestRAGClient_LoadChunks(t *testing.T) {
+	t.Logf("TestRAGClient_LoadChunks")
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
+	file, err := os.Open("/dataset/corpus.json")
+	if err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+	defer file.Close()
+	decoder := json.NewDecoder(file)
+	var data []struct {
+		Body  string `json:"body"`
+		Title string `json:"title"`
+		Url   string `json:"url"`
+	}
+	if err := decoder.Decode(&data); err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+
+	for _, item := range data {
+		t.Logf("LoadData() url = %s", item.Url)
+		t.Logf("LoadData() title = %s", item.Title)
+		t.Logf("LoadData() len body = %d", len(item.Body))
+		chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
+		if err != nil {
+			t.Errorf("LoadData() error = %v", err)
+			continue
+		} else {
+			t.Logf("LoadData() chunks len = %d", len(chunks))
+		}
+	}
+	t.Logf("TestRAGClient_LoadChunks done")
 }
diff --git a/plugins/golang-filter/mcp-server/servers/rag/server.go b/plugins/golang-filter/mcp-server/servers/rag/server.go
@@ -28,7 +28,7 @@ func init() {
 				TopK:      10,
 			},
 			LLM: config.LLMConfig{
-				Provider:    "openai",
+				Provider:    "",
 				APIKey:      "",
 				BaseURL:     "",
 				Model:       "gpt-4o",
@@ -103,8 +103,6 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
 	if llmConfig, ok := config["llm"].(map[string]any); ok {
 		if provider, exists := llmConfig["provider"].(string); exists {
 			c.config.LLM.Provider = provider
-		} else {
-			return errors.New("missing llm provider")
 		}
 		if apiKey, exists := llmConfig["api_key"].(string); exists {
 			c.config.LLM.APIKey = apiKey
@@ -190,7 +188,7 @@ func (c *RAGConfig) NewServer(serverName string) (*common.MCPServer, error) {
 
 	// Intelligent Q&A Tool
 	mcpServer.AddTool(
-		mcp.NewToolWithRawSchema("chat", "Generate contextually relevant responses using RAG system with LLM integration", GetChatSchema()),
+		mcp.NewToolWithRawSchema("chat", "Answer user questions by retrieving relevant knowledge from the database and generating responses using RAG-enhanced LLM", GetChatSchema()),
 		HandleChat(ragClient),
 	)
 
diff --git a/plugins/golang-filter/mcp-server/servers/rag/tools.go b/plugins/golang-filter/mcp-server/servers/rag/tools.go
@@ -169,6 +169,10 @@ func HandleChat(ragClient *RAGClient) common.ToolHandlerFunc {
 		if !ok {
 			return nil, fmt.Errorf("invalid query argument")
 		}
+		// check llm provider
+		if ragClient.llmProvider == nil {
+			return nil, fmt.Errorf("llm provider is empty, please check the llm configuration")
+		}
 		// Generate response using RAGClient's LLM
 		reply, err := ragClient.Chat(query)
 		if err != nil {

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ import (`
`12`	`12`
`13`	`13`	`const (`
`14`	`14`	`OPENAI_CHAT_ENDPOINT = "/chat/completions"`
`15`		`- OPENAI_DEFAULT_MODEL = "gpt-3.5-turbo"`
	`15`	`+ OPENAI_DEFAULT_MODEL = "gpt-4o"`
`16`	`16`	`)`
`17`	`17`
`18`	`18`	`// openAI specific configuration captured after initialization.`