Skip to content

Commit 135f5bc

Browse files
authored
feat(search): add --path filter with backend pushdown (#141)
* feat(search): add path filter with SearchOptions and MCP support * feat(search): normalize absolute --path filters across CLI and MCP * fix(store): guard qdrant fetch limit conversion for gosec G115
1 parent c075e19 commit 135f5bc

22 files changed

+1576
-50
lines changed

cli/search.go

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ var (
2323
searchCompact bool
2424
searchWorkspace string
2525
searchProjects []string
26+
searchPath string
2627
)
2728

2829
// SearchResultJSON is a lightweight struct for JSON output (excludes vector, hash, updated_at)
@@ -66,6 +67,7 @@ func init() {
6667
searchCmd.Flags().BoolVarP(&searchCompact, "compact", "c", false, "Output minimal format without content (requires --json or --toon)")
6768
searchCmd.Flags().StringVar(&searchWorkspace, "workspace", "", "Workspace name for cross-project search")
6869
searchCmd.Flags().StringArrayVar(&searchProjects, "project", nil, "Project name(s) to search (requires --workspace, can be repeated)")
70+
searchCmd.Flags().StringVar(&searchPath, "path", "", "Path prefix to filter search results")
6971
searchCmd.MarkFlagsMutuallyExclusive("json", "toon")
7072
}
7173

@@ -128,7 +130,7 @@ func runSearch(cmd *cobra.Command, args []string) error {
128130

129131
// Workspace mode
130132
if searchWorkspace != "" {
131-
return runWorkspaceSearch(ctx, query)
133+
return runWorkspaceSearch(ctx, query, searchProjects, searchPath)
132134
}
133135

134136
// Find project root
@@ -184,8 +186,13 @@ func runSearch(cmd *cobra.Command, args []string) error {
184186
// Create searcher with boost config
185187
searcher := search.NewSearcher(st, emb, cfg.Search)
186188

189+
normalizedPath, err := search.NormalizeProjectPathPrefix(searchPath, projectRoot)
190+
if err != nil {
191+
return fmt.Errorf("invalid --path value: %w", err)
192+
}
193+
187194
// Search with boosting
188-
results, err := searcher.Search(ctx, query, searchLimit)
195+
results, err := searcher.Search(ctx, query, searchLimit, normalizedPath)
189196
if err != nil {
190197
if searchJSON {
191198
return outputSearchErrorJSON(err)
@@ -387,7 +394,7 @@ func SearchJSON(projectRoot string, query string, limit int) ([]store.SearchResu
387394
// Create searcher with boost config
388395
searcher := search.NewSearcher(st, emb, cfg.Search)
389396

390-
return searcher.Search(ctx, query, limit)
397+
return searcher.Search(ctx, query, limit, "")
391398
}
392399

393400
func init() {
@@ -396,7 +403,7 @@ func init() {
396403
}
397404

398405
// runWorkspaceSearch handles workspace-level search operations
399-
func runWorkspaceSearch(ctx context.Context, query string) error {
406+
func runWorkspaceSearch(ctx context.Context, query string, projects []string, pathOpt string) error {
400407
// Load workspace config
401408
wsCfg, err := config.LoadWorkspaceConfig()
402409
if err != nil {
@@ -411,6 +418,11 @@ func runWorkspaceSearch(ctx context.Context, query string) error {
411418
return err
412419
}
413420

421+
normalizedPath, resolvedProjects, err := search.NormalizeWorkspacePathPrefix(pathOpt, ws, projects)
422+
if err != nil {
423+
return fmt.Errorf("invalid --path value: %w", err)
424+
}
425+
414426
// Validate backend
415427
if err := config.ValidateWorkspaceBackend(ws); err != nil {
416428
return err
@@ -454,8 +466,21 @@ func runWorkspaceSearch(ctx context.Context, query string) error {
454466
}
455467
searcher := search.NewSearcher(st, emb, searchCfg)
456468

469+
// Construct full path prefix for database query
470+
// Database stores paths as: workspaceName/projectName/relativePath
471+
// When a single project is specified, include it in the path prefix to push filtering to database level
472+
fullPathPrefix := ws.Name + "/"
473+
if len(resolvedProjects) == 1 {
474+
// If exactly one project specified, include it in the path prefix for database-level filtering
475+
// This ensures file_path LIKE 'workspace/project/%' filter is applied
476+
fullPathPrefix += resolvedProjects[0] + "/"
477+
}
478+
if normalizedPath != "" {
479+
fullPathPrefix += normalizedPath
480+
}
481+
457482
// Search
458-
results, err := searcher.Search(ctx, query, searchLimit)
483+
results, err := searcher.Search(ctx, query, searchLimit, fullPathPrefix)
459484
if err != nil {
460485
if searchJSON {
461486
return outputSearchErrorJSON(err)
@@ -466,12 +491,12 @@ func runWorkspaceSearch(ctx context.Context, query string) error {
466491
return fmt.Errorf("search failed: %w", err)
467492
}
468493

469-
// Filter by projects if specified
494+
// Filter by projects if specified (additional client-side filtering for multiple projects)
470495
// File paths are stored as: workspaceName/projectName/relativePath
471-
if len(searchProjects) > 0 {
496+
if len(resolvedProjects) > 0 {
472497
filteredResults := make([]store.SearchResult, 0)
473498
for _, r := range results {
474-
for _, projectName := range searchProjects {
499+
for _, projectName := range resolvedProjects {
475500
// Match workspace/project/ prefix
476501
expectedPrefix := ws.Name + "/" + projectName + "/"
477502
if strings.HasPrefix(r.Chunk.FilePath, expectedPrefix) {

cli/search_path_filter_test.go

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
package cli
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
"github.com/yoanbernabeu/grepai/config"
9+
"github.com/yoanbernabeu/grepai/search"
10+
"github.com/yoanbernabeu/grepai/store"
11+
)
12+
13+
// MockStore is a test mock for VectorStore
14+
type MockStore struct {
15+
chunks map[string]store.Chunk
16+
}
17+
18+
func NewMockStore() *MockStore {
19+
return &MockStore{
20+
chunks: make(map[string]store.Chunk),
21+
}
22+
}
23+
24+
func (m *MockStore) SaveChunks(ctx context.Context, chunks []store.Chunk) error {
25+
for _, chunk := range chunks {
26+
m.chunks[chunk.ID] = chunk
27+
}
28+
return nil
29+
}
30+
31+
func (m *MockStore) DeleteByFile(ctx context.Context, filePath string) error {
32+
for id, chunk := range m.chunks {
33+
if chunk.FilePath == filePath {
34+
delete(m.chunks, id)
35+
}
36+
}
37+
return nil
38+
}
39+
40+
func (m *MockStore) Search(ctx context.Context, queryVector []float32, limit int, opts store.SearchOptions) ([]store.SearchResult, error) {
41+
results := make([]store.SearchResult, 0)
42+
for _, chunk := range m.chunks {
43+
// Filter by path prefix if provided
44+
if opts.PathPrefix != "" && len(chunk.FilePath) < len(opts.PathPrefix) {
45+
continue
46+
}
47+
if opts.PathPrefix != "" && chunk.FilePath[:len(opts.PathPrefix)] != opts.PathPrefix {
48+
continue
49+
}
50+
51+
// Simple similarity score based on vector
52+
score := float32(0)
53+
for i := range queryVector {
54+
if i < len(chunk.Vector) {
55+
score += queryVector[i] * chunk.Vector[i]
56+
}
57+
}
58+
59+
results = append(results, store.SearchResult{
60+
Chunk: chunk,
61+
Score: score,
62+
})
63+
64+
if len(results) >= limit && limit > 0 {
65+
break
66+
}
67+
}
68+
return results, nil
69+
}
70+
71+
func (m *MockStore) GetDocument(ctx context.Context, filePath string) (*store.Document, error) {
72+
return nil, nil
73+
}
74+
75+
func (m *MockStore) SaveDocument(ctx context.Context, doc store.Document) error {
76+
return nil
77+
}
78+
79+
func (m *MockStore) DeleteDocument(ctx context.Context, filePath string) error {
80+
return nil
81+
}
82+
83+
func (m *MockStore) ListDocuments(ctx context.Context) ([]string, error) {
84+
return nil, nil
85+
}
86+
87+
func (m *MockStore) Load(ctx context.Context) error {
88+
return nil
89+
}
90+
91+
func (m *MockStore) Persist(ctx context.Context) error {
92+
return nil
93+
}
94+
95+
func (m *MockStore) Close() error {
96+
return nil
97+
}
98+
99+
func (m *MockStore) GetStats(ctx context.Context) (*store.IndexStats, error) {
100+
return nil, nil
101+
}
102+
103+
func (m *MockStore) ListFilesWithStats(ctx context.Context) ([]store.FileStats, error) {
104+
return nil, nil
105+
}
106+
107+
func (m *MockStore) GetChunksForFile(ctx context.Context, filePath string) ([]store.Chunk, error) {
108+
return nil, nil
109+
}
110+
111+
func (m *MockStore) GetAllChunks(ctx context.Context) ([]store.Chunk, error) {
112+
chunks := make([]store.Chunk, 0, len(m.chunks))
113+
for _, chunk := range m.chunks {
114+
chunks = append(chunks, chunk)
115+
}
116+
return chunks, nil
117+
}
118+
119+
// MockEmbedder is a test mock for Embedder
120+
type MockEmbedder struct{}
121+
122+
func (m *MockEmbedder) Embed(ctx context.Context, text string) ([]float32, error) {
123+
// Return a simple constant vector for testing
124+
return []float32{0.9, 0.1, 0.0}, nil
125+
}
126+
127+
func (m *MockEmbedder) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) {
128+
// Return constant vectors for all texts
129+
result := make([][]float32, len(texts))
130+
for i := range texts {
131+
result[i] = []float32{0.9, 0.1, 0.0}
132+
}
133+
return result, nil
134+
}
135+
136+
func (m *MockEmbedder) Dimensions() int {
137+
return 3
138+
}
139+
140+
func (m *MockEmbedder) Close() error {
141+
return nil
142+
}
143+
144+
// TestSearcherWithPathPrefix tests the searcher integration with path prefix filtering
145+
func TestSearcherWithPathPrefix(t *testing.T) {
146+
ctx := context.Background()
147+
mockStore := NewMockStore()
148+
mockEmbedder := &MockEmbedder{}
149+
150+
// Add test chunks
151+
chunks := []store.Chunk{
152+
{
153+
ID: "1",
154+
FilePath: "src/handlers/user.go",
155+
StartLine: 1,
156+
EndLine: 10,
157+
Content: "func HandleUser() {}",
158+
Vector: []float32{0.9, 0.1, 0.0},
159+
Hash: "hash1",
160+
UpdatedAt: time.Now(),
161+
},
162+
{
163+
ID: "2",
164+
FilePath: "src/models/user.go",
165+
StartLine: 1,
166+
EndLine: 15,
167+
Content: "type User struct {}",
168+
Vector: []float32{0.8, 0.2, 0.0},
169+
Hash: "hash2",
170+
UpdatedAt: time.Now(),
171+
},
172+
{
173+
ID: "3",
174+
FilePath: "test/user_test.go",
175+
StartLine: 1,
176+
EndLine: 20,
177+
Content: "func TestUser() {}",
178+
Vector: []float32{0.85, 0.15, 0.0},
179+
Hash: "hash3",
180+
UpdatedAt: time.Now(),
181+
},
182+
}
183+
184+
if err := mockStore.SaveChunks(ctx, chunks); err != nil {
185+
t.Fatalf("failed to save chunks: %v", err)
186+
}
187+
188+
// Create searcher
189+
cfg := config.SearchConfig{
190+
Boost: config.BoostConfig{},
191+
Hybrid: config.HybridConfig{Enabled: false},
192+
}
193+
searcher := search.NewSearcher(mockStore, mockEmbedder, cfg)
194+
195+
tests := []struct {
196+
name string
197+
pathPrefix string
198+
wantCount int
199+
}{
200+
{
201+
name: "no path filter returns all results",
202+
pathPrefix: "",
203+
wantCount: 3,
204+
},
205+
{
206+
name: "filter by src/ directory",
207+
pathPrefix: "src/",
208+
wantCount: 2,
209+
},
210+
{
211+
name: "filter by src/handlers/ subdirectory",
212+
pathPrefix: "src/handlers/",
213+
wantCount: 1,
214+
},
215+
{
216+
name: "filter by test/ directory",
217+
pathPrefix: "test/",
218+
wantCount: 1,
219+
},
220+
{
221+
name: "filter with non-existent path",
222+
pathPrefix: "nonexistent/",
223+
wantCount: 0,
224+
},
225+
}
226+
227+
for _, tt := range tests {
228+
t.Run(tt.name, func(t *testing.T) {
229+
results, err := searcher.Search(ctx, "test query", 10, tt.pathPrefix)
230+
if err != nil {
231+
t.Fatalf("search failed: %v", err)
232+
}
233+
234+
if len(results) != tt.wantCount {
235+
t.Errorf("got %d results, want %d", len(results), tt.wantCount)
236+
}
237+
238+
// Verify all results match the path prefix
239+
for _, result := range results {
240+
if tt.pathPrefix != "" {
241+
if len(result.Chunk.FilePath) < len(tt.pathPrefix) ||
242+
result.Chunk.FilePath[:len(tt.pathPrefix)] != tt.pathPrefix {
243+
t.Errorf("result %s doesn't start with prefix %s", result.Chunk.FilePath, tt.pathPrefix)
244+
}
245+
}
246+
}
247+
})
248+
}
249+
}

cli/watch.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2038,8 +2038,8 @@ func (p *projectPrefixStore) DeleteByFile(ctx context.Context, filePath string)
20382038
return p.store.DeleteByFile(ctx, prefixedPath)
20392039
}
20402040

2041-
func (p *projectPrefixStore) Search(ctx context.Context, queryVector []float32, limit int) ([]store.SearchResult, error) {
2042-
return p.store.Search(ctx, queryVector, limit)
2041+
func (p *projectPrefixStore) Search(ctx context.Context, queryVector []float32, limit int, opts store.SearchOptions) ([]store.SearchResult, error) {
2042+
return p.store.Search(ctx, queryVector, limit, opts)
20432043
}
20442044

20452045
func (p *projectPrefixStore) GetDocument(ctx context.Context, filePath string) (*store.Document, error) {

cli/watch_prefix_store_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ type mockVectorStore struct {
1414
deletedByFilePath string
1515
searchVector []float32
1616
searchLimit int
17+
searchPathPrefix string
1718
searchResults []store.SearchResult
1819
getDocumentPath string
1920
getDocumentResult *store.Document
@@ -37,9 +38,10 @@ func (m *mockVectorStore) DeleteByFile(_ context.Context, filePath string) error
3738
return nil
3839
}
3940

40-
func (m *mockVectorStore) Search(_ context.Context, queryVector []float32, limit int) ([]store.SearchResult, error) {
41+
func (m *mockVectorStore) Search(_ context.Context, queryVector []float32, limit int, opts store.SearchOptions) ([]store.SearchResult, error) {
4142
m.searchVector = queryVector
4243
m.searchLimit = limit
44+
m.searchPathPrefix = opts.PathPrefix
4345
return m.searchResults, nil
4446
}
4547

@@ -229,7 +231,7 @@ func TestProjectPrefixStore_PassThroughAndGetChunks(t *testing.T) {
229231
projectPath: projectRoot,
230232
}
231233

232-
results, err := wrapped.Search(ctx, []float32{1, 2}, 5)
234+
results, err := wrapped.Search(ctx, []float32{1, 2}, 5, store.SearchOptions{})
233235
if err != nil {
234236
t.Fatalf("Search failed: %v", err)
235237
}

0 commit comments

Comments
 (0)