Skip to content

Commit fb66d0f

Browse files
Add custom synonym_analyzer
Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent 6f1b59e commit fb66d0f

File tree

3 files changed

+40
-5
lines changed

3 files changed

+40
-5
lines changed

modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
import org.opensearch.index.analysis.PreConfiguredTokenizer;
147147
import org.opensearch.index.analysis.TokenFilterFactory;
148148
import org.opensearch.index.analysis.TokenizerFactory;
149+
import org.opensearch.indices.analysis.AnalysisModule;
149150
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
150151
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
151152
import org.opensearch.plugins.AnalysisPlugin;
@@ -157,6 +158,7 @@
157158
import org.opensearch.threadpool.ThreadPool;
158159
import org.opensearch.watcher.ResourceWatcherService;
159160

161+
import java.io.IOException;
160162
import java.util.ArrayList;
161163
import java.util.Collection;
162164
import java.util.Collections;
@@ -176,6 +178,9 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin
176178

177179
private final SetOnce<ScriptService> scriptService = new SetOnce<>();
178180

181+
private AnalysisModule analysisModule;
182+
183+
179184
@Override
180185
public Collection<Object> createComponents(
181186
Client client,
@@ -191,9 +196,16 @@ public Collection<Object> createComponents(
191196
Supplier<RepositoriesService> repositoriesServiceSupplier
192197
) {
193198
this.scriptService.set(scriptService);
199+
try {
200+
this.analysisModule = new AnalysisModule(environment,
201+
List.of(this));
202+
} catch (IOException e) {
203+
throw new RuntimeException(e);
204+
}
194205
return Collections.emptyList();
195206
}
196207

208+
197209
@Override
198210
public List<ScriptContext<?>> getContexts() {
199211
return Collections.singletonList(AnalysisPredicateScript.CONTEXT);
@@ -332,8 +344,13 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
332344
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
333345
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
334346
filters.put("stemmer", StemmerTokenFilterFactory::new);
335-
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
336-
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
347+
filters.put("synonym", (indexSettings, environment, name, settings) ->
348+
new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
349+
);
350+
filters.put("synonym_graph", (indexSettings, environment, name, settings) ->
351+
new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
352+
);
353+
337354
filters.put("trim", TrimTokenFilterFactory::new);
338355
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
339356
filters.put("unique", UniqueTokenFilterFactory::new);

modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.opensearch.env.Environment;
4141
import org.opensearch.index.IndexSettings;
4242
import org.opensearch.index.analysis.AnalysisMode;
43+
import org.opensearch.index.analysis.AnalysisRegistry;
4344
import org.opensearch.index.analysis.CharFilterFactory;
4445
import org.opensearch.index.analysis.TokenFilterFactory;
4546
import org.opensearch.index.analysis.TokenizerFactory;
@@ -49,8 +50,8 @@
4950

5051
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
5152

52-
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
53-
super(indexSettings, env, name, settings);
53+
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
54+
super(indexSettings, env, name, settings, analysisRegistry);
5455
}
5556

5657
@Override

modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
import org.opensearch.index.analysis.CustomAnalyzer;
4949
import org.opensearch.index.analysis.TokenFilterFactory;
5050
import org.opensearch.index.analysis.TokenizerFactory;
51+
import org.opensearch.index.analysis.AnalysisRegistry;
5152

53+
import java.io.IOException;
5254
import java.io.Reader;
5355
import java.io.StringReader;
5456
import java.util.List;
@@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
6466
protected final Settings settings;
6567
protected final Environment environment;
6668
protected final AnalysisMode analysisMode;
69+
private final String synonymAnalyzer;
70+
private final AnalysisRegistry analysisRegistry;
6771

68-
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
72+
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
6973
super(indexSettings, name, settings);
7074
this.settings = settings;
7175

@@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
8387
boolean updateable = settings.getAsBoolean("updateable", false);
8488
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
8589
this.environment = env;
90+
this.synonymAnalyzer = settings.get("synonym_analyzer", null);
91+
this.analysisRegistry = analysisRegistry;
8692
}
8793

8894
@Override
@@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer(
137143
List<TokenFilterFactory> tokenFilters,
138144
Function<String, TokenFilterFactory> allFilters
139145
) {
146+
if (synonymAnalyzer != null) {
147+
Analyzer customSynonymAnalyzer;
148+
try {
149+
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
150+
} catch (IOException e) {
151+
throw new RuntimeException(e);
152+
}
153+
if (customSynonymAnalyzer != null) {
154+
return customSynonymAnalyzer;
155+
}
156+
}
140157
return new CustomAnalyzer(
141158
tokenizer,
142159
charFilters.toArray(new CharFilterFactory[0]),

0 commit comments

Comments
 (0)