Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public function __construct(
* Create the fulltext search query.
*
* @param ContainerConfigurationInterface $containerConfig Search request container configuration.
* @param string $queryText The text query.
* @param string|array $queryText The text query.
* @param string $spellingType The type of spellchecked applied.
* @param float $boost Boost of the created query.
* @param int $depth Call depth of the create method. Can be used to avoid/prevent cycles.
Expand Down
52 changes: 49 additions & 3 deletions src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use Smile\ElasticsuiteThesaurus\Model\Index;
use Smile\ElasticsuiteCore\Api\Search\SpellcheckerInterface;
use Smile\ElasticsuiteCore\Search\Request\QueryInterface;
use Smile\ElasticsuiteCore\Helper\Text as TextHelper;

/**
* Plugin that handle query rewriting (synonym substitution) during fulltext query building phase.
Expand All @@ -47,6 +48,11 @@ class QueryRewrite
*/
private $index;

/**
* @var Text
*/
private $textHelper;

/**
* @var array
*/
Expand All @@ -58,15 +64,18 @@ class QueryRewrite
* @param QueryFactory $queryFactory Search request query factory.
* @param ThesaurusConfigFactory $thesaurusConfigFactory Thesaurus configuration factory.
* @param Index $index Synonym index.
* @param TextHelper $textHelper Text helper.
*/
public function __construct(
QueryFactory $queryFactory,
ThesaurusConfigFactory $thesaurusConfigFactory,
Index $index
Index $index,
TextHelper $textHelper
) {
$this->queryFactory = $queryFactory;
$this->thesaurusConfigFactory = $thesaurusConfigFactory;
$this->index = $index;
$this->textHelper = $textHelper;
}

/**
Expand All @@ -77,7 +86,7 @@ public function __construct(
* @param QueryBuilder $subject Original query builder.
* @param \Closure $proceed Original create func.
* @param ContainerConfigurationInterface $containerConfig Search request container config.
* @param string $queryText Current query text.
* @param string|array $queryText Current query text.
* @param string $spellingType Spelling type of the query.
* @param float $boost Original query boost.
* @param int $depth Call depth of the create method. Can be used to avoid/prevent cycles.
Expand Down Expand Up @@ -109,13 +118,16 @@ public function aroundCreate(
if ($depth === 0) {
$rewrites = $this->getWeightedRewrites($queryText, $containerConfig, $boost);
}
$originalSpellingType = $spellingType;
// Set base query as SPELLING_TYPE_EXACT if synonyms/expansions are found.
// This is to prevent possible fuzzy matches on that original query's terms and doing so, prioritize the rewritten queries.
$spellingType = empty($rewrites) ? $spellingType : SpellcheckerInterface::SPELLING_TYPE_EXACT;
$query = $proceed($containerConfig, $queryText, $spellingType, $boost, $depth);

if (!empty($rewrites)) {
$synonymQueries = [$query];
$synonymQueriesSpellcheck = SpellcheckerInterface::SPELLING_TYPE_EXACT;
// Do not enforce SPELLING_TYPE_EXACT systematically for alternative queries.
$synonymQueriesSpellcheck = $this->getRewritesSpellingType($queryText, $originalSpellingType);

foreach ($rewrites as $rewrittenQuery => $weight) {
$synonymQueries[] = $proceed($containerConfig, $rewrittenQuery, $synonymQueriesSpellcheck, $weight, $depth + 1);
Expand Down Expand Up @@ -161,6 +173,40 @@ private function getWeightedRewrites($queryText, $containerConfig, $originalBoos
return $rewrites;
}

/**
* Returns the spelling type to use for rewritten queries.
* For multi terms queries, considering that at least one term has been replaced,
* but there could be at least one mistyped term that might not have been replaced.
* So "elevate" the spelling type a bit.
* Ideally a new spellcheck query should be run.
*
* @param string|array $originalQueryText The original query text.
* @param int $originalSpellingType The original spelling type.
*
* @return int
*/
private function getRewritesSpellingType($originalQueryText, $originalSpellingType)
{
if (is_array($originalQueryText)) {
// Expected to be SPELLING_TYPE_EXACT as enforced by the request builder.
return $originalSpellingType;
}

if ($this->textHelper->mbWordCount($originalQueryText) === 1) {
return SpellcheckerInterface::SPELLING_TYPE_EXACT;
}

$spellingType = $originalSpellingType;

if (SpellcheckerInterface::SPELLING_TYPE_FUZZY === $originalSpellingType) {
$spellingType = SpellcheckerInterface::SPELLING_TYPE_MOST_FUZZY;
} elseif (SpellcheckerInterface::SPELLING_TYPE_PURE_STOPWORDS === $originalSpellingType) {
$spellingType = SpellcheckerInterface::SPELLING_TYPE_MOST_EXACT;
}

return $spellingType;
}

/**
* Return thesaurus/relevance configuration.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ public function testMultipleSearchQueryDepthBuilder()
$maxRewrittenQueries = 0;

$thesaurusConfigFactory = $this->getThesaurusConfigFactoryMock($maxRewrittenQueries);

$thesaurusIndex = $this->getMockBuilder(ThesaurusIndex::class)
->disableOriginalConstructor()
->getMock();
$textHelper = $this->getRealTextHelper();

$queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex);
$queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex, $textHelper);
$queryBuilderInterceptor = $this->getQueryBuilderWithPlugin($queryFactory, $queryRewritePlugin);

/*
Expand Down Expand Up @@ -139,12 +139,12 @@ public function testMultipleSearchQueryDepthBuilderWithRewrites()
$maxRewrittenQueries = 0;

$thesaurusConfigFactory = $this->getThesaurusConfigFactoryMock($maxRewrittenQueries);

$thesaurusIndex = $this->getMockBuilder(ThesaurusIndex::class)
->disableOriginalConstructor()
->getMock();
$textHelper = $this->getRealTextHelper();

$queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex);
$queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex, $textHelper);
$queryBuilderInterceptor = $this->getQueryBuilderWithPlugin($queryFactory, $queryRewritePlugin);

$thesaurusIndex->expects($this->exactly(2))->method('getQueryRewrites')->withConsecutive(
Expand Down Expand Up @@ -178,13 +178,13 @@ public function testSingleSearchQueryLimitedRewrites()
$maxRewrittenQueries = 1;

$thesaurusConfigFactory = $this->getThesaurusConfigFactoryMock($maxRewrittenQueries);

$thesaurusIndex = $this->getMockBuilder(ThesaurusIndex::class)
->disableOriginalConstructor()
->getMock();
$textHelper = $this->getRealTextHelper();

// Passing the mock Query Factory to the plugin to count the occurence of calls to 'create'.
$queryRewritePlugin = new QueryRewrite($queryFactoryFullMock, $thesaurusConfigFactory, $thesaurusIndex);
$queryRewritePlugin = new QueryRewrite($queryFactoryFullMock, $thesaurusConfigFactory, $thesaurusIndex, $textHelper);
// But passing the real Query Factory (with mocked factories) to the query builder itself.
$queryBuilderInterceptor = $this->getQueryBuilderWithPlugin($queryFactory, $queryRewritePlugin);

Expand Down