|
3 | 3 | * Copyright © Magento, Inc. All rights reserved.
|
4 | 4 | * See COPYING.txt for license details.
|
5 | 5 | */
|
| 6 | + |
| 7 | +declare(strict_types=1); |
6 | 8 | namespace Magento\Search\Model;
|
7 | 9 |
|
8 | 10 | use Magento\Search\Api\SynonymAnalyzerInterface;
|
9 | 11 |
|
| 12 | +/** |
| 13 | + * SynonymAnalyzer responsible for search of synonyms matching a word or a phrase. |
| 14 | + */ |
10 | 15 | class SynonymAnalyzer implements SynonymAnalyzerInterface
|
11 | 16 | {
|
12 | 17 | /**
|
@@ -42,55 +47,119 @@ public function __construct(SynonymReader $synReader)
|
42 | 47 | */
|
43 | 48 | public function getSynonymsForPhrase($phrase)
|
44 | 49 | {
|
45 |
| - $synGroups = []; |
| 50 | + $result = []; |
46 | 51 |
|
47 |
| - if (empty($phrase)) { |
48 |
| - return $synGroups; |
| 52 | + if (empty(trim($phrase))) { |
| 53 | + return $result; |
49 | 54 | }
|
50 | 55 |
|
51 |
| - $rows = $this->synReaderModel->loadByPhrase($phrase)->getData(); |
52 |
| - $synonyms = []; |
53 |
| - foreach ($rows as $row) { |
54 |
| - $synonyms [] = $row['synonyms']; |
55 |
| - } |
| 56 | + $synonymGroups = $this->getSynonymGroupsByPhrase($phrase); |
| 57 | + |
| 58 | + // Replace multiple spaces in a row with the only one space |
| 59 | + $phrase = preg_replace("/ {2,}/", " ", $phrase); |
56 | 60 |
|
57 | 61 | // Go through every returned record looking for presence of the actual phrase. If there were no matching
|
58 | 62 | // records found in DB then create a new entry for it in the returned array
|
59 | 63 | $words = explode(' ', $phrase);
|
60 |
| - foreach ($words as $w) { |
61 |
| - $position = $this->findInArray($w, $synonyms); |
62 |
| - if ($position !== false) { |
63 |
| - $synGroups[] = explode(',', $synonyms[$position]); |
64 |
| - } else { |
65 |
| - // No synonyms were found. Return the original word in this position |
66 |
| - $synGroups[] = [$w]; |
| 64 | + |
| 65 | + foreach ($words as $offset => $word) { |
| 66 | + $synonyms = [$word]; |
| 67 | + |
| 68 | + if ($synonymGroups) { |
| 69 | + $pattern = $this->getSearchPattern(array_slice($words, $offset)); |
| 70 | + $position = $this->findInArray($pattern, $synonymGroups); |
| 71 | + if ($position !== null) { |
| 72 | + $synonyms = explode(',', $synonymGroups[$position]); |
| 73 | + } |
67 | 74 | }
|
| 75 | + |
| 76 | + $result[] = $synonyms; |
68 | 77 | }
|
69 |
| - return $synGroups; |
| 78 | + |
| 79 | + return $result; |
70 | 80 | }
|
71 | 81 |
|
72 | 82 | /**
|
73 |
| - * Helper method to find the presence of $word in $wordsArray. If found, the particular array index is returned. |
| 83 | + * Helper method to find the matching of $pattern to $synonymGroupsToExamine. |
| 84 | + * If matches, the particular array index is returned. |
74 | 85 | * Otherwise false will be returned.
|
75 | 86 | *
|
76 |
| - * @param string $word |
77 |
| - * @param $array $wordsArray |
78 |
| - * @return boolean | int |
| 87 | + * @param string $pattern |
| 88 | + * @param array $synonymGroupsToExamine |
| 89 | + * @return int|null |
79 | 90 | */
|
80 |
| - private function findInArray($word, $wordsArray) |
| 91 | + private function findInArray(string $pattern, array $synonymGroupsToExamine) |
81 | 92 | {
|
82 |
| - if (empty($wordsArray)) { |
83 |
| - return false; |
84 |
| - } |
85 | 93 | $position = 0;
|
86 |
| - foreach ($wordsArray as $wordsLine) { |
87 |
| - $pattern = '/^' . $word . ',|,' . $word . ',|,' . $word . '$/'; |
88 |
| - $rv = preg_match($pattern, $wordsLine); |
89 |
| - if ($rv != 0) { |
| 94 | + foreach ($synonymGroupsToExamine as $synonymGroup) { |
| 95 | + $matchingResultCode = preg_match($pattern, $synonymGroup); |
| 96 | + if ($matchingResultCode === 1) { |
90 | 97 | return $position;
|
91 | 98 | }
|
92 | 99 | $position++;
|
93 | 100 | }
|
94 |
| - return false; |
| 101 | + return null; |
| 102 | + } |
| 103 | + |
| 104 | + /** |
| 105 | + * Returns a regular expression to search for synonyms of the phrase represented as the list of words. |
| 106 | + * |
| 107 | + * Returned pattern contains expression to search for a part of the phrase from the beginning. |
| 108 | + * |
| 109 | + * For example, in the phrase "Elizabeth is the English queen" with subset from the very first word, |
| 110 | + * the method will build an expression which looking for synonyms for all these patterns: |
| 111 | + * - Elizabeth is the English queen |
| 112 | + * - Elizabeth is the English |
| 113 | + * - Elizabeth is the |
| 114 | + * - Elizabeth is |
| 115 | + * - Elizabeth |
| 116 | + * |
| 117 | + * For the same phrase on the second iteration with the first word "is" it will match for these synonyms: |
| 118 | + * - is the English queen |
| 119 | + * - is the English |
| 120 | + * - is the |
| 121 | + * - is |
| 122 | + * |
| 123 | + * The pattern looking for exact match and will not find these phrases as synonyms: |
| 124 | + * - Is there anybody in the room? |
| 125 | + * - Is the English is most popular language? |
| 126 | + * - Is the English queen Elizabeth? |
| 127 | + * |
| 128 | + * Take into account that returned pattern expects that data will be represented as comma-separated value. |
| 129 | + * |
| 130 | + * @param array $words |
| 131 | + * @return string |
| 132 | + */ |
| 133 | + private function getSearchPattern(array $words): string |
| 134 | + { |
| 135 | + $patterns = []; |
| 136 | + for ($lastItem = count($words); $lastItem > 0; $lastItem--) { |
| 137 | + $phrase = implode("\s+", array_slice($words, 0, $lastItem)); |
| 138 | + $patterns[] = '^' . $phrase . ','; |
| 139 | + $patterns[] = ',' . $phrase . ','; |
| 140 | + $patterns[] = ',' . $phrase . '$'; |
| 141 | + } |
| 142 | + |
| 143 | + $pattern = '/' . implode('|', $patterns) . '/i'; |
| 144 | + return $pattern; |
| 145 | + } |
| 146 | + |
| 147 | + /** |
| 148 | + * Get all synonym groups for the phrase |
| 149 | + * |
| 150 | + * Returns an array of synonyms which are represented as comma-separated value for each item in the list |
| 151 | + * |
| 152 | + * @param string $phrase |
| 153 | + * @return string[] |
| 154 | + */ |
| 155 | + private function getSynonymGroupsByPhrase(string $phrase): array |
| 156 | + { |
| 157 | + $result = []; |
| 158 | + |
| 159 | + $synonymGroups = $this->synReaderModel->loadByPhrase($phrase)->getData(); |
| 160 | + foreach ($synonymGroups as $row) { |
| 161 | + $result[] = $row['synonyms']; |
| 162 | + } |
| 163 | + return $result; |
95 | 164 | }
|
96 | 165 | }
|
0 commit comments