Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
54 / 54 |
|
100.00% |
7 / 7 |
CRAP | |
100.00% |
1 / 1 |
| SearchEngineService | |
100.00% |
54 / 54 |
|
100.00% |
7 / 7 |
32 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| match | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
6 | |||
| getSearchEngines | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| matchVariant | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 | |||
| matchHostPattern | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| matchesHiddenKeyword | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
7 | |||
| extractKeyword | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
8 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Drupal\visitors\Service; |
| 6 | |
| 7 | use Drupal\Core\Config\ConfigFactoryInterface; |
| 8 | use Drupal\visitors\VisitorsSearchEngineInterface; |
| 9 | |
| 10 | /** |
| 11 | * Service for matching URLs to search engines. |
| 12 | */ |
| 13 | final class SearchEngineService implements VisitorsSearchEngineInterface { |
| 14 | |
| 15 | /** |
| 16 | * The config factory service. |
| 17 | * |
| 18 | * @var \Drupal\Core\Config\ConfigFactoryInterface |
| 19 | */ |
| 20 | private ConfigFactoryInterface $configFactory; |
| 21 | |
| 22 | /** |
| 23 | * Cached search engines configuration. |
| 24 | * |
| 25 | * @var array|null |
| 26 | */ |
| 27 | private ?array $searchEngines = NULL; |
| 28 | |
| 29 | /** |
| 30 | * Constructs a new SearchEngineService. |
| 31 | * |
| 32 | * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory |
| 33 | * The config factory service. |
| 34 | */ |
| 35 | public function __construct(ConfigFactoryInterface $config_factory) { |
| 36 | $this->configFactory = $config_factory; |
| 37 | } |
| 38 | |
| 39 | /** |
| 40 | * {@inheritdoc} |
| 41 | */ |
| 42 | public function match(string $url): ?array { |
| 43 | $parsed_url = parse_url($url); |
| 44 | |
| 45 | if (!$parsed_url || !isset($parsed_url['host'])) { |
| 46 | return NULL; |
| 47 | } |
| 48 | |
| 49 | $search_engines = $this->getSearchEngines(); |
| 50 | $host = strtolower($parsed_url['host']); |
| 51 | $query_string = $parsed_url['query'] ?? ''; |
| 52 | $path = $parsed_url['path'] ?? ''; |
| 53 | |
| 54 | foreach ($search_engines as $engine) { |
| 55 | foreach ($engine['variants'] as $variant) { |
| 56 | if ($this->matchVariant($host, $query_string, $path, $variant)) { |
| 57 | return [ |
| 58 | 'name' => $engine['label'], |
| 59 | 'variant' => $variant, |
| 60 | 'keyword' => $this->extractKeyword($query_string, $path, $variant), |
| 61 | ]; |
| 62 | } |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | return NULL; |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | * Gets the search engines configuration. |
| 71 | * |
| 72 | * @return array |
| 73 | * The search engines configuration. |
| 74 | */ |
| 75 | private function getSearchEngines(): array { |
| 76 | if ($this->searchEngines === NULL) { |
| 77 | $config = $this->configFactory->get('visitors.search_engines'); |
| 78 | $this->searchEngines = $config->get('sites') ?? []; |
| 79 | } |
| 80 | |
| 81 | return $this->searchEngines; |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * Matches a URL against a search engine variant. |
| 86 | * |
| 87 | * @param string $host |
| 88 | * The hostname from the URL. |
| 89 | * @param string $query_string |
| 90 | * The query string from the URL. |
| 91 | * @param string $path |
| 92 | * The path from the URL. |
| 93 | * @param array $variant |
| 94 | * The search engine variant configuration. |
| 95 | * |
| 96 | * @return bool |
| 97 | * TRUE if the variant matches, FALSE otherwise. |
| 98 | */ |
| 99 | private function matchVariant(string $host, string $query_string, string $path, array $variant): bool { |
| 100 | $urls = $variant['urls'] ?? []; |
| 101 | |
| 102 | foreach ($urls as $pattern) { |
| 103 | if ($this->matchHostPattern($host, $pattern)) { |
| 104 | // Check for hidden keywords that should exclude this match. |
| 105 | if (isset($variant['hiddenkeyword'])) { |
| 106 | foreach ($variant['hiddenkeyword'] as $hidden_pattern) { |
| 107 | if ($this->matchesHiddenKeyword($query_string, $path, $hidden_pattern)) { |
| 108 | return FALSE; |
| 109 | } |
| 110 | } |
| 111 | } |
| 112 | return TRUE; |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | return FALSE; |
| 117 | } |
| 118 | |
| 119 | /** |
| 120 | * Matches a hostname against a pattern. |
| 121 | * |
| 122 | * @param string $host |
| 123 | * The hostname to match. |
| 124 | * @param string $pattern |
| 125 | * The pattern to match against. |
| 126 | * |
| 127 | * @return bool |
| 128 | * TRUE if the pattern matches, FALSE otherwise. |
| 129 | */ |
| 130 | private function matchHostPattern(string $host, string $pattern): bool { |
| 131 | // Handle wildcard patterns like 'google.{}' or '{}.google.com'. |
| 132 | $pattern = strtolower($pattern); |
| 133 | |
| 134 | if (strpos($pattern, '{}') !== FALSE) { |
| 135 | // Replace {} placeholder with a temporary marker. |
| 136 | $temp_marker = '__WILDCARD_PLACEHOLDER__'; |
| 137 | $regex_pattern = str_replace('{}', $temp_marker, $pattern); |
| 138 | |
| 139 | // Escape the entire pattern for regex. |
| 140 | $regex_pattern = preg_quote($regex_pattern, '/'); |
| 141 | |
| 142 | // Replace the temporary marker with the actual wildcard pattern. |
| 143 | $regex_pattern = str_replace($temp_marker, '[a-z0-9.-]+', $regex_pattern); |
| 144 | |
| 145 | return (bool) preg_match('/^' . $regex_pattern . '$/', $host); |
| 146 | } |
| 147 | |
| 148 | // Exact match. |
| 149 | return $host === $pattern; |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Checks if the URL matches a hidden keyword pattern. |
| 154 | * |
| 155 | * @param string $query_string |
| 156 | * The query string from the URL. |
| 157 | * @param string $path |
| 158 | * The path from the URL. |
| 159 | * @param string $pattern |
| 160 | * The hidden keyword pattern. |
| 161 | * |
| 162 | * @return bool |
| 163 | * TRUE if matches hidden keyword, FALSE otherwise. |
| 164 | */ |
| 165 | private function matchesHiddenKeyword(string $query_string, string $path, string $pattern): bool { |
| 166 | // Handle regex patterns. |
| 167 | if (preg_match('/^\/.*\/$/', $pattern)) { |
| 168 | $full_url_part = $path . ($query_string ? '?' . $query_string : ''); |
| 169 | |
| 170 | // Special handling for Google search patterns |
| 171 | // Don't exclude /search URLs that have a 'q' parameter (actual searches) |
| 172 | if ($pattern === '/\/search(\?.*)?/' && $path === '/search' && strpos($query_string, 'q=') !== FALSE) { |
| 173 | return FALSE; |
| 174 | } |
| 175 | |
| 176 | return (bool) preg_match($pattern, $full_url_part); |
| 177 | } |
| 178 | |
| 179 | // Handle exact matches. |
| 180 | return $query_string === $pattern || $path === $pattern; |
| 181 | } |
| 182 | |
| 183 | /** |
| 184 | * Extracts the search keyword from the URL. |
| 185 | * |
| 186 | * @param string $query_string |
| 187 | * The query string from the URL. |
| 188 | * @param string $path |
| 189 | * The path from the URL. |
| 190 | * @param array $variant |
| 191 | * The search engine variant configuration. |
| 192 | * |
| 193 | * @return string|null |
| 194 | * The extracted keyword, or NULL if not found. |
| 195 | */ |
| 196 | private function extractKeyword(string $query_string, string $path, array $variant): ?string { |
| 197 | $params = $variant['params'] ?? []; |
| 198 | |
| 199 | foreach ($params as $param) { |
| 200 | // Handle regex parameter patterns. |
| 201 | if (preg_match('/^\/.*\/$/', $param)) { |
| 202 | $full_url_part = $path . ($query_string ? '?' . $query_string : ''); |
| 203 | if (preg_match($param, $full_url_part, $matches)) { |
| 204 | return isset($matches[1]) ? urldecode($matches[1]) : NULL; |
| 205 | } |
| 206 | } |
| 207 | // Handle query parameters. |
| 208 | else { |
| 209 | parse_str($query_string, $query_params); |
| 210 | if (isset($query_params[$param]) && !empty($query_params[$param])) { |
| 211 | return urldecode((string) $query_params[$param]); |
| 212 | } |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | return NULL; |
| 217 | } |
| 218 | |
| 219 | } |