?
Path : /home/admin/public_html/old/happy2/administrator/components/com_finder/helpers/indexer/ |
Current File : /home/admin/public_html/old/happy2/administrator/components/com_finder/helpers/indexer/query.php |
<?php /** * @package Joomla.Administrator * @subpackage com_finder * * @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved. * @license GNU General Public License version 2 or later; see LICENSE */ defined('_JEXEC') or die; // Register dependent classes. JLoader::register('FinderIndexerHelper', dirname(__FILE__) . '/helper.php'); JLoader::register('FinderIndexerTaxonomy', dirname(__FILE__) . '/taxonomy.php'); JLoader::register('FinderHelperRoute', JPATH_SITE . '/components/com_finder/helpers/route.php'); JLoader::register('FinderHelperLanguage', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/language.php'); /** * Query class for the Finder indexer package. * * @package Joomla.Administrator * @subpackage com_finder * @since 2.5 */ class FinderIndexerQuery { /** * Flag to show whether the query can return results. * * @var boolean * @since 2.5 */ public $search; /** * The query input string. * * @var string * @since 2.5 */ public $input; /** * The language of the query. * * @var string * @since 2.5 */ public $language; /** * The query string matching mode. * * @var string * @since 2.5 */ public $mode; /** * The included tokens. * * @var array * @since 2.5 */ public $included = array(); /** * The excluded tokens. * * @var array * @since 2.5 */ public $excluded = array(); /** * The tokens to ignore because no matches exist. * * @var array * @since 2.5 */ public $ignored = array(); /** * The operators used in the query input string. * * @var array * @since 2.5 */ public $operators = array(); /** * The terms to highlight as matches. * * @var array * @since 2.5 */ public $highlight = array(); /** * The number of matching terms for the query input. * * @var integer * @since 2.5 */ public $terms; /** * The static filter id. * * @var string * @since 2.5 */ public $filter; /** * The taxonomy filters. This is a multi-dimensional array of taxonomy * branches as the first level and then the taxonomy nodes as the values. * * For example: * $filters = array( * 'Type' = array(10, 32, 29, 11, ...); * 'Label' = array(20, 314, 349, 91, 82, ...); * ... * ); * * @var array * @since 2.5 */ public $filters = array(); /** * The start date filter. * * @var string * @since 2.5 */ public $date1; /** * The end date filter. * * @var string * @since 2.5 */ public $date2; /** * The start date filter modifier. * * @var string * @since 2.5 */ public $when1; /** * The end date filter modifier. * * @var string * @since 2.5 */ public $when2; /** * Method to instantiate the query object. * * @param array $options An array of query options. * * @since 2.5 * @throws Exception on database error. */ public function __construct($options) { // Get the input string. $this->input = isset($options['input']) ? $options['input'] : null; // Get the empty query setting. $this->empty = isset($options['empty']) ? (bool) $options['empty'] : false; // Get the input language. $this->language = !empty($options['language']) ? $options['language'] : FinderIndexerHelper::getDefaultLanguage(); $this->language = FinderIndexerHelper::getPrimaryLanguage($this->language); // Get the matching mode. $this->mode = 'AND'; // Initialize the temporary date storage. $this->dates = new JRegistry; // Populate the temporary date storage. if (isset($options['date1']) && !empty($options['date1'])) { $this->dates->set('date1', $options['date1']); } if (isset($options['date2']) && !empty($options['date1'])) { $this->dates->set('date2', $options['date2']); } if (isset($options['when1']) && !empty($options['date1'])) { $this->dates->set('when1', $options['when1']); } if (isset($options['when2']) && !empty($options['date1'])) { $this->dates->set('when2', $options['when2']); } // Process the static taxonomy filters. if (isset($options['filter']) && !empty($options['filter'])) { $this->processStaticTaxonomy($options['filter']); } // Process the dynamic taxonomy filters. if (isset($options['filters']) && !empty($options['filters'])) { $this->processDynamicTaxonomy($options['filters']); } // Get the date filters. $d1 = $this->dates->get('date1'); $d2 = $this->dates->get('date2'); $w1 = $this->dates->get('when1'); $w2 = $this->dates->get('when2'); // Process the date filters. if (!empty($d1) || !empty($d2)) { $this->processDates($d1, $d2, $w1, $w2); } // Process the input string. $this->processString($this->input, $this->language, $this->mode); // Get the number of matching terms. foreach ($this->included as $token) { $this->terms += count($token->matches); } // Remove the temporary date storage. unset($this->dates); /* * Lastly, determine whether this query can return a result set. */ // Check if we have a query string. if (!empty($this->input)) { $this->search = true; } // Check if we can search without a query string. elseif ($this->empty && (!empty($this->filter) || !empty($this->filters) || !empty($this->date1) || !empty($this->date2))) { $this->search = true; } // We do not have a valid search query. else { $this->search = false; } } /** * Method to convert the query object into a URI string. * * @param string $base The base URI. [optional] * * @return string The complete query URI. * * @since 2.5 */ public function toURI($base = null) { // Set the base if not specified. if (empty($base)) { $base = 'index.php?option=com_finder&view=search'; } // Get the base URI. $uri = JURI::getInstance($base); // Add the static taxonomy filter if present. if (!empty($this->filter)) { $uri->setVar('f', $this->filter); } // Get the filters in the request. $input = JFactory::getApplication()->input; $t = $input->request->get('t', array(), 'array'); // Add the dynamic taxonomy filters if present. if (!empty($this->filters)) { foreach ($this->filters as $nodes) { foreach ($nodes as $node) { if (!in_array($node, $t)) { continue; } $uri->setVar('t[]', $node); } } } // Add the input string if present. if (!empty($this->input)) { $uri->setVar('q', $this->input); } // Add the start date if present. if (!empty($this->date1)) { $uri->setVar('d1', $this->date1); } // Add the end date if present. if (!empty($this->date2)) { $uri->setVar('d2', $this->date2); } // Add the start date modifier if present. if (!empty($this->when1)) { $uri->setVar('w1', $this->when1); } // Add the end date modifier if present. if (!empty($this->when2)) { $uri->setVar('w2', $this->when2); } // Add a menu item id if one is not present. if (!$uri->getVar('Itemid')) { // Get the menu item id. $query = array( 'view' => $uri->getVar('view'), 'f' => $uri->getVar('f'), 'q' => $uri->getVar('q') ); $item = FinderHelperRoute::getItemid($query); // Add the menu item id if present. if ($item !== null) { $uri->setVar('Itemid', $item); } } return $uri->toString(array('path', 'query')); } /** * Method to get a list of excluded search term ids. * * @return array An array of excluded term ids. * * @since 2.5 */ public function getExcludedTermIds() { $results = array(); // Iterate through the excluded tokens and compile the matching terms. for ($i = 0, $c = count($this->excluded); $i < $c; $i++) { $results = array_merge($results, $this->excluded[$i]->matches); } // Sanitize the terms. //@TODO: Should toInteger use $return? $return = array_unique($results); JArrayHelper::toInteger($results); return $results; } /** * Method to get a list of included search term ids. * * @return array An array of included term ids. * * @since 2.5 */ public function getIncludedTermIds() { $results = array(); // Iterate through the included tokens and compile the matching terms. for ($i = 0, $c = count($this->included); $i < $c; $i++) { // Check if we have any terms. if (empty($this->included[$i]->matches)) { continue; } // Get the term. $term = $this->included[$i]->term; // Prepare the container for the term if necessary. if (!array_key_exists($term, $results)) { $results[$term] = array(); } // Add the matches to the stack. $results[$term] = array_merge($results[$term], $this->included[$i]->matches); } // Sanitize the terms. foreach ($results as $key => $value) { $results[$key] = array_unique($results[$key]); JArrayHelper::toInteger($results[$key]); } return $results; } /** * Method to get a list of required search term ids. * * @return array An array of required term ids. * * @since 2.5 */ public function getRequiredTermIds() { $results = array(); // Iterate through the included tokens and compile the matching terms. for ($i = 0, $c = count($this->included); $i < $c; $i++) { // Check if the token is required. if ($this->included[$i]->required) { // Get the term. $term = $this->included[$i]->term; // Prepare the container for the term if necessary. if (!array_key_exists($term, $results)) { $results[$term] = array(); } // Add the matches to the stack. $results[$term] = array_merge($results[$term], $this->included[$i]->matches); } } // Sanitize the terms. foreach ($results as $key => $value) { $results[$key] = array_unique($results[$key]); JArrayHelper::toInteger($results[$key]); } return $results; } /** * Method to process the static taxonomy input. The static taxonomy input * comes in the form of a pre-defined search filter that is assigned to the * search form. * * @param integer $filterId The id of static filter. * * @return boolean True on success, false on failure. * * @since 2.5 * @throws Exception on database error. */ protected function processStaticTaxonomy($filterId) { // Get the database object. $db = JFactory::getDBO(); // Initialize user variables $user = JFactory::getUser(); $groups = implode(',', $user->getAuthorisedViewLevels()); // Load the predefined filter. $query = $db->getQuery(true); $query->select('f.' . $db->quoteName('data') . ', f.' . $db->quoteName('params')); $query->from($db->quoteName('#__finder_filters') . ' AS f'); $query->where('f.' . $db->quoteName('filter_id') . ' = ' . (int) $filterId); $db->setQuery($query); $return = $db->loadObject(); // Check for a database error. if ($db->getErrorNum()) { // Throw database error exception. throw new Exception($db->getErrorMsg(), 500); } // Check the returned filter. if (empty($return)) { return false; } // Set the filter. $this->filter = (int) $filterId; // Get a parameter object for the filter date options. $registry = new JRegistry; $registry->loadString($return->params); $params = $registry; // Set the dates if not already set. $this->dates->def('d1', $params->get('d1')); $this->dates->def('d2', $params->get('d2')); $this->dates->def('w1', $params->get('w1')); $this->dates->def('w2', $params->get('w2')); // Remove duplicates and sanitize. $filters = explode(',', $return->data); $filters = array_unique($filters); JArrayHelper::toInteger($filters); // Remove any values of zero. if (array_search(0, $filters, true) !== false) { unset($filters[array_search(0, $filters, true)]); } // Check if we have any real input. if (empty($filters)) { return true; } /* * Create the query to get filters from the database. We do this for * two reasons: one, it allows us to ensure that the filters being used * are real; two, we need to sort the filters by taxonomy branch. */ $query->clear(); $query->select('t1.id, t1.title, t2.title AS branch'); $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1'); $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id'); $query->where('t1.state = 1'); $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')'); $query->where('t1.id IN (' . implode(',', $filters) . ')'); $query->where('t2.state = 1'); $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')'); // Load the filters. $db->setQuery($query); $results = $db->loadObjectList(); // Check for a database error. if ($db->getErrorNum()) { // Throw database error exception. throw new Exception($db->getErrorMsg(), 500); } // Sort the filter ids by branch. foreach ($results as $result) { $this->filters[$result->branch][$result->title] = (int) $result->id; } return true; } /** * Method to process the dynamic taxonomy input. The dynamic taxonomy input * comes in the form of select fields that the user chooses from. The * dynamic taxonomy input is processed AFTER the static taxonomy input * because the dynamic options can be used to further narrow a static * taxonomy filter. * * @param array $filters An array of taxonomy node ids. * * @return boolean True on success. * * @since 2.5 * @throws Exception on database error. */ protected function processDynamicTaxonomy($filters) { // Initialize user variables $user = JFactory::getUser(); $groups = implode(',', $user->getAuthorisedViewLevels()); // Remove duplicates and sanitize. $filters = array_unique($filters); JArrayHelper::toInteger($filters); // Remove any values of zero. if (array_search(0, $filters, true) !== false) { unset($filters[array_search(0, $filters, true)]); } // Check if we have any real input. if (empty($filters)) { return true; } // Get the database object. $db = JFactory::getDBO(); $query = $db->getQuery(true); /* * Create the query to get filters from the database. We do this for * two reasons: one, it allows us to ensure that the filters being used * are real; two, we need to sort the filters by taxonomy branch. */ $query->select('t1.id, t1.title, t2.title AS branch'); $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1'); $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id'); $query->where('t1.state = 1'); $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')'); $query->where('t1.id IN (' . implode(',', $filters) . ')'); $query->where('t2.state = 1'); $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')'); // Load the filters. $db->setQuery($query); $results = $db->loadObjectList(); // Check for a database error. if ($db->getErrorNum()) { // Throw database error exception. throw new Exception($db->getErrorMsg(), 500); } // Cleared filter branches. $cleared = array(); /* * Sort the filter ids by branch. Because these filters are designed to * override and further narrow the items selected in the static filter, * we will clear the values from the static filter on a branch by * branch basis before adding the dynamic filters. So, if the static * filter defines a type filter of "articles" and three "category" * filters but the user only limits the category further, the category * filters will be flushed but the type filters will not. */ foreach ($results as $result) { // Check if the branch has been cleared. if (!in_array($result->branch, $cleared)) { // Clear the branch. $this->filters[$result->branch] = array(); // Add the branch to the cleared list. $cleared[] = $result->branch; } // Add the filter to the list. $this->filters[$result->branch][$result->title] = (int) $result->id; } return true; } /** * Method to process the query date filters to determine start and end * date limitations. * * @param string $date1 The first date filter. * @param string $date2 The second date filter. * @param string $when1 The first date modifier. * @param string $when2 The second date modifier. * * @return boolean True on success. * * @since 2.5 */ protected function processDates($date1, $date2, $when1, $when2) { // Clean up the inputs. $date1 = JString::trim(JString::strtolower($date1)); $date2 = JString::trim(JString::strtolower($date2)); $when1 = JString::trim(JString::strtolower($when1)); $when2 = JString::trim(JString::strtolower($when2)); // Get the time offset. $offset = JFactory::getApplication()->getCfg('offset'); // Array of allowed when values. $whens = array('before', 'after', 'exact'); // The value of 'today' is a special case that we need to handle. if ($date1 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $today = JFactory::getDate('now', $offset); $date1 = $today->format('%Y-%m-%d'); } // Try to parse the date string. $date = JFactory::getDate($date1, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date1 = $date->toSQL(); $this->when1 = in_array($when1, $whens) ? $when1 : 'before'; } // The value of 'today' is a special case that we need to handle. if ($date2 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $today = JFactory::getDate('now', $offset); $date2 = $today->format('%Y-%m-%d'); } // Try to parse the date string. $date = JFactory::getDate($date2, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date2 = $date->toSQL(); $this->when2 = in_array($when2, $whens) ? $when2 : 'before'; } return true; } /** * Method to process the query input string and extract required, optional, * and excluded tokens; taxonomy filters; and date filters. * * @param string $input The query input string. * @param string $lang The query input language. * @param string $mode The query matching mode. * * @return boolean True on success. * * @since 2.5 * @throws Exception on database error. */ protected function processString($input, $lang, $mode) { // Clean up the input string. $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8'); $input = JString::strtolower($input); $input = preg_replace('#\s+#mi', ' ', $input); $input = JString::trim($input); $debug = JFactory::getConfig()->get('debug_lang'); /* * First, we need to handle string based modifiers. String based * modifiers could potentially include things like "category:blah" or * "before:2009-10-21" or "type:article", etc. */ $patterns = array( 'before' => JText::_('COM_FINDER_FILTER_WHEN_BEFORE'), 'after' => JText::_('COM_FINDER_FILTER_WHEN_AFTER') ); // Add the taxonomy branch titles to the possible patterns. foreach (FinderIndexerTaxonomy::getBranchTitles() as $branch) { // Add the pattern. $patterns[$branch] = JString::strtolower(JText::_(FinderHelperLanguage::branchSingular($branch))); } // Container for search terms and phrases. $terms = array(); $phrases = array(); // Cleared filter branches. $cleared = array(); /* * Compile the suffix pattern. This is used to match the values of the * filter input string. Single words can be input directly, multi-word * values have to be wrapped in double quotes. */ $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8'); $suffix = '(([\w\d' . $quotes . '-]+)|\"([\w\d\s' . $quotes . '-]+)\")'; /* * Iterate through the possible filter patterns and search for matches. * We need to match the key, colon, and a value pattern for the match * to be valid. */ foreach ($patterns as $modifier => $pattern) { $matches = array(); if ($debug) { $pattern = substr($pattern, 2, -2); } // Check if the filter pattern is in the input string. if (preg_match('#' . $pattern . '\s*:\s*' . $suffix . '#mi', $input, $matches)) { // Get the value given to the modifier. $value = isset($matches[3]) ? $matches[3] : $matches[1]; // Now we have to handle the filter string. switch ($modifier) { // Handle a before and after date filters. case 'before': case 'after': { // Get the time offset. $offset = JFactory::getApplication()->getCfg('offset'); // Array of allowed when values. $whens = array('before', 'after', 'exact'); // The value of 'today' is a special case that we need to handle. if ($value === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $today = JFactory::getDate('now', $offset); $value = $today->format('%Y-%m-%d'); } // Try to parse the date string. $date = JFactory::getDate($value, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date1 = $date->toSQL(); $this->when1 = in_array($modifier, $whens) ? $modifier : 'before'; } break; } // Handle a taxonomy branch filter. default: { // Try to find the node id. $return = FinderIndexerTaxonomy::getNodeByTitle($modifier, $value); // Check if the node id was found. if ($return) { // Check if the branch has been cleared. if (!in_array($modifier, $cleared)) { // Clear the branch. $this->filters[$modifier] = array(); // Add the branch to the cleared list. $cleared[] = $modifier; } // Add the filter to the list. $this->filters[$modifier][$return->title] = (int) $return->id; } break; } } // Clean up the input string again. $input = str_replace($matches[0], '', $input); $input = preg_replace('#\s+#mi', ' ', $input); $input = JString::trim($input); } } /* * Extract the tokens enclosed in double quotes so that we can handle * them as phrases. */ if (JString::strpos($input, '"') !== false) { $matches = array(); // Extract the tokens enclosed in double quotes. if (preg_match_all('#\"([^"]+)\"#mi', $input, $matches)) { /* * One or more phrases were found so we need to iterate through * them, tokenize them as phrases, and remove them from the raw * input string before we move on to the next processing step. */ foreach ($matches[1] as $key => $match) { // Find the complete phrase in the input string. $pos = JString::strpos($input, $matches[0][$key]); $len = JString::strlen($matches[0][$key]); // Add any terms that are before this phrase to the stack. if (JString::trim(JString::substr($input, 0, $pos))) { $terms = array_merge($terms, explode(' ', JString::trim(JString::substr($input, 0, $pos)))); } // Strip out everything up to and including the phrase. $input = JString::substr($input, $pos + $len); // Clean up the input string again. $input = preg_replace('#\s+#mi', ' ', $input); $input = JString::trim($input); // Get the number of words in the phrase. $parts = explode(' ', $match); // Check if the phrase is longer than three words. if (count($parts) > 3) { /* * If the phrase is longer than three words, we need to * break it down into smaller chunks of phrases that * are less than or equal to three words. We overlap * the chunks so that we can ensure that a match is * found for the complete phrase and not just portions * of it. */ for ($i = 0, $c = count($parts); $i < $c; $i += 2) { // Set up the chunk. $chunk = array(); // The chunk has to be assembled based on how many // pieces are available to use. switch ($c - $i) { // If only one word is left, we can break from // the switch and loop because the last word // was already used at the end of the last // chunk. case 1: break 2; // If there words are left, we use them both as // the last chunk of the phrase and we're done. case 2: $chunk[] = $parts[$i]; $chunk[] = $parts[$i + 1]; break; // If there are three or more words left, we // build a three word chunk and continue on. default: $chunk[] = $parts[$i]; $chunk[] = $parts[$i + 1]; $chunk[] = $parts[$i + 2]; break; } // If the chunk is not empty, add it as a phrase. if (count($chunk)) { $phrases[] = implode(' ', $chunk); $terms[] = implode(' ', $chunk); } } } else { // The phrase is <= 3 words so we can use it as is. $phrases[] = $match; $terms[] = $match; } } } } // Add the remaining terms if present. if (!empty($input)) { $terms = array_merge($terms, explode(' ', $input)); } // An array of our boolean operators. $operator => $translation $operators = array( 'AND' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_AND')), 'OR' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_OR')), 'NOT' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_NOT')) ); // If language debugging is enabled you need to ignore the debug strings in matching. if (JDEBUG) { $debugStrings = array('**', '??'); $operators = str_replace($debugStrings, '', $operators); } /* * Iterate through the terms and perform any sorting that needs to be * done based on boolean search operators. Terms that are before an * and/or/not modifier have to be handled in relation to their operator. */ for ($i = 0, $c = count($terms); $i < $c; $i++) { // Check if the term is followed by an operator that we understand. if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators)) { // Get the operator mode. $op = array_search($terms[$i + 1], $operators); // Handle the AND operator. if ($op === 'AND' && isset($terms[$i + 2])) { // Tokenize the current term. $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true); $token = $this->getTokenData($token); // Set the required flag. $token->required = true; // Add the current token to the stack. $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); // Skip the next token (the mode operator). $this->operators[] = $terms[$i + 1]; // Tokenize the term after the next term (current plus two). $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true); $other = $this->getTokenData($other); // Set the required flag. $other->required = true; // Add the token after the next token to the stack. $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); // Remove the processed phrases if possible. if (($pk = array_search($terms[$i], $phrases)) !== false) { unset($phrases[$pk]); } if (($pk = array_search($terms[$i + 2], $phrases)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i]); unset($terms[$i + 1]); unset($terms[$i + 2]); // Adjust the loop. $i += 2; continue; } // Handle the OR operator. elseif ($op === 'OR' && isset($terms[$i + 2])) { // Tokenize the current term. $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true); $token = $this->getTokenData($token); // Set the required flag. $token->required = false; // Add the current token to the stack. if (count($token->matches)) { $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); } else { $this->ignored[] = $token; } // Skip the next token (the mode operator). $this->operators[] = $terms[$i + 1]; // Tokenize the term after the next term (current plus two). $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true); $other = $this->getTokenData($other); // Set the required flag. $other->required = false; // Add the token after the next token to the stack. if (count($other->matches)) { $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); } else { $this->ignored[] = $other; } // Remove the processed phrases if possible. if (($pk = array_search($terms[$i], $phrases)) !== false) { unset($phrases[$pk]); } if (($pk = array_search($terms[$i + 2], $phrases)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i]); unset($terms[$i + 1]); unset($terms[$i + 2]); // Adjust the loop. $i += 2; continue; } } // Handle an orphaned OR operator. elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'OR') { // Skip the next token (the mode operator). $this->operators[] = $terms[$i]; // Tokenize the next term (current plus one). $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true); $other = $this->getTokenData($other); // Set the required flag. $other->required = false; // Add the token after the next token to the stack. if (count($other->matches)) { $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); } else { $this->ignored[] = $other; } // Remove the processed phrase if possible. if (($pk = array_search($terms[$i + 1], $phrases)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i]); unset($terms[$i + 1]); // Adjust the loop. $i += 1; continue; } // Handle the NOT operator. elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'NOT') { // Skip the next token (the mode operator). $this->operators[] = $terms[$i]; // Tokenize the next term (current plus one). $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true); $other = $this->getTokenData($other); // Set the required flag. $other->required = false; // Add the next token to the stack. if (count($other->matches)) { $this->excluded[] = $other; } else { $this->ignored[] = $other; } // Remove the processed phrase if possible. if (($pk = array_search($terms[$i + 1], $phrases)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i]); unset($terms[$i + 1]); // Adjust the loop. $i += 1; continue; } } /* * Iterate through any search phrases and tokenize them. We handle * phrases as autonomous units and do not break them down into two and * three word combinations. */ for ($i = 0, $c = count($phrases); $i < $c; $i++) { // Tokenize the phrase. $token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true); $token = $this->getTokenData($token); // Set the required flag. $token->required = true; // Add the current token to the stack. $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); // Remove the processed term if possible. if (($pk = array_search($phrases[$i], $terms)) !== false) { unset($terms[$pk]); } // Remove the processed phrase. unset($phrases[$i]); } /* * Handle any remaining tokens using the standard processing mechanism. */ if (!empty($terms)) { // Tokenize the terms. $terms = implode(' ', $terms); $tokens = FinderIndexerHelper::tokenize($terms, $lang, false); // Make sure we are working with an array. $tokens = is_array($tokens) ? $tokens : array($tokens); // Get the token data and required state for all the tokens. foreach ($tokens as $token) { // Get the token data. $token = $this->getTokenData($token); // Set the required flag for the token. $token->required = $mode === 'AND' ? ($token->phrase ? false : true) : false; // Add the token to the appropriate stack. if (count($token->matches) || $token->required) { $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); } else { $this->ignored[] = $token; } } } return true; } /** * Method to get the base and similar term ids and, if necessary, suggested * term data from the database. The terms ids are identified based on a * 'like' match in MySQL and/or a common stem. If no term ids could be * found, then we know that we will not be able to return any results for * that term and we should try to find a similar term to use that we can * match so that we can suggest the alternative search query to the user. * * @param FinderIndexerToken $token A FinderIndexerToken object. * * @return FinderIndexerToken A FinderIndexerToken object. * * @since 2.5 * @throws Exception on database error. */ protected function getTokenData($token) { // Get the database object. $db = JFactory::getDBO(); // Create a database query to build match the token. $query = $db->getQuery(true); $query->select('t.term, t.term_id'); $query->from('#__finder_terms AS t'); /* * If the token is a phrase, the lookup process is fairly simple. If * the token is a word, it is a little more complicated. We have to * create two queries to lookup the term and the stem respectively, * then union the result sets together. This is MUCH faster than using * an or condition in the database query. */ if ($token->phrase) { // Add the phrase to the query. $query->where('t.term = ' . $db->quote($token->term)); $query->where('t.phrase = 1'); } else { // Add the term to the query. $query->where('t.term = ' . $db->quote($token->term)); $query->where('t.phrase = 0'); // Clone the query, replace the WHERE clause. $sub = clone($query); $sub->clear('where'); $sub->where('t.stem = '.$db->quote($token->stem)); $sub->where('t.phrase = 0'); // Union the two queries. $query->union($sub); // $query->where('(t.term = ' . $db->quote($token->term) . ' OR t.stem = ' . $db->quote($token->stem) . ')'); // $query->where('t.phrase = 0'); } // Get the terms. $db->setQuery($query); $matches = $db->loadObjectList(); // Check for a database error. if ($db->getErrorNum()) { // Throw database error exception. throw new Exception($db->getErrorMsg(), 500); } // Setup the container. $token->matches = array(); // Check the matching terms. if (!empty($matches)) { // Add the matches to the token. for ($i = 0, $c = count($matches); $i < $c; $i++) { $token->matches[$matches[$i]->term] = (int) $matches[$i]->term_id; } } // If no matches were found, try to find a similar but better token. if (empty($token->matches)) { // Create a database query to get the similar terms. //@TODO: PostgreSQL doesn't support SOUNDEX out of the box $query->clear(); $query->select('DISTINCT t.term_id AS id, t.term AS term'); $query->from('#__finder_terms AS t'); //$query->where('t.soundex = ' . soundex($db->quote($token->term))); $query->where('t.soundex = SOUNDEX(' . $db->quote($token->term) . ')'); $query->where('t.phrase = ' . (int) $token->phrase); // Get the terms. $db->setQuery($query); $results = $db->loadObjectList(); // Check for a database error. if ($db->getErrorNum()) { // Throw database error exception. throw new Exception($db->getErrorMsg(), 500); } // Check if any similar terms were found. if (empty($results)) { return $token; } // Stack for sorting the similar terms. $suggestions = array(); // Get the levnshtein distance for all suggested terms. foreach ($results as $sk => $st) { // Get the levenshtein distance between terms. $distance = levenshtein($st->term, $token->term); // Make sure the levenshtein distance isn't over 50. if ($distance < 50) { $suggestions[$sk] = $distance; } } // Sort the suggestions. asort($suggestions, SORT_NUMERIC); // Get the closest match. $keys = array_keys($suggestions); $key = $keys[0]; // Add the suggested term. $token->suggestion = $results[$key]->term; } return $token; } }