Files
wordpress-preseed/wp-content/plugins/jetpack/_inc/lib/jetpack-wpes-query-builder/jetpack-wpes-query-builder.php
2019-10-24 00:12:05 +02:00

342 lines
8.2 KiB
PHP

<?php
/**
* Provides an interface for easily building a complex search query that
* combines multiple ranking signals.
*
*
* $bldr = new Jetpack_WPES_Query_Builder();
* $bldr->add_filter( ... );
* $bldr->add_filter( ... );
* $bldr->add_query( ... );
* $es_query = $bldr->build_query();
*
*
* All ES queries take a standard form with main query (with some filters),
* wrapped in a function_score
*
* Bucketed queries use an aggregation to diversify results. eg a bunch
* of separate filters where to get different sets of results.
*
*/
class Jetpack_WPES_Query_Builder {
protected $es_filters = array();
// Custom boosting with function_score
protected $functions = array();
protected $decays = array();
protected $scripts = array();
protected $functions_max_boost = 2.0;
protected $functions_score_mode = 'multiply';
protected $query_bool_boost = null;
// General aggregations for buckets and metrics
protected $aggs_query = false;
protected $aggs = array();
// The set of top level text queries to combine
protected $must_queries = array();
protected $should_queries = array();
protected $dis_max_queries = array();
protected $diverse_buckets_query = false;
protected $bucket_filters = array();
protected $bucket_sub_aggs = array();
////////////////////////////////////
// Methods for building a query
public function add_filter( $filter ) {
$this->es_filters[] = $filter;
}
public function add_query( $query, $type = 'must' ) {
switch ( $type ) {
case 'dis_max':
$this->dis_max_queries[] = $query;
break;
case 'should':
$this->should_queries[] = $query;
break;
case 'must':
default:
$this->must_queries[] = $query;
break;
}
}
/**
* Add a scoring function to the query
*
* NOTE: For decays (linear, exp, or gauss), use Jetpack_WPES_Query_Builder::add_decay() instead
*
* @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
*
* @param $function string name of the function
* @param $params array functions parameters
*
* @return void
*/
public function add_function( $function, $params ) {
$this->functions[ $function ][] = $params;
}
/**
* Add a decay function to score results
*
* This method should be used instead of Jetpack_WPES_Query_Builder::add_function() for decays, as the internal ES structure
* is slightly different for them.
*
* @see https://www.elastic.co/guide/en/elasticsearch/guide/current/decay-functions.html
*
* @param $function string name of the decay function - linear, exp, or gauss
* @param $params array The decay functions parameters, passed to ES directly
*
* @return void
*/
public function add_decay( $function, $params ) {
$this->decays[ $function ][] = $params;
}
/**
* Add a scoring mode to the query
*
* @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
*
* @param $mode string name of how to score
*
* @return void
*/
public function add_score_mode_to_functions( $mode='multiply' ) {
$this->functions_score_mode = $mode;
}
public function add_max_boost_to_functions( $boost ) {
$this->functions_max_boost = $boost;
}
public function add_boost_to_query_bool( $boost ) {
$this->query_bool_boost = $boost;
}
public function add_aggs( $aggs_name, $aggs ) {
$this->aggs_query = true;
$this->aggs[$aggs_name] = $aggs;
}
public function add_aggs_sub_aggs( $aggs_name, $sub_aggs ) {
if ( ! array_key_exists( 'aggs', $this->aggs[$aggs_name] ) ) {
$this->aggs[$aggs_name]['aggs'] = array();
}
$this->aggs[$aggs_name]['aggs'] = $sub_aggs;
}
public function add_bucketed_query( $name, $query ) {
$this->_add_bucket_filter( $name, $query );
$this->add_query( $query, 'dis_max' );
}
public function add_bucketed_terms( $name, $field, $terms, $boost = 1 ) {
if ( ! is_array( $terms ) ) {
$terms = array( $terms );
}
$this->_add_bucket_filter( $name, array(
'terms' => array(
$field => $terms,
),
));
$this->add_query( array(
'constant_score' => array(
'filter' => array(
'terms' => array(
$field => $terms,
),
),
'boost' => $boost,
),
), 'dis_max' );
}
public function add_bucket_sub_aggs( $agg ) {
$this->bucket_sub_aggs = array_merge( $this->bucket_sub_aggs, $agg );
}
protected function _add_bucket_filter( $name, $filter ) {
$this->diverse_buckets_query = true;
$this->bucket_filters[ $name ] = $filter;
}
////////////////////////////////////
// Building Final Query
/**
* Combine all the queries, functions, decays, scripts, and max_boost into an ES query
*
* @return array Array representation of the built ES query
*/
public function build_query() {
$query = array();
//dis_max queries just become a single must query
if ( ! empty( $this->dis_max_queries ) ) {
$this->must_queries[] = array(
'dis_max' => array(
'queries' => $this->dis_max_queries,
),
);
}
if ( empty( $this->must_queries ) ) {
$this->must_queries = array(
array(
'match_all' => array(),
),
);
}
if ( empty( $this->should_queries ) ) {
if ( 1 == count( $this->must_queries ) ) {
$query = $this->must_queries[0];
} else {
$query = array(
'bool' => array(
'must' => $this->must_queries,
),
);
}
} else {
$query = array(
'bool' => array(
'must' => $this->must_queries,
'should' => $this->should_queries,
),
);
}
if ( ! is_null( $this->query_bool_boost ) && isset( $query['bool'] ) ) {
$query['bool']['boost'] = $this->query_bool_boost;
}
// If there are any function score adjustments, then combine those
if ( $this->functions || $this->decays || $this->scripts ) {
$weighting_functions = array();
if ( $this->functions ) {
foreach ( $this->functions as $function_type => $configs ) {
foreach ( $configs as $config ) {
foreach ( $config as $field => $params ) {
$func_arr = $params;
$func_arr['field'] = $field;
$weighting_functions[] = array(
$function_type => $func_arr,
);
}
}
}
}
if ( $this->decays ) {
foreach ( $this->decays as $decay_type => $configs ) {
foreach ( $configs as $config ) {
foreach ( $config as $field => $params ) {
$weighting_functions[] = array(
$decay_type => array(
$field => $params,
),
);
}
}
}
}
if ( $this->scripts ) {
foreach ( $this->scripts as $script ) {
$weighting_functions[] = array(
'script_score' => array(
'script' => $script,
),
);
}
}
$query = array(
'function_score' => array(
'query' => $query,
'functions' => $weighting_functions,
'max_boost' => $this->functions_max_boost,
'score_mode' => $this->functions_score_mode,
),
);
} // End if().
return $query;
}
/**
* Assemble the 'filter' portion of an ES query, from all registered filters
*
* @return array|null Combined ES filters, or null if none have been defined
*/
public function build_filter() {
if ( empty( $this->es_filters ) ) {
$filter = null;
} elseif ( 1 == count( $this->es_filters ) ) {
$filter = $this->es_filters[0];
} else {
$filter = array(
'and' => $this->es_filters,
);
}
return $filter;
}
/**
* Assemble the 'aggregation' portion of an ES query, from all general aggregations.
*
* @return array An aggregation query as an array of topics, filters, and bucket names
*/
public function build_aggregation() {
if ( empty( $this->bucket_sub_aggs ) && empty( $this->aggs_query ) ) {
return array();
}
if ( ! $this->diverse_buckets_query && empty( $this->aggs_query ) ) {
return $this->bucket_sub_aggs;
}
$aggregations = array(
'topics' => array(
'filters' => array(
'filters' => array(),
),
),
);
if ( ! empty( $this->bucket_sub_aggs ) ) {
$aggregations['topics']['aggs'] = $this->bucket_sub_aggs;
}
foreach ( $this->bucket_filters as $bucket_name => $filter ) {
$aggregations['topics']['filters']['filters'][ $bucket_name ] = $filter;
}
if ( ! empty( $this->aggs_query ) ) {
$aggregations = $this->aggs;
}
return $aggregations;
}
}