Source code for django_elasticsearch_dsl_drf.filter_backends.faceted_search

"""
Faceted search backend.
"""
import copy
from collections import defaultdict

from elasticsearch_dsl import TermsFacet
from elasticsearch_dsl.query import Q

from rest_framework.filters import BaseFilterBackend

from six import string_types, iteritems

__title__ = 'django_elasticsearch_dsl_drf.faceted_search'
__author__ = 'Artur Barseghyan <artur.barseghyan@gmail.com>'
__copyright__ = '2017-2020 Artur Barseghyan'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('FacetedSearchFilterBackend', 'FacetedFilterSearchFilterBackend')

from django_elasticsearch_dsl_drf.filter_backends.filtering import FilteringFilterBackend


[docs]class FacetedSearchFilterBackend(BaseFilterBackend): """Faceted search backend. Example: >>> from django_elasticsearch_dsl_drf.filter_backends import ( >>> FacetedSearchFilterBackend >>> ) >>> from elasticsearch_dsl import TermsFacet, DateHistogramFacet >>> from django_elasticsearch_dsl_drf.viewsets import ( >>> BaseDocumentViewSet, >>> ) >>> >>> # Local article document definition >>> from .documents import ArticleDocument >>> >>> # Local article document serializer >>> from .serializers import ArticleDocumentSerializer >>> >>> class ArticleDocumentView(BaseDocumentViewSet): >>> >>> document = ArticleDocument >>> serializer_class = ArticleDocumentSerializer >>> filter_backends = [FacetedSearchFilterBackend,] >>> faceted_search_fields = { >>> 'title': 'title.raw', # Uses `TermsFacet` by default >>> 'state': { >>> 'field': 'state.raw', >>> 'facet': TermsFacet, >>> }, >>> 'publisher': { >>> 'field': 'publisher.raw', >>> 'facet': TermsFacet, >>> 'enabled': False, >>> }, >>> 'date_published': { >>> 'field': 'date_published.raw', >>> 'facet': DateHistogramFacet, >>> 'options': { >>> 'interval': 'month', >>> }, >>> 'enabled': True, >>> }, >>> >>> } Facets make queries to be more heavy. That's why by default all facets are disabled and enabled only explicitly either in the filter options (`enabled` set to True) or via query params `?facet=state&facet=date_published`. """ faceted_search_param = 'facet'
[docs] @classmethod def prepare_faceted_search_fields(cls, view): """Prepare faceted search fields. Prepares the following structure: >>> { >>> 'publisher': { >>> 'field': 'publisher.raw', >>> 'facet': TermsFacet, >>> 'enabled': False, >>> } >>> 'date_published': { >>> 'field': 'date_published.raw', >>> 'facet': DateHistogramFacet, >>> 'options': { >>> 'interval': 'month', >>> }, >>> 'enabled': True, >>> }, >>> } :param view: :type view: rest_framework.viewsets.ReadOnlyModelViewSet :return: Faceted search fields options. :rtype: dict """ faceted_search_fields = copy.deepcopy(view.faceted_search_fields) for field, options in faceted_search_fields.items(): if options is None or isinstance(options, string_types): faceted_search_fields[field] = { 'field': options or field } elif 'field' not in faceted_search_fields[field]: faceted_search_fields[field]['field'] = field if 'enabled' not in faceted_search_fields[field]: faceted_search_fields[field]['enabled'] = False if 'facet' not in faceted_search_fields[field]: faceted_search_fields[field]['facet'] = TermsFacet if 'options' not in faceted_search_fields[field]: faceted_search_fields[field]['options'] = {} faceted_search_fields[field]['global'] = \ faceted_search_fields[field].get('global', False) return faceted_search_fields
[docs] def get_faceted_search_query_params(self, request): """Get faceted search query params. :param request: Django REST framework request. :type request: rest_framework.request.Request :return: List of search query params. :rtype: list """ query_params = request.query_params.copy() return query_params.getlist(self.faceted_search_param, [])
[docs] def construct_facets(self, request, view): """Construct facets. Turns the following structure: >>> { >>> 'publisher': { >>> 'field': 'publisher.raw', >>> 'facet': TermsFacet, >>> 'enabled': False, >>> } >>> 'date_published': { >>> 'field': 'date_published', >>> 'facet': DateHistogramFacet, >>> 'options': { >>> 'interval': 'month', >>> }, >>> 'enabled': True, >>> }, >>> } Into the following structure: >>> { >>> 'publisher': TermsFacet(field='publisher.raw'), >>> 'publishing_frequency': DateHistogramFacet( >>> field='date_published.raw', >>> interval='month' >>> ), >>> } """ __facets = {} faceted_search_query_params = self.get_faceted_search_query_params( request ) faceted_search_fields = self.prepare_faceted_search_fields(view) for __field, __options in faceted_search_fields.items(): if __field in faceted_search_query_params or __options['enabled']: __facets.update( { __field: { 'facet': faceted_search_fields[__field]['facet']( field=faceted_search_fields[__field]['field'], **faceted_search_fields[__field]['options'] ), 'global': faceted_search_fields[__field]['global'], } } ) return __facets
[docs] def aggregate(self, request, queryset, view): """Aggregate. :param request: :param queryset: :param view: :return: """ __facets = self.construct_facets(request, view) for __field, __facet in iteritems(__facets): agg = __facet['facet'].get_aggregation() agg_filter = Q('match_all') # TODO: Implement # for __filter_field, __filter in iteritems(self._filters): # if __field == __filter_field: # continue # agg_filter &= __filter if __facet['global']: queryset.aggs.bucket( '_filter_' + __field, 'global' ).bucket(__field, agg) else: queryset.aggs.bucket( '_filter_' + __field, 'filter', filter=agg_filter ).bucket(__field, agg) return queryset
[docs] def filter_queryset(self, request, queryset, view): """Filter the queryset. :param request: Django REST framework request. :param queryset: Base queryset. :param view: View. :type request: rest_framework.request.Request :type queryset: elasticsearch_dsl.search.Search :type view: rest_framework.viewsets.ReadOnlyModelViewSet :return: Updated queryset. :rtype: elasticsearch_dsl.search.Search """ return self.aggregate(request, queryset, view)
[docs]class FacetedFilterSearchFilterBackend(FilteringFilterBackend, FacetedSearchFilterBackend): """ Combined faceting and filtering backend similar to elasticsearch-dsl's FacetedSearch class. It combines the functionality of FilteringFilterBackend and FacetedSearchFilterBackend to take filters into account when creating facets. This backend uses the same configuration fields as FilteringFilterBackend and FacetedSearchFilterBackend. This backend replaces their functionality and should not be used together with either of those backends. Note that to work correctly, the actual elasticsearch field must be the same for a facet and its matching filter. For example, if a facet will aggregate on field `state.raw`, then the filter must also filter on `state.raw`, and not just `state`. When creating a facet, filters for faceted fields other than for the current facet are applied. Filters for faceted fields are then applied as post_filters. Filters on non-faceted fields are applied as normal filters. """
[docs] def filter_queryset(self, request, queryset, view): # the fact that apply_filter is a classmethod means we can't store state on self, # so we hitch it onto queryset queryset._facets = self.construct_facets(request, view) queryset._faceted_fields = set(f['facet']._params['field'] for f in queryset._facets.values()) queryset._filters = defaultdict(list) # apply filters queryset = FilteringFilterBackend.filter_queryset(self, request, queryset, view) # apply aggregations return self.aggregate(request, queryset, view)
[docs] @classmethod def apply_filter(cls, queryset, options=None, args=None, kwargs=None): if args is None: args = [] if kwargs is None: kwargs = {} facets = queryset._facets faceted_fields = queryset._faceted_fields filters = queryset._filters # if this field is faceted, then apply it as a post-filter if options['field'] in faceted_fields: queryset = queryset.post_filter(*args, **kwargs) else: queryset = queryset.filter(*args, **kwargs) filters[options['field']].append(Q(*args, **kwargs)) # ensure the new queryset object retains the helper variables queryset._facets = facets queryset._faceted_fields = faceted_fields queryset._filters = filters return queryset
[docs] def aggregate(self, request, queryset, view): facets = queryset._facets faceted_fields = queryset._faceted_fields filters = queryset._filters for field, facet in facets.items(): agg = facet['facet'].get_aggregation() if facet['global']: queryset.aggs.bucket( '_filter_' + field, 'global' ).bucket(field, agg) continue agg_filter = Q('match_all') for f, _filter in filters.items(): # apply filters for that are applicable for facets other than this one if agg.field == f or f not in faceted_fields: continue # combine with or q = _filter[0] for x in _filter[1:]: q = q | x agg_filter &= q queryset.aggs.bucket( '_filter_' + field, 'filter', filter=agg_filter ).bucket(field, agg) return queryset