Innovate anywhere, anytime withruncode.io Your cloud-based dev studio.
Django

Autocomplete with Django-Haystack and Elasticsearch with Single Letter Querying

2022-07-20

Django's haystack provides autocomplete functionality. To do autocomplete effectively, the search backend(elasticsearch in this case) uses n-grams (essentially a small window passed over the string). Because this alters the way your data needs to be stored. We had two choices: NgramField and EdgeNgramField used as n-grams in search backend. The major drawback of the n-grams is that they take minimum of 3 letters in the search query. Still EdgeNgramField or NgramField fields won't produce consistent results(For Eg: . By customising the Haystack's built in elastcisearch engine backend we can achieve single letter query with Charfield itself.

In your search_indexes.py

from haystack import indexes
from myapp.models import Book

class BookIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='title')
    pub_date = indexes.DateTimeField(model_attr='pub_date')

    def get_model(self):
        return Book

Create backends.py in your app. And this file will contain the actual customized elasticsearch search engine backend.

from django.conf import settings
from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend
from haystack.backends.elasticsearch_backend import ElasticsearchSearchEngine
from haystack.backends.elasticsearch_backend import ElasticsearchSearchQuery
from haystack.constants import VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS
from haystack.inputs import Clean, Exact, PythonData, Raw
from django.utils import six

class CustomElasticsearchSearchQuery(ElasticsearchSearchQuery):

    def __init__(self, using=DEFAULT_ALIAS):
        super(CustomElasticsearchSearchQuery, self).__init__(using=DEFAULT_ALIAS)

    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections
        query_frag = ''

        if not hasattr(value, 'input_type_name'):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, 'values_list'):
                value = list(value)

            if isinstance(value, six.string_types):
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what pysolr wants if needed.
            prepared_value = self.backend._from_python(prepared_value)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == 'content':
            index_fieldname = ''
        else:
            index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)

        filter_types = {
            'contains': u'*%s*',
            'startswith': u'%s*',
            'exact': u'%s',
            'gt': u'{%s TO *}',
            'gte': u'[%s TO *]',
            'lt': u'{* TO %s}',
            'lte': u'[* TO %s]',
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in ['contains', 'startswith']:
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorportate the converted form of each into the query.
                    terms = []

                    if isinstance(prepared_value, six.string_types):
                        for possible_value in prepared_value.split(' '):
                            terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
                    else:
                        terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value))

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = u"(%s)" % " AND ".join(terms)
            elif filter_type == 'in':
                in_options = []

                for possible_value in prepared_value:
                    in_options.append(u'"%s"' % self.backend._from_python(possible_value))

                query_frag = u"(%s)" % " OR ".join(in_options)
            elif filter_type == 'range':
                start = self.backend._from_python(prepared_value[0])
                end = self.backend._from_python(prepared_value[1])
                query_frag = u'["%s" TO "%s"]' % (start, end)
            elif filter_type == 'exact':
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if value.input_type_name != 'exact':
                    prepared_value = Exact(prepared_value).prepare(self)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw):
            if not query_frag.startswith('(') and not query_frag.endswith(')'):
                query_frag = "(%s)" % query_frag

        return u"%s%s" % (index_fieldname, query_frag)


class ConfigurableElasticSearchEngine(ElasticsearchSearchEngine):
    query = CustomElasticsearchSearchQuery

In the above snippet 'contains': u'*%s*' is doing the job for us to allow single letter querying which provides better results. Add the ConfigurableElasticSearchEngine in your settings.py

HAYSTACK_CONNECTIONS = {
    'default': {
        'ENGINE': 'myapp.backends.ConfigurableElasticSearchEngine',
        'URL': 'http://localhost:9200/',
        'INDEX_NAME': 'my_index',
    },
}

Now we are done with the customization. The customised searchengine's example search would look like:

SearchQuerySet().models(Book).filter_and(title=<query_string>)

Eg1: SearchQuerySet().models(Book).filter_and(title='m')
# Will give results of books with title 'mcrop', 'micropyramid', 'mycropyramid' and all other titles which are having letter 'm' in them.

Eg2: SearchQuerySet().models(Book).filter_and(title='mi')
# Will give results of books with title 'micropyramid'.

Note: Here __contains is optional as Haystack2.X added __contains as default for filtering the data.