django cmsplugin-blog search indexes for haystack with whoose - python

I want to add a search function for a simple django cms homepage with cmsplugin-blog.
But there are only search indexes for use with django-cms-facetsearch. But facetsearch needs solr, and I don`t want to run a solr server only for a few cms-pages and blogentries. I just want to use haystack with whoosh, because it is really simple to configurate.
Are there multilingual search indexes for cmsplugin-blog models? Or do I have to write them myself?
Thank you for helping...

I had the same problem for a setup using haystack, django-cms, cmsplugin-blog and some other apps as well.
I just created a custom search index for cmsplugin-blog and haystack, inspired by the index used for regular CMS pages in django-cms-search. Take a look at it, it may help you to create your own.
from haystack import indexes
from haystack import site
from cmsplugin_blog.models import Entry, EntryTitle
from cms.models.pluginmodel import CMSPlugin
from django.utils.encoding import force_unicode
import re
def _strip_tags(value):
"""
Returns the given HTML with all tags stripped.
This is a copy of django.utils.html.strip_tags, except that it adds some
whitespace in between replaced tags to make sure words are not erroneously
concatenated.
"""
return re.sub(r'<[^>]*?>', ' ', force_unicode(value))
class BlogIndex(indexes.SearchIndex):
text = indexes.CharField(document=True)
url = indexes.CharField(stored=True, indexed=False, model_attr='get_absolute_url')
title = indexes.CharField(stored=True, indexed=False)
pub_date = indexes.DateTimeField(model_attr='pub_date', null=True)
def get_model(self):
return Entry
def index_queryset(self):
"""Used when the entire index for model is updated."""
return self.get_model().objects.filter(is_published=True)
def prepare_title(self, obj):
return EntryTitle.objects.filter(entry=obj)[0]
def prepare_text(self, obj):
title = EntryTitle.objects.filter(entry=obj)[0]
placeholder_plugins = CMSPlugin.objects.filter(placeholder__in=obj.placeholders.all())
text = force_unicode(title)
plugins = list(placeholder_plugins)
for base_plugin in plugins:
instance, plugin_type = base_plugin.get_plugin_instance()
if instance is None:
# this is an empty plugin
continue
if hasattr(instance, 'search_fields'):
text += u' '.join(force_unicode(_strip_tags(getattr(instance, field, ''))) for field in instance.search_fields)
if getattr(instance, 'search_fulltext', False) or getattr(plugin_type, 'search_fulltext', False):
text += _strip_tags(instance.render_plugin(context=RequestContext(request))) + u' '
return text
site.register(Entry, BlogIndex)
I will consider putting a fork of cmsplugin-blog with a bulletproof version of this search index on github later. Feel free to use it wherever helpful.

Related

Using Django-modeltranslation in combination with PostgreSQL SearchVector

I'm using django-modeltranslation to translate model fields. And suppose I have the following model (where the name is translated in the DB):
class Book(models.Model):
name = models.CharField(max_length=90)
Then, in a DRF view, I have an endpoint that takes a query text and searches through the book names using this code:
from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank
class BookView(APIView):
def get(self, request):
q = request.get('q')
vector = SearchVector('name') # this is where the issue is
query = SearchQuery(q)
matches = Book.objects.annotate(rank=SearchRank(vector, query))\
.filter(rank__gt=0.1)\
.order_by('-rank')
# etc.
This works great when I was working with English only. But now I added a new language, and all aspects of the localisation are working fine, except this search. It's looking at the name_en field values only.
If the target language is German for example, and I explicitly change the following line from:
vector = SearchVector('name')
to:
vector = SearchVector('name_de')
Then the search works over the correct field. Is there a way to pass in the correct field to SearchVector?
IIUC, you can just use get_language():
from django.utils.translation import get_language
from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank
class BookView(APIView):
def get(self, request):
q = request.get('q')
vector = SearchVector(f'name_{get_language()}')
query = SearchQuery(q)
matches = Book.objects.annotate(rank=SearchRank(vector, query))\
.filter(rank__gt=0.1)\
.order_by('-rank')

Why is index_queryset called every time from search view in django-Haystack?

I have followed the Getting Starting - Django Haystack example, swapping out their model for mine.
In search_indexes.py, the method index_queryset has the comment "Used when the entire index for model is updated." however it is called every time I do a search from the view search/search.html
The method itself gets all the objects from the database and is very slow, so I assume this isn't the intended behavior.
search_indexes.py
import datetime
from haystack import indexes
from article.models import Article
class ArticleIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
article_id = indexes.IntegerField(model_attr='id')
title = indexes.CharField(model_attr='title')
summary = indexes.CharField(model_attr='summary')
content = indexes.CharField(model_attr='content')
published_at = indexes.DateTimeField(model_attr='published_at')
def get_model(self):
return Article
def index_queryset(self, using=None):
return self.get_model().objects.filter(
published_at__lte=datetime.datetime.now())
I captured the stack trace to see where it was being called from:
python2.7/site-packages/haystack/views.py(53)__call__()
-> return self.create_response()
python2.7/site-packages/haystack/views.py(133)create_response()
-> (paginator, page) = self.build_page()
python2.7/site-packages/haystack/views.py(110)build_page()
-> self.results[start_offset:start_offset + self.results_per_page]
python2.7/site-packages/haystack/query.py(272)__getitem__()
-> self._fill_cache(start, bound)
python2.7/site-packages/haystack/query.py(191)_fill_cache()
-> to_cache = self.post_process_results(results)
python2.7/site-packages/haystack/query.py(214)post_process_results()
-> objects = index.read_queryset(using=self.query._using)
python2.7/site-packages/haystack/indexes.py(144)read_queryset()
-> return self.index_queryset(using=using)
myApplication/article/search_indexes.py(20)index_queryset()
-> return self.get_model().objects.filter(
Note: I am using django version 1.7.10 and django-haystack version 2.4.1
This is intended behaviour - it is the docstring that is wrong. The function index_queryset basically returns the queryset that Haystack will use to obtain the search results (as well as to index documents).
You say:
The method itself gets all the objects from the database and is very slow
Actually it doesn't. All the method does is return a queryset. Querysets are lazy, so the method doesn't hit the database. The database only gets hit when something tried to access the results of the queryset.
This will happen after your search has been executed and Haystack returns the results. At this point the queryset will be further filtered to return the objects that matched the search. If this is slow then it may indicate a more fundamental performance issue with your model structure.

Create a url with an article's title

I have articles in MongoDB. I want the URLs for the articles to be readable. If I have an article named "How to Use Flask and MongoDB Seamlessly with Heroku", I want the URL to be something like localhost:5000/blog/how-to-use-flask-and-mongodb-seamlessly-with-heroku.
What is the best way to accomplish this? Any pointers in the right direction are appreciated. I wasn't sure exactly where to start on this one.
You are looking for a way to generate a "slug" and use that to identify the post.
If you want to use just a slug, all post titles will have to have a unique slug (which approximately means a unique title). This also means that if you change the post's title, the url could change, which would invalidate bookmarks and other outside links.
A better method is to do something like what Stack Overflow does for questions. If you look at this question's URL, you'll notice it has a unique id and a slug. In fact, the slug is optional, you can still get to this page by removing it from the url.
You'll need a way to generate slugs, and a custom url converter. The inflection library provides a nice way to slugify strings with the parameterize method. The following url converter takes an object and returns a url with the_object.id and the_object.title as a slug. When parsing a url, it will just return the object's id, since the slug is optional.
from inflection import parameterize
from werkzeug.routing import BaseConverter
class IDSlugConverter(BaseConverter):
"""Matches an int id and optional slug, separated by "/".
:param attr: name of field to slugify, or None for default of str(instance)
:param length: max length of slug when building url
"""
regex = r'-?\d+(?:/[\w\-]*)?'
def __init__(self, map, attr='title', length=80):
self.attr = attr
self.length = int(length)
super(IDSlugConverter, self).__init__(map)
def to_python(self, value):
id, slug = (value.split('/') + [None])[:2]
return int(id)
def to_url(self, value):
raw = str(value) if self.attr is None else getattr(value, self.attr, '')
slug = parameterize(raw)[:self.length].rstrip('-')
return '{}/{}'.format(value.id, slug).rstrip('/')
Register the converter so it can be used in routes:
app.url_map.converters['id_slug'] = IDSlugConverter
Use it in a route:
#app.route('/blog/<id_slug:id>')
def blog_post(id):
# get post by id, do stuff
Generate a url for a post. Note that you pass the object ('post'), not just the id, to the id parameter.:
url_for('blog_post', id=post)
# /blog/1234/the-post-title
Converter written by me for the Stack Overflow Python chat room site.

Django Search: Setting Haystack_Default_Operator = 'OR' has no effect

I'm using Haystack and Whoosh to do search with a django site I'm building. I'd like to use an OR operator on search terms (e.g. "Search String" will find objects with text "Search" OR "String" instead of "Search" AND "String")
This seems pretty straight forward as haystack allows you to override the default "AND" operator by setting HAYSTACK_DEFAULT_OPERATOR = 'OR' in your settings.py file.
Unfortunately, adding this to my settings.py has had no effect. I've found a couple of tangential references to this behavior on stackoverflow, but no solution. I've also found an issue posted on github, but it's been there since last year with no comments or classification.
I may be doing something wrong, so figured I'd post here and see if there's a solution. I'm kinda stuck without one!
My haystack settings in my settings.py:
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'),
},
}
HAYSTACK_DEFAULT_OPERATOR = 'OR'
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
My view:
from haystack import views as hsviews
def search_test(request):
return hsviews.basic_search(request)
My search_indexes.py file:
import datetime
from haystack import indexes
from myApp.models import MyModel
from django.contrib.auth.models import User
class MyModelIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.NgramField(document=True, use_template= True)
isPublic = indexes.BooleanField(model_attr='isPubliclyVisible')
brand = indexes.CharField(model_attr='brand')
model = indexes.CharField(model_attr='model')
owner = indexes.CharField(model_attr='owner')
owner_username = indexes.CharField()
obj_type = indexes.CharField()
def get_model(self):
return MyModel
def index_queryset(self, using=None):
"""Used when the entire index for model is updated."""
return self.get_model().objects.filter(isPubliclyVisible = True)
def prepare_owner_username(self, obj):
return obj.owner.user.username
def prepare_obj_type(self,obj):
return 'MyModel'
I did find this workaround (which I haven't tested/thought through for my solution yet), but I figured this warranted its own question in case I/we are doing something wrong.
Instead of using the Haystack built-in basic_search function, I would suggest writing your own view so you would have more control of how the search queries are performed. That way, you can process more complex searches by extending your view or custom search query function, plus it would be easier to test.
For example, you can build separate SearchQuerySet filters to perform each of the keywords you're seaching for, then "OR" them together, like this:
def get_query(request):
"""
This function retrieves any query terms (e.g q=search+term)
from the request object.
:param request: request object
:returns: query terms as a list (split on whitespace)
"""
query = None
qs_keyword = 'q'
if (qs_keyword in request.GET) and request.GET[qs_keyword].strip():
query_string = request.GET[qs_keyword]
query = query_string.split()
return query
def perform_query(request):
"""
This is a helper function to perform the actual query.
You can extend this to handle more complicated searches using AND,
OR, boolean qualifiers, etc.
:param request: request object
:returns: SearchQuerySet results
"""
query = get_query(request)
if not query:
results = EmptySearchQuerySet()
else:
results = SearchQuerySet()
for search_term in query:
# you can use the "|" (or) operator
results |= results.filter(content=search_term)
# or else use "filter_or"
# results = results.filter_or(content=search_term)
return results
def your_search_view(request, *args, **kwargs):
"""
This is your search view to process the query and display your results.
"""
# call "perform_query" to do the actual search
results = perform_query(request)
# do the rest of your view processing ...
return render_to_response(etc.)

Does Django Have a Way to Auto-Sort Model Fields?

So basically, I've got a rather large Django project going. It's a private web portal that allows users to manage various phone-related tasks.
Several pages of the portal provide a listing of Model objects to users, and list all of their attributes in a HTML table (so that users can visually look through a list of these items).
The problem I'm having is: I cannot find a Django-ish or pythonic way to handle the sorting of these Model objects by field name. As an example of what I'm talking about, here is one of my views which lists all Partyline Model objects:
def list_partylines(request):
"""
List all `Partyline`s that we own.
"""
# Figure out which sort term to use.
sort_field = request.REQUEST.get('sortby', 'did').strip()
if sort_field.startswith('-'):
search = sort_field[1:]
sort_toggle = ''
else:
search = sort_field
sort_toggle = '-'
# Check to see if the sort term is valid.
if not (search in Partyline._meta.get_all_field_names()):
sort_field = 'did'
if is_user_type(request.user, ['admin']):
partylines = Partyline.objects.all().order_by(sort_field)
else:
partylines = get_my_partylines(request.user, sort_field)
variables = RequestContext(request, {
'partylines': partylines,
'sort_toggle': sort_toggle
})
return render_to_response('portal/partylines/list.html', variables)
The sorting code basically allows users to specify a /url/?sortby=model_field_name parameter which will then return a sorted listing of objects whenever users click on the HTML table name displayed on the page.
Since I have various views in various apps which all show a listing of Model objects, and require sorting, I'm wondering if there is a generic way to do this sorting so that I don't have to?
I'm sorry if this question is a bit unclear, I'm struggling to find the right way to phrase this question.
Thanks.
The way that I'd look at doing this is through a custom QuerySet. In your model, you can define the class QuerySet and add your sorting there. In order to maintain all the logic in the model object, I'd also move the contents of get_my_partylines into the QuerySet, too.
## This class is used to replicate QuerySet methods into a manager.
## This way: Partyline.objects.for_user(foo) works the same as
## Partyline.objects.filter(date=today).for_user(foo)
class CustomQuerySetManager(models.Manager):
def get_query_set(self):
return self.model.QuerySet(self.model)
def __getattr__(self, attr, *args):
try:
return getattr(self.__class__, attr, *args)
except AttributeError:
return getattr(self.get_query_set(), attr, *args)
class Partyline(models.Model):
## Define fields, blah blah.
objects = CustomQuerySetManager()
class QuerySet(QuerySet):
def sort_for_request(self, request):
sort_field = request.REQUEST.get('sortby', 'did').strip()
reverse_order = False
if sort_field.startswith('-'):
search = sort_field[1:]
else:
search = sort_field
reverse_order = True
# Check to see if the sort term is valid.
if not (search in Partyline._meta.get_all_field_names()):
sort_field = 'did'
partylines = self.all().order_by(sort_field)
if reverse_order:
partylines.reverse()
return partylines
def for_user(self, user):
if is_user_type(request.user, ['admin']):
return self.all()
else:
## Code from get_my_partylines goes here.
return self.all() ## Temporary.
views.py:
def list_partylines(request):
"""
List all `Partyline`s that we own.
"""
partylines = Partylines.objects.for_user(request.user).sort_for_request(request)
There's a great example of how this is done in a generic way in django.contrib.admin.views.main.ChangeList although that does much more than sorting you can browse it's code for some hints and ideas. You may also want to look at django.contrib.admin.options.ModelAdmin the changelist method in particular to get more context.

Categories