我正在将Django REST Framework用于我们的Web API和Solr来进行搜索。Currenlty在的子类中ListAPIView
,我重写了Solr搜索结果get_queryset()
以获取一个QuerySet
:
class ClipList(generics.ListAPIView):
"""
List all Clips.
Permissions: IsAuthenticatedOrReadOnly
Parameters:
query -- Search for Clips. EX: clips/?query=aaron%20rodgers
"""
model = Clip
serializer_class = ClipSerializer
permission_classes = (permissions.IsAuthenticatedOrReadOnly,)
def get_queryset(self):
params = request.GET
query = params.get('query', None)
queryset = Clip.objects.all()
if query is not None:
conn = solr.Solr(settings.SOLR_URL)
sh = solr.SearchHandler(conn, "/select")
response = sh(query)
ids = []
for result in response.results:
ids.append(result['id'])
# filter the initial queryset based on the Clip identifiers from the Solr response
# PROBLEM: This does not preserve the order of the results as it equates to
# `SELECT * FROM clips WHERE id in (75, 89, 106, 45)`.
# SQL is not guaranteed to return the results in the same order used in the WHERE clause.
# There is no way (that I'm aware of) to do this in SQL.
queryset = queryset.filter(pk__in=ids)
return queryset
但是,如注释中所述,这不会保留结果的顺序。我意识到我可以制作一组Python的Clip对象,但是随后我将失去对Django的惰性计算QuerySet
,结果可能很大并且将被分页。
我调查了Haystack,我的理解是上面使用Haystack的代码如下所示:
def get_queryset(self):
params = self.request.GET
query = params.get('query', None)
search_queryset = SearchQuerySet().filter(content=query)
return search_queryset
这非常简单,并且将保持结果的顺序,但是Django REST Framework不会序列化SearchQuerySets
。
REST框架中是否有我可以覆盖的方法,该方法允许序列化SearchQuerySets
?还是有一种无需使用Haystack或Python集就能保持排名结果顺序的方法?
我们提出了这个解决方案。它模仿了Django和REST Framework的惯用法。
简要地,我将进行解释(希望代码和注释是更深入的解释:))。无需使用常规DjangoPage
来增强分页功能,我们FakePage
可以使用不与对象列表耦合的总计数来初始化。这是hack。是的,这是一个“ hack”,但这是一个非常简单的解决方案。对我们来说,替代方法是重新实现的版本QuerySet
。对于我们来说,简单的解决方案总会取胜。尽管很简单,但是它是可重用和高性能的。
使用伪造的页面,我们有一个抽象SearchListModelMixin
类,它知道如何获得使用伪造的页面的序列化程序。mixin适用于具体的视图类,例如SearchListCreateAPIView
。代码如下。如果其他人有需要,我可以更详尽地解释。
class FakePage(object):
"""
Fake page used by Django paginator.
Required for wrapping responses from Solr.
"""
def __init__(self, object_list, number, total_count):
"""
Create fake page instance.
Args:
object_list: list of objects, represented by a page
number: 1-based page number
total_count: total count of objects (in all pages)
"""
self.object_list = object_list
self.number = number
# count of objects per page equals to length of list
self.per_page = len(object_list)
if self.per_page > 0:
self.num_pages = total_count // self.per_page
else:
self.num_pages = 0
self.total_count = total_count
def __repr__(self):
return '<Page %s of %s>' % (self.number, self.num_pages)
def __len__(self):
return len(self.object_list)
def __getitem__(self, index):
if not isinstance(index, (slice,) + six.integer_types):
raise TypeError
# The object_list is converted to a list so that if it was a QuerySet
# it won't be a database hit per __getitem__.
if not isinstance(self.object_list, list):
self.object_list = list(self.object_list)
return self.object_list[index]
def total_count(self):
return self.total_count
def has_next(self):
return self.number < self.num_pages
def has_previous(self):
return self.number > 1
def has_other_pages(self):
return self.has_previous() or self.has_next()
def next_page_number(self):
if self.has_next():
return self.number + 1
raise EmptyPage('Next page does not exist')
def previous_page_number(self):
if self.has_previous():
return self.number - 1
raise EmptyPage('Previous page does not exist')
def start_index(self):
"""
Returns the 1-based index of the first object on this page,
relative to total objects in the paginator.
"""
# Special case, return zero if no items.
if self.total_count == 0:
return 0
return (self.per_page * (self.number - 1)) + 1
def end_index(self):
"""
Returns the 1-based index of the last object on this page,
relative to total objects found (hits).
"""
# Special case for the last page because there can be orphans.
if self.number == self.num_pages:
return self.total_count
return self.number * self.per_page
class SearchListModelMixin(object):
"""
List search results or a queryset.
"""
# Set this attribute to make a custom URL paramter for the query.
# EX: ../widgets?query=something
query_param = 'query'
# Set this attribute to define the type of search.
# This determines the source of the query value.
# For example, for regular text search,
# the query comes from a URL param. However, for a related search,
# the query is a unique identifier in the URL itself.
search_type = SearchType.QUERY
def search_list(self, request, *args, **kwargs):
# Get the query from the URL parameters dictionary.
query = self.request.GET.get(self.query_param, None)
# If there is no query use default REST Framework behavior.
if query is None and self.search_type == SearchType.QUERY:
return self.list(request, *args, **kwargs)
# Get the page of objects and the total count of the results.
if hasattr(self, 'get_search_results'):
self.object_list, total_count = self.get_search_results()
if not isinstance(self.object_list, list) and not isinstance(total_count, int):
raise ImproperlyConfigured("'%s.get_search_results()' must return (list, int)"
% self.__class__.__name__)
else:
raise ImproperlyConfigured("'%s' must define 'get_search_results()'"
% self.__class__.__name__)
# Normally, we would be serializing a QuerySet,
# which is lazily evaluated and has the entire result set.
# Here, we just have a Python list containing only the elements for the
# requested page. Thus, we must generate a fake page,
# simulating a Django page in order to fully support pagination.
# Otherwise, the `count` field would be equal to the page size.
page = FakePage(self.object_list,
int(self.request.GET.get(self.page_kwarg, 1)),
total_count)
# Prepare a SearchPaginationSerializer
# with the object_serializer_class
# set to the serializer_class of the APIView.
serializer = self.get_search_pagination_serializer(page)
return Response(serializer.data)
def get_search_pagination_serializer(self, page):
"""
Return a serializer instance to use with paginated search data.
"""
class SerializerClass(SearchPaginationSerializer):
class Meta:
object_serializer_class = self.get_serializer_class()
pagination_serializer_class = SerializerClass
context = self.get_serializer_context()
return pagination_serializer_class(instance=page, context=context)
def get_solr_results(self, sort=None):
"""
This method is optional. It encapsulates logic for a Solr request
for a list of ids using pagination. Another method could be provided to
do the search request.
"""
queryset = super(self.__class__, self).get_queryset()
conn = solr.Solr(settings.SOLR_URL)
# Create a SearchHandler and get the query with respect to
# the type of search.
if self.search_type == SearchType.QUERY:
query = self.request.GET.get(self.query_param, None)
sh = solr.SearchHandler(conn, "/select")
elif self.search_type == SearchType.RELATED:
query = str(self.kwargs[self.lookup_field])
sh = solr.SearchHandler(conn, "/morelikethis")
# Get the pagination information and populate kwargs.
page_num = int(self.request.GET.get(self.page_kwarg, 1))
per_page = self.get_paginate_by()
offset = (page_num - 1) * per_page
kwargs = {'rows': per_page, 'start': offset}
if sort:
kwargs['sort'] = sort
# Perform the Solr request and build a list of results.
# For now, this only gets the id field, but this could be
# customized later.
response = sh(query, 'id', **kwargs)
results = [int(r['id']) for r in response.results]
# Get a dictionary representing a page of objects.
# The dict has pk keys and object values, sorted by id.
object_dict = queryset.in_bulk(results)
# Build the sorted list of objects.
sorted_objects = []
for pk in results:
obj = object_dict.get(pk, None)
if obj:
sorted_objects.append(obj)
return sorted_objects, response.numFound
class SearchType(object):
"""
This enum-like class defines types of Solr searches
that can be used in APIViews.
"""
QUERY = 1
RELATED = 2
class SearchPaginationSerializer(pagination.BasePaginationSerializer):
count = serializers.Field(source='total_count')
next = pagination.NextPageField(source='*')
previous = pagination.PreviousPageField(source='*')
class SearchListCreateAPIView(SearchListModelMixin, generics.ListCreateAPIView):
"""
Concrete view for listing search results, a queryset, or creating a model instance.
"""
def get(self, request, *args, **kwargs):
return self.search_list(request, *args, **kwargs)
class SearchListAPIView(SearchListModelMixin, generics.ListAPIView):
"""
Concrete view for listing search results or a queryset.
"""
def get(self, request, *args, **kwargs):
return self.search_list(request, *args, **kwargs)
class WidgetList(SearchListCreateAPIView):
"""
List all Widgets or create a new Widget.
"""
model = Widget
queryset = Widget.objects.all()
serializer_class = WidgetSerializer
permission_classes = (permissions.IsAuthenticatedOrReadOnly,)
search_type = SearchType.QUERY # this is default, but explicitly declared here
def get_queryset(self):
"""
The method implemented for database powered results.
I'm simply illustrating the default behavior here.
"""
queryset = super(WidgetList, self).get_queryset()
return queryset
def get_search_results(self):
"""
The method implemented for search engine powered results.
"""
return self.get_solr_results(solr_sort)
class RelatedList(SearchListAPIView):
"""
List all related Widgets.
"""
model = Widget
queryset = Widget.objects.all()
serializer_class = WdigetSerializer
permission_classes = (permissions.IsAuthenticatedOrReadOnly,)
search_type = SearchType.RELATED
def get_search_results(self):
return self.get_solr_results()
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句