Django - a complex request

Question

Django - a complex request

Assuming I have two models:

class Profile(models.Model): #some fields here class Ratings(models.Model): profile = models.ForeignKey(profile) category = models.IntegerField() points = models.IntegerField()

Assuming the following MySQL rating table example:

 profile | category | points 1 1 10 1 1 4 1 2 10 1 3 0 1 4 10 1 4 10 1 4 10 1 5 0

I have the following values in my POST data, as well as other field values:

 category_1_avg_val = 7 category_2_avg_val = 5 category_3_avg_val = 5 category_4_avg_val = 7 category_5_avg_val = 9

I want to filter profiles that have average ratings calculated for categories higher or equal to the required values.

Some filters are applied first as:

 q1 = [('associated_with', search_for), ('profile_type__slug__exact', profile_type), ('gender__in', gender), ('rank__in', rank), ('styles__style__in', styles), ('age__gte', age_from), ('age__lte', age_to)] q1_list = [Q(x) for x in q1 if x[1]] q2 = [('user__first_name__icontains', search_term), ('user__last_name__icontains', search_term), ('profile_type__name__icontains', search_term), ('styles__style__icontains', search_term), ('rank__icontains', search_term)] q2_list = [Q(x) for x in q2 if x[1]] if q1_list: objects = Profile.objects.filter( reduce(operator.and_, q1_list)) if q2_list: if objects: objects = objects.filter( reduce(operator.or_, q2_list)) else: objects = Profile.objects.filter( reduce(operator.or_, q2_list)) if order_by_ranking_level == 'desc': objects = objects.order_by('-ranking_level').distinct() else: objects = objects.order_by('ranking_level').distinct()

Now I want to filter profiles that (on average by points) (group by category)> = (average values of the category included in the post)

I tried to do it one by one as

 objects = objects.filter( ratings__category=1) \ .annotate(avg_points=Avg('ratings__points'))\ .filter(avg_points__gte=category_1_avg_val) objects = objects.filter( ratings__category=2) \ .annotate(avg_points=Avg('ratings__points'))\ .filter(avg_points__gte=category_2_avg_val)

But this is wrong, I think. Please help me. If return is a query that would be big.

Edited Using the answer posted by hynekcer , I came up with a slightly different solution, since I already have a set of profile requests that need to be filtered more based on the rating.

 def check_ratings_avg(pr, rtd): ok = True qr = Ratings.objects.filter(profile__id=pr.id) \ .values('category')\ .annotate(points_avg=Avg('points')) qr = {i['category']:i['points_avg'] for i in qr} for cat in rtd: val = rtd[cat] if qr[cat] >= val: pass else: ok = False break return ok rtd = {1: category_1_avg_val, 2: category_2_avg_val, 3: category_3_avg_val, 4: category_4_avg_val, 5: category_5_avg_val} objects = [i for i in objects if check_ratings_avg(i, rtd)]

+4

python django

Aamir adnan Nov 05 '12 at 18:51

source share

2 answers

You can add methods to the manager.

 # Untested code class ProfileManager(models.Manager): def with_category_average(self, cat, avg): # Give each filter a unique annotation key key = 'avg_pts_' + str(cat) return self.filter(ratings__category=cat) \ .annotate(**{key: Avg('ratings__points')}) \ .filter(**{key + '__gte': avg}) # Expects a dict of `cat: avg` pairs def filter_by_averages(self, avg_dict): qs = self.get_query_set() for key, val in avg_dict.items(): qs &= self.with_category_average(key, val) return qs

0

Jesse the game Nov 05 '12 at 19:29

source share

hynekcer · Accepted Answer · 2012-11-09T00:04:17+0000

Your complex query requires a subquery in principle. Possible solutions:

Subquery written by the 'extra' query method or raw SQL . This is NOT DRY, and it is not supported by some db backends, for example. on some versions of MySQL, however, subqueries are used in some limited way with Django 1.1.
Saving intermediate results to a temporary table in the database. This is not nice in Django.
Python loopback emulation in Python. The best universal solution. A Python loop based on database data aggregated by the first query can aggregate and filter data quickly enough.

A) Subquery emulated by Python

 from django.db.models import Q, Avg from itertools import groupby from myapp.models import Profile, Ratings def iterator_filtered_by_average(dictionary): qr = Ratings.objects.values('profile', 'category', 'points').order_by( 'profile', 'category').annotate(points_avg=Avg('points')) f = Q() for k, v in dictionary.iteritems(): f |= Q(category=k, points_avg__gte=v) for profile, grp in groupby(qr.filter(f).values('profile')): if len(list(grp)) == len(dictionary): yield profile #example FILTER_DATA = {1:category_1_avg_val, 2:category_2_avg_val, 3:category_3_avg_val, 4:category_4_avg_val, 5:category_5_avg_val} for row in iterator_filtered_by_average(FILTER_DATA): print row

This is a simple solution to the original question without additional additional requirements.

B) Solution with subqueries :
This is necessary for a more detailed version of the question, because if the source filters are based on some field of type ManyToManyField , and also because it contains a distinct clause:

 # objects: QuerySet that you get from your initial filters. Not yet executed. if rtd: # Method `as_nested_sql` removes the `order_by` clase, unlike `as_sql` subquery3 = objects.values('id').query \ .get_compiler(connection=connection).as_nested_sql() subquery2 = ("""SELECT profile_id, category, avg(points) AS points_avg FROM myapp_ratings WHERE profile_id in ( %s ) GROUP BY profile_id, category """ % subquery3[0], subquery3[1] ) where_sql = ' OR '.join( 'category = %d AND points_avg >= %%s' % cat for cat in rtd.keys() ) subquery = ( """SELECT profile_id FROM ( %s ) subquery2 WHERE %s GROUP BY profile_id HAVING count(*) = %s """ % (subquery2[0], where_sql, len(rtd)), subquery2[1] + tuple(rtd.values()) ) assert order_by_ranking_level in ('asc', 'desc') mainquery = ("""SELECT myapp_profile.* FROM myapp_profile INNER JOIN ( %s ) subquery ON subquery.profile_id=myapp_profile.id ORDER BY ranking_level %s""" % (subquery[0], order_by_ranking_level), subquery[1] ) objects = Profile.objects.raw(mainquery[0], params=mainquery[1]) return objects

Replace all name_of_your_application lines with name_of_your_application .

SQL example generated by this code

 SELECT myapp_profile.* FROM myapp_profile INNER JOIN ( SELECT profile_id FROM ( SELECT profile_id, category, avg(points) AS points_avg FROM myapp_ratings WHERE profile_id IN ( SELECT U0.`id` FROM `myapp_profile` U0 WHERE U0.`ranking_level` >= 4 ) GROUP BY profile_id, category ) subquery2 WHERE category = 1 AND points_avg >= 7 OR category = 2 AND points_avg >= 5 OR category = 3 AND points_avg >= 5 OR category = 4 AND points_avg >= 7 OR category = 5 AND points_avg >= 9 GROUP BY profile_id HAVING count(*) = 5 ) subquery ON subquery.profile_id=myapp_profile.id ORDER BY ranking_level asc

(This SQL is hand-parsed for better readability when %s strings are replaced with parameters, but database kernel parameters are not taken into account for security reasons.)

Your problem is with a little support for subqueries created by Django. Only examples from the documentation of more complex queries create a subquery. (for example, aggregate after annotate or count after annotate or aggregate after distinct , but without annotate after distinct or after annotate ) Complex nested aggregations are simplified to a single query, which is unexpected.

All other solutions that execute a new individual SQL query for each object filtered by the first query are discouraged for production, although they can be very useful for testing the results of any best solution.

Django - a complex request

More articles: