Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
PromotionWeights: speed up queries by using distinct.
The queries are used to find the ids of PromoCampaign or Link objects
and we don't need the many (one per campaign per subreddit target per day)
PromotionWeights objects.
  • Loading branch information
bsimpson63 committed Oct 1, 2014
1 parent df77799 commit eb9f0ae
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 25 deletions.
8 changes: 3 additions & 5 deletions r2/r2/controllers/promotecontroller.py
Expand Up @@ -224,9 +224,8 @@ def GET_report(self, start, end, link_text=None, owner=None):
owner_name = owner.name if owner else ''

if owner:
promo_weights = PromotionWeights.get_campaigns(start, end,
author_id=owner._id)
campaign_ids = [pw.promo_idx for pw in promo_weights]
campaign_ids = PromotionWeights.get_campaign_ids(
start, end, author_id=owner._id)
campaigns = PromoCampaign._byID(campaign_ids, data=True)
link_ids = {camp.link_id for camp in campaigns.itervalues()}
links.extend(Link._byID(link_ids, data=True, return_dict=False))
Expand Down Expand Up @@ -412,8 +411,7 @@ def live_by_subreddit(cls, sr):
@memoize('house_link_names', time=60)
def get_house_link_names(cls):
now = promote.promo_datetime_now()
pws = PromotionWeights.get_campaigns(now)
campaign_ids = {pw.promo_idx for pw in pws}
campaign_ids = PromotionWeights.get_campaign_ids(now)
q = PromoCampaign._query(PromoCampaign.c._id.in_(campaign_ids),
PromoCampaign.c.priority_name == 'house',
data=True)
Expand Down
4 changes: 2 additions & 2 deletions r2/r2/lib/inventory.py
Expand Up @@ -100,8 +100,8 @@ def get_date_range(start, end):
def get_campaigns_by_date(srs, start, end, ignore=None):
srs = tup(srs)
sr_names = [sr.name for sr in srs]
q = PromotionWeights.get_campaigns(start, end=end, sr_names=sr_names)
campaign_ids = {pw.promo_idx for pw in q}
campaign_ids = PromotionWeights.get_campaign_ids(
start, end=end, sr_names=sr_names)
if ignore:
campaign_ids.discard(ignore._id)
campaigns = PromoCampaign._byID(campaign_ids, data=True, return_dict=False)
Expand Down
4 changes: 2 additions & 2 deletions r2/r2/lib/promote.py
Expand Up @@ -504,8 +504,8 @@ def is_geotargeted_promo(link):


def get_promos(date, sr_names=None, link=None):
pws = PromotionWeights.get_campaigns(date, sr_names=sr_names, link=link)
campaign_ids = {pw.promo_idx for pw in pws}
campaign_ids = PromotionWeights.get_campaign_ids(
date, sr_names=sr_names, link=link)
campaigns = PromoCampaign._byID(campaign_ids, data=True, return_dict=False)
link_ids = {camp.link_id for camp in campaigns}
links = Link._byID(link_ids, data=True)
Expand Down
33 changes: 24 additions & 9 deletions r2/r2/models/bidding.py
Expand Up @@ -30,6 +30,7 @@
Column,
DateTime,
Date,
distinct,
Float,
func as safunc,
Integer,
Expand Down Expand Up @@ -423,27 +424,41 @@ def delete_unfinished(cls, thing, idx):
item._delete()

@classmethod
def get_campaigns(cls, start, end=None, link=None, author_id=None,
sr_names=None):
def _filter_query(cls, query, start, end=None, link=None,
author_id=None, sr_names=None):
start = to_date(start)
q = cls.query()

if end:
end = to_date(end)
q = q.filter(and_(cls.date >= start, cls.date < end))
query = query.filter(and_(cls.date >= start, cls.date < end))
else:
q = q.filter(cls.date == start)
query = query.filter(cls.date == start)

if link:
q = q.filter(cls.thing_name == link._fullname)
query = query.filter(cls.thing_name == link._fullname)

if author_id:
q = q.filter(cls.account_id == author_id)
query = query.filter(cls.account_id == author_id)

if sr_names:
sr_names = [cls.filter_sr_name(sr_name) for sr_name in sr_names]
q = q.filter(cls.sr_name.in_(sr_names))
query = query.filter(cls.sr_name.in_(sr_names))

return query

return list(q)
@classmethod
def get_campaign_ids(cls, start, end=None, link=None, author_id=None,
sr_names=None):
query = cls.session.query(distinct(cls.promo_idx))
query = cls._filter_query(query, start, end, link, author_id, sr_names)
return {i[0] for i in query}

@classmethod
def get_link_names(cls, start, end=None, link=None, author_id=None,
sr_names=None):
query = cls.session.query(distinct(cls.thing_name))
query = cls._filter_query(query, start, end, link, author_id, sr_names)
return {i[0] for i in query}


# do all the leg work of creating/connecting to tables
Expand Down
11 changes: 4 additions & 7 deletions scripts/promoted_links.py
Expand Up @@ -59,11 +59,9 @@ def error_statistics(errors):


def get_scheduled(date, sr_name=''):
all_promotions = PromotionWeights.get_campaigns(date)
fp_promotions = [p for p in all_promotions if p.sr_name == sr_name]
campaigns = PromoCampaign._byID([i.promo_idx for i in fp_promotions],
return_dict=False, data=True)
links = Link._by_fullname([i.thing_name for i in fp_promotions],
campaign_ids = PromotionWeights.get_campaign_ids(date, sr_names=[sr_name])
campaigns = PromoCampaign._byID(campaign_ids, return_dict=False, data=True)
links = Link._by_fullname({camp.link_id for camp in campaigns},
return_dict=False, data=True)
links = {l._id: l for l in links}
kept = []
Expand All @@ -77,8 +75,7 @@ def get_scheduled(date, sr_name=''):

kept.append(camp._id)

return [('%s_%s' % (PC_PREFIX, to36(p.promo_idx)), p.thing_name, p.bid)
for p in fp_promotions if p.promo_idx in kept]
return [(camp._fullname, camp.link_id, camp.bid) for camp in kept]


def get_campaign_pageviews(date, sr_name=''):
Expand Down

0 comments on commit eb9f0ae

Please sign in to comment.