wiki:Complex DB Reports

Version 9 (modified by 231035, 8 days ago) ( diff )

--

Advanced Reports

Finding the most active users on the app during a given time period

This SQL query wants to find the most active users. Active are users who create listings, write reviews, handle appointments and save favorites.
This query:

  • Counts each type of activity
  • Combines everything per user
  • Calculates the final score
  • Ranks the users from most to least active

SQL

{{{WITH params AS (

SELECT

CAST(:start_ts AS timestamp) AS start_ts, CAST(:end_ts AS timestamp) AS end_ts

),

listings_by_user AS (

SELECT l.owner_id AS user_id, COUNT(*) AS listings_created FROM listings l

JOIN params p ON l.created_at >= p.start_ts AND l.created_at < p.end_ts

GROUP BY l.owner_id

), reviews_by_user AS (

SELECT r.reviewer_id AS user_id,

COUNT(*) AS reviews_left, AVG(r.rating)::numeric(10,2) AS avg_rating_left

FROM reviews r

JOIN params p ON r.created_at >= p.start_ts AND r.created_at < p.end_ts

GROUP BY r.reviewer_id

), appointments_by_user AS (

SELECT a.responsible_owner_id AS user_id,

COUNT(*) AS appointments_total, COUNT(*) FILTER (WHERE a.status = 'DONE') AS appointments_done, COUNT(*) FILTER (WHERE a.status = 'NO_SHOW') AS appointments_no_show, COUNT(*) FILTER (WHERE a.status = 'CANCELLED') AS appointments_cancelled

FROM appointments a

JOIN params p ON a.date_time >= p.start_ts AND a.date_time < p.end_ts

GROUP BY a.responsible_owner_id

), favorites_by_user AS (

SELECT f.client_id AS user_id, COUNT(*) AS favorites_saved_all_time FROM favorite_listings f GROUP BY f.client_id

)

SELECT

u.user_id, u.username, u.email, u.name, u.surname, COALESCE(l.listings_created, 0) AS listings_created, COALESCE(rv.reviews_left, 0) AS reviews_left, COALESCE(rv.avg_rating_left, 0) AS avg_rating_left, COALESCE(ap.appointments_total, 0) AS appointments_total, COALESCE(ap.appointments_done, 0) AS appointments_done, COALESCE(ap.appointments_no_show, 0) AS appointments_no_show, COALESCE(ap.appointments_cancelled, 0) AS appointments_cancelled, COALESCE(fv.favorites_saved_all_time, 0) AS favorites_saved_all_time, (

COALESCE(l.listings_created, 0) * 5

+ COALESCE(rv.reviews_left, 0) * 3 + COALESCE(ap.appointments_done, 0) * 2 + COALESCE(fv.favorites_saved_all_time, 0)

  • COALESCE(ap.appointments_no_show, 0) * 2

) AS activity_score,

DENSE_RANK() OVER (

ORDER BY

(

COALESCE(l.listings_created, 0) * 5

+ COALESCE(rv.reviews_left, 0) * 3 + COALESCE(ap.appointments_done, 0) * 2 + COALESCE(fv.favorites_saved_all_time, 0)

  • COALESCE(ap.appointments_no_show, 0) * 2

) DESC,

COALESCE(l.listings_created, 0) DESC, COALESCE(rv.reviews_left, 0) DESC

) AS activity_rank

FROM users u

LEFT JOIN listings_by_user l ON l.user_id = u.user_id LEFT JOIN reviews_by_user rv ON rv.user_id = u.user_id LEFT JOIN appointments_by_user ap ON ap.user_id = u.user_id LEFT JOIN favorites_by_user fv ON fv.user_id = u.user_id

WHERE COALESCE(l.listings_created, 0)

+ COALESCE(rv.reviews_left, 0) + COALESCE(ap.appointments_total, 0) + COALESCE(fv.favorites_saved_all_time, 0) > 0

ORDER BY activity_rank LIMIT 10; }}}

Relation Algebra

Period <-
{(start_ts, end_ts)}

ListingsByUser <-
γ
  user_id := l.owner_id;
  listings_created := COUNT(*)
(
  listings l ⨝
  (l.created_at ≥ p.start_ts ∧ l.created_at < p.end_ts)
  Period p
)

ReviewsByUser <-
γ
  user_id := r.reviewer_id;
  reviews_left := COUNT(*);
  avg_rating_left := AVG(r.rating)
(
  reviews r ⨝
  (r.created_at ≥ p.start_ts ∧ r.created_at < p.end_ts)
  Period p
)

AppointmentsByUser <-
γ
  user_id := a.responsible_owner_id;
  appointments_total := COUNT(*);
  appointments_done := COUNT(a.status = 'DONE');
  appointments_no_show := COUNT(a.status = 'NO_SHOW');
  appointments_cancelled := COUNT(a.status = 'CANCELLED')
(
  appointments a ⨝
  (a.date_time ≥ p.start_ts ∧ a.date_time < p.end_ts)
  Period p
)

FavoritesByUser <-
γ
  user_id := f.client_id;
  favorites_saved_all_time := COUNT(*)
(favorite_listings f)

UserActivity <-
((((
     users u
     ⟕ (u.user_id = l.user_id) ListingsByUser l
    )
   ⟕ (u.user_id = r.user_id) ReviewsByUser r
   )
  ⟕ (u.user_id = a.user_id) AppointmentsByUser a
 )
 ⟕ (u.user_id = f.user_id) FavoritesByUser f
)

ActivityWithDefaults <-
π
  u.user_id,
  u.username,
  u.email,
  u.name,
  u.surname,
  listings_created := COALESCE(l.listings_created, 0),
  reviews_left := COALESCE(r.reviews_left, 0),
  avg_rating_left := COALESCE(r.avg_rating_left, 0),
  appointments_total := COALESCE(a.appointments_total, 0),
  appointments_done := COALESCE(a.appointments_done, 0),
  appointments_no_show := COALESCE(a.appointments_no_show, 0),
  appointments_cancelled := COALESCE(a.appointments_cancelled, 0),
  favorites_saved_all_time := COALESCE(f.favorites_saved_all_time, 0)
(UserActivity)

ActiveUsers <-
σ
  listings_created +
  reviews_left +
  appointments_total +
  favorites_saved_all_time > 0
(ActivityWithDefaults)

ScoredUsers <-
π
  user_id,
  username,
  email,
  name,
  surname,
  listings_created,
  reviews_left,
  avg_rating_left,
  appointments_total,
  appointments_done,
  appointments_no_show,
  appointments_cancelled,
  favorites_saved_all_time,
  activity_score :=
    listings_created * 5 +
    reviews_left * 3 +
    appointments_done * 2 +
    favorites_saved_all_time * 1 -
    appointments_no_show * 2
(ActiveUsers)

RankedUsers <-
rank_dense
  activity_rank :=
    ORDER BY
      activity_score DESC,
      listings_created DESC,
      reviews_left DESC
(ScoredUsers)

Result <-
topK_{K := 10}
(
  τ activity_rank ASC
  (
    π
      user_id,
      username,
      email,
      name,
      surname,
      listings_created,
      reviews_left,
      avg_rating_left,
      appointments_total,
      appointments_done,
      appointments_no_show,
      appointments_cancelled,
      favorites_saved_all_time,
      activity_score,
      activity_rank
    (RankedUsers)
  )
)

Recommending listings to a user by similar users and liked listings

This SQL query wants to find the recommended listings for a user based on similar users and his liked listings.
This query:

  • Gets the recent likes of the user
  • Gets the similar users based on same liked listings
  • Gets listings liked by the similar users but NOT by me
  • Gets listings similar to my liked listings
  • Combines them both

SQL

WITH
my_likes AS (
  SELECT fl.listing_id
  FROM favorite_listings fl
  WHERE fl.client_id = :user_id
),

my_recent_likes AS (
  SELECT fl.listing_id
  FROM favorite_listings fl
  JOIN listings l ON l.listing_id = fl.listing_id
  WHERE fl.client_id = :user_id
  ORDER BY l.created_at DESC
  LIMIT :10
),

similar_users AS (
  SELECT
    fl2.client_id AS other_user_id,
    COUNT(*)      AS overlap_likes
  FROM favorite_listings fl2
  JOIN my_likes ml ON ml.listing_id = fl2.listing_id
  WHERE fl2.client_id <> :user_id
  GROUP BY fl2.client_id
  HAVING COUNT(*) > 0
),

cf_candidates AS (
  SELECT
    fl.listing_id,
    SUM(su.overlap_likes) AS cf_score,
    COUNT(DISTINCT su.other_user_id) AS liked_by_similar_users
  FROM similar_users su
  JOIN favorite_listings fl
    ON fl.client_id = su.other_user_id
  LEFT JOIN my_likes ml
    ON ml.listing_id = fl.listing_id
  WHERE ml.listing_id IS NULL  
  GROUP BY fl.listing_id
),

content_candidates AS (
  SELECT
    l2.listing_id,
    COUNT(*) AS content_score
  FROM my_recent_likes r
  JOIN listings l1 ON l1.listing_id = r.listing_id
  JOIN listings l2 ON
      l2.listing_id <> l1.listing_id
      AND (
           (l2.species = l1.species)
        OR (l2.breed   = l1.breed)
        OR (l2.location = l1.location)
      )
  LEFT JOIN my_likes ml ON ml.listing_id = l2.listing_id
  WHERE ml.listing_id IS NULL
  GROUP BY l2.listing_id
),


merged AS (
  SELECT
    COALESCE(cf.listing_id, cc.listing_id) AS listing_id,
    COALESCE(cf.cf_score, 0)              AS cf_score,
    COALESCE(cf.liked_by_similar_users, 0) AS liked_by_similar_users,
    COALESCE(cc.content_score, 0)         AS content_score
  FROM cf_candidates cf
  FULL OUTER JOIN content_candidates cc
    ON cc.listing_id = cf.listing_id
)

SELECT
  l.listing_id,
  l.title,
  l.species,
  l.breed,
  l.location,
  l.created_at,

  m.cf_score,
  m.liked_by_similar_users,
  m.content_score,

  (m.cf_score * 3 + m.content_score * 2) AS final_score

FROM merged m
JOIN listings l ON l.listing_id = m.listing_id
WHERE l.status = 'ACTIVE'
  AND l.owner_id <> :user_id
ORDER BY final_score DESC, l.created_at DESC
LIMIT 20;

Relation Algebra

Params <-
{(user_id := U, k_recent := 10, top_n := 20)}
MyLikes <-
π listing_id
(
  σ fl.client_id = p.user_id
  (
    favorite_listings fl × Params p
  )
)
MyRecentLikes <-
topK_{K := p.k_recent}
(
  τ l.created_at DESC
  (
    π fl.listing_id, l.created_at
    (
      σ fl.client_id = p.user_id
      (
        (favorite_listings fl ⨝ (fl.listing_id = l.listing_id) listings l)
        × Params p
      )
    )
  )
)
SimilarUsers <-
σ other_user_id ≠ p.user_id ∧ overlap_likes > 0
(
  γ
    other_user_id := fl2.client_id;
    overlap_likes := COUNT(*)
  (
    (
      favorite_listings fl2 ⨝ (fl2.listing_id = ml.listing_id) MyLikes ml
    )
    × Params p
  )
)
CFCandidates <-
γ
  listing_id := fl.listing_id;
  cf_score := SUM(su.overlap_likes);
  liked_by_similar_users := COUNT_DISTINCT(su.other_user_id)
(
  σ ml.listing_id IS NULL
  (
    (
      (SimilarUsers su ⨝ (su.other_user_id = fl.client_id) favorite_listings fl)
      ⟕ (fl.listing_id = ml.listing_id) MyLikes ml
    )
  )
)

ContentCandidates <-
γ
  listing_id := l2.listing_id;
  content_score := COUNT(*)
(
  σ ml.listing_id IS NULL
  (
    (
      (
        (MyRecentLikes r ⨝ (r.listing_id = l1.listing_id) listings l1)
        ⨝
        (
          l2.listing_id ≠ l1.listing_id ∧
          (l2.species = l1.species ∨ l2.breed = l1.breed ∨ l2.location = l1.location)
        )
        listings l2
      )
      ⟕ (l2.listing_id = ml.listing_id) MyLikes ml
    )
  )
)
Merged <-
π
  listing_id := COALESCE(cf.listing_id, cc.listing_id),
  cf_score := COALESCE(cf.cf_score, 0),
  liked_by_similar_users := COALESCE(cf.liked_by_similar_users, 0),
  content_score := COALESCE(cc.content_score, 0)
(CFCandidates cf ⟗ (cf.listing_id = cc.listing_id) ContentCandidates cc)
FinalWithListings <-
π
  l.listing_id,
  l.title,
  l.species,
  l.breed,
  l.location,
  l.created_at,
  m.cf_score,
  m.liked_by_similar_users,
  m.content_score,
  final_score := (m.cf_score * 3 + m.content_score * 2)
(
  σ (l.status = 'ACTIVE' ∧ l.owner_id ≠ p.user_id)
  (
    (Merged m ⨝ (m.listing_id = l.listing_id) listings l)
    × Params p
  )
)

Result <-
topK_{N := p.top_n}
(
  τ final_score DESC, created_at DESC
  (FinalWithListings × Params p)
)

Note: See TracWiki for help on using the wiki.