| Version 6 (modified by , 21 hours ago) ( diff ) |
|---|
Advanced Reports
Finding the most active users on the app during a given time period
This SQL query wants to find the most active users. Active are users who create listings, write reviews, handle appointments and save favorites.
This query:
- Counts each type of activity
- Combines everything per user
- Calculates the final score
- Ranks the users from most to least active
SQL
WITH
params AS (
SELECT
CAST(:start_ts AS timestamp) AS start_ts,
CAST(:end_ts AS timestamp) AS end_ts
),
listings_by_user AS (
SELECT
l.owner_id AS user_id,
COUNT(*) AS listings_created
FROM listings l
JOIN params p ON l.created_at >= p.start_ts AND l.created_at < p.end_ts
GROUP BY l.owner_id
),
reviews_by_user AS (
SELECT
r.reviewer_id AS user_id,
COUNT(*) AS reviews_left,
AVG(r.rating)::numeric(10,2) AS avg_rating_left
FROM reviews r
JOIN params p ON r.created_at >= p.start_ts AND r.created_at < p.end_ts
GROUP BY r.reviewer_id
),
appointments_by_user AS (
SELECT
a.responsible_owner_id AS user_id,
COUNT(*) AS appointments_total,
COUNT(*) FILTER (WHERE a.status = 'DONE') AS appointments_done,
COUNT(*) FILTER (WHERE a.status = 'NO_SHOW') AS appointments_no_show,
COUNT(*) FILTER (WHERE a.status = 'CANCELLED')AS appointments_cancelled
FROM appointments a
JOIN params p ON a.date_time >= p.start_ts AND a.date_time < p.end_ts
GROUP BY a.responsible_owner_id
),
favorites_by_user AS (
SELECT
f.client_id AS user_id,
COUNT(*) AS favorites_saved_all_time
FROM favorite_listings f
GROUP BY f.client_id
)
SELECT
u.user_id,
u.username,
u.email,
u.name,
u.surname,
COALESCE(l.listings_created, 0) AS listings_created,
COALESCE(rv.reviews_left, 0) AS reviews_left,
COALESCE(rv.avg_rating_left, 0) AS avg_rating_left,
COALESCE(ap.appointments_total, 0) AS appointments_total,
COALESCE(ap.appointments_done, 0) AS appointments_done,
COALESCE(ap.appointments_no_show, 0) AS appointments_no_show,
COALESCE(ap.appointments_cancelled, 0) AS appointments_cancelled,
COALESCE(fv.favorites_saved_all_time, 0) AS favorites_saved_all_time,
(
COALESCE(l.listings_created, 0) * 5
+ COALESCE(rv.reviews_left, 0) * 3
+ COALESCE(ap.appointments_done, 0) * 2
+ COALESCE(fv.favorites_saved_all_time, 0) * 1
- COALESCE(ap.appointments_no_show, 0) * 2
) AS activity_score,
DENSE_RANK() OVER (
ORDER BY
(
COALESCE(l.listings_created, 0) * 5
+ COALESCE(rv.reviews_left, 0) * 3
+ COALESCE(ap.appointments_done, 0) * 2
+ COALESCE(fv.favorites_saved_all_time, 0) * 1
- COALESCE(ap.appointments_no_show, 0) * 2
) DESC,
COALESCE(l.listings_created, 0) DESC,
COALESCE(rv.reviews_left, 0) DESC
) AS activity_rank
FROM users u
LEFT JOIN listings_by_user l ON l.user_id = u.user_id
LEFT JOIN reviews_by_user rv ON rv.user_id = u.user_id
LEFT JOIN appointments_by_user ap ON ap.user_id = u.user_id
LEFT JOIN favorites_by_user fv ON fv.user_id = u.user_id
WHERE
COALESCE(l.listings_created, 0)
+ COALESCE(rv.reviews_left, 0)
+ COALESCE(ap.appointments_total, 0)
+ COALESCE(fv.favorites_saved_all_time, 0) > 0
ORDER BY activity_rank;
Relation Algebra
Period <-
{(start_ts, end_ts)}
ListingsByUser <-
γ
user_id := l.owner_id;
listings_created := COUNT(*)
(
listings l ⨝
(l.created_at ≥ p.start_ts ∧ l.created_at < p.end_ts)
Period p
)
ReviewsByUser <-
γ
user_id := r.reviewer_id;
reviews_left := COUNT(*);
avg_rating_left := AVG(r.rating)
(
reviews r ⨝
(r.created_at ≥ p.start_ts ∧ r.created_at < p.end_ts)
Period p
)
AppointmentsByUser <-
γ
user_id := a.responsible_owner_id;
appointments_total := COUNT(*);
appointments_done := COUNT(a.status = 'DONE');
appointments_no_show := COUNT(a.status = 'NO_SHOW');
appointments_cancelled := COUNT(a.status = 'CANCELLED')
(
appointments a ⨝
(a.date_time ≥ p.start_ts ∧ a.date_time < p.end_ts)
Period p
)
FavoritesByUser <-
γ
user_id := f.client_id;
favorites_saved_all_time := COUNT(*)
(favorite_listings f)
UserActivity <-
((((
users u
⟕ (u.user_id = l.user_id) ListingsByUser l
)
⟕ (u.user_id = r.user_id) ReviewsByUser r
)
⟕ (u.user_id = a.user_id) AppointmentsByUser a
)
⟕ (u.user_id = f.user_id) FavoritesByUser f
)
ActivityWithDefaults <-
π
u.user_id,
u.username,
u.email,
u.name,
u.surname,
listings_created := COALESCE(l.listings_created, 0),
reviews_left := COALESCE(r.reviews_left, 0),
avg_rating_left := COALESCE(r.avg_rating_left, 0),
appointments_total := COALESCE(a.appointments_total, 0),
appointments_done := COALESCE(a.appointments_done, 0),
appointments_no_show := COALESCE(a.appointments_no_show, 0),
appointments_cancelled := COALESCE(a.appointments_cancelled, 0),
favorites_saved_all_time := COALESCE(f.favorites_saved_all_time, 0)
(UserActivity)
ActiveUsers <-
σ
listings_created +
reviews_left +
appointments_total +
favorites_saved_all_time > 0
(ActivityWithDefaults)
ScoredUsers <-
π
*,
activity_score :=
listings_created * 5 +
reviews_left * 3 +
appointments_done * 2 +
favorites_saved_all_time * 1 -
appointments_no_show * 2
(ActiveUsers)
RankedUsers <-
rank_dense
activity_rank :=
ORDER BY
activity_score DESC,
listings_created DESC,
reviews_left DESC
(ScoredUsers)
Result <-
τ activity_rank ASC
(
π
user_id,
username,
email,
name,
surname,
listings_created,
reviews_left,
avg_rating_left,
appointments_total,
appointments_done,
appointments_no_show,
appointments_cancelled,
favorites_saved_all_time,
activity_score,
activity_rank
(RankedUsers)
)
Recommending listings to a user by similar users and liked listings
This SQL query wants to find the recommended listings for a user based on similar users and his liked listings.
This query:
- Gets the recent likes of the user
- Gets the similar users based on same liked listings
- Gets listings liked by the similar users but NOT by me
- Gets listings similar to my liked listings
- Combines them both
SQL
WITH
my_likes AS (
SELECT fl.listing_id
FROM favorite_listings fl
WHERE fl.client_id = :user_id
),
my_recent_likes AS (
SELECT fl.listing_id
FROM favorite_listings fl
JOIN listings l ON l.listing_id = fl.listing_id
WHERE fl.client_id = :user_id
ORDER BY l.created_at DESC
LIMIT :10
),
similar_users AS (
SELECT
fl2.client_id AS other_user_id,
COUNT(*) AS overlap_likes
FROM favorite_listings fl2
JOIN my_likes ml ON ml.listing_id = fl2.listing_id
WHERE fl2.client_id <> :user_id
GROUP BY fl2.client_id
HAVING COUNT(*) > 0
),
cf_candidates AS (
SELECT
fl.listing_id,
SUM(su.overlap_likes) AS cf_score,
COUNT(DISTINCT su.other_user_id) AS liked_by_similar_users
FROM similar_users su
JOIN favorite_listings fl
ON fl.client_id = su.other_user_id
LEFT JOIN my_likes ml
ON ml.listing_id = fl.listing_id
WHERE ml.listing_id IS NULL
GROUP BY fl.listing_id
),
content_candidates AS (
SELECT
l2.listing_id,
COUNT(*) AS content_score
FROM my_recent_likes r
JOIN listings l1 ON l1.listing_id = r.listing_id
JOIN listings l2 ON
l2.listing_id <> l1.listing_id
AND (
(l2.species = l1.species)
OR (l2.breed = l1.breed)
OR (l2.location = l1.location)
)
LEFT JOIN my_likes ml ON ml.listing_id = l2.listing_id
WHERE ml.listing_id IS NULL
GROUP BY l2.listing_id
),
merged AS (
SELECT
COALESCE(cf.listing_id, cc.listing_id) AS listing_id,
COALESCE(cf.cf_score, 0) AS cf_score,
COALESCE(cf.liked_by_similar_users, 0) AS liked_by_similar_users,
COALESCE(cc.content_score, 0) AS content_score
FROM cf_candidates cf
FULL OUTER JOIN content_candidates cc
ON cc.listing_id = cf.listing_id
)
SELECT
l.listing_id,
l.title,
l.species,
l.breed,
l.location,
l.created_at,
m.cf_score,
m.liked_by_similar_users,
m.content_score,
(m.cf_score * 3 + m.content_score * 2) AS final_score
FROM merged m
JOIN listings l ON l.listing_id = m.listing_id
WHERE l.status = 'ACTIVE'
AND l.owner_id <> :user_id
ORDER BY final_score DESC, l.created_at DESC
LIMIT 20;
Relation Algebra
Params <-
{(user_id := U, k_recent := 10, top_n := 20)}
MyLikes <-
π listing_id
(
σ fl.client_id = p.user_id
(
favorite_listings fl × Params p
)
)
MyRecentLikes <-
topK_{K := p.k_recent}
(
τ l.created_at DESC
(
π fl.listing_id, l.created_at
(
σ fl.client_id = p.user_id
(
(favorite_listings fl ⨝ (fl.listing_id = l.listing_id) listings l)
× Params p
)
)
)
)
SimilarUsers <-
σ other_user_id ≠ p.user_id ∧ overlap_likes > 0
(
γ
other_user_id := fl2.client_id;
overlap_likes := COUNT(*)
(
(
favorite_listings fl2 ⨝ (fl2.listing_id = ml.listing_id) MyLikes ml
)
× Params p
)
)
CFCandidates <-
γ
listing_id := fl.listing_id;
cf_score := SUM(su.overlap_likes);
liked_by_similar_users := COUNT_DISTINCT(su.other_user_id)
(
σ ml.listing_id IS NULL
(
(
(SimilarUsers su ⨝ (su.other_user_id = fl.client_id) favorite_listings fl)
⟕ (fl.listing_id = ml.listing_id) MyLikes ml
)
)
)
ContentCandidates <-
γ
listing_id := l2.listing_id;
content_score := COUNT(*)
(
σ ml.listing_id IS NULL
(
(
(
(MyRecentLikes r ⨝ (r.listing_id = l1.listing_id) listings l1)
⨝
(
l2.listing_id ≠ l1.listing_id ∧
(l2.species = l1.species ∨ l2.breed = l1.breed ∨ l2.location = l1.location)
)
listings l2
)
⟕ (l2.listing_id = ml.listing_id) MyLikes ml
)
)
)
Merged <-
π
listing_id := COALESCE(cf.listing_id, cc.listing_id),
cf_score := COALESCE(cf.cf_score, 0),
liked_by_similar_users := COALESCE(cf.liked_by_similar_users, 0),
content_score := COALESCE(cc.content_score, 0)
(CFCandidates cf ⟗ (cf.listing_id = cc.listing_id) ContentCandidates cc)
FinalWithListings <-
π
l.listing_id,
l.title,
l.species,
l.breed,
l.location,
l.created_at,
m.cf_score,
m.liked_by_similar_users,
m.content_score,
final_score := (m.cf_score * 3 + m.content_score * 2)
(
σ (l.status = 'ACTIVE' ∧ l.owner_id ≠ p.user_id)
(
(Merged m ⨝ (m.listing_id = l.listing_id) listings l)
× Params p
)
)
Result <-
topK_{N := p.top_n}
(
τ final_score DESC, created_at DESC
(FinalWithListings × Params p)
)
Note:
See TracWiki
for help on using the wiki.
