= Advanced Reports == Finding the most active users on the app during a given time period This SQL query wants to find the most active users. Active are users who create listings, write reviews, handle appointments and save favorites. [[BR]] This query: * Counts each type of activity * Combines everything per user * Calculates the final score * Ranks the users from most to least active === SQL {{{ WITH params AS ( SELECT CAST(:start_ts AS timestamp) AS start_ts, CAST(:end_ts AS timestamp) AS end_ts ), listings_by_user AS ( SELECT l.owner_id AS user_id, COUNT(*) AS listings_created FROM listings l JOIN params p ON l.created_at >= p.start_ts AND l.created_at < p.end_ts GROUP BY l.owner_id ), reviews_by_user AS ( SELECT r.reviewer_id AS user_id, COUNT(*) AS reviews_left, AVG(r.rating)::numeric(10,2) AS avg_rating_left FROM reviews r JOIN params p ON r.created_at >= p.start_ts AND r.created_at < p.end_ts GROUP BY r.reviewer_id ), appointments_by_user AS ( SELECT a.responsible_owner_id AS user_id, COUNT(*) AS appointments_total, COUNT(*) FILTER (WHERE a.status = 'DONE') AS appointments_done, COUNT(*) FILTER (WHERE a.status = 'NO_SHOW') AS appointments_no_show, COUNT(*) FILTER (WHERE a.status = 'CANCELLED')AS appointments_cancelled FROM appointments a JOIN params p ON a.date_time >= p.start_ts AND a.date_time < p.end_ts GROUP BY a.responsible_owner_id ), favorites_by_user AS ( SELECT f.client_id AS user_id, COUNT(*) AS favorites_saved_all_time FROM favorite_listings f GROUP BY f.client_id ) SELECT u.user_id, u.username, u.email, u.name, u.surname, COALESCE(l.listings_created, 0) AS listings_created, COALESCE(rv.reviews_left, 0) AS reviews_left, COALESCE(rv.avg_rating_left, 0) AS avg_rating_left, COALESCE(ap.appointments_total, 0) AS appointments_total, COALESCE(ap.appointments_done, 0) AS appointments_done, COALESCE(ap.appointments_no_show, 0) AS appointments_no_show, COALESCE(ap.appointments_cancelled, 0) AS appointments_cancelled, COALESCE(fv.favorites_saved_all_time, 0) AS favorites_saved_all_time, ( COALESCE(l.listings_created, 0) * 5 + COALESCE(rv.reviews_left, 0) * 3 + COALESCE(ap.appointments_done, 0) * 2 + COALESCE(fv.favorites_saved_all_time, 0) * 1 - COALESCE(ap.appointments_no_show, 0) * 2 ) AS activity_score, DENSE_RANK() OVER ( ORDER BY ( COALESCE(l.listings_created, 0) * 5 + COALESCE(rv.reviews_left, 0) * 3 + COALESCE(ap.appointments_done, 0) * 2 + COALESCE(fv.favorites_saved_all_time, 0) * 1 - COALESCE(ap.appointments_no_show, 0) * 2 ) DESC, COALESCE(l.listings_created, 0) DESC, COALESCE(rv.reviews_left, 0) DESC ) AS activity_rank FROM users u LEFT JOIN listings_by_user l ON l.user_id = u.user_id LEFT JOIN reviews_by_user rv ON rv.user_id = u.user_id LEFT JOIN appointments_by_user ap ON ap.user_id = u.user_id LEFT JOIN favorites_by_user fv ON fv.user_id = u.user_id WHERE COALESCE(l.listings_created, 0) + COALESCE(rv.reviews_left, 0) + COALESCE(ap.appointments_total, 0) + COALESCE(fv.favorites_saved_all_time, 0) > 0 ORDER BY activity_rank; }}} === Relation Algebra {{{ Period <- {(start_ts, end_ts)} ListingsByUser <- γ user_id := l.owner_id; listings_created := COUNT(*) ( listings l ⨝ (l.created_at ≥ p.start_ts ∧ l.created_at < p.end_ts) Period p ) ReviewsByUser <- γ user_id := r.reviewer_id; reviews_left := COUNT(*); avg_rating_left := AVG(r.rating) ( reviews r ⨝ (r.created_at ≥ p.start_ts ∧ r.created_at < p.end_ts) Period p ) AppointmentsByUser <- γ user_id := a.responsible_owner_id; appointments_total := COUNT(*); appointments_done := COUNT(a.status = 'DONE'); appointments_no_show := COUNT(a.status = 'NO_SHOW'); appointments_cancelled := COUNT(a.status = 'CANCELLED') ( appointments a ⨝ (a.date_time ≥ p.start_ts ∧ a.date_time < p.end_ts) Period p ) FavoritesByUser <- γ user_id := f.client_id; favorites_saved_all_time := COUNT(*) (favorite_listings f) UserActivity <- (((( users u ⟕ (u.user_id = l.user_id) ListingsByUser l ) ⟕ (u.user_id = r.user_id) ReviewsByUser r ) ⟕ (u.user_id = a.user_id) AppointmentsByUser a ) ⟕ (u.user_id = f.user_id) FavoritesByUser f ) ActivityWithDefaults <- π u.user_id, u.username, u.email, u.name, u.surname, listings_created := COALESCE(l.listings_created, 0), reviews_left := COALESCE(r.reviews_left, 0), avg_rating_left := COALESCE(r.avg_rating_left, 0), appointments_total := COALESCE(a.appointments_total, 0), appointments_done := COALESCE(a.appointments_done, 0), appointments_no_show := COALESCE(a.appointments_no_show, 0), appointments_cancelled := COALESCE(a.appointments_cancelled, 0), favorites_saved_all_time := COALESCE(f.favorites_saved_all_time, 0) (UserActivity) ActiveUsers <- σ listings_created + reviews_left + appointments_total + favorites_saved_all_time > 0 (ActivityWithDefaults) ScoredUsers <- π *, activity_score := listings_created * 5 + reviews_left * 3 + appointments_done * 2 + favorites_saved_all_time * 1 - appointments_no_show * 2 (ActiveUsers) RankedUsers <- rank_dense activity_rank := ORDER BY activity_score DESC, listings_created DESC, reviews_left DESC (ScoredUsers) Result <- τ activity_rank ASC ( π user_id, username, email, name, surname, listings_created, reviews_left, avg_rating_left, appointments_total, appointments_done, appointments_no_show, appointments_cancelled, favorites_saved_all_time, activity_score, activity_rank (RankedUsers) ) }}} == Recommending listings to a user by similar users and liked listings This SQL query wants to find the recommended listings for a user based on similar users and his liked listings. [[BR]] This query: * Gets the recent likes of the user * Gets the similar users based on same liked listings * Gets listings liked by the similar users but NOT by me * Gets listings similar to my liked listings * Combines them both === SQL {{{ WITH my_likes AS ( SELECT fl.listing_id FROM favorite_listings fl WHERE fl.client_id = :user_id ), my_recent_likes AS ( SELECT fl.listing_id FROM favorite_listings fl JOIN listings l ON l.listing_id = fl.listing_id WHERE fl.client_id = :user_id ORDER BY l.created_at DESC LIMIT :10 ), similar_users AS ( SELECT fl2.client_id AS other_user_id, COUNT(*) AS overlap_likes FROM favorite_listings fl2 JOIN my_likes ml ON ml.listing_id = fl2.listing_id WHERE fl2.client_id <> :user_id GROUP BY fl2.client_id HAVING COUNT(*) > 0 ), cf_candidates AS ( SELECT fl.listing_id, SUM(su.overlap_likes) AS cf_score, COUNT(DISTINCT su.other_user_id) AS liked_by_similar_users FROM similar_users su JOIN favorite_listings fl ON fl.client_id = su.other_user_id LEFT JOIN my_likes ml ON ml.listing_id = fl.listing_id WHERE ml.listing_id IS NULL GROUP BY fl.listing_id ), content_candidates AS ( SELECT l2.listing_id, COUNT(*) AS content_score FROM my_recent_likes r JOIN listings l1 ON l1.listing_id = r.listing_id JOIN listings l2 ON l2.listing_id <> l1.listing_id AND ( (l2.species = l1.species) OR (l2.breed = l1.breed) OR (l2.location = l1.location) ) LEFT JOIN my_likes ml ON ml.listing_id = l2.listing_id WHERE ml.listing_id IS NULL GROUP BY l2.listing_id ), merged AS ( SELECT COALESCE(cf.listing_id, cc.listing_id) AS listing_id, COALESCE(cf.cf_score, 0) AS cf_score, COALESCE(cf.liked_by_similar_users, 0) AS liked_by_similar_users, COALESCE(cc.content_score, 0) AS content_score FROM cf_candidates cf FULL OUTER JOIN content_candidates cc ON cc.listing_id = cf.listing_id ) SELECT l.listing_id, l.title, l.species, l.breed, l.location, l.created_at, m.cf_score, m.liked_by_similar_users, m.content_score, (m.cf_score * 3 + m.content_score * 2) AS final_score FROM merged m JOIN listings l ON l.listing_id = m.listing_id WHERE l.status = 'ACTIVE' ORDER BY final_score DESC, l.created_at DESC LIMIT 20; }}} === Relation Algebra {{{ Params <- {(user_id := U, k_recent := 10, top_n := 20)} MyLikes <- π listing_id ( σ fl.client_id = p.user_id ( favorite_listings fl × Params p ) ) MyRecentLikes <- topK_{K := p.k_recent} ( τ l.created_at DESC ( π fl.listing_id, l.created_at ( σ fl.client_id = p.user_id ( (favorite_listings fl ⨝ (fl.listing_id = l.listing_id) listings l) × Params p ) ) ) ) SimilarUsers <- σ other_user_id ≠ p.user_id ∧ overlap_likes > 0 ( γ other_user_id := fl2.client_id; overlap_likes := COUNT(*) ( ( favorite_listings fl2 ⨝ (fl2.listing_id = ml.listing_id) MyLikes ml ) × Params p ) ) CFCandidates <- γ listing_id := fl.listing_id; cf_score := SUM(su.overlap_likes); liked_by_similar_users := COUNT_DISTINCT(su.other_user_id) ( σ ml.listing_id IS NULL ( ( (SimilarUsers su ⨝ (su.other_user_id = fl.client_id) favorite_listings fl) ⟕ (fl.listing_id = ml.listing_id) MyLikes ml ) ) ) ContentCandidates <- γ listing_id := l2.listing_id; content_score := COUNT(*) ( σ ml.listing_id IS NULL ( ( ( (MyRecentLikes r ⨝ (r.listing_id = l1.listing_id) listings l1) ⨝ ( l2.listing_id ≠ l1.listing_id ∧ (l2.species = l1.species ∨ l2.breed = l1.breed ∨ l2.location = l1.location) ) listings l2 ) ⟕ (l2.listing_id = ml.listing_id) MyLikes ml ) ) ) Merged <- π listing_id := COALESCE(cf.listing_id, cc.listing_id), cf_score := COALESCE(cf.cf_score, 0), liked_by_similar_users := COALESCE(cf.liked_by_similar_users, 0), content_score := COALESCE(cc.content_score, 0) (CFCandidates cf ⟗ (cf.listing_id = cc.listing_id) ContentCandidates cc) FinalWithListings <- π l.listing_id, l.title, l.species, l.breed, l.location, l.created_at, m.cf_score, m.liked_by_similar_users, m.content_score, final_score := (m.cf_score * 3 + m.content_score * 2) ( σ l.status = 'ACTIVE' ((Merged m ⨝ (m.listing_id = l.listing_id) listings l)) ) Result <- topK_{N := p.top_n} ( τ final_score DESC, created_at DESC (FinalWithListings × Params p) ) }}}