wiki:Complex DB Reports

Version 3 (modified by 231035, 43 hours ago) ( diff )

--

Advanced Reports

Finding the most active users on the app during a given time period

This SQL query wants to find the most active users. Active are users who create listings, write reviews, handle appointments and save favorites. This query:

  • Counts each type of activity
  • Combines everything per user
  • Calculates the final score
  • Ranks the users from most to least active

SQL

WITH
params AS (
  SELECT
    CAST(:start_ts AS timestamp) AS start_ts,
    CAST(:end_ts   AS timestamp) AS end_ts
),

listings_by_user AS (
  SELECT
    l.owner_id AS user_id,
    COUNT(*)   AS listings_created
  FROM listings l
  JOIN params p ON l.created_at >= p.start_ts AND l.created_at < p.end_ts
  GROUP BY l.owner_id
),

reviews_by_user AS (
  SELECT
    r.reviewer_id AS user_id,
    COUNT(*)      AS reviews_left,
    AVG(r.rating)::numeric(10,2) AS avg_rating_left
  FROM reviews r
  JOIN params p ON r.created_at >= p.start_ts AND r.created_at < p.end_ts
  GROUP BY r.reviewer_id
),

appointments_by_user AS (
  SELECT
    a.responsible_owner_id AS user_id,
    COUNT(*) AS appointments_total,
    COUNT(*) FILTER (WHERE a.status = 'DONE')     AS appointments_done,
    COUNT(*) FILTER (WHERE a.status = 'NO_SHOW')  AS appointments_no_show,
    COUNT(*) FILTER (WHERE a.status = 'CANCELLED')AS appointments_cancelled
  FROM appointments a
  JOIN params p ON a.date_time >= p.start_ts AND a.date_time < p.end_ts
  GROUP BY a.responsible_owner_id
),

favorites_by_user AS (
  SELECT
    f.client_id AS user_id,
    COUNT(*)    AS favorites_saved_all_time
  FROM favorite_listings f
  GROUP BY f.client_id
)

SELECT
  u.user_id,
  u.username,
  u.email,
  u.name,
  u.surname,

  COALESCE(l.listings_created, 0)          AS listings_created,
  COALESCE(rv.reviews_left, 0)             AS reviews_left,
  COALESCE(rv.avg_rating_left, 0)       AS avg_rating_left,
  COALESCE(ap.appointments_total, 0)       AS appointments_total,
  COALESCE(ap.appointments_done, 0)        AS appointments_done,
  COALESCE(ap.appointments_no_show, 0)     AS appointments_no_show,
  COALESCE(ap.appointments_cancelled, 0)   AS appointments_cancelled,
  COALESCE(fv.favorites_saved_all_time, 0) AS favorites_saved_all_time,

  (
    COALESCE(l.listings_created, 0) * 5
    + COALESCE(rv.reviews_left, 0) * 3
    + COALESCE(ap.appointments_done, 0) * 2
    + COALESCE(fv.favorites_saved_all_time, 0) * 1
    - COALESCE(ap.appointments_no_show, 0) * 2
  ) AS activity_score,

  DENSE_RANK() OVER (
    ORDER BY
      (
        COALESCE(l.listings_created, 0) * 5
        + COALESCE(rv.reviews_left, 0) * 3
        + COALESCE(ap.appointments_done, 0) * 2
        + COALESCE(fv.favorites_saved_all_time, 0) * 1
        - COALESCE(ap.appointments_no_show, 0) * 2
      ) DESC,
      COALESCE(l.listings_created, 0) DESC,
      COALESCE(rv.reviews_left, 0) DESC
  ) AS activity_rank

FROM users u
LEFT JOIN listings_by_user     l  ON l.user_id  = u.user_id
LEFT JOIN reviews_by_user      rv ON rv.user_id = u.user_id
LEFT JOIN appointments_by_user ap ON ap.user_id = u.user_id
LEFT JOIN favorites_by_user    fv ON fv.user_id = u.user_id

WHERE
  COALESCE(l.listings_created, 0)
  + COALESCE(rv.reviews_left, 0)
  + COALESCE(ap.appointments_total, 0)
  + COALESCE(fv.favorites_saved_all_time, 0) > 0

ORDER BY activity_rank;

Relation Algebra

Period <-
{ (start_ts, end_ts) }

ListingsByUser <-
γ
  user_id := l.owner_id;
  listings_created := COUNT(*)
(
  listings l ⨝
  (l.created_at ≥ p.start_ts ∧ l.created_at < p.end_ts)
  Period p
)
ReviewsByUser <-
γ
  user_id := r.reviewer_id;
  reviews_left := COUNT(*);
  avg_rating_left := AVG(r.rating)
(
  reviews r ⨝
  (r.created_at ≥ p.start_ts ∧ r.created_at < p.end_ts)
  Period p
)
AppointmentsByUser <-
γ
  user_id := a.responsible_owner_id;
  appointments_total := COUNT(*);
  appointments_done := COUNT(a.status = 'DONE');
  appointments_no_show := COUNT(a.status = 'NO_SHOW');
  appointments_cancelled := COUNT(a.status = 'CANCELLED')
(
  appointments a ⨝
  (a.date_time ≥ p.start_ts ∧ a.date_time < p.end_ts)
  Period p
)

FavoritesByUser <-
γ
  user_id := f.client_id;
  favorites_saved_all_time := COUNT(*)
(
  favorite_listings f
)

UserActivity <-
((((
     users u
     ⟕ (u.user_id = l.user_id) ListingsByUser l
    )
   ⟕ (u.user_id = r.user_id) ReviewsByUser r
   )
  ⟕ (u.user_id = a.user_id) AppointmentsByUser a
 )
 ⟕ (u.user_id = f.user_id) FavoritesByUser f
)
ActivityWithDefaults <-
π
  u.user_id,
  u.username,
  u.email,
  u.name,
  u.surname,
  listings_created := COALESCE(l.listings_created, 0),
  reviews_left := COALESCE(r.reviews_left, 0),
  avg_rating_left := COALESCE(r.avg_rating_left, 0),
  appointments_total := COALESCE(a.appointments_total, 0),
  appointments_done := COALESCE(a.appointments_done, 0),
  appointments_no_show := COALESCE(a.appointments_no_show, 0),
  appointments_cancelled := COALESCE(a.appointments_cancelled, 0),
  favorites_saved_all_time := COALESCE(f.favorites_saved_all_time, 0)
(
  UserActivity
)

ActiveUsers <-
σ
  listings_created +
  reviews_left +
  appointments_total +
  favorites_saved_all_time > 0
(
  ActivityWithDefaults
)
ScoredUsers <-
π
  *,
  activity_score :=
    listings_created * 5 +
    reviews_left * 3 +
    appointments_done * 2 +
    favorites_saved_all_time * 1 -
    appointments_no_show * 2
(
  ActiveUsers
)

RankedUsers <-
rank_dense
  activity_rank :=
    ORDER BY
      activity_score DESC,
      listings_created DESC,
      reviews_left DESC
(
  ScoredUsers
)
Result <-
τ activity_rank ASC
(
  π
    user_id,
    username,
    email,
    name,
    surname,
    listings_created,
    reviews_left,
    avg_rating_left,
    appointments_total,
    appointments_done,
    appointments_no_show,
    appointments_cancelled,
    favorites_saved_all_time,
    activity_score,
    activity_rank
  (
    RankedUsers
  )
)

Recommending listings to a user by similar users and liked listings

This SQL query wants to find the recommended listings for a user based on similar users and his liked listings. This query:

SQL

Relation Algebra

Note: See TracWiki for help on using the wiki.