| 210 | == 3. Top user-purchased routes == |
| 211 | |
| 212 | ==== Original Query |
| 213 | {{{ |
| 214 | WITH route_weighted_usage AS ( |
| 215 | SELECT |
| 216 | r.route_id, |
| 217 | l_from.name AS from_location, |
| 218 | l_to.name AS to_location, |
| 219 | to_.company_name AS transport_company, |
| 220 | SUM( |
| 221 | CASE |
| 222 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE) |
| 223 | THEN 0.5 |
| 224 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') |
| 225 | THEN 0.3 |
| 226 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') |
| 227 | THEN 0.2 |
| 228 | ELSE 0 |
| 229 | END |
| 230 | ) AS weighted_score |
| 231 | FROM ticket t |
| 232 | JOIN trip tr ON t.trip_id = tr.trip_id |
| 233 | JOIN route r ON tr.route_id = r.route_id |
| 234 | JOIN location l_from ON r.from_location_id = l_from.location_id |
| 235 | JOIN location l_to ON r.to_location_id = l_to.location_id |
| 236 | JOIN transport_organizer to_ ON r.transport_organizer_id = to_.transport_organizer_id |
| 237 | WHERE t.account_id = 300 AND t.date_purchased >= CURRENT_DATE - INTERVAL '3 months' |
| 238 | GROUP BY r.route_id, l_from.name, l_to.name, to_.company_name |
| 239 | ) |
| 240 | SELECT |
| 241 | route_id, |
| 242 | from_location, |
| 243 | to_location, |
| 244 | transport_company, |
| 245 | weighted_score |
| 246 | FROM route_weighted_usage |
| 247 | WHERE weighted_score >= (SELECT AVG(weighted_score) FROM route_weighted_usage) |
| 248 | ORDER BY weighted_score DESC; |
| 249 | }}} |
| 250 | |
| 251 | == Results (Relational Algebra) == |
| 252 | {{{ |
| 253 | T1 = σ(account_id = 300 ∧ date_purchased ≥ CURRENT_DATE - 3m)(ticket) |
| 254 | J1 = T1 ⨝ trip |
| 255 | J2 = J1 ⨝ route |
| 256 | J3 = J2 ⨝ l_from |
| 257 | J4 = J3 ⨝ l_to |
| 258 | J5 = J4 ⨝ transport_organizer |
| 259 | |
| 260 | P1 = π(route_id, l_from.name, l_to.name, to_.company_name, weight)( |
| 261 | extended projection with CASE(date_purchased) → weight |
| 262 | )(J5) |
| 263 | |
| 264 | G1 = γ(route_id, from_location, to_location, transport_company; |
| 265 | SUM(weight) → weighted_score)(P1) |
| 266 | |
| 267 | A1 = γ(; AVG(weighted_score) → avg_score)(G1) |
| 268 | |
| 269 | Result = τ_desc(weighted_score)(σ(weighted_score ≥ avg_score)(G1)) |
| 270 | }}} |
| 271 | |
| 272 | == Analysis (Optimizations) == |
| 273 | * Push selection down: filter `ticket` by `account_id` and `date_purchased` before joins to reduce data early. |
| 274 | * Compute `weight` in a projection step before aggregation for clarity and efficiency. |
| 275 | * Replace correlated subquery for `AVG` with a window function or single aggregation to avoid repeated scans. |
| 276 | * Indexing opportunities: |
| 277 | - `ticket(account_id, date_purchased)` for fast filtering. |
| 278 | - Foreign key indexes on `ticket.trip_id`, `trip.route_id`, and `route.{from_location_id, to_location_id, transport_organizer_id}`. |
| 279 | |
| 280 | == Conclusion (Rewritten SQL) == |
| 281 | {{{ |
| 282 | WITH route_scores AS ( |
| 283 | SELECT |
| 284 | r.route_id, |
| 285 | l_from.name AS from_location, |
| 286 | l_to.name AS to_location, |
| 287 | to_.company_name AS transport_company, |
| 288 | SUM( |
| 289 | CASE |
| 290 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE) |
| 291 | THEN 0.5 |
| 292 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') |
| 293 | THEN 0.3 |
| 294 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') |
| 295 | THEN 0.2 |
| 296 | ELSE 0 |
| 297 | END |
| 298 | ) AS weighted_score |
| 299 | FROM ticket t |
| 300 | JOIN trip tr ON t.trip_id = tr.trip_id |
| 301 | JOIN route r ON tr.route_id = r.route_id |
| 302 | JOIN location l_from ON r.from_location_id = l_from.location_id |
| 303 | JOIN location l_to ON r.to_location_id = l_to.location_id |
| 304 | JOIN transport_organizer to_ ON r.transport_organizer_id = to_.transport_organizer_id |
| 305 | WHERE t.account_id = 300 |
| 306 | AND t.date_purchased >= CURRENT_DATE - INTERVAL '3 months' |
| 307 | GROUP BY r.route_id, l_from.name, l_to.name, to_.company_name |
| 308 | ) |
| 309 | SELECT * |
| 310 | FROM ( |
| 311 | SELECT route_scores.*, AVG(weighted_score) OVER () AS avg_score |
| 312 | FROM route_scores |
| 313 | ) ranked |
| 314 | WHERE weighted_score >= avg_score |
| 315 | ORDER BY weighted_score DESC; |
| 316 | }}} |
| 317 | |
| 318 | |
| 319 | |
| 320 | == 5. Most popular destinations in the past year == |
| 321 | |
| 322 | ==== Original Query |
| 323 | {{{ |
| 324 | WITH destinations AS ( |
| 325 | SELECT |
| 326 | l.name AS destination, |
| 327 | COUNT(t.ticket_id) AS total_passengers, |
| 328 | COUNT(DISTINCT t.account_id) AS unique_customers, |
| 329 | SUM(t.price) AS total_revenue, |
| 330 | COUNT(st.student_ticket_id) AS student_tickets, |
| 331 | COUNT(ct.child_ticket_id) AS child_tickets, |
| 332 | l.latitude, |
| 333 | l.longitude |
| 334 | FROM location l |
| 335 | JOIN ticket t ON l.location_id = t.gets_off_location_id |
| 336 | JOIN trip tr ON t.trip_id = tr.trip_id |
| 337 | JOIN transport_organizer to_org ON tr.transport_organizer_id = to_org.transport_organizer_id |
| 338 | LEFT JOIN student_ticket st ON t.ticket_id = st.ticket_id |
| 339 | LEFT JOIN child_ticket ct ON t.ticket_id = ct.ticket_id |
| 340 | GROUP BY l.location_id, l.name, l.latitude, l.longitude |
| 341 | ), |
| 342 | stats AS ( |
| 343 | SELECT AVG(total_passengers) AS mean_passengers |
| 344 | FROM destinations |
| 345 | ) |
| 346 | SELECT |
| 347 | d.destination, |
| 348 | d.total_passengers, |
| 349 | d.unique_customers, |
| 350 | d.total_revenue, |
| 351 | d.student_tickets, |
| 352 | d.child_tickets |
| 353 | FROM destinations d, stats |
| 354 | WHERE d.total_passengers >= stats.mean_passengers |
| 355 | ORDER BY d.total_passengers DESC; |
| 356 | }}} |
| 357 | |
| 358 | == Results (Relational Algebra) == |
| 359 | {{{ |
| 360 | J1 = location ⨝ ticket (l.location_id = t.gets_off_location_id) |
| 361 | J2 = J1 ⨝ trip |
| 362 | J3 = J2 ⨝ transport_organizer |
| 363 | J4 = J3 ⟕ student_ticket |
| 364 | J5 = J4 ⟕ child_ticket |
| 365 | |
| 366 | G1 = γ(l.location_id, l.name, l.latitude, l.longitude; |
| 367 | COUNT(ticket_id) → total_passengers, |
| 368 | COUNT_DISTINCT(account_id) → unique_customers, |
| 369 | SUM(price) → total_revenue, |
| 370 | COUNT(student_ticket_id) → student_tickets, |
| 371 | COUNT(child_ticket_id) → child_tickets)(J5) |
| 372 | |
| 373 | A1 = γ(; AVG(total_passengers) → mean_passengers)(G1) |
| 374 | |
| 375 | Result = τ_desc(total_passengers)(σ(total_passengers ≥ mean_passengers)(G1)) |
| 376 | }}} |
| 377 | |
| 378 | == Analysis (Optimizations) == |
| 379 | * Push projections early: keep only `location_id`, `ticket_id`, `account_id`, `price`, and join keys before grouping. |
| 380 | * Apply the date filter (`past year`) explicitly in the `WHERE` clause on `ticket.date_purchased` (missing in original). |
| 381 | * Replace cross join with `stats` by using a window function (`AVG() OVER ()`) to avoid scanning `destinations` twice. |
| 382 | * Indexing opportunities: |
| 383 | - `ticket(gets_off_location_id, date_purchased)` |
| 384 | - `student_ticket.ticket_id`, `child_ticket.ticket_id` |
| 385 | - Foreign keys on `trip.route_id`, `trip.transport_organizer_id`. |
| 386 | |
| 387 | == Conclusion (Rewritten SQL) == |
| 388 | {{{ |
| 389 | WITH destinations AS ( |
| 390 | SELECT |
| 391 | l.name AS destination, |
| 392 | COUNT(t.ticket_id) AS total_passengers, |
| 393 | COUNT(DISTINCT t.account_id) AS unique_customers, |
| 394 | SUM(t.price) AS total_revenue, |
| 395 | COUNT(st.student_ticket_id) AS student_tickets, |
| 396 | COUNT(ct.child_ticket_id) AS child_tickets, |
| 397 | l.latitude, |
| 398 | l.longitude |
| 399 | FROM location l |
| 400 | JOIN ticket t ON l.location_id = t.gets_off_location_id |
| 401 | JOIN trip tr ON t.trip_id = tr.trip_id |
| 402 | JOIN transport_organizer to_org ON tr.transport_organizer_id = to_org.transport_organizer_id |
| 403 | LEFT JOIN student_ticket st ON t.ticket_id = st.ticket_id |
| 404 | LEFT JOIN child_ticket ct ON t.ticket_id = ct.ticket_id |
| 405 | WHERE t.date_purchased >= CURRENT_DATE - INTERVAL '1 year' |
| 406 | GROUP BY l.location_id, l.name, l.latitude, l.longitude |
| 407 | ) |
| 408 | SELECT * |
| 409 | FROM ( |
| 410 | SELECT d.*, |
| 411 | AVG(total_passengers) OVER () AS mean_passengers |
| 412 | FROM destinations d |
| 413 | ) ranked |
| 414 | WHERE total_passengers >= mean_passengers |
| 415 | ORDER BY total_passengers DESC; |
| 416 | }}} |