| | 210 | == 3. Top user-purchased routes == |
| | 211 | |
| | 212 | ==== Original Query |
| | 213 | {{{ |
| | 214 | WITH route_weighted_usage AS ( |
| | 215 | SELECT |
| | 216 | r.route_id, |
| | 217 | l_from.name AS from_location, |
| | 218 | l_to.name AS to_location, |
| | 219 | to_.company_name AS transport_company, |
| | 220 | SUM( |
| | 221 | CASE |
| | 222 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE) |
| | 223 | THEN 0.5 |
| | 224 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') |
| | 225 | THEN 0.3 |
| | 226 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') |
| | 227 | THEN 0.2 |
| | 228 | ELSE 0 |
| | 229 | END |
| | 230 | ) AS weighted_score |
| | 231 | FROM ticket t |
| | 232 | JOIN trip tr ON t.trip_id = tr.trip_id |
| | 233 | JOIN route r ON tr.route_id = r.route_id |
| | 234 | JOIN location l_from ON r.from_location_id = l_from.location_id |
| | 235 | JOIN location l_to ON r.to_location_id = l_to.location_id |
| | 236 | JOIN transport_organizer to_ ON r.transport_organizer_id = to_.transport_organizer_id |
| | 237 | WHERE t.account_id = 300 AND t.date_purchased >= CURRENT_DATE - INTERVAL '3 months' |
| | 238 | GROUP BY r.route_id, l_from.name, l_to.name, to_.company_name |
| | 239 | ) |
| | 240 | SELECT |
| | 241 | route_id, |
| | 242 | from_location, |
| | 243 | to_location, |
| | 244 | transport_company, |
| | 245 | weighted_score |
| | 246 | FROM route_weighted_usage |
| | 247 | WHERE weighted_score >= (SELECT AVG(weighted_score) FROM route_weighted_usage) |
| | 248 | ORDER BY weighted_score DESC; |
| | 249 | }}} |
| | 250 | |
| | 251 | == Results (Relational Algebra) == |
| | 252 | {{{ |
| | 253 | T1 = σ(account_id = 300 ∧ date_purchased ≥ CURRENT_DATE - 3m)(ticket) |
| | 254 | J1 = T1 ⨝ trip |
| | 255 | J2 = J1 ⨝ route |
| | 256 | J3 = J2 ⨝ l_from |
| | 257 | J4 = J3 ⨝ l_to |
| | 258 | J5 = J4 ⨝ transport_organizer |
| | 259 | |
| | 260 | P1 = π(route_id, l_from.name, l_to.name, to_.company_name, weight)( |
| | 261 | extended projection with CASE(date_purchased) → weight |
| | 262 | )(J5) |
| | 263 | |
| | 264 | G1 = γ(route_id, from_location, to_location, transport_company; |
| | 265 | SUM(weight) → weighted_score)(P1) |
| | 266 | |
| | 267 | A1 = γ(; AVG(weighted_score) → avg_score)(G1) |
| | 268 | |
| | 269 | Result = τ_desc(weighted_score)(σ(weighted_score ≥ avg_score)(G1)) |
| | 270 | }}} |
| | 271 | |
| | 272 | == Analysis (Optimizations) == |
| | 273 | * Push selection down: filter `ticket` by `account_id` and `date_purchased` before joins to reduce data early. |
| | 274 | * Compute `weight` in a projection step before aggregation for clarity and efficiency. |
| | 275 | * Replace correlated subquery for `AVG` with a window function or single aggregation to avoid repeated scans. |
| | 276 | * Indexing opportunities: |
| | 277 | - `ticket(account_id, date_purchased)` for fast filtering. |
| | 278 | - Foreign key indexes on `ticket.trip_id`, `trip.route_id`, and `route.{from_location_id, to_location_id, transport_organizer_id}`. |
| | 279 | |
| | 280 | == Conclusion (Rewritten SQL) == |
| | 281 | {{{ |
| | 282 | WITH route_scores AS ( |
| | 283 | SELECT |
| | 284 | r.route_id, |
| | 285 | l_from.name AS from_location, |
| | 286 | l_to.name AS to_location, |
| | 287 | to_.company_name AS transport_company, |
| | 288 | SUM( |
| | 289 | CASE |
| | 290 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE) |
| | 291 | THEN 0.5 |
| | 292 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') |
| | 293 | THEN 0.3 |
| | 294 | WHEN DATE_TRUNC('month', t.date_purchased) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') |
| | 295 | THEN 0.2 |
| | 296 | ELSE 0 |
| | 297 | END |
| | 298 | ) AS weighted_score |
| | 299 | FROM ticket t |
| | 300 | JOIN trip tr ON t.trip_id = tr.trip_id |
| | 301 | JOIN route r ON tr.route_id = r.route_id |
| | 302 | JOIN location l_from ON r.from_location_id = l_from.location_id |
| | 303 | JOIN location l_to ON r.to_location_id = l_to.location_id |
| | 304 | JOIN transport_organizer to_ ON r.transport_organizer_id = to_.transport_organizer_id |
| | 305 | WHERE t.account_id = 300 |
| | 306 | AND t.date_purchased >= CURRENT_DATE - INTERVAL '3 months' |
| | 307 | GROUP BY r.route_id, l_from.name, l_to.name, to_.company_name |
| | 308 | ) |
| | 309 | SELECT * |
| | 310 | FROM ( |
| | 311 | SELECT route_scores.*, AVG(weighted_score) OVER () AS avg_score |
| | 312 | FROM route_scores |
| | 313 | ) ranked |
| | 314 | WHERE weighted_score >= avg_score |
| | 315 | ORDER BY weighted_score DESC; |
| | 316 | }}} |
| | 317 | |
| | 318 | |
| | 319 | |
| | 320 | == 5. Most popular destinations in the past year == |
| | 321 | |
| | 322 | ==== Original Query |
| | 323 | {{{ |
| | 324 | WITH destinations AS ( |
| | 325 | SELECT |
| | 326 | l.name AS destination, |
| | 327 | COUNT(t.ticket_id) AS total_passengers, |
| | 328 | COUNT(DISTINCT t.account_id) AS unique_customers, |
| | 329 | SUM(t.price) AS total_revenue, |
| | 330 | COUNT(st.student_ticket_id) AS student_tickets, |
| | 331 | COUNT(ct.child_ticket_id) AS child_tickets, |
| | 332 | l.latitude, |
| | 333 | l.longitude |
| | 334 | FROM location l |
| | 335 | JOIN ticket t ON l.location_id = t.gets_off_location_id |
| | 336 | JOIN trip tr ON t.trip_id = tr.trip_id |
| | 337 | JOIN transport_organizer to_org ON tr.transport_organizer_id = to_org.transport_organizer_id |
| | 338 | LEFT JOIN student_ticket st ON t.ticket_id = st.ticket_id |
| | 339 | LEFT JOIN child_ticket ct ON t.ticket_id = ct.ticket_id |
| | 340 | GROUP BY l.location_id, l.name, l.latitude, l.longitude |
| | 341 | ), |
| | 342 | stats AS ( |
| | 343 | SELECT AVG(total_passengers) AS mean_passengers |
| | 344 | FROM destinations |
| | 345 | ) |
| | 346 | SELECT |
| | 347 | d.destination, |
| | 348 | d.total_passengers, |
| | 349 | d.unique_customers, |
| | 350 | d.total_revenue, |
| | 351 | d.student_tickets, |
| | 352 | d.child_tickets |
| | 353 | FROM destinations d, stats |
| | 354 | WHERE d.total_passengers >= stats.mean_passengers |
| | 355 | ORDER BY d.total_passengers DESC; |
| | 356 | }}} |
| | 357 | |
| | 358 | == Results (Relational Algebra) == |
| | 359 | {{{ |
| | 360 | J1 = location ⨝ ticket (l.location_id = t.gets_off_location_id) |
| | 361 | J2 = J1 ⨝ trip |
| | 362 | J3 = J2 ⨝ transport_organizer |
| | 363 | J4 = J3 ⟕ student_ticket |
| | 364 | J5 = J4 ⟕ child_ticket |
| | 365 | |
| | 366 | G1 = γ(l.location_id, l.name, l.latitude, l.longitude; |
| | 367 | COUNT(ticket_id) → total_passengers, |
| | 368 | COUNT_DISTINCT(account_id) → unique_customers, |
| | 369 | SUM(price) → total_revenue, |
| | 370 | COUNT(student_ticket_id) → student_tickets, |
| | 371 | COUNT(child_ticket_id) → child_tickets)(J5) |
| | 372 | |
| | 373 | A1 = γ(; AVG(total_passengers) → mean_passengers)(G1) |
| | 374 | |
| | 375 | Result = τ_desc(total_passengers)(σ(total_passengers ≥ mean_passengers)(G1)) |
| | 376 | }}} |
| | 377 | |
| | 378 | == Analysis (Optimizations) == |
| | 379 | * Push projections early: keep only `location_id`, `ticket_id`, `account_id`, `price`, and join keys before grouping. |
| | 380 | * Apply the date filter (`past year`) explicitly in the `WHERE` clause on `ticket.date_purchased` (missing in original). |
| | 381 | * Replace cross join with `stats` by using a window function (`AVG() OVER ()`) to avoid scanning `destinations` twice. |
| | 382 | * Indexing opportunities: |
| | 383 | - `ticket(gets_off_location_id, date_purchased)` |
| | 384 | - `student_ticket.ticket_id`, `child_ticket.ticket_id` |
| | 385 | - Foreign keys on `trip.route_id`, `trip.transport_organizer_id`. |
| | 386 | |
| | 387 | == Conclusion (Rewritten SQL) == |
| | 388 | {{{ |
| | 389 | WITH destinations AS ( |
| | 390 | SELECT |
| | 391 | l.name AS destination, |
| | 392 | COUNT(t.ticket_id) AS total_passengers, |
| | 393 | COUNT(DISTINCT t.account_id) AS unique_customers, |
| | 394 | SUM(t.price) AS total_revenue, |
| | 395 | COUNT(st.student_ticket_id) AS student_tickets, |
| | 396 | COUNT(ct.child_ticket_id) AS child_tickets, |
| | 397 | l.latitude, |
| | 398 | l.longitude |
| | 399 | FROM location l |
| | 400 | JOIN ticket t ON l.location_id = t.gets_off_location_id |
| | 401 | JOIN trip tr ON t.trip_id = tr.trip_id |
| | 402 | JOIN transport_organizer to_org ON tr.transport_organizer_id = to_org.transport_organizer_id |
| | 403 | LEFT JOIN student_ticket st ON t.ticket_id = st.ticket_id |
| | 404 | LEFT JOIN child_ticket ct ON t.ticket_id = ct.ticket_id |
| | 405 | WHERE t.date_purchased >= CURRENT_DATE - INTERVAL '1 year' |
| | 406 | GROUP BY l.location_id, l.name, l.latitude, l.longitude |
| | 407 | ) |
| | 408 | SELECT * |
| | 409 | FROM ( |
| | 410 | SELECT d.*, |
| | 411 | AVG(total_passengers) OVER () AS mean_passengers |
| | 412 | FROM destinations d |
| | 413 | ) ranked |
| | 414 | WHERE total_passengers >= mean_passengers |
| | 415 | ORDER BY total_passengers DESC; |
| | 416 | }}} |