wiki:AnalyticalStatisticalQuerying

Version 2 (modified by 221507, 12 hours ago) ( diff )

--

Analytical and statistical querying

1. Finding routes and subroutes for arbitrary locations

This SQL query retrieves trips that include both a specified start location and end location as stops, ensuring the start location comes before the end location in the stop sequence, excluding direct routes between them unless the WHERE clause is commented out.

WITH start_location AS (
    SELECT ts.trip_id, ts.stop_time
    FROM trip_stops ts
    WHERE ts.location_id = 100 -- ohrid
),
end_location AS (
    SELECT ts.trip_id, ts.stop_time
    FROM trip_stops ts
    WHERE ts.location_id = 300 -- skopje
),
trips AS (
    SELECT s.trip_id
    FROM start_location s
    JOIN end_location e ON s.trip_id = e.trip_id
    WHERE s.stop_time < e.stop_time
)
SELECT 
    t.trip_id,
    t.route_id,
    r.from_location_id,
    r.to_location_id,
    from_loc.name AS from_name,
    to_loc.name AS to_name,
    to_org.company_name AS transport_company,
    t.status
FROM trip t
JOIN trips tr ON t.trip_id = tr.trip_id
JOIN route r ON t.route_id = r.route_id
JOIN location from_loc ON r.from_location_id = from_loc.location_id
JOIN location to_loc ON r.to_location_id = to_loc.location_id
JOIN transport_organizer to_org ON r.transport_organizer_id = to_org.transport_organizer_id
WHERE NOT (r.from_location_id = 100 AND r.to_location_id = 300) -- commenting this line here gives ONLY SUBROUTES!

2. Transport company performance overview

This SQL query aggregates performance metrics for each transport organizer, including routes operated, trips organized, tickets sold, total revenue, average ticket price, unique customers, and average rating, sorted by total revenue in descending order.

WITH company_performance AS (
    SELECT 
        to_org.company_name,
        COUNT(DISTINCT r.route_id) as routes_operated,
        COUNT(DISTINCT t.trip_id) as trips_organized,
        COUNT(tk.ticket_id) as total_tickets_sold,
        SUM(COALESCE(tk.price, 0)) as total_revenue,
        AVG(COALESCE(tk.price, 0)) as avg_ticket_price,
        COUNT(DISTINCT tk.account_id) as unique_customers,
        AVG(COALESCE(rev.rating, 0)) as avg_rating
    FROM transport_organizer to_org
    JOIN route r ON to_org.transport_organizer_id = r.transport_organizer_id
    JOIN trip t ON r.route_id = t.route_id
    LEFT JOIN ticket tk ON t.trip_id = tk.trip_id
    LEFT JOIN review rev ON tk.ticket_id = rev.ticket_id
    GROUP BY to_org.transport_organizer_id, to_org.company_name
)
SELECT 
    company_name,
    routes_operated,
    trips_organized,
    total_tickets_sold,
    total_revenue,
    avg_ticket_price,
    unique_customers,
    ROUND(avg_rating, 2) as avg_rating
FROM company_performance
ORDER BY total_revenue DESC;

3. Top user-purchased routes

This SQL query retrieves the top three routes most frequently purchased by a specified user, based on ticket counts, including route details and transport company, sorted by tickets purchased in descending order. Useful for fast recommendation of routes per user.

SELECT 
    r.route_id,
    l_from.name AS from_location,
    l_to.name AS to_location,
    COUNT(t.ticket_id) AS tickets_purchased,
    to_.company_name AS transport_company
FROM ticket t
JOIN trip tr ON t.trip_id = tr.trip_id
JOIN route r ON tr.route_id = r.route_id
JOIN location l_from ON r.from_location_id = l_from.location_id
JOIN location l_to ON r.to_location_id = l_to.location_id
JOIN transport_organizer to_ ON r.transport_organizer_id = to_.transport_organizer_id
WHERE  t.account_id = 300 -- this can be changed arbitrarily 
GROUP BY r.route_id, l_from.name, l_to.name, to_.company_name
ORDER BY tickets_purchased DESC
LIMIT 3;

4. Ticket sales revenue by weekday and route

This query analyzes completed trips to show ticket volume, total and average revenue, and route details. Useful to check trends throughout the week.

SELECT 
    TO_CHAR(tk.date_purchased, 'Day') AS day_of_week,
    COUNT(tk.ticket_id) AS total_tickets,
    SUM(p.total_price) AS total_revenue,
    AVG(p.total_price) AS avg_ticket_price,
    r.from_location_id,
    r.to_location_id
FROM ticket tk
JOIN payment p ON tk.payment_id = p.payment_id
JOIN trip t ON tk.trip_id = t.trip_id
JOIN route r ON t.route_id = r.route_id
WHERE t.status = 'COMPLETED'
GROUP BY TO_CHAR(tk.date_purchased, 'Day'), r.from_location_id, r.to_location_id
ORDER BY total_tickets DESC;

5. Most popular destinations in the past year

This SQL query retrieves the top 5 most popular destinations from the last year, showing passenger volume, revenue, and breakdown of student and child ticket sales for each location.

SELECT 
    l.name AS destination,
    COUNT(t.ticket_id) AS total_passengers,
    COUNT(DISTINCT t.account_id) AS unique_customers,
    SUM(t.price) AS total_revenue,
    COUNT(st.student_ticket_id) AS student_tickets,
    COUNT(ct.child_ticket_id) AS child_tickets
FROM location l
JOIN ticket t ON l.location_id = t.gets_off_location_id
JOIN trip tr ON t.trip_id = tr.trip_id
JOIN transport_organizer TO_id ON tr.transport_organizer_id = TO_id.transport_organizer_id
LEFT JOIN student_ticket st ON t.ticket_id = st.ticket_id
LEFT JOIN child_ticket ct ON t.ticket_id = ct.ticket_id
WHERE t.date_purchased >= CURRENT_DATE - INTERVAL '1 year'
GROUP BY l.location_id, l.name, l.latitude, l.longitude
ORDER BY total_passengers DESC
LIMIT 5;
Note: See TracWiki for help on using the wiki.