Annual analysis of course enrollments, completions and certificates
SELECT
EXTRACT(YEAR FROM e.enroll_date) AS Year,
COUNT(DISTINCT e.user_id) AS Total_Students,
COUNT(DISTINCT e.enrollment_id) AS Total_Enrollments,
COUNT(DISTINCT CASE WHEN e.completion_status = 'COMPLETED' THEN e.enrollment_id END) AS Completed_Enrollments,
COUNT(DISTINCT cert.certificate_id) AS Total_Certificates,
ROUND(
COUNT(DISTINCT CASE WHEN e.completion_status = 'COMPLETED' THEN e.enrollment_id END)::DECIMAL
/ NULLIF(COUNT(DISTINCT e.enrollment_id), 0) * 100, 2) AS Completion_Rate_Percentage,
ROUND(AVG(e.progress_percentage), 2) AS Avg_Progress_Percentage
FROM enrollment e
LEFT JOIN certificate cert
ON cert.enrollment_id = e.enrollment_id
GROUP BY EXTRACT(YEAR FROM e.enroll_date)
ORDER BY Year;
Solution Relational Algebra
- E(enrollment_id, user_id, course_id, enroll_date, completion_status, progress_percentage)
- CERT(certificate_id, enrollment_id, issue_date, certificate_code, status)
JOIN on all tables
J1 ← E ⟕ E.enrollment_id = CERT.enrollment_id CERT
Projection with date transformation
E' ← π enrollment_id, user_id, completion_status, progress_percentage, certificate_id, YEAR(enroll_date) → Year (J1)
Grouping and aggregate calculations
G ← Year γ COUNT(DISTINCT user_id) → Total_Students, COUNT(DISTINCT enrollment_id) → Total_Enrollments, COUNT(DISTINCT [completion_status = 'COMPLETED'] enrollment_id) → Completed_Enrollments, COUNT(DISTINCT certificate_id) → Total_Certificates, AVG(progress_percentage) → Avg_Progress_Percentage (E')
Final projection and percentage calculation
P ← π Year, Total_Students, Total_Enrollments, Completed_Enrollments, Total_Certificates, (Completed_Enrollments / Total_Enrollments) * 100 → Completion_Rate_Percentage, Avg_Progress_Percentage (G)
Chronological ordering by year R_final ← τ Year (P)
Monthly analysis of instructor activity by courses, students, modules, and lessons
SELECT
EXTRACT(YEAR FROM e.enroll_date) AS Year,
EXTRACT(MONTH FROM e.enroll_date) AS Month,
COUNT(DISTINCT i.id) AS Total_Instructors,
COUNT(DISTINCT c.course_id) AS Total_Courses,
COUNT(DISTINCT e.user_id) AS Total_Students,
COUNT(DISTINCT m.module_id) AS Total_Modules,
COUNT(DISTINCT l.lesson_id) AS Total_Lessons,
ROUND(AVG(c.price), 2) AS Avg_Course_Price
FROM enrollment e
JOIN course c
ON c.course_id = e.course_id
JOIN instructors i
ON i.id = c.instructor_id
LEFT JOIN module m
ON m.course_id = c.course_id
LEFT JOIN lesson l
ON l.module_id = m.module_id
GROUP BY
EXTRACT(YEAR FROM e.enroll_date),
EXTRACT(MONTH FROM e.enroll_date)
ORDER BY
Year,
Month;
Solution Relational Algebra
- E(enrollment_id, user_id, course_id, enroll_date, completion_status, progress_percentage)
- C(course_id, name, price, status, instructor_id)
- I(id)
- M(module_id, course_id, title, description)
- L(lesson_id, module_id, title, material)
JOIN on all tables:
J1 ← E ⋈ E.course_id = C.course_id C
J2 ← J1 ⋈ C.instructor_id = I.id I
J3 ← J2 ⟕ C.course_id = M.course_id M
J4 ← J3 ⟕ M.module_id = L.module_id L
Projection with instructor full name
F1 ← π user_id, course_id, id, module_id, lesson_id, price, YEAR(enroll_date) → Year, MONTH(enroll_date) → Month (J4)
Grouping and aggregate calculations
G ← Year, Month γ COUNT(DISTINCT id) → Total_Instructors, COUNT(DISTINCT course_id) → Total_Courses, COUNT(DISTINCT user_id) → Total_Students, COUNT(DISTINCT module_id) → Total_Modules, COUNT(DISTINCT lesson_id) → Total_Lessons, AVG(price) → Avg_Course_Price (F1)
Ordering by number of students and courses
R_final ← τ Year, Month (G)
Annual subscription and revenue analysis by subscription plan
SELECT
EXTRACT(YEAR FROM us.start_date) AS Year,
sp.name AS Plan_Name,
COUNT(DISTINCT us.subscription_id) AS Total_Subscriptions,
COUNT(DISTINCT us.user_id) AS Total_Users,
COUNT(DISTINCT p.payment_id) AS Total_Payments,
ROUND(SUM(COALESCE(p.amount, 0)), 2) AS Total_Revenue,
ROUND(AVG(COALESCE(p.amount, 0)), 2) AS Avg_Payment_Amount
FROM user_subscription us
JOIN subscription_plan sp
ON sp.plan_id = us.plan_id
LEFT JOIN payment p
ON p.subscription_id = us.subscription_id
GROUP BY
EXTRACT(YEAR FROM us.start_date),
sp.name
ORDER BY
Year,
Total_Revenue DESC;
Solution Relational Algebra
- US(subscription_id, user_id, plan_id, start_date, end_date, status)
- SP(plan_id, name, price, duration_months, description, access_type)
- P(payment_id, user_id, subscription_id, amount)
JOIN on all tables
J1 ← US ⋈ US.plan_id = SP.plan_id SP
J2 ← J1 ⋈ US.subscription_id = P.subscription_id P
Projection with date transformation
F1 ← π subscription_id, user_id, payment_id, amount, name, YEAR(start_date) → Year (J2)
Renaming attribute
F2 ← ρ Plan_Name/name (F1)
Grouping and aggregate calculations
G ← Year, Plan_Name γ COUNT(DISTINCT subscription_id) → Total_Subscriptions, COUNT(DISTINCT user_id) → Total_Users, COUNT(DISTINCT payment_id) → Total_Payments, SUM(amount) → Total_Revenue, AVG(amount) → Avg_Payment_Amount (F2)
Chronological ordering and descending revenue ordering
R_final ← τ Year, Total_Revenue DESC (G)
Yearly category analysis by attempts, students, average score, and pass rate
SELECT
EXTRACT(YEAR FROM qa.attempt_date) AS Year,
cat.name AS Category_Name,
COUNT(DISTINCT qa.attempt_id) AS Total_Attempts,
COUNT(DISTINCT qa.user_id) AS Total_Students,
ROUND(AVG(qa.score), 2) AS Avg_Score,
COUNT(
DISTINCT CASE
WHEN qa.score >= q.passing_score
THEN qa.attempt_id
END
) AS Passed_Attempts,
ROUND(
COUNT(
DISTINCT CASE
WHEN qa.score >= q.passing_score
THEN qa.attempt_id
END
)::DECIMAL /
NULLIF(COUNT(DISTINCT qa.attempt_id),0) * 100,
2
) AS Pass_Rate_Percentage
FROM quiz_attempt qa
JOIN quiz q
ON q.quiz_id = qa.quiz_id
JOIN lesson l
ON l.lesson_id = q.lesson_id
JOIN module m
ON m.module_id = l.module_id
JOIN course c
ON c.course_id = m.course_id
JOIN course_category cc
ON cc.course_id = c.course_id
JOIN category cat
ON cat.category_id = cc.category_id
GROUP BY
EXTRACT(YEAR FROM qa.attempt_date),
cat.name
ORDER BY
Year,
Pass_Rate_Percentage DESC;
Solution Relational Algebra
- QA(attempt_id, score, attempt_date, user_id, quiz_id)
- Q(quiz_id, total_points, passing_score, lesson_id)
- L(lesson_id, module_id, title, material)
- M(module_id, course_id, title, description)
- C(course_id, name, price, status, instructor_id)
- CC(course_id, category_id)
- CAT(category_id, name, description)
JOIN on all tables:
J1 ← QA ⋈ QA.quiz_id = Q.quiz_id Q
J2 ← J1 ⋈ Q.lesson_id = L.lesson_id L
J3 ← J2 ⋈ L.module_id = M.module_id M
J4 ← J3 ⋈ M.course_id = C.course_id C
J5 ← J4 ⋈ C.course_id = CC.course_id CC
J6 ← J5 ⋈ CC.category_id = CAT.category_id CAT
Projection and renaming
F1 ← π attempt_id, user_id, score, passing_score, CAT.name → Category_Name, YEAR(attempt_date) → Year (J6)
Grouping and aggregate calculations
G ← Year, Category_Name γ
COUNT(DISTINCT attempt_id) → Total_Attempts,
COUNT(DISTINCT user_id) → Total_Students,
AVG(score) → Avg_Score,
COUNT(DISTINCT [score ≥ passing_score] attempt_id) → Passed_Attempts (F1)
Final projection and percentage calculation
P ← π Year, Category_Name, Total_Attempts, Total_Students, Avg_Score, Passed_Attempts,
(Passed_Attempts / Total_Attempts) * 100 → Pass_Rate_Percentage (G)
Chronological ordering by year
R_final ← τ Year, Pass_Rate_Percentage DESC (P)
