DatabaseCreation-AdvDb: shifter_dml.sql

File shifter_dml.sql, 54.8 KB (added by 231175, 2 days ago)
Line 
1-- ============================================================================
2-- SHIFTER COMPLETE DEMO DATA GENERATION
3-- ============================================================================
4-- Generates realistic demo data for ALL 49 tables with:
5-- - 1-5 million users
6-- - 50,000 courses (for index testing)
7-- - Multiple course versions per course
8-- - Enrollments (UNIQUE per user+course_version, some users with 100k-1M enrollments)
9-- - All related entities automatically scaled
10-- - Multi-language support (mk, en, es, it, de, sr, hr)
11-- ============================================================================
12
13SET client_min_messages = WARNING;
14
15-- Configuration
16DO $$
17BEGIN
18 RAISE NOTICE '============================================';
19 RAISE NOTICE 'SHIFTER DEMO DATA GENERATION';
20 RAISE NOTICE '============================================';
21 RAISE NOTICE 'Target Configuration:';
22 RAISE NOTICE ' - Users: 3,000,000';
23 RAISE NOTICE ' - Courses: 50,000';
24 RAISE NOTICE ' - Topics: 100';
25 RAISE NOTICE ' - Skills: 500';
26 RAISE NOTICE ' - Languages: 7';
27 RAISE NOTICE ' - Power Users: 50 (100k-1M enrollments each)';
28 RAISE NOTICE ' - Normal Users: 100k (1-50 enrollments each)';
29 RAISE NOTICE '============================================';
30END $$;
31
32-- ============================================================================
33-- 1. LANGUAGE (7 languages)
34-- ============================================================================
35
36INSERT INTO language (language) VALUES
37 ('mk'), ('en'), ('es'), ('it'), ('de'), ('sr'), ('hr')
38ON CONFLICT (language) DO NOTHING;
39
40RAISE NOTICE 'Languages inserted: 7';
41
42-- ============================================================================
43-- 2. ADMIN (10 admins)
44-- ============================================================================
45
46INSERT INTO admin (email, password_hash)
47SELECT
48 'admin' || i || '@shifter.mk',
49 '$2a$10$' || md5(random()::text || i::text)
50FROM generate_series(1, 10) i;
51
52DO $$ BEGIN RAISE NOTICE 'Admins inserted: %', (SELECT COUNT(*) FROM admin); END $$;
53
54-- ============================================================================
55-- 3. EXPERT (2000 experts for course creation)
56-- ============================================================================
57
58INSERT INTO expert (name, email, password_hash, login_provider)
59SELECT
60 'Expert ' || i,
61 'expert' || i || '@shifter.mk',
62 CASE WHEN random() < 0.3 THEN NULL ELSE '$2a$10$' || md5(random()::text || i::text) END,
63 CASE WHEN random() < 0.3 THEN 'GOOGLE' ELSE 'EMAIL' END
64FROM generate_series(1, 2000) i;
65
66DO $$ BEGIN RAISE NOTICE 'Experts inserted: %', (SELECT COUNT(*) FROM expert); END $$;
67
68-- ============================================================================
69-- 4. USERS (3 MILLION - Batched for performance)
70-- ============================================================================
71
72DO $$
73DECLARE
74 batch_size INTEGER := 100000;
75 total_users INTEGER := 3000000;
76 num_batches INTEGER := total_users / batch_size;
77 current_batch INTEGER;
78 start_id INTEGER;
79 end_id INTEGER;
80BEGIN
81 FOR current_batch IN 1..num_batches LOOP
82 start_id := (current_batch - 1) * batch_size + 1;
83 end_id := current_batch * batch_size;
84
85 INSERT INTO "user" (
86 name, email, password_hash, login_provider, verified, profile_completed,
87 deleted, used_free_consultation, company_size, work_position, points, created_at
88 )
89 SELECT
90 'User ' || i,
91 'user' || i || '@demo.mk',
92 CASE WHEN random() < 0.3 THEN NULL ELSE '$2a$10$' || md5(random()::text || i::text) END,
93 CASE WHEN random() < 0.35 THEN 'GOOGLE' ELSE 'EMAIL' END,
94 random() < 0.72,
95 random() < 0.65,
96 random() < 0.03,
97 random() < 0.18,
98 CASE
99 WHEN random() < 0.25 THEN 'FREELANCE'
100 WHEN random() < 0.45 THEN 'SMALL'
101 WHEN random() < 0.70 THEN 'MEDIUM'
102 WHEN random() < 0.90 THEN 'LARGE'
103 ELSE 'ENTERPRISE'
104 END,
105 (ARRAY['Software Engineer', 'Product Manager', 'Designer', 'Marketing Specialist',
106 'Business Analyst', 'Data Scientist', 'DevOps Engineer', 'Sales Manager',
107 'HR Manager', 'Financial Analyst', 'Content Creator', 'Operations Manager'])[1 + floor(random() * 12)::INTEGER],
108 (random() * 15000)::INTEGER,
109 CURRENT_TIMESTAMP - (random() * INTERVAL '3 years')
110 FROM generate_series(start_id, end_id) i;
111
112 IF current_batch % 5 = 0 THEN
113 RAISE NOTICE 'User batch % of % completed (% users)', current_batch, num_batches, end_id;
114 END IF;
115 END LOOP;
116END $$;
117
118DO $$ BEGIN RAISE NOTICE 'Users inserted: %', (SELECT COUNT(*) FROM "user"); END $$;
119
120-- ============================================================================
121-- 5. VERIFICATION_TOKEN (For unverified users)
122-- ============================================================================
123
124INSERT INTO verification_token (user_id, created_at, expires_at)
125SELECT
126 id,
127 created_at,
128 created_at + INTERVAL '24 hours'
129FROM "user"
130WHERE verified = FALSE
131LIMIT 500000; -- First 500k unverified users
132
133DO $$ BEGIN RAISE NOTICE 'Verification tokens inserted: %', (SELECT COUNT(*) FROM verification_token); END $$;
134
135-- ============================================================================
136-- 6. MEETING_EMAIL_REMINDER (For consultation meetings)
137-- ============================================================================
138
139INSERT INTO meeting_email_reminder (user_id, meeting_at, scheduled_at, sent, meeting_link)
140SELECT
141 id,
142 CURRENT_TIMESTAMP + (random() * INTERVAL '30 days'),
143 CURRENT_TIMESTAMP - (random() * INTERVAL '7 days'),
144 random() < 0.4,
145 CASE WHEN random() < 0.8 THEN 'https://meet.shifter.mk/' || md5(random()::text) ELSE NULL END
146FROM "user"
147WHERE used_free_consultation = TRUE OR random() < 0.15
148LIMIT 200000;
149
150DO $$ BEGIN RAISE NOTICE 'Meeting reminders inserted: %', (SELECT COUNT(*) FROM meeting_email_reminder); END $$;
151
152-- ============================================================================
153-- 7. TOPIC (100 topics)
154-- ============================================================================
155
156INSERT INTO topic (slug)
157SELECT
158 CASE i
159 WHEN 1 THEN 'programming'
160 WHEN 2 THEN 'web-development'
161 WHEN 3 THEN 'data-science'
162 WHEN 4 THEN 'machine-learning'
163 WHEN 5 THEN 'artificial-intelligence'
164 WHEN 6 THEN 'digital-marketing'
165 WHEN 7 THEN 'business-analytics'
166 WHEN 8 THEN 'project-management'
167 WHEN 9 THEN 'leadership'
168 WHEN 10 THEN 'finance'
169 WHEN 11 THEN 'design'
170 WHEN 12 THEN 'ux-ui'
171 WHEN 13 THEN 'mobile-development'
172 WHEN 14 THEN 'cloud-computing'
173 WHEN 15 THEN 'devops'
174 WHEN 16 THEN 'cybersecurity'
175 WHEN 17 THEN 'blockchain'
176 WHEN 18 THEN 'sales'
177 WHEN 19 THEN 'customer-success'
178 WHEN 20 THEN 'human-resources'
179 ELSE 'topic-' || i
180 END
181FROM generate_series(1, 100) i;
182
183-- Topic translations
184INSERT INTO topic_translate (topic_id, language_id, title, description)
185SELECT
186 t.id,
187 l.id,
188 CASE l.language
189 WHEN 'mk' THEN 'Тема ' || t.slug
190 WHEN 'en' THEN 'Topic ' || t.slug
191 WHEN 'es' THEN 'Tema ' || t.slug
192 WHEN 'it' THEN 'Argomento ' || t.slug
193 WHEN 'de' THEN 'Thema ' || t.slug
194 WHEN 'sr' THEN 'Тема ' || t.slug
195 WHEN 'hr' THEN 'Tema ' || t.slug
196 END,
197 CASE l.language
198 WHEN 'mk' THEN 'Опис за тема ' || t.slug
199 WHEN 'en' THEN 'Description for topic ' || t.slug
200 WHEN 'es' THEN 'Descripción del tema ' || t.slug
201 WHEN 'it' THEN 'Descrizione argomento ' || t.slug
202 WHEN 'de' THEN 'Themenbeschreibung ' || t.slug
203 WHEN 'sr' THEN 'Опис теме ' || t.slug
204 WHEN 'hr' THEN 'Opis teme ' || t.slug
205 END
206FROM topic t
207CROSS JOIN language l;
208
209DO $$ BEGIN RAISE NOTICE 'Topics inserted: %, Translations: %',
210 (SELECT COUNT(*) FROM topic), (SELECT COUNT(*) FROM topic_translate); END $$;
211
212-- ============================================================================
213-- 8. SKILL (500 skills)
214-- ============================================================================
215
216INSERT INTO skill (created_at)
217SELECT CURRENT_TIMESTAMP - (random() * INTERVAL '2 years')
218FROM generate_series(1, 500) i;
219
220-- Skill translations
221INSERT INTO skill_translate (skill_id, language_id, title, description)
222SELECT
223 s.id,
224 l.id,
225 CASE l.language
226 WHEN 'mk' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Лидерство', 'Комуникација', 'Анализа'])[1 + (s.id % 8)] || ' ' || s.id
227 WHEN 'en' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Leadership', 'Communication', 'Analysis'])[1 + (s.id % 8)] || ' ' || s.id
228 WHEN 'es' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Liderazgo', 'Comunicación', 'Análisis'])[1 + (s.id % 8)] || ' ' || s.id
229 WHEN 'it' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Leadership', 'Comunicazione', 'Analisi'])[1 + (s.id % 8)] || ' ' || s.id
230 WHEN 'de' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Führung', 'Kommunikation', 'Analyse'])[1 + (s.id % 8)] || ' ' || s.id
231 WHEN 'sr' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Лидерство', 'Комуникација', 'Анализа'])[1 + (s.id % 8)] || ' ' || s.id
232 WHEN 'hr' THEN (ARRAY['Python', 'JavaScript', 'Java', 'SQL', 'React', 'Vodstvo', 'Komunikacija', 'Analiza'])[1 + (s.id % 8)] || ' ' || s.id
233 END,
234 CASE l.language
235 WHEN 'mk' THEN 'Опис за вештина ' || s.id
236 WHEN 'en' THEN 'Skill description ' || s.id
237 WHEN 'es' THEN 'Descripción de habilidad ' || s.id
238 WHEN 'it' THEN 'Descrizione abilità ' || s.id
239 WHEN 'de' THEN 'Fähigkeitsbeschreibung ' || s.id
240 WHEN 'sr' THEN 'Опис вештине ' || s.id
241 WHEN 'hr' THEN 'Opis vještine ' || s.id
242 END
243FROM skill s
244CROSS JOIN language l;
245
246DO $$ BEGIN RAISE NOTICE 'Skills inserted: %, Translations: %',
247 (SELECT COUNT(*) FROM skill), (SELECT COUNT(*) FROM skill_translate); END $$;
248
249-- ============================================================================
250-- 9. COURSE (50,000 courses for index testing)
251-- ============================================================================
252
253DO $$
254DECLARE
255 batch_size INTEGER := 5000;
256 total_courses INTEGER := 50000;
257 num_batches INTEGER := total_courses / batch_size;
258 current_batch INTEGER;
259 start_id INTEGER;
260 end_id INTEGER;
261BEGIN
262 FOR current_batch IN 1..num_batches LOOP
263 start_id := (current_batch - 1) * batch_size + 1;
264 end_id := current_batch * batch_size;
265
266 INSERT INTO course (
267 expert_id, difficulty, estimated_duration, is_published,
268 is_featured, created_at, updated_at
269 )
270 SELECT
271 (SELECT id FROM expert ORDER BY random() LIMIT 1),
272 (ARRAY['BEGINNER', 'INTERMEDIATE', 'ADVANCED', 'EXPERT']::course_difficulty_enum[])[1 + floor(random() * 4)::INTEGER],
273 (1800 + random() * 43200)::INTEGER, -- 30min to 13 hours
274 random() < 0.92,
275 random() < 0.08,
276 CURRENT_TIMESTAMP - (random() * INTERVAL '3 years'),
277 CURRENT_TIMESTAMP - (random() * INTERVAL '6 months')
278 FROM generate_series(start_id, end_id) i;
279
280 IF current_batch % 2 = 0 THEN
281 RAISE NOTICE 'Course batch % of % completed (% courses)', current_batch, num_batches, end_id;
282 END IF;
283 END LOOP;
284END $$;
285
286DO $$ BEGIN RAISE NOTICE 'Courses inserted: %', (SELECT COUNT(*) FROM course); END $$;
287
288-- ============================================================================
289-- 10. COURSE_TRANSLATE (7 languages × 50k courses = 350k rows)
290-- ============================================================================
291
292DO $$
293DECLARE
294 batch_size INTEGER := 50000;
295 total_courses INTEGER := 50000;
296 num_batches INTEGER := CEIL(total_courses::NUMERIC / batch_size);
297 current_batch INTEGER;
298 start_id BIGINT;
299 end_id BIGINT;
300BEGIN
301 FOR current_batch IN 1..num_batches LOOP
302 SELECT MIN(id), MAX(id) INTO start_id, end_id
303 FROM (
304 SELECT id FROM course ORDER BY id LIMIT batch_size OFFSET (current_batch - 1) * batch_size
305 ) sub;
306
307 INSERT INTO course_translate (course_id, language_id, title, description, short_description)
308 SELECT
309 c.id,
310 l.id,
311 CASE l.language
312 WHEN 'mk' THEN (ARRAY['Програмирање', 'Дигитален Маркетинг', 'Дизајн', 'Бизнис Аналитика',
313 'Управување со Проекти', 'Финансии', 'Лидерство', 'Податочна Наука'])[1 + (c.id % 8)] || ' - Курс ' || c.id
314 WHEN 'en' THEN (ARRAY['Programming', 'Digital Marketing', 'Design', 'Business Analytics',
315 'Project Management', 'Finance', 'Leadership', 'Data Science'])[1 + (c.id % 8)] || ' - Course ' || c.id
316 WHEN 'es' THEN 'Curso ' || c.id || ': Programación'
317 WHEN 'it' THEN 'Corso ' || c.id || ': Programmazione'
318 WHEN 'de' THEN 'Kurs ' || c.id || ': Programmierung'
319 WHEN 'sr' THEN 'Курс ' || c.id || ': Програмирање'
320 WHEN 'hr' THEN 'Tečaj ' || c.id || ': Programiranje'
321 END,
322 CASE l.language
323 WHEN 'mk' THEN 'Детален опис за курс ' || c.id || '. Научете практични вештини.'
324 WHEN 'en' THEN 'Detailed description for course ' || c.id || '. Learn practical skills.'
325 WHEN 'es' THEN 'Descripción detallada del curso ' || c.id
326 WHEN 'it' THEN 'Descrizione dettagliata del corso ' || c.id
327 WHEN 'de' THEN 'Detaillierte Kursbeschreibung ' || c.id
328 WHEN 'sr' THEN 'Детаљан опис курса ' || c.id
329 WHEN 'hr' THEN 'Detaljan opis tečaja ' || c.id
330 END,
331 CASE l.language
332 WHEN 'mk' THEN 'Краток опис'
333 WHEN 'en' THEN 'Short description'
334 ELSE 'Short desc'
335 END
336 FROM course c
337 CROSS JOIN language l
338 WHERE c.id BETWEEN start_id AND end_id;
339
340 RAISE NOTICE 'Course translations batch % of % completed', current_batch, num_batches;
341 END LOOP;
342END $$;
343
344DO $$ BEGIN RAISE NOTICE 'Course translations inserted: %', (SELECT COUNT(*) FROM course_translate); END $$;
345
346-- ============================================================================
347-- 11. COURSE_TOPIC (Each course has 1-3 topics)
348-- ============================================================================
349
350INSERT INTO course_topic (course_id, topic_id)
351SELECT DISTINCT ON (c.id, t.id)
352 c.id,
353 t.id
354FROM course c
355CROSS JOIN LATERAL (
356 SELECT id FROM topic ORDER BY random() LIMIT (1 + floor(random() * 2)::INTEGER)
357) t;
358
359DO $$ BEGIN RAISE NOTICE 'Course topics inserted: %', (SELECT COUNT(*) FROM course_topic); END $$;
360
361-- ============================================================================
362-- 12. COURSE_VERSION (1-4 versions per course)
363-- ============================================================================
364
365INSERT INTO course_version (
366 course_id, version_number, is_current, published_at, created_at
367)
368SELECT
369 c.id,
370 ver,
371 ver = max_ver,
372 CURRENT_TIMESTAMP - (random() * INTERVAL '2 years'),
373 CURRENT_TIMESTAMP - (random() * INTERVAL '2 years')
374FROM course c
375CROSS JOIN LATERAL (
376 SELECT
377 generate_series(1, (1 + floor(random() * 3))::INTEGER) as ver,
378 (1 + floor(random() * 3))::INTEGER as max_ver
379) v;
380
381DO $$ BEGIN RAISE NOTICE 'Course versions inserted: %', (SELECT COUNT(*) FROM course_version); END $$;
382
383-- ============================================================================
384-- 13. COURSE_CONTENT (3-10 modules per version)
385-- ============================================================================
386
387INSERT INTO course_content (
388 course_version_id, parent_id, content_type,
389 order_index, duration, is_preview, created_at
390)
391SELECT
392 cv.id,
393 NULL,
394 (ARRAY['VIDEO', 'ARTICLE', 'INTERACTIVE', 'EXERCISE']::content_type_enum[])[1 + floor(random() * 4)::INTEGER],
395 ROW_NUMBER() OVER (PARTITION BY cv.id ORDER BY random()),
396 (300 + random() * 3300)::INTEGER,
397 random() < 0.18,
398 cv.created_at
399FROM course_version cv
400CROSS JOIN generate_series(1, (3 + floor(random() * 7))::INTEGER) m;
401
402DO $$ BEGIN RAISE NOTICE 'Course content modules inserted: %', (SELECT COUNT(*) FROM course_content); END $$;
403
404-- Course content translations
405INSERT INTO course_content_translate (course_content_id, language_id, title, description)
406SELECT
407 cc.id,
408 l.id,
409 CASE l.language
410 WHEN 'mk' THEN 'Модул ' || cc.order_index
411 WHEN 'en' THEN 'Module ' || cc.order_index
412 WHEN 'es' THEN 'Módulo ' || cc.order_index
413 WHEN 'it' THEN 'Modulo ' || cc.order_index
414 WHEN 'de' THEN 'Modul ' || cc.order_index
415 WHEN 'sr' THEN 'Модул ' || cc.order_index
416 WHEN 'hr' THEN 'Modul ' || cc.order_index
417 END,
418 'Description for module ' || cc.id
419FROM course_content cc
420CROSS JOIN language l;
421
422DO $$ BEGIN RAISE NOTICE 'Course content translations inserted: %', (SELECT COUNT(*) FROM course_content_translate); END $$;
423
424-- ============================================================================
425-- 14. COURSE_LECTURE (3-12 lectures per module)
426-- ============================================================================
427
428INSERT INTO course_lecture (
429 course_content_id, order_index, duration,
430 video_url, is_preview, created_at
431)
432SELECT
433 cc.id,
434 ROW_NUMBER() OVER (PARTITION BY cc.id ORDER BY random()),
435 (120 + random() * 1680)::INTEGER,
436 'https://cdn.shifter.mk/videos/' || md5(random()::text || cc.id::text) || '.mp4',
437 random() < 0.12,
438 cc.created_at
439FROM course_content cc
440CROSS JOIN generate_series(1, (3 + floor(random() * 9))::INTEGER) lec;
441
442DO $$ BEGIN RAISE NOTICE 'Course lectures inserted: %', (SELECT COUNT(*) FROM course_lecture); END $$;
443
444-- Lecture translations
445INSERT INTO course_lecture_translate (course_lecture_id, language_id, title, description, transcript)
446SELECT
447 cl.id,
448 l.id,
449 CASE l.language
450 WHEN 'mk' THEN 'Лекција ' || cl.order_index
451 WHEN 'en' THEN 'Lecture ' || cl.order_index
452 WHEN 'es' THEN 'Lección ' || cl.order_index
453 WHEN 'it' THEN 'Lezione ' || cl.order_index
454 WHEN 'de' THEN 'Vorlesung ' || cl.order_index
455 WHEN 'sr' THEN 'Лекција ' || cl.order_index
456 WHEN 'hr' THEN 'Predavanje ' || cl.order_index
457 END,
458 'Lecture description ' || cl.id,
459 'Transcript for lecture ' || cl.id
460FROM course_lecture cl
461CROSS JOIN language l;
462
463DO $$ BEGIN RAISE NOTICE 'Course lecture translations inserted: %', (SELECT COUNT(*) FROM course_lecture_translate); END $$;
464
465-- ============================================================================
466-- 15. COURSE_PRICE (Each course has 1-2 pricing tiers)
467-- ============================================================================
468
469INSERT INTO course_price (
470 course_id, price, currency, valid_from, valid_until, is_active
471)
472SELECT
473 c.id,
474 CASE
475 WHEN random() < 0.18 THEN 0
476 ELSE (9.99 + random() * 390)::NUMERIC(10, 2)
477 END,
478 (ARRAY['USD', 'EUR', 'MKD'])[1 + floor(random() * 3)::INTEGER],
479 CURRENT_TIMESTAMP - INTERVAL '1 year',
480 CASE WHEN random() < 0.75 THEN NULL ELSE CURRENT_TIMESTAMP + INTERVAL '1 year' END,
481 TRUE
482FROM course c;
483
484DO $$ BEGIN RAISE NOTICE 'Course prices inserted: %', (SELECT COUNT(*) FROM course_price); END $$;
485
486-- ============================================================================
487-- 16. BUNDLE (1000 bundles)
488-- ============================================================================
489
490INSERT INTO bundle (
491 bundle_type, discount_percent, is_active, valid_from, valid_until, created_at
492)
493SELECT
494 (ARRAY['MANUAL', 'PERSONALIZED']::bundle_type_enum[])[1 + floor(random() * 2)::INTEGER],
495 (10 + random() * 45)::INTEGER,
496 random() < 0.88,
497 CURRENT_TIMESTAMP - INTERVAL '6 months',
498 CASE WHEN random() < 0.65 THEN NULL ELSE CURRENT_TIMESTAMP + INTERVAL '1 year' END,
499 CURRENT_TIMESTAMP - (random() * INTERVAL '1 year')
500FROM generate_series(1, 1000) i;
501
502-- Manual bundles
503INSERT INTO manual_bundle (bundle_id)
504SELECT id FROM bundle WHERE bundle_type = 'MANUAL';
505
506-- Personalized bundles
507INSERT INTO personalized_bundle (bundle_id, user_id, personalized_type)
508SELECT
509 b.id,
510 (SELECT id FROM "user" WHERE NOT deleted ORDER BY random() LIMIT 1),
511 (ARRAY['SKILL_GAP', 'INTEREST_BASED', 'CAREER_PATH', 'TRENDING']::personalized_type_enum[])[1 + floor(random() * 4)::INTEGER]
512FROM bundle b
513WHERE b.bundle_type = 'PERSONALIZED';
514
515-- Bundle translations
516INSERT INTO bundle_translate (bundle_id, language_id, title, description)
517SELECT
518 b.id,
519 l.id,
520 CASE l.language
521 WHEN 'mk' THEN 'Пакет ' || b.id
522 WHEN 'en' THEN 'Bundle ' || b.id
523 WHEN 'es' THEN 'Paquete ' || b.id
524 WHEN 'it' THEN 'Pacchetto ' || b.id
525 WHEN 'de' THEN 'Paket ' || b.id
526 WHEN 'sr' THEN 'Пакет ' || b.id
527 WHEN 'hr' THEN 'Paket ' || b.id
528 END,
529 'Bundle description ' || b.id
530FROM bundle b
531CROSS JOIN language l;
532
533DO $$ BEGIN RAISE NOTICE 'Bundles inserted: %, Manual: %, Personalized: %, Translations: %',
534 (SELECT COUNT(*) FROM bundle),
535 (SELECT COUNT(*) FROM manual_bundle),
536 (SELECT COUNT(*) FROM personalized_bundle),
537 (SELECT COUNT(*) FROM bundle_translate); END $$;
538
539-- ============================================================================
540-- 17. LEARNING_PATH (500 learning paths)
541-- ============================================================================
542
543INSERT INTO learning_path (
544 learning_path_type, estimated_duration, is_active, created_at
545)
546SELECT
547 (ARRAY['MANUAL', 'PERSONALIZED']::bundle_type_enum[])[1 + floor(random() * 2)::INTEGER],
548 (7200 + random() * 100800)::INTEGER,
549 random() < 0.82,
550 CURRENT_TIMESTAMP - (random() * INTERVAL '18 months')
551FROM generate_series(1, 500) i;
552
553-- Manual learning paths
554INSERT INTO manual_learning_path (learning_path_id)
555SELECT id FROM learning_path WHERE learning_path_type = 'MANUAL';
556
557-- Personalized learning paths
558INSERT INTO personalized_learning_path (learning_path_id, user_id, personalized_type)
559SELECT
560 lp.id,
561 (SELECT id FROM "user" WHERE NOT deleted ORDER BY random() LIMIT 1),
562 (ARRAY['SKILL_GAP', 'INTEREST_BASED', 'CAREER_PATH', 'TRENDING']::personalized_type_enum[])[1 + floor(random() * 4)::INTEGER]
563FROM learning_path lp
564WHERE lp.learning_path_type = 'PERSONALIZED';
565
566-- Learning path translations
567INSERT INTO learning_path_translate (learning_path_id, language_id, title, description)
568SELECT
569 lp.id,
570 l.id,
571 CASE l.language
572 WHEN 'mk' THEN 'Патека за учење ' || lp.id
573 WHEN 'en' THEN 'Learning Path ' || lp.id
574 WHEN 'es' THEN 'Ruta de Aprendizaje ' || lp.id
575 WHEN 'it' THEN 'Percorso ' || lp.id
576 WHEN 'de' THEN 'Lernpfad ' || lp.id
577 WHEN 'sr' THEN 'Путања ' || lp.id
578 WHEN 'hr' THEN 'Put Učenja ' || lp.id
579 END,
580 'Learning path description ' || lp.id
581FROM learning_path lp
582CROSS JOIN language l;
583
584DO $$ BEGIN RAISE NOTICE 'Learning paths inserted: %, Translations: %',
585 (SELECT COUNT(*) FROM learning_path), (SELECT COUNT(*) FROM learning_path_translate); END $$;
586
587-- ============================================================================
588-- 18. LEARNING_PATH_COURSE (2-8 courses per path)
589-- ============================================================================
590
591INSERT INTO learning_path_course (learning_path_id, course_id, order_index)
592SELECT DISTINCT ON (lp.id, c.id)
593 lp.id,
594 c.id,
595 ROW_NUMBER() OVER (PARTITION BY lp.id ORDER BY random())
596FROM learning_path lp
597CROSS JOIN LATERAL (
598 SELECT id FROM course WHERE is_published = TRUE ORDER BY random() LIMIT (2 + floor(random() * 6)::INTEGER)
599) c;
600
601DO $$ BEGIN RAISE NOTICE 'Learning path courses inserted: %', (SELECT COUNT(*) FROM learning_path_course); END $$;
602
603-- ============================================================================
604-- 19. QUIZ (1-3 quizzes per course)
605-- ============================================================================
606
607INSERT INTO quiz (
608 course_id, quiz_type, passing_score, max_attempts,
609 time_limit, points_value, created_at
610)
611SELECT
612 c.id,
613 (ARRAY['PRE_DIAGNOSTIC', 'MODULE_CHECKPOINT', 'FINAL_CERTIFICATION', 'PRACTICE']::quiz_type_enum[])[1 + floor(random() * 4)::INTEGER],
614 (60 + random() * 30)::INTEGER,
615 CASE WHEN random() < 0.25 THEN NULL ELSE (2 + floor(random() * 4))::INTEGER END,
616 CASE WHEN random() < 0.35 THEN NULL ELSE (1800 + random() * 5400)::INTEGER END,
617 (50 + random() * 450)::INTEGER,
618 c.created_at + (random() * INTERVAL '30 days')
619FROM course c
620CROSS JOIN generate_series(1, (1 + floor(random() * 2))::INTEGER) q;
621
622DO $$ BEGIN RAISE NOTICE 'Quizzes inserted: %', (SELECT COUNT(*) FROM quiz); END $$;
623
624-- Quiz translations
625INSERT INTO quiz_translate (quiz_id, language_id, title, description, instructions)
626SELECT
627 q.id,
628 l.id,
629 CASE l.language
630 WHEN 'mk' THEN 'Квиз ' || q.id
631 WHEN 'en' THEN 'Quiz ' || q.id
632 WHEN 'es' THEN 'Cuestionario ' || q.id
633 WHEN 'it' THEN 'Quiz ' || q.id
634 WHEN 'de' THEN 'Quiz ' || q.id
635 WHEN 'sr' THEN 'Квиз ' || q.id
636 WHEN 'hr' THEN 'Kviz ' || q.id
637 END,
638 'Quiz description ' || q.id,
639 'Quiz instructions ' || q.id
640FROM quiz q
641CROSS JOIN language l;
642
643DO $$ BEGIN RAISE NOTICE 'Quiz translations inserted: %', (SELECT COUNT(*) FROM quiz_translate); END $$;
644
645-- ============================================================================
646-- 20. QUESTION (5-25 per quiz)
647-- ============================================================================
648
649INSERT INTO question (
650 quiz_id, question_type, difficulty, points,
651 order_index, created_at
652)
653SELECT
654 qz.id,
655 (ARRAY['MULTIPLE_CHOICE', 'TRUE_FALSE', 'SHORT_ANSWER', 'MULTI_SELECT']::question_type_enum[])[1 + floor(random() * 4)::INTEGER],
656 (ARRAY['BEGINNER', 'INTERMEDIATE', 'ADVANCED', 'EXPERT']::course_difficulty_enum[])[1 + floor(random() * 4)::INTEGER],
657 (5 + random() * 25)::INTEGER,
658 ROW_NUMBER() OVER (PARTITION BY qz.id ORDER BY random()),
659 qz.created_at
660FROM quiz qz
661CROSS JOIN generate_series(1, (5 + floor(random() * 20))::INTEGER) ques;
662
663DO $$ BEGIN RAISE NOTICE 'Questions inserted: %', (SELECT COUNT(*) FROM question); END $$;
664
665-- Question translations
666INSERT INTO question_translate (question_id, language_id, question_text, explanation)
667SELECT
668 ques.id,
669 l.id,
670 CASE l.language
671 WHEN 'mk' THEN 'Прашање ' || ques.id || ': Која е вистинската опција?'
672 WHEN 'en' THEN 'Question ' || ques.id || ': What is the correct option?'
673 WHEN 'es' THEN 'Pregunta ' || ques.id || ': ¿Cuál es correcta?'
674 WHEN 'it' THEN 'Domanda ' || ques.id || ': Quale è corretta?'
675 WHEN 'de' THEN 'Frage ' || ques.id || ': Welche ist richtig?'
676 WHEN 'sr' THEN 'Питање ' || ques.id || ': Која је тачна?'
677 WHEN 'hr' THEN 'Pitanje ' || ques.id || ': Koja je točna?'
678 END,
679 'Explanation for question ' || ques.id
680FROM question ques
681CROSS JOIN language l;
682
683DO $$ BEGIN RAISE NOTICE 'Question translations inserted: %', (SELECT COUNT(*) FROM question_translate); END $$;
684
685-- ============================================================================
686-- 21. ANSWER_OPTION (2-6 per question)
687-- ============================================================================
688
689INSERT INTO answer_option (
690 question_id, is_correct, order_index, created_at
691)
692SELECT
693 q.id,
694 ROW_NUMBER() OVER (PARTITION BY q.id ORDER BY random()) = 1,
695 ROW_NUMBER() OVER (PARTITION BY q.id ORDER BY random()),
696 q.created_at
697FROM question q
698CROSS JOIN generate_series(1, (2 + floor(random() * 4))::INTEGER) opt;
699
700DO $$ BEGIN RAISE NOTICE 'Answer options inserted: %', (SELECT COUNT(*) FROM answer_option); END $$;
701
702-- Answer option translations
703INSERT INTO answer_option_translate (answer_option_id, language_id, option_text)
704SELECT
705 ao.id,
706 l.id,
707 CASE l.language
708 WHEN 'mk' THEN 'Опција ' || ao.order_index || ' за прашање ' || ao.question_id
709 WHEN 'en' THEN 'Option ' || ao.order_index || ' for question ' || ao.question_id
710 WHEN 'es' THEN 'Opción ' || ao.order_index
711 WHEN 'it' THEN 'Opzione ' || ao.order_index
712 WHEN 'de' THEN 'Option ' || ao.order_index
713 WHEN 'sr' THEN 'Опција ' || ao.order_index
714 WHEN 'hr' THEN 'Opcija ' || ao.order_index
715 END
716FROM answer_option ao
717CROSS JOIN language l;
718
719DO $$ BEGIN RAISE NOTICE 'Answer option translations inserted: %', (SELECT COUNT(*) FROM answer_option_translate); END $$;
720
721-- ============================================================================
722-- 22. QUESTION_SKILL (Each question maps to 1-4 skills)
723-- ============================================================================
724
725INSERT INTO question_skill (question_id, skill_id, weight)
726SELECT DISTINCT ON (q.id, s.id)
727 q.id,
728 s.id,
729 (0.5 + random() * 2.5)::NUMERIC(5, 2)
730FROM question q
731CROSS JOIN LATERAL (
732 SELECT id FROM skill ORDER BY random() LIMIT (1 + floor(random() * 3)::INTEGER)
733) s;
734
735DO $$ BEGIN RAISE NOTICE 'Question skills inserted: %', (SELECT COUNT(*) FROM question_skill); END $$;
736
737-- ============================================================================
738-- 23. ENROLLMENTS - POWER USERS (50 users with 100k-1M enrollments each)
739-- ============================================================================
740-- UNIQUE constraint: (user_id, course_version_id) implied by business logic
741
742DO $$
743DECLARE
744 v_power_user_id BIGINT;
745 v_enrollment_count INTEGER;
746 v_batch_size INTEGER := 10000;
747 v_total_batches INTEGER;
748 v_batch INTEGER;
749 v_course_versions BIGINT[];
750 v_total_versions BIGINT;
751BEGIN
752 -- Get all available course versions
753 SELECT ARRAY_AGG(id) INTO v_course_versions FROM course_version;
754 v_total_versions := array_length(v_course_versions, 1);
755
756 RAISE NOTICE 'Starting power user enrollment generation...';
757 RAISE NOTICE 'Total course versions available: %', v_total_versions;
758
759 FOR v_power_user_id IN
760 SELECT id FROM "user" WHERE NOT deleted ORDER BY id LIMIT 50
761 LOOP
762 -- Random enrollment count between 100k and min(1M, total_versions)
763 v_enrollment_count := LEAST(100000 + (random() * 900000)::INTEGER, v_total_versions);
764 v_total_batches := CEIL(v_enrollment_count::NUMERIC / v_batch_size);
765
766 RAISE NOTICE 'User %: Creating % enrollments in % batches',
767 v_power_user_id, v_enrollment_count, v_total_batches;
768
769 FOR v_batch IN 1..v_total_batches LOOP
770 INSERT INTO enrollment (
771 user_id, course_id, course_version_id, enrollment_type, enrollment_status,
772 enrolled_at, expires_at, completed_at, progress_percent
773 )
774 SELECT DISTINCT ON (cv.id)
775 v_power_user_id,
776 cv.course_id,
777 cv.id,
778 (ARRAY['FREE', 'PAID', 'GIFTED', 'ADMIN_GRANTED']::enrollment_type_enum[])[1 + floor(random() * 4)::INTEGER],
779 (ARRAY['ACTIVE', 'COMPLETED', 'EXPIRED', 'SUSPENDED']::enrollment_status_enum[])[1 + floor(random() * 4)::INTEGER],
780 CURRENT_TIMESTAMP - (random() * INTERVAL '2 years'),
781 CASE WHEN random() < 0.3 THEN NULL
782 ELSE CURRENT_TIMESTAMP + (random() * INTERVAL '1 year') END,
783 CASE WHEN random() < 0.25 THEN CURRENT_TIMESTAMP - (random() * INTERVAL '6 months')
784 ELSE NULL END,
785 (random() * 100)::INTEGER
786 FROM (
787 SELECT * FROM course_version
788 WHERE id = ANY(v_course_versions)
789 ORDER BY random()
790 LIMIT LEAST(v_batch_size, v_enrollment_count - (v_batch - 1) * v_batch_size)
791 ) cv;
792
793 IF v_batch % 5 = 0 THEN
794 RAISE NOTICE ' User %: Batch % of % completed', v_power_user_id, v_batch, v_total_batches;
795 END IF;
796 END LOOP;
797 END LOOP;
798
799 RAISE NOTICE 'Power user enrollments completed';
800END $$;
801
802DO $$ BEGIN RAISE NOTICE 'Total enrollments after power users: %', (SELECT COUNT(*) FROM enrollment); END $$;
803
804-- ============================================================================
805-- 24. ENROLLMENTS - NORMAL USERS (100k users with 1-50 enrollments each)
806-- ============================================================================
807
808DO $$
809DECLARE
810 v_normal_user_id BIGINT;
811 v_enrollment_count INTEGER;
812 v_user_count INTEGER := 0;
813 v_total_normal_users INTEGER := 100000;
814BEGIN
815 RAISE NOTICE 'Starting normal user enrollment generation (% users)...', v_total_normal_users;
816
817 FOR v_normal_user_id IN
818 SELECT id FROM "user"
819 WHERE NOT deleted
820 AND id NOT IN (SELECT id FROM "user" ORDER BY id LIMIT 50)
821 ORDER BY random()
822 LIMIT v_total_normal_users
823 LOOP
824 v_enrollment_count := (1 + floor(random() * 49))::INTEGER;
825 v_user_count := v_user_count + 1;
826
827 INSERT INTO enrollment (
828 user_id, course_id, course_version_id, enrollment_type, enrollment_status,
829 enrolled_at, expires_at, completed_at, progress_percent
830 )
831 SELECT DISTINCT ON (cv.id)
832 v_normal_user_id,
833 cv.course_id,
834 cv.id,
835 (ARRAY['FREE', 'PAID', 'GIFTED', 'ADMIN_GRANTED']::enrollment_type_enum[])[1 + floor(random() * 4)::INTEGER],
836 (ARRAY['ACTIVE', 'COMPLETED', 'EXPIRED']::enrollment_status_enum[])[1 + floor(random() * 3)::INTEGER],
837 CURRENT_TIMESTAMP - (random() * INTERVAL '2 years'),
838 CASE WHEN random() < 0.4 THEN NULL
839 ELSE CURRENT_TIMESTAMP + (random() * INTERVAL '1 year') END,
840 CASE WHEN random() < 0.2 THEN CURRENT_TIMESTAMP - (random() * INTERVAL '6 months')
841 ELSE NULL END,
842 (random() * 100)::INTEGER
843 FROM (
844 SELECT * FROM course_version ORDER BY random() LIMIT v_enrollment_count
845 ) cv;
846
847 IF v_user_count % 5000 = 0 THEN
848 RAISE NOTICE 'Normal users processed: %/%', v_user_count, v_total_normal_users;
849 END IF;
850 END LOOP;
851
852 RAISE NOTICE 'Normal user enrollments completed';
853END $$;
854
855DO $$ BEGIN RAISE NOTICE 'Total enrollments: %', (SELECT COUNT(*) FROM enrollment); END $$;
856
857-- ============================================================================
858-- 25. USER_LEARNING_PATH (Users in learning paths)
859-- ============================================================================
860
861INSERT INTO user_learning_path (user_id, learning_path_id, status, started_at, completed_at)
862SELECT DISTINCT ON (u.id, lp.id)
863 u.id,
864 lp.id,
865 (ARRAY['NOT_STARTED', 'IN_PROGRESS', 'COMPLETED', 'ABANDONED']::learning_path_status_enum[])[1 + floor(random() * 4)::INTEGER],
866 CURRENT_TIMESTAMP - (random() * INTERVAL '1 year'),
867 CASE WHEN random() < 0.2 THEN CURRENT_TIMESTAMP - (random() * INTERVAL '6 months') ELSE NULL END
868FROM (SELECT id FROM "user" WHERE NOT deleted ORDER BY random() LIMIT 50000) u
869CROSS JOIN LATERAL (
870 SELECT id FROM learning_path WHERE is_active = TRUE ORDER BY random() LIMIT (1 + floor(random() * 2)::INTEGER)
871) lp;
872
873DO $$ BEGIN RAISE NOTICE 'User learning paths inserted: %', (SELECT COUNT(*) FROM user_learning_path); END $$;
874
875-- ============================================================================
876-- 26. ORDER & PAYMENT (For paid enrollments)
877-- ============================================================================
878
879-- Create orders for paid enrollments (sample)
880INSERT INTO "order" (
881 user_id, total_amount, currency, order_status, created_at
882)
883SELECT
884 e.user_id,
885 (cp.price * (1 - COALESCE(b.discount_percent, 0) / 100.0))::NUMERIC(10, 2),
886 cp.currency,
887 (ARRAY['PENDING', 'PROCESSING', 'COMPLETED', 'CANCELLED']::order_status_enum[])[1 + floor(random() * 4)::INTEGER],
888 e.enrolled_at
889FROM (
890 SELECT DISTINCT ON (user_id, course_id) *
891 FROM enrollment
892 WHERE enrollment_type = 'PAID'
893 ORDER BY user_id, course_id, id
894 LIMIT 1000000
895) e
896JOIN course c ON e.course_id = c.id
897LEFT JOIN course_price cp ON c.id = cp.course_id AND cp.is_active = TRUE
898LEFT JOIN bundle b ON random() < 0.15;
899
900DO $$ BEGIN RAISE NOTICE 'Orders inserted: %', (SELECT COUNT(*) FROM "order"); END $$;
901
902-- Order details
903INSERT INTO order_details (order_id, course_id, bundle_id, price, quantity)
904SELECT
905 o.id,
906 e.course_id,
907 CASE WHEN random() < 0.15 THEN (SELECT id FROM bundle WHERE is_active = TRUE ORDER BY random() LIMIT 1) ELSE NULL END,
908 o.total_amount,
909 1
910FROM "order" o
911JOIN enrollment e ON e.user_id = o.user_id
912WHERE e.enrollment_type = 'PAID'
913LIMIT 1000000;
914
915DO $$ BEGIN RAISE NOTICE 'Order details inserted: %', (SELECT COUNT(*) FROM order_details); END $$;
916
917-- Payments
918INSERT INTO payment (
919 order_id, amount, currency, payment_method, payment_status,
920 transaction_id, processed_at
921)
922SELECT
923 o.id,
924 o.total_amount,
925 o.currency,
926 (ARRAY['CREDIT_CARD', 'PAYPAL', 'BANK_TRANSFER', 'STRIPE']::payment_method_enum[])[1 + floor(random() * 4)::INTEGER],
927 CASE
928 WHEN o.order_status = 'COMPLETED' THEN 'COMPLETED'::payment_status_enum
929 WHEN o.order_status = 'CANCELLED' THEN 'CANCELLED'::payment_status_enum
930 ELSE 'PENDING'::payment_status_enum
931 END,
932 'TXN-' || md5(random()::text || o.id::text),
933 CASE WHEN o.order_status = 'COMPLETED' THEN o.created_at + INTERVAL '5 minutes' ELSE NULL END
934FROM "order" o;
935
936DO $$ BEGIN RAISE NOTICE 'Payments inserted: %', (SELECT COUNT(*) FROM payment); END $$;
937
938-- ============================================================================
939-- 27. REVIEW (15% of completed enrollments)
940-- ============================================================================
941
942INSERT INTO review (
943 enrollment_id, rating, created_at, updated_at
944)
945SELECT
946 e.id,
947 (3 + floor(random() * 3))::INTEGER,
948 e.completed_at + (random() * INTERVAL '30 days'),
949 e.completed_at + (random() * INTERVAL '30 days')
950FROM enrollment e
951WHERE e.enrollment_status = 'COMPLETED'
952 AND e.completed_at IS NOT NULL
953 AND random() < 0.15
954LIMIT 2000000;
955
956DO $$ BEGIN RAISE NOTICE 'Reviews inserted: %', (SELECT COUNT(*) FROM review); END $$;
957
958-- ============================================================================
959-- 28. CERTIFICATE (50% of completed enrollments)
960-- ============================================================================
961
962INSERT INTO certificate (
963 enrollment_id, certificate_number, issued_at, verification_code
964)
965SELECT
966 e.id,
967 'CERT-' || LPAD(e.id::TEXT, 12, '0'),
968 e.completed_at,
969 md5(random()::text || e.id::text)
970FROM enrollment e
971WHERE e.enrollment_status = 'COMPLETED'
972 AND e.completed_at IS NOT NULL
973 AND random() < 0.5
974LIMIT 5000000;
975
976DO $$ BEGIN RAISE NOTICE 'Certificates inserted: %', (SELECT COUNT(*) FROM certificate); END $$;
977
978-- ============================================================================
979-- 29. USER_COURSE_INTERACTION (10M interactions)
980-- ============================================================================
981
982DO $$
983DECLARE
984 batch_size INTEGER := 1000000;
985 total_interactions INTEGER := 10000000;
986 num_batches INTEGER := total_interactions / batch_size;
987 current_batch INTEGER;
988BEGIN
989 FOR current_batch IN 1..num_batches LOOP
990 INSERT INTO user_course_interaction (
991 user_id, course_id, interaction_type, interaction_source, created_at
992 )
993 SELECT
994 (SELECT id FROM "user" WHERE NOT deleted ORDER BY random() LIMIT 1),
995 (SELECT id FROM course WHERE is_published = TRUE ORDER BY random() LIMIT 1),
996 (ARRAY['VIEW', 'CLICK', 'SEARCH', 'PREVIEW', 'BOOKMARK', 'SHARE']::interaction_type_enum[])[1 + floor(random() * 6)::INTEGER],
997 (ARRAY['CATALOG', 'SEARCH', 'RECOMMENDATION', 'EMAIL', 'SOCIAL', 'DIRECT']::interaction_source_enum[])[1 + floor(random() * 6)::INTEGER],
998 CURRENT_TIMESTAMP - (random() * INTERVAL '1 year')
999 FROM generate_series(1, batch_size) i;
1000
1001 RAISE NOTICE 'User interactions batch % of % completed', current_batch, num_batches;
1002 END LOOP;
1003END $$;
1004
1005DO $$ BEGIN RAISE NOTICE 'User course interactions inserted: %', (SELECT COUNT(*) FROM user_course_interaction); END $$;
1006
1007-- ============================================================================
1008-- 30. COURSE_SIMILARITY (200k similarity pairs)
1009-- ============================================================================
1010
1011INSERT INTO course_similarity (
1012 course_id_a, course_id_b, similarity_type, similarity_score, created_at
1013)
1014SELECT DISTINCT ON (c1.id, c2.id)
1015 c1.id,
1016 c2.id,
1017 (ARRAY['CONTENT', 'COLLABORATIVE', 'HYBRID']::similarity_type_enum[])[1 + floor(random() * 3)::INTEGER],
1018 (0.3 + random() * 0.7)::NUMERIC(5, 4),
1019 CURRENT_TIMESTAMP - (random() * INTERVAL '6 months')
1020FROM (SELECT id FROM course ORDER BY random() LIMIT 10000) c1
1021CROSS JOIN LATERAL (
1022 SELECT id FROM course WHERE id != c1.id ORDER BY random() LIMIT 20
1023) c2;
1024
1025DO $$ BEGIN RAISE NOTICE 'Course similarities inserted: %', (SELECT COUNT(*) FROM course_similarity); END $$;
1026
1027-- ============================================================================
1028-- 31. LECTURE_PROGRESS (Sample from active enrollments)
1029-- ============================================================================
1030
1031INSERT INTO lecture_progress (
1032 enrollment_id, course_lecture_id, completed, last_position,
1033 completed_at, created_at, updated_at
1034)
1035SELECT
1036 e.id,
1037 cl.id,
1038 random() < 0.35,
1039 (random() * cl.duration)::INTEGER,
1040 CASE WHEN random() < 0.35 THEN CURRENT_TIMESTAMP - (random() * INTERVAL '30 days') ELSE NULL END,
1041 e.enrolled_at,
1042 CURRENT_TIMESTAMP - (random() * INTERVAL '7 days')
1043FROM (
1044 SELECT * FROM enrollment
1045 WHERE enrollment_status IN ('ACTIVE', 'COMPLETED')
1046 ORDER BY random()
1047 LIMIT 2000000
1048) e
1049CROSS JOIN LATERAL (
1050 SELECT cl.id, cl.duration
1051 FROM course_lecture cl
1052 JOIN course_content cc ON cl.course_content_id = cc.id
1053 WHERE cc.course_version_id = e.course_version_id
1054 ORDER BY random()
1055 LIMIT (1 + floor(random() * 9))::INTEGER
1056) cl;
1057
1058DO $$ BEGIN RAISE NOTICE 'Lecture progress inserted: %', (SELECT COUNT(*) FROM lecture_progress); END $$;
1059
1060-- ============================================================================
1061-- 32. QUIZ_ATTEMPT (25% of enrollments attempt quizzes)
1062-- ============================================================================
1063
1064INSERT INTO quiz_attempt (
1065 enrollment_id, quiz_id, attempt_number, started_at, submited_at,
1066 status, score, total_points, earned_points, passed
1067)
1068SELECT
1069 e.id,
1070 q.id,
1071 (1 + floor(random() * 3))::INTEGER,
1072 e.enrolled_at + (random() * INTERVAL '60 days'),
1073 CASE WHEN random() < 0.75
1074 THEN e.enrolled_at + (random() * INTERVAL '60 days') + INTERVAL '45 minutes'
1075 ELSE NULL END,
1076 (ARRAY['IN_PROGRESS', 'SUBMITTED', 'GRADED', 'ABANDONED']::quiz_attempt_status_enum[])[1 + floor(random() * 4)::INTEGER],
1077 CASE WHEN random() < 0.75 THEN (40 + floor(random() * 60))::INTEGER ELSE NULL END,
1078 q.points_value,
1079 CASE WHEN random() < 0.75 THEN (q.points_value * random())::INTEGER ELSE NULL END,
1080 random() < 0.62
1081FROM (
1082 SELECT * FROM enrollment ORDER BY random() LIMIT 5000000
1083) e
1084JOIN quiz q ON q.course_id = e.course_id
1085WHERE random() < 0.25;
1086
1087DO $$ BEGIN RAISE NOTICE 'Quiz attempts inserted: %', (SELECT COUNT(*) FROM quiz_attempt); END $$;
1088
1089-- ============================================================================
1090-- 33. QUIZ_ANSWER
1091-- ============================================================================
1092
1093INSERT INTO quiz_answer (
1094 quiz_attempt_id, question_id, answer_option_id,
1095 confidence_value, is_correct
1096)
1097SELECT
1098 qa.id,
1099 ques.id,
1100 (SELECT ao.id FROM answer_option ao WHERE ao.question_id = ques.id ORDER BY random() LIMIT 1),
1101 (50 + random() * 50)::NUMERIC(5, 2),
1102 random() < 0.62
1103FROM (
1104 SELECT * FROM quiz_attempt WHERE status IN ('SUBMITTED', 'GRADED') ORDER BY random() LIMIT 3000000
1105) qa
1106JOIN quiz q ON qa.quiz_id = q.id
1107CROSS JOIN LATERAL (
1108 SELECT id FROM question WHERE quiz_id = q.id ORDER BY random() LIMIT (5 + floor(random() * 10)::INTEGER)
1109) ques;
1110
1111DO $$ BEGIN RAISE NOTICE 'Quiz answers inserted: %', (SELECT COUNT(*) FROM quiz_answer); END $$;
1112
1113-- ============================================================================
1114-- 34. USER_SKILL (From quiz attempts)
1115-- ============================================================================
1116
1117INSERT INTO user_skill (
1118 user_id, skill_id, verified, proficiency, score_percent, created_at
1119)
1120SELECT DISTINCT ON (e.user_id, qs.skill_id)
1121 e.user_id,
1122 qs.skill_id,
1123 random() < 0.28,
1124 CASE
1125 WHEN AVG(qans.is_correct::INTEGER) > 0.8 THEN 'ADVANCED'::proficiency_level_enum
1126 WHEN AVG(qans.is_correct::INTEGER) > 0.6 THEN 'INTERMEDIATE'::proficiency_level_enum
1127 ELSE 'BEGINNER'::proficiency_level_enum
1128 END,
1129 (AVG(qans.is_correct::INTEGER) * 100)::NUMERIC(5, 2),
1130 MIN(e.enrolled_at)
1131FROM enrollment e
1132JOIN quiz_attempt qat ON qat.enrollment_id = e.id
1133JOIN quiz_answer qans ON qans.quiz_attempt_id = qat.id
1134JOIN question ques ON qans.question_id = ques.id
1135JOIN question_skill qs ON qs.question_id = ques.id
1136WHERE qat.status = 'GRADED'
1137GROUP BY e.user_id, qs.skill_id
1138LIMIT 10000000;
1139
1140DO $$ BEGIN RAISE NOTICE 'User skills inserted: %', (SELECT COUNT(*) FROM user_skill); END $$;
1141
1142-- ============================================================================
1143-- 35. USER_SKILL_SNAPSHOT (From quiz attempts)
1144-- ============================================================================
1145
1146INSERT INTO user_skill_snapshot (
1147 user_id, skill_id, quiz_attempt_id, weight_percent,
1148 confidence_avg, snapshot_type
1149)
1150SELECT
1151 e.user_id,
1152 qs.skill_id,
1153 qat.id,
1154 AVG(qs.weight)::NUMERIC(5, 2),
1155 AVG(qans.confidence_value)::NUMERIC(5, 2),
1156 CASE q.quiz_type
1157 WHEN 'PRE_DIAGNOSTIC' THEN 'PRE_DIAGNOSTIC'::snapshot_type_enum
1158 WHEN 'MODULE_CHECKPOINT' THEN 'CHECKPOINT'::snapshot_type_enum
1159 WHEN 'FINAL_CERTIFICATION' THEN 'FINAL'::snapshot_type_enum
1160 ELSE 'CHECKPOINT'::snapshot_type_enum
1161 END
1162FROM (SELECT * FROM quiz_attempt WHERE status = 'GRADED' ORDER BY random() LIMIT 2000000) qat
1163JOIN enrollment e ON qat.enrollment_id = e.id
1164JOIN quiz q ON qat.quiz_id = q.id
1165JOIN quiz_answer qans ON qans.quiz_attempt_id = qat.id
1166JOIN question ques ON qans.question_id = ques.id
1167JOIN question_skill qs ON qs.question_id = ques.id
1168GROUP BY e.user_id, qs.skill_id, qat.id, q.quiz_type;
1169
1170DO $$ BEGIN RAISE NOTICE 'User skill snapshots inserted: %', (SELECT COUNT(*) FROM user_skill_snapshot); END $$;
1171
1172-- ============================================================================
1173-- FINAL SUMMARY & ANALYSIS
1174-- ============================================================================
1175
1176DO $$
1177DECLARE
1178 rec RECORD;
1179BEGIN
1180 RAISE NOTICE '============================================';
1181 RAISE NOTICE 'DATA GENERATION COMPLETE - FINAL SUMMARY';
1182 RAISE NOTICE '============================================';
1183
1184 FOR rec IN
1185 SELECT
1186 'language' as table_name, COUNT(*) as row_count FROM language
1187 UNION ALL SELECT 'admin', COUNT(*) FROM admin
1188 UNION ALL SELECT 'expert', COUNT(*) FROM expert
1189 UNION ALL SELECT 'user', COUNT(*) FROM "user"
1190 UNION ALL SELECT 'verification_token', COUNT(*) FROM verification_token
1191 UNION ALL SELECT 'meeting_email_reminder', COUNT(*) FROM meeting_email_reminder
1192 UNION ALL SELECT 'topic', COUNT(*) FROM topic
1193 UNION ALL SELECT 'topic_translate', COUNT(*) FROM topic_translate
1194 UNION ALL SELECT 'skill', COUNT(*) FROM skill
1195 UNION ALL SELECT 'skill_translate', COUNT(*) FROM skill_translate
1196 UNION ALL SELECT 'course', COUNT(*) FROM course
1197 UNION ALL SELECT 'course_translate', COUNT(*) FROM course_translate
1198 UNION ALL SELECT 'course_topic', COUNT(*) FROM course_topic
1199 UNION ALL SELECT 'course_version', COUNT(*) FROM course_version
1200 UNION ALL SELECT 'course_content', COUNT(*) FROM course_content
1201 UNION ALL SELECT 'course_content_translate', COUNT(*) FROM course_content_translate
1202 UNION ALL SELECT 'course_lecture', COUNT(*) FROM course_lecture
1203 UNION ALL SELECT 'course_lecture_translate', COUNT(*) FROM course_lecture_translate
1204 UNION ALL SELECT 'course_price', COUNT(*) FROM course_price
1205 UNION ALL SELECT 'bundle', COUNT(*) FROM bundle
1206 UNION ALL SELECT 'manual_bundle', COUNT(*) FROM manual_bundle
1207 UNION ALL SELECT 'personalized_bundle', COUNT(*) FROM personalized_bundle
1208 UNION ALL SELECT 'bundle_translate', COUNT(*) FROM bundle_translate
1209 UNION ALL SELECT 'learning_path', COUNT(*) FROM learning_path
1210 UNION ALL SELECT 'manual_learning_path', COUNT(*) FROM manual_learning_path
1211 UNION ALL SELECT 'personalized_learning_path', COUNT(*) FROM personalized_learning_path
1212 UNION ALL SELECT 'learning_path_translate', COUNT(*) FROM learning_path_translate
1213 UNION ALL SELECT 'learning_path_course', COUNT(*) FROM learning_path_course
1214 UNION ALL SELECT 'user_learning_path', COUNT(*) FROM user_learning_path
1215 UNION ALL SELECT 'quiz', COUNT(*) FROM quiz
1216 UNION ALL SELECT 'quiz_translate', COUNT(*) FROM quiz_translate
1217 UNION ALL SELECT 'question', COUNT(*) FROM question
1218 UNION ALL SELECT 'question_translate', COUNT(*) FROM question_translate
1219 UNION ALL SELECT 'answer_option', COUNT(*) FROM answer_option
1220 UNION ALL SELECT 'answer_option_translate', COUNT(*) FROM answer_option_translate
1221 UNION ALL SELECT 'question_skill', COUNT(*) FROM question_skill
1222 UNION ALL SELECT 'enrollment', COUNT(*) FROM enrollment
1223 UNION ALL SELECT 'order', COUNT(*) FROM "order"
1224 UNION ALL SELECT 'order_details', COUNT(*) FROM order_details
1225 UNION ALL SELECT 'payment', COUNT(*) FROM payment
1226 UNION ALL SELECT 'review', COUNT(*) FROM review
1227 UNION ALL SELECT 'certificate', COUNT(*) FROM certificate
1228 UNION ALL SELECT 'user_course_interaction', COUNT(*) FROM user_course_interaction
1229 UNION ALL SELECT 'course_similarity', COUNT(*) FROM course_similarity
1230 UNION ALL SELECT 'lecture_progress', COUNT(*) FROM lecture_progress
1231 UNION ALL SELECT 'quiz_attempt', COUNT(*) FROM quiz_attempt
1232 UNION ALL SELECT 'quiz_answer', COUNT(*) FROM quiz_answer
1233 UNION ALL SELECT 'user_skill', COUNT(*) FROM user_skill
1234 UNION ALL SELECT 'user_skill_snapshot', COUNT(*) FROM user_skill_snapshot
1235 ORDER BY table_name
1236 LOOP
1237 RAISE NOTICE '% : %', RPAD(rec.table_name, 30), rec.row_count;
1238 END LOOP;
1239
1240 RAISE NOTICE '============================================';
1241 RAISE NOTICE 'Creating indexes for performance...';
1242END $$;
1243
1244-- ============================================================================
1245-- INDEX CREATION (In addition to DDL indexes)
1246-- ============================================================================
1247
1248CREATE INDEX IF NOT EXISTS idx_enrollment_user_id ON enrollment(user_id);
1249CREATE INDEX IF NOT EXISTS idx_enrollment_course_id ON enrollment(course_id);
1250CREATE INDEX IF NOT EXISTS idx_enrollment_course_version_id ON enrollment(course_version_id);
1251CREATE INDEX IF NOT EXISTS idx_enrollment_status ON enrollment(enrollment_status);
1252CREATE INDEX IF NOT EXISTS idx_enrollment_type ON enrollment(enrollment_type);
1253CREATE INDEX IF NOT EXISTS idx_enrollment_enrolled_at ON enrollment(enrolled_at);
1254
1255CREATE INDEX IF NOT EXISTS idx_quiz_attempt_enrollment_id ON quiz_attempt(enrollment_id);
1256CREATE INDEX IF NOT EXISTS idx_quiz_attempt_quiz_id ON quiz_attempt(quiz_id);
1257CREATE INDEX IF NOT EXISTS idx_quiz_attempt_status ON quiz_attempt(status);
1258
1259CREATE INDEX IF NOT EXISTS idx_quiz_answer_attempt_id ON quiz_answer(quiz_attempt_id);
1260CREATE INDEX IF NOT EXISTS idx_quiz_answer_question_id ON quiz_answer(question_id);
1261
1262CREATE INDEX IF NOT EXISTS idx_user_skill_user_id ON user_skill(user_id);
1263CREATE INDEX IF NOT EXISTS idx_user_skill_skill_id ON user_skill(skill_id);
1264
1265CREATE INDEX IF NOT EXISTS idx_lecture_progress_enrollment ON lecture_progress(enrollment_id);
1266CREATE INDEX IF NOT EXISTS idx_lecture_progress_lecture ON lecture_progress(course_lecture_id);
1267
1268CREATE INDEX IF NOT EXISTS idx_user_course_interaction_user ON user_course_interaction(user_id);
1269CREATE INDEX IF NOT EXISTS idx_user_course_interaction_course ON user_course_interaction(course_id);
1270CREATE INDEX IF NOT EXISTS idx_user_course_interaction_type ON user_course_interaction(interaction_type);
1271
1272CREATE INDEX IF NOT EXISTS idx_course_similarity_a ON course_similarity(course_id_a);
1273CREATE INDEX IF NOT EXISTS idx_course_similarity_b ON course_similarity(course_id_b);
1274
1275-- Composite indexes for common queries
1276CREATE INDEX IF NOT EXISTS idx_enrollment_user_status ON enrollment(user_id, enrollment_status);
1277CREATE INDEX IF NOT EXISTS idx_enrollment_course_status ON enrollment(course_id, enrollment_status);
1278CREATE INDEX IF NOT EXISTS idx_course_published_difficulty ON course(is_published, difficulty);
1279CREATE INDEX IF NOT EXISTS idx_course_featured ON course(is_featured) WHERE is_featured = TRUE;
1280
1281DO $$ BEGIN RAISE NOTICE 'Indexes created successfully'; END $$;
1282
1283-- ============================================================================
1284-- ANALYZE TABLES FOR QUERY OPTIMIZATION
1285-- ============================================================================
1286
1287ANALYZE language;
1288ANALYZE admin;
1289ANALYZE expert;
1290ANALYZE "user";
1291ANALYZE verification_token;
1292ANALYZE meeting_email_reminder;
1293ANALYZE topic;
1294ANALYZE topic_translate;
1295ANALYZE skill;
1296ANALYZE skill_translate;
1297ANALYZE course;
1298ANALYZE course_translate;
1299ANALYZE course_topic;
1300ANALYZE course_version;
1301ANALYZE course_content;
1302ANALYZE course_content_translate;
1303ANALYZE course_lecture;
1304ANALYZE course_lecture_translate;
1305ANALYZE course_price;
1306ANALYZE bundle;
1307ANALYZE manual_bundle;
1308ANALYZE personalized_bundle;
1309ANALYZE bundle_translate;
1310ANALYZE learning_path;
1311ANALYZE manual_learning_path;
1312ANALYZE personalized_learning_path;
1313ANALYZE learning_path_translate;
1314ANALYZE learning_path_course;
1315ANALYZE user_learning_path;
1316ANALYZE quiz;
1317ANALYZE quiz_translate;
1318ANALYZE question;
1319ANALYZE question_translate;
1320ANALYZE answer_option;
1321ANALYZE answer_option_translate;
1322ANALYZE question_skill;
1323ANALYZE enrollment;
1324ANALYZE "order";
1325ANALYZE order_details;
1326ANALYZE payment;
1327ANALYZE review;
1328ANALYZE certificate;
1329ANALYZE user_course_interaction;
1330ANALYZE course_similarity;
1331ANALYZE lecture_progress;
1332ANALYZE quiz_attempt;
1333ANALYZE quiz_answer;
1334ANALYZE user_skill;
1335ANALYZE user_skill_snapshot;
1336
1337DO $$ BEGIN
1338 RAISE NOTICE '============================================';
1339 RAISE NOTICE 'ALL TABLES ANALYZED';
1340 RAISE NOTICE 'Demo data generation completed successfully!';
1341 RAISE NOTICE '============================================';
1342END $$;