|| '''[[WikiStart| Home ]]''' || '''[[DatabaseCreation| Database Creation ]]'''|| {{{#!sql --++++++++++ helpers ++++++++++ CREATE TABLE female_names ( name VARCHAR(100) ); CREATE TABLE male_names ( name VARCHAR(100) ); CREATE TABLE male_surnames ( male_surname VARCHAR(100) ); CREATE TABLE female_surnames ( female_surname VARCHAR(100) ); CREATE TABLE cities ( city VARCHAR(25), latitude DECIMAL, longitude DECIMAL ); --+++++++++++ location ++++++++++ INSERT INTO cities (city, latitude, longitude) VALUES ('Skopje', 41.9981, 21.4254), ('Bitola', 41.0314, 21.3347), ('Kumanovo', 42.1322, 21.7144), ('Prilep', 41.3451, 21.5550), ('Tetovo', 42.0097, 20.9716), ('Veles', 41.7156, 21.7756), ('Stip', 41.7458, 22.1958), ('Ohrid', 41.1231, 20.8016), ('Gostivar', 41.8000, 20.9167), ('Strumica', 41.4378, 22.6427), ('Kavadarci', 41.4331, 22.0119), ('Kocani', 41.9164, 22.4128), ('Kicevo', 41.5136, 20.9586), ('Struga', 41.1770, 20.6770), ('Radovis', 41.6383, 22.4647), ('Gevgelija', 41.1417, 22.5014), ('Debar', 41.5244, 20.5242), ('Kriva Palanka', 42.2009, 22.3317), ('Sveti Nikole', 41.8696, 21.9527), ('Negotino', 41.4839, 22.0892), ('Resen', 41.0889, 21.0122), ('Delcevo', 41.9672, 22.7694), ('Vinica', 41.8828, 22.5092), ('Berovo', 41.7031, 22.8578), ('Kratovo', 42.0784, 22.1800), ('Krusevo', 41.3689, 21.2489), ('Bogdanci', 41.2031, 22.5756), ('Demir Hisar', 41.2214, 21.2031), ('Demir Kapija', 41.4053, 22.2467), ('Pehcevo', 41.7633, 22.8892), ('Makedonska Kamenica', 42.0208, 22.5876), ('Probistip', 41.9985, 22.1786), ('Valandovo', 41.3174, 22.5619) ON CONFLICT DO NOTHING; INSERT INTO Location (city, latitude, longitude) SELECT 'Skopje', ROUND((41.97 + random() * 0.06)::numeric, 6), ROUND((21.38 + random() * 0.08)::numeric, 6) FROM generate_series(1, 4000) ON CONFLICT DO NOTHING; INSERT INTO Location (city, latitude, longitude) SELECT c.city, ROUND((c.latitude - 0.03 + random() * 0.06)::numeric, 6), ROUND((c.longitude - 0.05 + random() * 0.10)::numeric, 6) FROM cities c CROSS JOIN generate_series(1, 200) WHERE c.city <> 'Skopje' ON CONFLICT DO NOTHING; -- ++++++++++ userAccount ++++++++++ CREATE OR REPLACE FUNCTION clean_text(input TEXT) RETURNS TEXT AS $$ BEGIN RETURN TRANSLATE(input, 'абвгдѓежзѕијклљмнњопрстќуфхцчџшАБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ', -- Cyrillic 'abvgdgezzsijkljmnjoprstkufhccdsABVGDGEZZSIJKLJMJNJOPRSTKUFHCCDS' -- Latin ); END; $$ LANGUAGE plpgsql IMMUTABLE; WITH offset_cte AS ( SELECT COUNT(*) AS offset_val FROM UserAccount ), all_data AS ( SELECT name, surname, ROW_NUMBER() OVER () AS rn FROM ( -- машки SELECT clean_text(TRIM(m.name)) AS name, clean_text(TRIM(s.surname)) AS surname FROM male_names m CROSS JOIN surnames s UNION ALL -- женски SELECT clean_text(TRIM(f.name)) AS name, clean_text(TRIM(s.female_surname)) AS surname FROM female_names f CROSS JOIN female_surnames s ) t ORDER BY random() LIMIT 350000 ) INSERT INTO UserAccount (name, surname, email, phone_number, created_at) SELECT a.name, a.surname, LOWER(REGEXP_REPLACE(a.name, '[^a-zA-Z0-9]', '', 'g') || '.' || REGEXP_REPLACE(a.surname, '[^a-zA-Z0-9]', '', 'g') || (a.rn + o.offset_val) || '@gmail.com'), '+3897' || (ARRAY['1','2','5','7','8'])[((a.rn + o.offset_val - 1) % 5) + 1] || LPAD((a.rn + o.offset_val)::text, 6, '0'), TIMESTAMP '2017-01-01' + (random() * (NOW() - '2017-01-01')) FROM all_data AS a CROSS JOIN offset_cte AS o; drop table female_names; drop table female_surnames; drop table male_names; drop table surnames; --++++++++++ client ++++++++++ INSERT INTO Client (user_id) SELECT id FROM UserAccount WHERE id NOT IN (SELECT user_id FROM Client) ORDER BY random() LIMIT 100000; --++++++++++ worker ++++++++++ WITH random_workers AS ( SELECT id, created_at, ROW_NUMBER() OVER (ORDER BY random()) AS rn FROM UserAccount WHERE id NOT IN (SELECT user_id FROM Client) LIMIT 250000 ), random_locations AS ( SELECT id, ROW_NUMBER() OVER (ORDER BY random()) AS rn FROM Location ), location_count AS ( SELECT COUNT(*) AS total FROM Location ) INSERT INTO Worker (user_id, work_mode, service_radius_km, location_id, created_at) SELECT w.id, (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random() * 3 + 1)::int], 15, l.id, w.created_at FROM random_workers w CROSS JOIN location_count lc JOIN random_locations l ON ((w.rn - 1) % lc.total + 1) = l.rn; --++++++++++ favourited ++++++++++ WITH worker_count AS ( SELECT COUNT(*) AS cnt FROM Worker ), client_sample AS ( SELECT id, row_number() OVER () AS rn FROM Client ORDER BY random() LIMIT 15000 ), worker_sample AS ( SELECT id, row_number() OVER () AS rn FROM Worker ORDER BY random() LIMIT 15000 ) INSERT INTO Favourite (worker_id, client_id) SELECT w.id AS worker_id, c.id AS client_id FROM client_sample c JOIN worker_sample w ON w.rn = ((c.rn * 1000003) % 15000) + 1 -- prime number shuffle ON CONFLICT DO NOTHING; --++++++++++ category ++++++++++ CREATE TABLE category_staging ( category_name VARCHAR(100), description VARCHAR(500), parent_category_name VARCHAR(100) ); INSERT INTO Category (category_name, description, parent_category_id) SELECT category_name, description, NULL FROM category_staging WHERE parent_category_name IS NULL OR parent_category_name = '' ON CONFLICT (category_name) DO NOTHING; INSERT INTO Category (category_name, description, parent_category_id) SELECT s.category_name, s.description, p.id FROM category_staging s JOIN Category p ON p.category_name = s.parent_category_name WHERE s.parent_category_name IS NOT NULL AND s.parent_category_name != '' ON CONFLICT (category_name) DO NOTHING; DROP TABLE category_staging; --++++++++++ worker_category ++++++++++ --dodeluvame samo child categories INSERT INTO WorkerCategory (worker_id, category_id) SELECT worker_id, category_id FROM ( SELECT w.id AS worker_id, c.id AS category_id, ROW_NUMBER() OVER ( PARTITION BY w.id ORDER BY random() ) AS rn FROM Worker w JOIN Category c ON c.parent_category_id IS NOT NULL ) t WHERE rn <= 3 ON CONFLICT DO NOTHING; -- ++++++++++ badges ++++++++++ WITH tier_definitions AS ( SELECT * FROM (VALUES (1, 'Beginner', 'Entry-level worker with basic experience in this category', 0.00, 0.20), (2, 'Apprentice', 'Developing worker familiar with standard tasks in this category', 0.20, 0.40), (3, 'Skilled', 'Competent worker with consistent positive ratings in this category', 0.40, 0.60), (4, 'Advanced', 'Experienced worker handling complex tasks in this category', 0.60, 0.80), (5, 'Expert', 'Top-rated professional trusted for high-demand work in this category', 0.80, 1.00) ) AS t(tier_level, tier_name, tier_description, min_factor, max_factor) ), category_price_ranges AS ( SELECT * FROM (VALUES ('Home Cleaning', 5, 150), ('Plumbing', 20, 300), ('Electrical', 25, 400), ('General Handyman', 10, 250), ('Painting', 15, 350), ('Outdoor & Lawn', 10, 280), ('Moving & Hauling', 15, 250), ('Tech & Smart Home', 20, 350), ('Organization & Decluttering',10, 220), ('Errands & Delivery', 5, 150), ('Pet Care', 10, 200), ('Childcare & Tutoring', 10, 200), ('Cooking & Meal Prep', 10, 300), ('Vehicle & Auto', 10, 250), ('Administrative & Business', 10, 220), ('Remodeling & Renovation', 25, 600), ('Event & Party Setup', 10, 300), ('Personal Assistance', 10, 220) ) AS p(category_name, base_min, base_max) ) INSERT INTO Badge (badge_name, description, category_id, min_price, max_price, tier_level) SELECT c.category_name || ' - ' || t.tier_name AS badge_name, t.tier_description || ' (Category: ' || c.category_name || ')' AS description, c.id AS category_id, FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.min_factor)::INT AS min_price, FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.max_factor)::INT AS max_price, t.tier_level FROM Category c JOIN category_price_ranges cpr ON cpr.category_name = c.category_name CROSS JOIN tier_definitions t WHERE c.parent_category_id IS NULL -- only parent categories get badges ORDER BY c.id, t.tier_level; --++++++++++ worker_badge ++++++++++ -- not valid yet, needs review for tasks to calculate properly --initially everyone gets begginer badge in their categories INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired,is_active) SELECT DISTINCT wc.worker_id, b.id, w.created_at, true FROM WorkerCategory wc JOIN Worker w ON w.id = wc.worker_id JOIN Category child ON child.id = wc.category_id JOIN Badge b ON b.category_id = child.parent_category_id AND b.tier_level = 1 WHERE child.parent_category_id IS NOT NULL ON CONFLICT (worker_id, badge_id) DO UPDATE SET is_active = TRUE, date_acquired = EXCLUDED.date_acquired; --++++++++++ task requests ++++++++++ --5milioni taskrequests DO $$ DECLARE v_min_id BIGINT; v_max_id BIGINT; v_batch_size INT := 5000; -- clients per iteration v_rows_per_cli INT := 50; -- task rows per client v_offset BIGINT := 0; v_batch_min BIGINT; v_batch_max BIGINT; v_rows_inserted BIGINT := 0; BEGIN SELECT MIN(id), MAX(id) INTO v_min_id, v_max_id FROM Client; LOOP v_batch_min := v_min_id + v_offset; v_batch_max := v_min_id + v_offset + v_batch_size - 1; EXIT WHEN v_batch_min > v_max_id; INSERT INTO TaskRequest (client_id, description, work_mode, status, category_id, location_id, created_at) SELECT c.id, td.task_description, (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random()*3+1)::int], 'OPEN', cat.id, l.id, u.created_at + INTERVAL '1 minute' + (random() * (TIMESTAMP '2026-01-01' - u.created_at - INTERVAL '1 minute')) FROM Client c JOIN UserAccount u ON c.user_id = u.id -- only clients in this slice JOIN generate_series(1, v_rows_per_cli) gs(n) ON TRUE JOIN LATERAL ( SELECT task_description, category_name FROM temp_task_desc ORDER BY RANDOM()* gs.n LIMIT 1 ) td ON TRUE JOIN Category cat ON cat.category_name = td.category_name JOIN LATERAL ( SELECT id FROM Location ORDER BY RANDOM()* gs.n LIMIT 1 ) l ON true WHERE c.id BETWEEN v_batch_min AND v_batch_max; GET DIAGNOSTICS v_rows_inserted = ROW_COUNT; RAISE NOTICE 'Batch offset=% | clients %-% | rows inserted=%', v_offset, v_batch_min, v_batch_max, v_rows_inserted; v_offset := v_offset + v_batch_size; COMMIT; END LOOP; RAISE NOTICE 'Done. All batches complete.'; END; $$ LANGUAGE plpgsql; -- ================================================================ --helper procedura za badges CREATE OR REPLACE PROCEDURE recalc_worker_badges(p_as_of TIMESTAMP, p_from TIMESTAMP) LANGUAGE plpgsql AS $$ BEGIN -- Само работници со нови reviews во овој batch DROP TABLE IF EXISTS tmp_affected_workers; CREATE TEMP TABLE tmp_affected_workers AS SELECT DISTINCT o.worker_id FROM Review r JOIN Task t ON t.id = r.task_id JOIN Offer o ON o.id = t.offer_id WHERE r.created_at > p_from AND r.created_at <= p_as_of; CREATE INDEX ON tmp_affected_workers (worker_id); IF NOT EXISTS (SELECT 1 FROM tmp_affected_workers) THEN RETURN; END IF; -- ОПТ: GROUP BY оди директно — ист резултат, нема промена во логиката. -- Не скенираме повеќе Review отколку што треба бидејќи -- tmp_affected_workers е мал сет. DROP TABLE IF EXISTS tmp_avg_ratings; CREATE TEMP TABLE tmp_avg_ratings AS SELECT o.worker_id, parent.id AS parent_category_id, AVG(r.rating) AS avg_rating, COUNT(*) AS review_count FROM Review r JOIN Task t ON t.id = r.task_id JOIN Offer o ON o.id = t.offer_id JOIN TaskRequest tr ON tr.id = o.task_request_id JOIN Category child ON child.id = tr.category_id JOIN Category parent ON parent.id = child.parent_category_id WHERE o.worker_id IN (SELECT worker_id FROM tmp_affected_workers) AND r.created_at <= p_as_of GROUP BY o.worker_id, parent.id; CREATE INDEX ON tmp_avg_ratings (worker_id, parent_category_id); DROP TABLE IF EXISTS tmp_new_badges; CREATE TEMP TABLE tmp_new_badges AS SELECT DISTINCT ON (ar.worker_id, ar.parent_category_id) ar.worker_id, ar.parent_category_id, b.id AS badge_id FROM tmp_avg_ratings ar JOIN Badge b ON b.category_id = ar.parent_category_id ORDER BY ar.worker_id, ar.parent_category_id, ABS(b.tier_level - ROUND((ar.avg_rating - 1) / 4.0 * 4 + 1)); CREATE INDEX ON tmp_new_badges (worker_id); CREATE INDEX ON tmp_new_badges (badge_id); -- Деактивирај стари badges UPDATE WorkerBadge wb SET is_active = FALSE WHERE wb.is_active = TRUE AND wb.worker_id IN (SELECT worker_id FROM tmp_affected_workers) AND NOT EXISTS ( SELECT 1 FROM tmp_new_badges nb WHERE nb.worker_id = wb.worker_id AND nb.badge_id = wb.badge_id ); -- Вметни/ажурирај точни badges INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired, is_active) SELECT worker_id, badge_id, p_as_of::DATE, TRUE FROM tmp_new_badges ON CONFLICT (worker_id, badge_id) DO UPDATE SET is_active = TRUE, date_acquired = EXCLUDED.date_acquired; END; $$; ------------------------------------------------------------------- DO $$ DECLARE v_batch_start TIMESTAMP; v_batch_end TIMESTAMP; v_origin TIMESTAMP := TIMESTAMP '2023-02-01'; v_finish TIMESTAMP := TIMESTAMP '2023-03-01'; -- прошири колку сакаш, најполовно на 3-4 месеци, v_cnt BIGINT; v_tables_ready BOOLEAN := FALSE; BEGIN v_batch_start := v_origin; LOOP v_batch_end := v_batch_start + INTERVAL '1 month'; EXIT WHEN v_batch_start >= v_finish; RAISE NOTICE '=== BATCH % → % ===', v_batch_start, v_batch_end; -- ---------------------------------------------------------------- -- ОПТ 1: Создај temp табели САМО еднаш, потоа TRUNCATE -- Избегнува каталошки lock + index rebuild на секоја итерација -- ---------------------------------------------------------------- IF NOT v_tables_ready THEN CREATE TEMP TABLE tmp_worker_sample ( category_id INT, worker_id INT, created_at TIMESTAMP ) ON COMMIT PRESERVE ROWS; CREATE INDEX ON tmp_worker_sample (category_id, worker_id); CREATE TEMP TABLE tmp_offers_to_make ( task_request_id INT, worker_id INT, worker_created TIMESTAMP, tr_created TIMESTAMP, client_created TIMESTAMP, initiated_by TEXT, price INT ) ON COMMIT PRESERVE ROWS; CREATE INDEX ON tmp_offers_to_make (task_request_id); CREATE TEMP TABLE tmp_batch_offers ( id INT, task_request_id INT, created_at TIMESTAMP ) ON COMMIT PRESERVE ROWS; CREATE INDEX ON tmp_batch_offers (task_request_id); CREATE INDEX ON tmp_batch_offers (id); CREATE TEMP TABLE tmp_batch_tasks ( task_id INT, updated_at TIMESTAMP, worker_id INT, worker_user_id INT, client_user_id INT ) ON COMMIT PRESERVE ROWS; CREATE INDEX ON tmp_batch_tasks (task_id); v_tables_ready := TRUE; ELSE TRUNCATE tmp_worker_sample; TRUNCATE tmp_offers_to_make; TRUNCATE tmp_batch_offers; TRUNCATE tmp_batch_tasks; END IF; -- ---------------------------------------------------------------- -- ОПТ 2: Земи max 20 работници по категорија ЕДНАШ -- Истата логика, но RANDOM() се вика еднаш, не за секој TaskRequest -- ---------------------------------------------------------------- INSERT INTO tmp_worker_sample (category_id, worker_id, created_at) WITH numbered AS ( SELECT wc.category_id, w.id AS worker_id, w.created_at, ROW_NUMBER() OVER ( PARTITION BY wc.category_id ORDER BY RANDOM() ) AS rn FROM Worker w JOIN WorkerCategory wc ON wc.worker_id = w.id WHERE w.created_at < v_batch_end ) SELECT category_id, worker_id, created_at FROM numbered WHERE rn <= 20; -- ---------------------------------------------------------------- -- ОПТ 3: Генерирај парови (TaskRequest, Worker) ЕДНАШ во temp табела -- Наместо да се прави lateral join двапати (worker + client offers) -- со ORDER BY RANDOM() на секој ред — ова е главниот bottleneck. -- Истиот број на offers (2–5 по TaskRequest), иста рандомност. -- ---------------------------------------------------------------- INSERT INTO tmp_offers_to_make (task_request_id, worker_id, worker_created, tr_created, client_created, initiated_by, price) SELECT tr.id, w.worker_id, w.created_at, tr.created_at, u.created_at, 'WORKER', COALESCE( FLOOR(RANDOM() * (b.max_price - b.min_price + 1) + b.min_price), FLOOR(RANDOM() * (300 - 100 + 1) + 100) )::INT FROM TaskRequest tr JOIN Client c ON c.id = tr.client_id JOIN UserAccount u ON u.id = c.user_id JOIN LATERAL ( -- ОПТ: tmp_worker_sample е веќе случаен → само земи 2–5 SELECT bw.worker_id, bw.created_at FROM tmp_worker_sample bw WHERE bw.category_id = tr.category_id LIMIT (FLOOR(RANDOM()*4)+2) -- нема ORDER BY RANDOM() овде! ) w ON TRUE JOIN Category child ON child.id = tr.category_id JOIN Category parent ON parent.id = child.parent_category_id LEFT JOIN WorkerBadge wb ON wb.worker_id = w.worker_id AND wb.is_active = TRUE LEFT JOIN Badge b ON b.id = wb.badge_id AND b.category_id = parent.id WHERE tr.status = 'OPEN' AND tr.created_at >= v_batch_start AND tr.created_at < v_batch_end; -- ------------------------------------------------ -- 1. WORKER-INITIATED OFFERS -- ------------------------------------------------ INSERT INTO Offer (worker_id, task_request_id, price, offer_status, initiated_by, created_at) SELECT worker_id, task_request_id, price, 'PENDING', 'WORKER', GREATEST( tr_created + INTERVAL '1 minute', worker_created + INTERVAL '1 hour', client_created + INTERVAL '1 minute', LEAST( tr_created + (RANDOM() * INTERVAL '12 hours'), v_batch_end - INTERVAL '1 hour' ) ) FROM tmp_offers_to_make; GET DIAGNOSTICS v_cnt = ROW_COUNT; RAISE NOTICE ' Worker offers inserted: %', v_cnt; -- ------------------------------------------------ -- 2. CLIENT-INITIATED OFFERS (~50% од истите парови) -- Ист ефект: RANDOM() < 0.5 филтер -- ------------------------------------------------ INSERT INTO Offer (worker_id, task_request_id, price, offer_status, initiated_by, created_at) SELECT worker_id, task_request_id, FLOOR(RANDOM() * (300 - 100 + 1) + 100)::INT, 'PENDING', 'CLIENT', GREATEST( tr_created + INTERVAL '1 minute', worker_created + INTERVAL '1 hour', client_created + INTERVAL '1 minute', LEAST( tr_created + (RANDOM() * INTERVAL '12 hours'), v_batch_end - INTERVAL '1 hour' ) ) FROM tmp_offers_to_make WHERE RANDOM() < 0.5; GET DIAGNOSTICS v_cnt = ROW_COUNT; RAISE NOTICE ' Client offers inserted: %', v_cnt; -- ------------------------------------------------ -- 3. ACCEPT ЕДЕН OFFER PО TASK, REJECT ОСТАНАТИТЕ -- ОПТ: Еден UPDATE со CASE наместо два одделни UPDATE -- Ист резултат: 1 ACCEPTED, останатите REJECTED по TaskRequest -- ------------------------------------------------ INSERT INTO tmp_batch_offers (id, task_request_id, created_at) SELECT o.id, o.task_request_id, o.created_at FROM Offer o WHERE o.offer_status = 'PENDING' AND o.created_at >= v_batch_start AND o.created_at < v_batch_end; WITH last_offer AS ( SELECT task_request_id, MAX(created_at) AS last_offer_time FROM tmp_batch_offers GROUP BY task_request_id ), ranked AS ( SELECT o.id, o.task_request_id, o.created_at AS offer_created_at, lo.last_offer_time, ROW_NUMBER() OVER ( PARTITION BY o.task_request_id ORDER BY RANDOM() ) AS rn FROM tmp_batch_offers o JOIN last_offer lo ON lo.task_request_id = o.task_request_id ) -- ОПТ: Еден UPDATE со CASE → половина I/O наспроти два UPDATE UPDATE Offer o SET offer_status = CASE WHEN r.rn = 1 THEN 'ACCEPTED' ELSE 'REJECTED' END, updated_at = CASE WHEN r.rn = 1 THEN GREATEST( r.offer_created_at + INTERVAL '1 minute', LEAST( GREATEST(r.offer_created_at, r.last_offer_time) + (RANDOM() * INTERVAL '1 hour'), v_batch_end - INTERVAL '1 hour' ) ) ELSE GREATEST(o.created_at, GREATEST( r.offer_created_at + INTERVAL '1 minute', LEAST( GREATEST(r.offer_created_at, r.last_offer_time) + (RANDOM() * INTERVAL '1 hour'), v_batch_end - INTERVAL '1 hour' ) ) ) END FROM ranked r WHERE o.id = r.id; -- ------------------------------------------------ -- 4. CLOSE ACCEPTED TASK REQUESTS -- ------------------------------------------------ WITH accepted_tasks AS ( SELECT DISTINCT task_request_id, updated_at FROM Offer WHERE offer_status = 'ACCEPTED' AND created_at >= v_batch_start AND created_at < v_batch_end ) UPDATE TaskRequest tr SET status = 'CLOSED', updated_at = GREATEST(at2.updated_at, tr.created_at + INTERVAL '1 minute') FROM accepted_tasks at2 WHERE tr.id = at2.task_request_id AND tr.status = 'OPEN'; -- ------------------------------------------------ -- 5. CREATE TASKS -- ------------------------------------------------ INSERT INTO Task (offer_id, status, created_at) SELECT o.id, 'ACTIVE', GREATEST( o.updated_at, o.created_at + INTERVAL '1 minute', u.created_at + INTERVAL '1 minute' ) FROM Offer o JOIN TaskRequest tr ON tr.id = o.task_request_id JOIN Client c ON c.id = tr.client_id JOIN UserAccount u ON u.id = c.user_id WHERE o.offer_status = 'ACCEPTED' AND o.created_at >= v_batch_start AND o.created_at < v_batch_end AND NOT EXISTS ( SELECT 1 FROM Task t WHERE t.offer_id = o.id ); GET DIAGNOSTICS v_cnt = ROW_COUNT; RAISE NOTICE ' Tasks created: %', v_cnt; -- ------------------------------------------------ -- 6. COMPLETE TASKS -- ------------------------------------------------ UPDATE Task t SET status = 'COMPLETED', updated_at = GREATEST( t.created_at + INTERVAL '1 hour', LEAST( t.created_at + (RANDOM() * INTERVAL '2 days'), v_batch_end - INTERVAL '1 hour' ) ) WHERE t.status = 'ACTIVE' AND t.created_at >= v_batch_start AND t.created_at < v_batch_end; -- ------------------------------------------------ -- 7. REVIEWS -- ------------------------------------------------ INSERT INTO tmp_batch_tasks (task_id, updated_at, worker_id, worker_user_id, client_user_id) SELECT t.id, t.updated_at, o.worker_id, w.user_id, c.user_id FROM Task t JOIN Offer o ON o.id = t.offer_id JOIN Worker w ON w.id = o.worker_id JOIN TaskRequest tr ON tr.id = o.task_request_id JOIN Client c ON c.id = tr.client_id WHERE t.status = 'COMPLETED' AND t.created_at >= v_batch_start AND t.created_at < v_batch_end; -- Client → Worker INSERT INTO Review (task_id, reviewer_id, reviewed_id, rating, created_at) SELECT bt.task_id, bt.client_user_id, bt.worker_user_id, LEAST(5, GREATEST(1, ROUND((random()+random()+random()+random())/4.0*4+1)::INT )), GREATEST( bt.updated_at + INTERVAL '1 minute', LEAST( bt.updated_at + (RANDOM() * INTERVAL '12 hours'), v_batch_end - INTERVAL '1 hour' ) ) FROM tmp_batch_tasks bt LEFT JOIN Review r ON r.task_id = bt.task_id AND r.reviewer_id = bt.client_user_id WHERE r.id IS NULL; GET DIAGNOSTICS v_cnt = ROW_COUNT; RAISE NOTICE ' Reviews client->worker: %', v_cnt; -- Worker → Client INSERT INTO Review (task_id, reviewer_id, reviewed_id, rating, created_at) SELECT bt.task_id, bt.worker_user_id, bt.client_user_id, LEAST(5, GREATEST(1, ROUND((random()+random()+random())/3.0*4+1)::INT )), GREATEST( bt.updated_at + INTERVAL '1 minute', LEAST( bt.updated_at + (RANDOM() * INTERVAL '12 hours'), v_batch_end - INTERVAL '1 hour' ) ) FROM tmp_batch_tasks bt LEFT JOIN Review r ON r.task_id = bt.task_id AND r.reviewer_id = bt.worker_user_id WHERE r.id IS NULL; GET DIAGNOSTICS v_cnt = ROW_COUNT; RAISE NOTICE ' Reviews worker->client: %', v_cnt; -- ------------------------------------------------ -- 8. BADGE RECALC -- ------------------------------------------------ CALL recalc_worker_badges(v_batch_end, v_batch_start); RAISE NOTICE ' Badges updated.'; COMMIT; v_batch_start := v_batch_end; END LOOP; RAISE NOTICE 'All batches done.'; END; $$ LANGUAGE plpgsql; --++++++++++ notification type +++++++++++ INSERT INTO NotificationType (name, description) VALUES ('New Task Created', 'Triggered when a new task is created and awaiting offers.'), ('Task Accepted', 'Triggered when a task is accepted by a worker.'), ('Task Started', 'Triggered when a worker begins a task.'), ('Task Completed', 'Triggered when a task is marked complete.'), ('Task Cancelled', 'Triggered when a task is cancelled.'), ('Offer Received', 'Triggered when a new offer is submitted on a task.'), ('Offer Accepted', 'Triggered when an offer is accepted by the client.'), ('Offer Rejected', 'Triggered when an offer is rejected.'), ('Offer Expired', 'Triggered when an offer expires without a response.'), ('Payment Pending', 'Triggered when a payment is being processed.'), ('Payment Successful', 'Triggered when a payment completes successfully.'), ('Payment Failed', 'Triggered when a payment attempt fails.'), ('Review Received', 'Triggered when a user receives a new review.'), ('Complaint Opened', 'Triggered when a complaint is filed.'), ('Complaint Resolved', 'Triggered when a complaint is resolved.'), ('Complaint Dismissed', 'Triggered when a complaint is dismissed.'), ('Message Received', 'Triggered when a new message is received.'), ('Badge Earned', 'Triggered when a worker earns a new badge.'), ('Profile Viewed', 'Triggered when a user profile is viewed.'), ('System Announcement', 'General system-wide announcements.'); INSERT INTO NotificationPreference (is_enabled, channel, user_id, notification_type_id) SELECT (random() > 0.25) AS is_enabled, channel AS channel, u AS user_id, nt AS notification_type_id FROM generate_series((SELECT min(id) FROM useraccount), (SELECT max(id) FROM useraccount)) AS u, generate_series(1, 20) AS nt, (VALUES ('EMAIL'), ('SMS'), ('PUSH')) AS c(channel) WHERE u IN (SELECT id FROM useraccount) -- only real user IDs LIMIT 7000000; --++++++++++ compalaints +++++++++++ DO $$ DECLARE batch_size INT := 100000; total_rows INT := 1000000; offset_val INT := 0; task_ids INT[]; client_ids INT[]; worker_ids INT[]; BEGIN -- Load matched task/client/worker combos from the actual join chain SELECT array_agg(t.id), array_agg(tr.client_id), array_agg(o.worker_id) INTO task_ids, client_ids, worker_ids FROM Task t JOIN Offer o ON o.id = t.offer_id JOIN TaskRequest tr ON tr.id = o.task_request_id; WHILE offset_val < total_rows LOOP INSERT INTO Complaint (reason, description, status, created_at, updated_at, task_id, client_id, worker_id) SELECT CASE WHEN is_worker_complaint THEN CASE floor(random() * 6)::INT WHEN 0 THEN 'Client: Payment not made as agreed' WHEN 1 THEN 'Client: Unsafe or hazardous work conditions' WHEN 2 THEN 'Client: Abusive or threatening behaviour' WHEN 3 THEN 'Client: Scope creep beyond agreed terms' WHEN 4 THEN 'Client: Access denied to work site' ELSE 'Client: False damage claim' END ELSE CASE floor(random() * 6)::INT WHEN 0 THEN 'Worker: Work not completed as agreed' WHEN 1 THEN 'Worker: Poor quality of service' WHEN 2 THEN 'Worker: No-show or late arrival' WHEN 3 THEN 'Worker: Unprofessional behaviour' WHEN 4 THEN 'Worker: Overcharged or billing dispute' ELSE 'Worker: Damaged property or belongings' END END AS reason, CASE WHEN is_worker_complaint THEN CASE floor(random() * 4)::INT WHEN 0 THEN 'The client did not fulfil the agreed payment terms.' WHEN 1 THEN 'The client created conditions that made work impossible.' WHEN 2 THEN 'The client behaved in an abusive or threatening manner.' ELSE NULL END ELSE CASE floor(random() * 4)::INT WHEN 0 THEN 'The worker did not follow the agreed specifications.' WHEN 1 THEN 'The quality of the delivered work was unsatisfactory.' WHEN 2 THEN 'The worker arrived significantly late without notice.' ELSE NULL END END AS description, CASE floor(random() * 3)::INT WHEN 0 THEN 'OPEN' WHEN 1 THEN 'RESOLVED' ELSE 'DISMISSED' END AS status, created_ts AS created_at, CASE WHEN floor(random() * 3)::INT > 0 THEN created_ts + (random() * INTERVAL '30 days') ELSE NULL END AS updated_at, task_ids [idx] AS task_id, client_ids[idx] AS client_id, worker_ids[idx] AS worker_id FROM ( SELECT i, '2017-01-15 19:44:09.798'::TIMESTAMP + random() * ('2026-04-29 23:45:45.249'::TIMESTAMP - '2017-01-15 19:44:09.798'::TIMESTAMP) AS created_ts, random() < 0.5 AS is_worker_complaint, 1 + floor(random() * array_length(task_ids, 1))::INT AS idx FROM generate_series(offset_val + 1, offset_val + batch_size) AS i ) sub; offset_val := offset_val + batch_size; RAISE NOTICE 'Inserted % / % rows', offset_val, total_rows; END LOOP; END; $$; CREATE TEMP TABLE complaints_ordered AS SELECT c.id AS cid, cl.user_id AS client_user_id, w.user_id AS worker_user_id, random() AS rnd FROM Complaint c JOIN Task t ON t.id = c.task_id JOIN Offer o ON o.id = t.offer_id JOIN TaskRequest tr ON tr.id = o.task_request_id JOIN Client cl ON cl.id = tr.client_id AND cl.id = c.client_id JOIN Worker w ON w.id = o.worker_id AND w.id = c.worker_id; CREATE INDEX ON complaints_ordered (rnd); INSERT INTO ComplaintAttachment (file_url, description, complaint_id, user_id) SELECT 'https://storage.example.com/complaints/' || cid || '/' || md5(random()::TEXT) || '.' || CASE floor(random() * 4)::INT WHEN 0 THEN 'jpg' WHEN 1 THEN 'png' WHEN 2 THEN 'pdf' ELSE 'mp4' END AS file_url, CASE floor(random() * 3)::INT WHEN 0 THEN 'Photo evidence of the issue' WHEN 1 THEN 'Screenshot of conversation' ELSE NULL END AS description, cid AS complaint_id, CASE WHEN random() < 0.5 THEN client_user_id ELSE worker_user_id END AS user_id FROM complaints_ordered ORDER BY rnd LIMIT 2000000; -- Your total desired rows DROP TABLE IF EXISTS complaints_ordered; --++++++++++ payments +++++++++++ CREATE TEMP TABLE payments_base AS SELECT t.id AS task_id, tr.client_id, o.worker_id, row_number() OVER (ORDER BY random()) AS rn FROM Task t JOIN Offer o ON o.id = t.offer_id JOIN TaskRequest tr ON tr.id = o.task_request_id CROSS JOIN generate_series(1, 10) AS multiplier LIMIT 10000000; truncate table Payment restart identity cascade; DO $$ DECLARE batch_size INT := 500000; max_rn INT := (SELECT MAX(rn) FROM payments_base); curr_rn INT := 1; BEGIN WHILE curr_rn <= max_rn LOOP INSERT INTO Payment (amount, payment_method, status, task_id, client_id, worker_id, created_at, updated_at) SELECT (floor(random() * 99000) + 1000)::INT AS amount, CASE floor(random() * 3)::INT WHEN 0 THEN 'CARD' WHEN 1 THEN 'PAYPAL' ELSE 'CASH' END AS payment_method, CASE floor(random() * 10)::INT WHEN 0 THEN 'FAILED' WHEN 1 THEN 'PENDING' ELSE 'PAID' END AS status, pb.task_id, pb.client_id, pb.worker_id, created_ts AS created_at, CASE WHEN random() > 0.2 THEN LEAST(created_ts + (random() * INTERVAL '30 days'), NOW()) ELSE NULL END AS updated_at FROM payments_base pb CROSS JOIN LATERAL ( SELECT '2017-01-15 19:44:09.798'::TIMESTAMP + random() * ('2026-04-29 23:45:45.249'::TIMESTAMP - '2017-01-15 19:44:09.798'::TIMESTAMP) AS created_ts ) ts WHERE pb.rn BETWEEN curr_rn AND curr_rn + batch_size - 1; curr_rn := curr_rn + batch_size; RAISE NOTICE 'Inserted up to row % / %', curr_rn - 1, max_rn; END LOOP; END; $$; DROP TABLE payments_base; -- ========================================================= -- HELPER TABLES -- ========================================================= CREATE TABLE message_template_client ( category TEXT, text TEXT ); CREATE TABLE message_template_worker ( category TEXT, text TEXT ); --add category_id from Category ALTER TABLE message_template_client ADD COLUMN category_id INT; UPDATE message_template_client mtc SET category_id = c.id FROM Category c WHERE c.category_name = mtc.category; ALTER TABLE message_template_worker ADD COLUMN category_id INT; UPDATE message_template_worker mtc SET category_id = c.id FROM Category c WHERE c.category_name = mtc.category; -- ========================================================= -- REQUIRED INDEXES -- ========================================================= CREATE INDEX IF NOT EXISTS idx_task_created_at ON Task(created_at); CREATE INDEX IF NOT EXISTS idx_template_worker_category_id ON message_template_worker(category_id); CREATE INDEX IF NOT EXISTS idx_template_client_category_id ON message_template_client(category_id); -- ========================================================= -- GENERATE MESSAGES DO $$ DECLARE v_batch_start TIMESTAMP := TIMESTAMP '2017-01-01'; v_batch_end TIMESTAMP; v_finish TIMESTAMP := TIMESTAMP '2026-04-30'; v_tables_ready BOOLEAN := FALSE; BEGIN LOOP v_batch_end := v_batch_start + INTERVAL '1 week'; EXIT WHEN v_batch_start >= v_finish; RAISE NOTICE '=== BATCH % -> % ===', v_batch_start, v_batch_end; -- ===================================================== -- 1. CREATE / RESET TEMP TABLES -- ===================================================== IF NOT v_tables_ready THEN CREATE TEMP TABLE tmp_batch_tasks ( task_id INT, created_at TIMESTAMP, updated_at TIMESTAMP, worker_user_id INT, client_user_id INT, category_id INT ) ON COMMIT PRESERVE ROWS; CREATE INDEX idx_tmp_batch_task_id ON tmp_batch_tasks(task_id); CREATE TEMP TABLE tmp_messages_to_insert ( task_id INT, sender_id INT, created_at TIMESTAMP, category_id INT ) ON COMMIT PRESERVE ROWS; CREATE INDEX idx_tmp_messages_task_id ON tmp_messages_to_insert(task_id); v_tables_ready := TRUE; ELSE TRUNCATE TABLE tmp_batch_tasks; TRUNCATE TABLE tmp_messages_to_insert; END IF; -- ===================================================== -- 2. LOAD TASK DATA -- ===================================================== INSERT INTO tmp_batch_tasks ( task_id, created_at, updated_at, worker_user_id, client_user_id, category_id ) SELECT t.id, t.created_at, t.updated_at, w.user_id, c.user_id, tr.category_id FROM Task t JOIN Offer o ON o.id = t.offer_id JOIN Worker w ON w.id = o.worker_id JOIN TaskRequest tr ON tr.id = o.task_request_id JOIN Client c ON c.id = tr.client_id WHERE t.created_at >= v_batch_start AND t.created_at < v_batch_end; RAISE NOTICE 'Tasks loaded: %', (SELECT COUNT(*) FROM tmp_batch_tasks); -- ===================================================== -- 3. GENERATE MESSAGE METADATA -- ===================================================== INSERT INTO tmp_messages_to_insert ( task_id, sender_id, created_at, category_id ) SELECT bt.task_id, CASE WHEN random() < 0.5 THEN bt.client_user_id ELSE bt.worker_user_id END AS sender_id, bt.created_at + ( random() * (bt.updated_at - bt.created_at) ) AS created_at, bt.category_id FROM tmp_batch_tasks bt JOIN LATERAL generate_series( 1, floor(random() * 4)::int + 3 ) gs ON true; RAISE NOTICE 'Messages generated: %', (SELECT COUNT(*) FROM tmp_messages_to_insert); -- ===================================================== -- 4. INSERT INTO MESSAGE -- ===================================================== INSERT INTO Message ( text, created_at, task_id, sender_id ) SELECT COALESCE( CASE WHEN tmi.sender_id = bt.worker_user_id THEN mtw.text ELSE mtc.text END, 'Default message' ) AS text, tmi.created_at, tmi.task_id, tmi.sender_id FROM tmp_messages_to_insert tmi JOIN tmp_batch_tasks bt ON bt.task_id = tmi.task_id LEFT JOIN LATERAL ( SELECT text FROM message_template_worker mtw WHERE mtw.category_id = tmi.category_id OFFSET floor( random() * ( SELECT count(*) FROM message_template_worker x WHERE x.category_id = tmi.category_id ) ) LIMIT 1 ) mtw ON tmi.sender_id = bt.worker_user_id LEFT JOIN LATERAL ( SELECT text FROM message_template_client mtc WHERE mtc.category_id = tmi.category_id OFFSET floor( random() * ( SELECT count(*) FROM message_template_client x WHERE x.category_id = tmi.category_id ) ) LIMIT 1 ) mtc ON tmi.sender_id = bt.client_user_id; RAISE NOTICE 'Inserted batch % -> %', v_batch_start, v_batch_end; v_batch_start := v_batch_end; END LOOP; RAISE NOTICE 'ALL BATCHES FINISHED'; END; $$; -- ========================================================= -- CLEANUP -- ========================================================= DROP TABLE IF EXISTS tmp_messages_to_insert; DROP TABLE IF EXISTS tmp_batch_tasks; DROP INDEX IF EXISTS idx_task_created_at; DROP INDEX IF EXISTS idx_template_worker_category_id; DROP INDEX IF EXISTS idx_template_client_category_id; -- ========================================================= -- DROP THE HELPER TABLES -- ========================================================= DROP TABLE IF EXISTS message_template_client ; DROP TABLE IF EXISTS message_template_worker; -- ========================================================= -- DROP DUPLICATE MESSAGE PER TASK -- ========================================================= DROP DELETE FROM Message m USING Message d WHERE m.id > d.id AND m.task_id = d.task_id AND m.text = d.text; }}}