| | 1 | This DML script simulates the real behavior of the application over time, rather than inserting all data at once. The key idea is that several parts of the system are interdependent, especially offer pricing, badges, and reviews. |
| | 2 | |
| | 3 | The price of each offer is not purely random—it depends on the worker’s badge in the corresponding category. The badge itself is calculated based on the worker’s average ratings derived from reviews accumulated over time. In other words, there is a clear dependency chain: |
| | 4 | reviews → badges → offer price |
| | 5 | |
| | 6 | Because of this dependency, the data cannot be generated in a single step. Instead, the script simulates the application timeline, where data is created in a realistic order: |
| | 7 | |
| | 8 | !TaskRequests are created first |
| | 9 | then Offers are generated |
| | 10 | one offer per task is accepted and Tasks are created |
| | 11 | Tasks are completed |
| | 12 | Reviews are added |
| | 13 | and finally, badges are recalculated |
| | 14 | |
| | 15 | This process runs in monthly batches (e.g., February → March), allowing reviews to accumulate gradually and workers to progress naturally through badge levels. |
| | 16 | |
| | 17 | For simplicity and performance reasons, badge recalculation is not triggered after every change (such as each new review). Instead, it is performed at the end of each month, which significantly reduces computational overhead while still preserving a realistic system behavior. |
| | 18 | {{{#!sql |
| | 19 | --++++++++++ helpers ++++++++++ |
| | 20 | |
| | 21 | CREATE TABLE female_names ( |
| | 22 | name VARCHAR(100) |
| | 23 | ); |
| | 24 | CREATE TABLE male_names ( |
| | 25 | name VARCHAR(100) |
| | 26 | ); |
| | 27 | |
| | 28 | CREATE TABLE male_surnames ( |
| | 29 | male_surname VARCHAR(100) |
| | 30 | ); |
| | 31 | |
| | 32 | CREATE TABLE female_surnames ( |
| | 33 | female_surname VARCHAR(100) |
| | 34 | ); |
| | 35 | CREATE TABLE cities ( |
| | 36 | city VARCHAR(25), |
| | 37 | latitude DECIMAL, |
| | 38 | longitude DECIMAL |
| | 39 | ); |
| | 40 | --+++++++++++ location ++++++++++ |
| | 41 | INSERT INTO cities (city, latitude, longitude) VALUES |
| | 42 | ('Skopje', 41.9981, 21.4254), |
| | 43 | ('Bitola', 41.0314, 21.3347), |
| | 44 | ('Kumanovo', 42.1322, 21.7144), |
| | 45 | ('Prilep', 41.3451, 21.5550), |
| | 46 | ('Tetovo', 42.0097, 20.9716), |
| | 47 | ('Veles', 41.7156, 21.7756), |
| | 48 | ('Stip', 41.7458, 22.1958), |
| | 49 | ('Ohrid', 41.1231, 20.8016), |
| | 50 | ('Gostivar', 41.8000, 20.9167), |
| | 51 | ('Strumica', 41.4378, 22.6427), |
| | 52 | ('Kavadarci', 41.4331, 22.0119), |
| | 53 | ('Kocani', 41.9164, 22.4128), |
| | 54 | ('Kicevo', 41.5136, 20.9586), |
| | 55 | ('Struga', 41.1770, 20.6770), |
| | 56 | ('Radovis', 41.6383, 22.4647), |
| | 57 | ('Gevgelija', 41.1417, 22.5014), |
| | 58 | ('Debar', 41.5244, 20.5242), |
| | 59 | ('Kriva Palanka', 42.2009, 22.3317), |
| | 60 | ('Sveti Nikole', 41.8696, 21.9527), |
| | 61 | ('Negotino', 41.4839, 22.0892), |
| | 62 | ('Resen', 41.0889, 21.0122), |
| | 63 | ('Delcevo', 41.9672, 22.7694), |
| | 64 | ('Vinica', 41.8828, 22.5092), |
| | 65 | ('Berovo', 41.7031, 22.8578), |
| | 66 | ('Kratovo', 42.0784, 22.1800), |
| | 67 | ('Krusevo', 41.3689, 21.2489), |
| | 68 | ('Bogdanci', 41.2031, 22.5756), |
| | 69 | ('Demir Hisar', 41.2214, 21.2031), |
| | 70 | ('Demir Kapija', 41.4053, 22.2467), |
| | 71 | ('Pehcevo', 41.7633, 22.8892), |
| | 72 | ('Makedonska Kamenica', 42.0208, 22.5876), |
| | 73 | ('Probistip', 41.9985, 22.1786), |
| | 74 | ('Valandovo', 41.3174, 22.5619) |
| | 75 | ON CONFLICT DO NOTHING; |
| | 76 | |
| | 77 | |
| | 78 | INSERT INTO Location (city, latitude, longitude) |
| | 79 | SELECT |
| | 80 | 'Skopje', |
| | 81 | |
| | 82 | ROUND((41.97 + random() * 0.06)::numeric, 6), |
| | 83 | ROUND((21.38 + random() * 0.08)::numeric, 6) |
| | 84 | |
| | 85 | FROM generate_series(1, 4000) |
| | 86 | ON CONFLICT DO NOTHING; |
| | 87 | |
| | 88 | INSERT INTO Location (city, latitude, longitude) |
| | 89 | SELECT |
| | 90 | c.city, |
| | 91 | |
| | 92 | ROUND((c.latitude - 0.03 + random() * 0.06)::numeric, 6), |
| | 93 | ROUND((c.longitude - 0.05 + random() * 0.10)::numeric, 6) |
| | 94 | |
| | 95 | FROM cities c |
| | 96 | CROSS JOIN generate_series(1, 200) |
| | 97 | WHERE c.city <> 'Skopje' |
| | 98 | ON CONFLICT DO NOTHING; |
| | 99 | |
| | 100 | |
| | 101 | -- ++++++++++ userAccount ++++++++++ |
| | 102 | |
| | 103 | CREATE OR REPLACE FUNCTION clean_text(input TEXT) |
| | 104 | RETURNS TEXT AS $$ |
| | 105 | BEGIN |
| | 106 | RETURN TRANSLATE(input, |
| | 107 | 'абвгдѓежзѕијклљмнњопрстќуфхцчџшАБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ', -- Cyrillic |
| | 108 | 'abvgdgezzsijkljmnjoprstkufhccdsABVGDGEZZSIJKLJMJNJOPRSTKUFHCCDS' -- Latin |
| | 109 | ); |
| | 110 | END; |
| | 111 | $$ LANGUAGE plpgsql IMMUTABLE; |
| | 112 | |
| | 113 | WITH offset_cte AS ( |
| | 114 | SELECT COUNT(*) AS offset_val FROM UserAccount |
| | 115 | ), |
| | 116 | all_data AS ( |
| | 117 | SELECT |
| | 118 | name, |
| | 119 | surname, |
| | 120 | ROW_NUMBER() OVER () AS rn |
| | 121 | FROM ( |
| | 122 | -- машки |
| | 123 | SELECT clean_text(TRIM(m.name)) AS name, |
| | 124 | clean_text(TRIM(s.surname)) AS surname |
| | 125 | FROM male_names m |
| | 126 | CROSS JOIN surnames s |
| | 127 | |
| | 128 | UNION ALL |
| | 129 | |
| | 130 | -- женски |
| | 131 | SELECT clean_text(TRIM(f.name)) AS name, clean_text(TRIM(s.female_surname)) AS surname |
| | 132 | FROM female_names f |
| | 133 | CROSS JOIN female_surnames s |
| | 134 | ) t |
| | 135 | ORDER BY random() |
| | 136 | LIMIT 350000 |
| | 137 | ) |
| | 138 | |
| | 139 | INSERT INTO UserAccount (name, surname, email, phone_number, created_at) |
| | 140 | SELECT |
| | 141 | a.name, |
| | 142 | a.surname, |
| | 143 | |
| | 144 | LOWER(REGEXP_REPLACE(a.name, '[^a-zA-Z0-9]', '', 'g') || '.' || |
| | 145 | REGEXP_REPLACE(a.surname, '[^a-zA-Z0-9]', '', 'g') || |
| | 146 | (a.rn + o.offset_val) || |
| | 147 | '@gmail.com'), |
| | 148 | |
| | 149 | '+3897' |
| | 150 | || (ARRAY['1','2','5','7','8'])[((a.rn + o.offset_val - 1) % 5) + 1] |
| | 151 | || LPAD((a.rn + o.offset_val)::text, 6, '0'), |
| | 152 | |
| | 153 | TIMESTAMP '2017-01-01' + (random() * (NOW() - '2017-01-01')) |
| | 154 | |
| | 155 | FROM all_data AS a |
| | 156 | CROSS JOIN offset_cte AS o; |
| | 157 | |
| | 158 | drop table female_names; |
| | 159 | drop table female_surnames; |
| | 160 | drop table male_names; |
| | 161 | drop table surnames; |
| | 162 | |
| | 163 | --++++++++++ client ++++++++++ |
| | 164 | INSERT INTO Client (user_id) |
| | 165 | SELECT id |
| | 166 | FROM UserAccount |
| | 167 | WHERE id NOT IN (SELECT user_id FROM Client) |
| | 168 | ORDER BY random() |
| | 169 | LIMIT 100000; |
| | 170 | |
| | 171 | --++++++++++ worker ++++++++++ |
| | 172 | WITH random_workers AS ( |
| | 173 | SELECT id, created_at, ROW_NUMBER() OVER (ORDER BY random()) AS rn |
| | 174 | FROM UserAccount |
| | 175 | WHERE id NOT IN (SELECT user_id FROM Client) |
| | 176 | LIMIT 250000 |
| | 177 | ), |
| | 178 | random_locations AS ( |
| | 179 | SELECT id, ROW_NUMBER() OVER (ORDER BY random()) AS rn |
| | 180 | FROM Location |
| | 181 | ), |
| | 182 | location_count AS ( |
| | 183 | SELECT COUNT(*) AS total FROM Location |
| | 184 | ) |
| | 185 | INSERT INTO Worker (user_id, work_mode, service_radius_km, location_id, created_at) |
| | 186 | SELECT |
| | 187 | w.id, |
| | 188 | (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random() * 3 + 1)::int], |
| | 189 | 15, |
| | 190 | l.id, |
| | 191 | w.created_at |
| | 192 | FROM random_workers w |
| | 193 | CROSS JOIN location_count lc |
| | 194 | JOIN random_locations l ON ((w.rn - 1) % lc.total + 1) = l.rn; |
| | 195 | |
| | 196 | --++++++++++ favourited ++++++++++ |
| | 197 | |
| | 198 | WITH worker_count AS ( |
| | 199 | SELECT COUNT(*) AS cnt FROM Worker |
| | 200 | ), |
| | 201 | client_sample AS ( |
| | 202 | SELECT id, row_number() OVER () AS rn |
| | 203 | FROM Client |
| | 204 | ORDER BY random() |
| | 205 | LIMIT 15000 |
| | 206 | ), |
| | 207 | worker_sample AS ( |
| | 208 | SELECT id, row_number() OVER () AS rn |
| | 209 | FROM Worker |
| | 210 | ORDER BY random() |
| | 211 | LIMIT 15000 |
| | 212 | ) |
| | 213 | INSERT INTO Favourite (worker_id, client_id) |
| | 214 | SELECT |
| | 215 | w.id AS worker_id, |
| | 216 | c.id AS client_id |
| | 217 | FROM client_sample c |
| | 218 | JOIN worker_sample w |
| | 219 | ON w.rn = ((c.rn * 1000003) % 15000) + 1 -- prime number shuffle |
| | 220 | ON CONFLICT DO NOTHING; |
| | 221 | |
| | 222 | --++++++++++ category ++++++++++ |
| | 223 | |
| | 224 | CREATE TABLE category_staging ( |
| | 225 | category_name VARCHAR(100), |
| | 226 | description VARCHAR(500), |
| | 227 | parent_category_name VARCHAR(100) |
| | 228 | ); |
| | 229 | |
| | 230 | INSERT INTO Category (category_name, description, parent_category_id) |
| | 231 | SELECT category_name, description, NULL |
| | 232 | FROM category_staging |
| | 233 | WHERE parent_category_name IS NULL OR parent_category_name = '' |
| | 234 | ON CONFLICT (category_name) DO NOTHING; |
| | 235 | |
| | 236 | INSERT INTO Category (category_name, description, parent_category_id) |
| | 237 | SELECT |
| | 238 | s.category_name, |
| | 239 | s.description, |
| | 240 | p.id |
| | 241 | FROM category_staging s |
| | 242 | JOIN Category p ON p.category_name = s.parent_category_name |
| | 243 | WHERE s.parent_category_name IS NOT NULL AND s.parent_category_name != '' |
| | 244 | ON CONFLICT (category_name) DO NOTHING; |
| | 245 | |
| | 246 | DROP TABLE category_staging; |
| | 247 | |
| | 248 | --++++++++++ worker_category ++++++++++ |
| | 249 | --dodeluvame samo child categories |
| | 250 | INSERT INTO WorkerCategory (worker_id, category_id) |
| | 251 | SELECT worker_id, category_id |
| | 252 | FROM ( |
| | 253 | SELECT |
| | 254 | w.id AS worker_id, |
| | 255 | c.id AS category_id, |
| | 256 | ROW_NUMBER() OVER ( |
| | 257 | PARTITION BY w.id |
| | 258 | ORDER BY random() |
| | 259 | ) AS rn |
| | 260 | FROM Worker w |
| | 261 | JOIN Category c |
| | 262 | ON c.parent_category_id IS NOT NULL |
| | 263 | ) t |
| | 264 | WHERE rn <= 3 |
| | 265 | ON CONFLICT DO NOTHING; |
| | 266 | |
| | 267 | |
| | 268 | -- ++++++++++ badges ++++++++++ |
| | 269 | |
| | 270 | WITH tier_definitions AS ( |
| | 271 | SELECT * FROM (VALUES |
| | 272 | (1, 'Beginner', 'Entry-level worker with basic experience in this category', 0.00, 0.20), |
| | 273 | (2, 'Apprentice', 'Developing worker familiar with standard tasks in this category', 0.20, 0.40), |
| | 274 | (3, 'Skilled', 'Competent worker with consistent positive ratings in this category', 0.40, 0.60), |
| | 275 | (4, 'Advanced', 'Experienced worker handling complex tasks in this category', 0.60, 0.80), |
| | 276 | (5, 'Expert', 'Top-rated professional trusted for high-demand work in this category', 0.80, 1.00) |
| | 277 | ) AS t(tier_level, tier_name, tier_description, min_factor, max_factor) |
| | 278 | ), |
| | 279 | category_price_ranges AS ( |
| | 280 | SELECT * FROM (VALUES |
| | 281 | ('Home Cleaning', 5, 150), |
| | 282 | ('Plumbing', 20, 300), |
| | 283 | ('Electrical', 25, 400), |
| | 284 | ('General Handyman', 10, 250), |
| | 285 | ('Painting', 15, 350), |
| | 286 | ('Outdoor & Lawn', 10, 280), |
| | 287 | ('Moving & Hauling', 15, 250), |
| | 288 | ('Tech & Smart Home', 20, 350), |
| | 289 | ('Organization & Decluttering',10, 220), |
| | 290 | ('Errands & Delivery', 5, 150), |
| | 291 | ('Pet Care', 10, 200), |
| | 292 | ('Childcare & Tutoring', 10, 200), |
| | 293 | ('Cooking & Meal Prep', 10, 300), |
| | 294 | ('Vehicle & Auto', 10, 250), |
| | 295 | ('Administrative & Business', 10, 220), |
| | 296 | ('Remodeling & Renovation', 25, 600), |
| | 297 | ('Event & Party Setup', 10, 300), |
| | 298 | ('Personal Assistance', 10, 220) |
| | 299 | ) AS p(category_name, base_min, base_max) |
| | 300 | ) |
| | 301 | INSERT INTO Badge (badge_name, description, category_id, min_price, max_price, tier_level) |
| | 302 | SELECT |
| | 303 | c.category_name || ' - ' || t.tier_name AS badge_name, |
| | 304 | t.tier_description || ' (Category: ' || c.category_name || ')' AS description, |
| | 305 | c.id AS category_id, |
| | 306 | FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.min_factor)::INT AS min_price, |
| | 307 | FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.max_factor)::INT AS max_price, |
| | 308 | t.tier_level |
| | 309 | FROM Category c |
| | 310 | JOIN category_price_ranges cpr ON cpr.category_name = c.category_name |
| | 311 | CROSS JOIN tier_definitions t |
| | 312 | WHERE c.parent_category_id IS NULL -- only parent categories get badges |
| | 313 | ORDER BY c.id, t.tier_level; |
| | 314 | |
| | 315 | --++++++++++ worker_badge ++++++++++ |
| | 316 | -- not valid yet, needs review for tasks to calculate properly |
| | 317 | --initially everyone gets begginer badge in their categories |
| | 318 | INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired,is_active) |
| | 319 | SELECT DISTINCT |
| | 320 | wc.worker_id, |
| | 321 | b.id, |
| | 322 | w.created_at, |
| | 323 | true |
| | 324 | FROM WorkerCategory wc |
| | 325 | JOIN Worker w ON w.id = wc.worker_id |
| | 326 | JOIN Category child ON child.id = wc.category_id |
| | 327 | JOIN Badge b |
| | 328 | ON b.category_id = child.parent_category_id |
| | 329 | AND b.tier_level = 1 |
| | 330 | WHERE child.parent_category_id IS NOT NULL |
| | 331 | |
| | 332 | ON CONFLICT (worker_id, badge_id) DO UPDATE |
| | 333 | SET is_active = TRUE, |
| | 334 | date_acquired = EXCLUDED.date_acquired; |
| | 335 | |
| | 336 | |
| | 337 | --++++++++++ task requests ++++++++++ |
| | 338 | --5milioni taskrequests |
| | 339 | DO $$ |
| | 340 | DECLARE |
| | 341 | v_min_id BIGINT; |
| | 342 | v_max_id BIGINT; |
| | 343 | v_batch_size INT := 5000; -- clients per iteration |
| | 344 | v_rows_per_cli INT := 50; -- task rows per client |
| | 345 | v_offset BIGINT := 0; |
| | 346 | v_batch_min BIGINT; |
| | 347 | v_batch_max BIGINT; |
| | 348 | v_rows_inserted BIGINT := 0; |
| | 349 | BEGIN |
| | 350 | SELECT MIN(id), MAX(id) |
| | 351 | INTO v_min_id, v_max_id |
| | 352 | FROM Client; |
| | 353 | |
| | 354 | LOOP |
| | 355 | v_batch_min := v_min_id + v_offset; |
| | 356 | v_batch_max := v_min_id + v_offset + v_batch_size - 1; |
| | 357 | |
| | 358 | EXIT WHEN v_batch_min > v_max_id; |
| | 359 | |
| | 360 | INSERT INTO TaskRequest |
| | 361 | (client_id, description, work_mode, status, |
| | 362 | category_id, location_id, created_at) |
| | 363 | SELECT |
| | 364 | c.id, |
| | 365 | td.task_description, |
| | 366 | (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random()*3+1)::int], |
| | 367 | 'OPEN', |
| | 368 | cat.id, |
| | 369 | l.id, |
| | 370 | u.created_at + INTERVAL '1 minute' |
| | 371 | + (random() * (TIMESTAMP '2026-01-01' - u.created_at - INTERVAL '1 minute')) |
| | 372 | FROM Client c |
| | 373 | JOIN UserAccount u ON c.user_id = u.id |
| | 374 | -- only clients in this slice |
| | 375 | JOIN generate_series(1, v_rows_per_cli) gs(n) ON TRUE |
| | 376 | JOIN LATERAL ( |
| | 377 | SELECT task_description, category_name |
| | 378 | FROM temp_task_desc |
| | 379 | ORDER BY RANDOM()* gs.n |
| | 380 | LIMIT 1 |
| | 381 | ) td ON TRUE |
| | 382 | JOIN Category cat ON cat.category_name = td.category_name |
| | 383 | JOIN LATERAL ( |
| | 384 | SELECT id |
| | 385 | FROM Location |
| | 386 | ORDER BY RANDOM()* gs.n |
| | 387 | LIMIT 1 |
| | 388 | ) l ON true |
| | 389 | WHERE c.id BETWEEN v_batch_min AND v_batch_max; |
| | 390 | |
| | 391 | GET DIAGNOSTICS v_rows_inserted = ROW_COUNT; |
| | 392 | RAISE NOTICE 'Batch offset=% | clients %-% | rows inserted=%', |
| | 393 | v_offset, v_batch_min, v_batch_max, v_rows_inserted; |
| | 394 | |
| | 395 | v_offset := v_offset + v_batch_size; |
| | 396 | |
| | 397 | COMMIT; |
| | 398 | END LOOP; |
| | 399 | |
| | 400 | RAISE NOTICE 'Done. All batches complete.'; |
| | 401 | END; |
| | 402 | $$ LANGUAGE plpgsql; |
| | 403 | |
| | 404 | |
| | 405 | -- ================================================================ |
| | 406 | --helper procedura za badges |
| | 407 | |
| | 408 | CREATE OR REPLACE PROCEDURE recalc_worker_badges(p_as_of TIMESTAMP, p_from TIMESTAMP) |
| | 409 | LANGUAGE plpgsql |
| | 410 | AS $$ |
| | 411 | BEGIN |
| | 412 | |
| | 413 | -- Само работници со нови reviews во овој batch |
| | 414 | DROP TABLE IF EXISTS tmp_affected_workers; |
| | 415 | CREATE TEMP TABLE tmp_affected_workers AS |
| | 416 | SELECT DISTINCT o.worker_id |
| | 417 | FROM Review r |
| | 418 | JOIN Task t ON t.id = r.task_id |
| | 419 | JOIN Offer o ON o.id = t.offer_id |
| | 420 | WHERE r.created_at > p_from |
| | 421 | AND r.created_at <= p_as_of; |
| | 422 | |
| | 423 | CREATE INDEX ON tmp_affected_workers (worker_id); |
| | 424 | |
| | 425 | IF NOT EXISTS (SELECT 1 FROM tmp_affected_workers) THEN |
| | 426 | RETURN; |
| | 427 | END IF; |
| | 428 | |
| | 429 | -- ОПТ: GROUP BY оди директно — ист резултат, нема промена во логиката. |
| | 430 | -- Не скенираме повеќе Review отколку што треба бидејќи |
| | 431 | -- tmp_affected_workers е мал сет. |
| | 432 | DROP TABLE IF EXISTS tmp_avg_ratings; |
| | 433 | CREATE TEMP TABLE tmp_avg_ratings AS |
| | 434 | SELECT |
| | 435 | o.worker_id, |
| | 436 | parent.id AS parent_category_id, |
| | 437 | AVG(r.rating) AS avg_rating, |
| | 438 | COUNT(*) AS review_count |
| | 439 | FROM Review r |
| | 440 | JOIN Task t ON t.id = r.task_id |
| | 441 | JOIN Offer o ON o.id = t.offer_id |
| | 442 | JOIN TaskRequest tr ON tr.id = o.task_request_id |
| | 443 | JOIN Category child ON child.id = tr.category_id |
| | 444 | JOIN Category parent ON parent.id = child.parent_category_id |
| | 445 | WHERE o.worker_id IN (SELECT worker_id FROM tmp_affected_workers) |
| | 446 | AND r.created_at <= p_as_of |
| | 447 | GROUP BY o.worker_id, parent.id; |
| | 448 | |
| | 449 | CREATE INDEX ON tmp_avg_ratings (worker_id, parent_category_id); |
| | 450 | |
| | 451 | DROP TABLE IF EXISTS tmp_new_badges; |
| | 452 | CREATE TEMP TABLE tmp_new_badges AS |
| | 453 | SELECT DISTINCT ON (ar.worker_id, ar.parent_category_id) |
| | 454 | ar.worker_id, |
| | 455 | ar.parent_category_id, |
| | 456 | b.id AS badge_id |
| | 457 | FROM tmp_avg_ratings ar |
| | 458 | JOIN Badge b ON b.category_id = ar.parent_category_id |
| | 459 | ORDER BY |
| | 460 | ar.worker_id, |
| | 461 | ar.parent_category_id, |
| | 462 | ABS(b.tier_level - ROUND((ar.avg_rating - 1) / 4.0 * 4 + 1)); |
| | 463 | |
| | 464 | CREATE INDEX ON tmp_new_badges (worker_id); |
| | 465 | CREATE INDEX ON tmp_new_badges (badge_id); |
| | 466 | |
| | 467 | -- Деактивирај стари badges |
| | 468 | UPDATE WorkerBadge wb |
| | 469 | SET is_active = FALSE |
| | 470 | WHERE wb.is_active = TRUE |
| | 471 | AND wb.worker_id IN (SELECT worker_id FROM tmp_affected_workers) |
| | 472 | AND NOT EXISTS ( |
| | 473 | SELECT 1 FROM tmp_new_badges nb |
| | 474 | WHERE nb.worker_id = wb.worker_id |
| | 475 | AND nb.badge_id = wb.badge_id |
| | 476 | ); |
| | 477 | |
| | 478 | -- Вметни/ажурирај точни badges |
| | 479 | INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired, is_active) |
| | 480 | SELECT worker_id, badge_id, p_as_of::DATE, TRUE |
| | 481 | FROM tmp_new_badges |
| | 482 | ON CONFLICT (worker_id, badge_id) DO UPDATE |
| | 483 | SET is_active = TRUE, |
| | 484 | date_acquired = EXCLUDED.date_acquired; |
| | 485 | |
| | 486 | END; |
| | 487 | $$; |
| | 488 | |
| | 489 | |
| | 490 | ------------------------------------------------------------------- |
| | 491 | |
| | 492 | DO $$ |
| | 493 | DECLARE |
| | 494 | v_batch_start TIMESTAMP; |
| | 495 | v_batch_end TIMESTAMP; |
| | 496 | v_origin TIMESTAMP := TIMESTAMP '2023-02-01'; |
| | 497 | v_finish TIMESTAMP := TIMESTAMP '2023-03-01'; -- прошири колку сакаш, најполовно на 3-4 месеци, |
| | 498 | v_cnt BIGINT; |
| | 499 | v_tables_ready BOOLEAN := FALSE; |
| | 500 | BEGIN |
| | 501 | v_batch_start := v_origin; |
| | 502 | |
| | 503 | LOOP |
| | 504 | v_batch_end := v_batch_start + INTERVAL '1 month'; |
| | 505 | EXIT WHEN v_batch_start >= v_finish; |
| | 506 | |
| | 507 | RAISE NOTICE '=== BATCH % → % ===', v_batch_start, v_batch_end; |
| | 508 | |
| | 509 | -- ---------------------------------------------------------------- |
| | 510 | -- ОПТ 1: Создај temp табели САМО еднаш, потоа TRUNCATE |
| | 511 | -- Избегнува каталошки lock + index rebuild на секоја итерација |
| | 512 | -- ---------------------------------------------------------------- |
| | 513 | IF NOT v_tables_ready THEN |
| | 514 | CREATE TEMP TABLE tmp_worker_sample ( |
| | 515 | category_id INT, |
| | 516 | worker_id INT, |
| | 517 | created_at TIMESTAMP |
| | 518 | ) ON COMMIT PRESERVE ROWS; |
| | 519 | CREATE INDEX ON tmp_worker_sample (category_id, worker_id); |
| | 520 | |
| | 521 | CREATE TEMP TABLE tmp_offers_to_make ( |
| | 522 | task_request_id INT, |
| | 523 | worker_id INT, |
| | 524 | worker_created TIMESTAMP, |
| | 525 | tr_created TIMESTAMP, |
| | 526 | client_created TIMESTAMP, |
| | 527 | initiated_by TEXT, |
| | 528 | price INT |
| | 529 | ) ON COMMIT PRESERVE ROWS; |
| | 530 | CREATE INDEX ON tmp_offers_to_make (task_request_id); |
| | 531 | |
| | 532 | CREATE TEMP TABLE tmp_batch_offers ( |
| | 533 | id INT, |
| | 534 | task_request_id INT, |
| | 535 | created_at TIMESTAMP |
| | 536 | ) ON COMMIT PRESERVE ROWS; |
| | 537 | CREATE INDEX ON tmp_batch_offers (task_request_id); |
| | 538 | CREATE INDEX ON tmp_batch_offers (id); |
| | 539 | |
| | 540 | CREATE TEMP TABLE tmp_batch_tasks ( |
| | 541 | task_id INT, |
| | 542 | updated_at TIMESTAMP, |
| | 543 | worker_id INT, |
| | 544 | worker_user_id INT, |
| | 545 | client_user_id INT |
| | 546 | ) ON COMMIT PRESERVE ROWS; |
| | 547 | CREATE INDEX ON tmp_batch_tasks (task_id); |
| | 548 | |
| | 549 | v_tables_ready := TRUE; |
| | 550 | ELSE |
| | 551 | TRUNCATE tmp_worker_sample; |
| | 552 | TRUNCATE tmp_offers_to_make; |
| | 553 | TRUNCATE tmp_batch_offers; |
| | 554 | TRUNCATE tmp_batch_tasks; |
| | 555 | END IF; |
| | 556 | |
| | 557 | -- ---------------------------------------------------------------- |
| | 558 | -- ОПТ 2: Земи max 20 работници по категорија ЕДНАШ |
| | 559 | -- Истата логика, но RANDOM() се вика еднаш, не за секој TaskRequest |
| | 560 | -- ---------------------------------------------------------------- |
| | 561 | INSERT INTO tmp_worker_sample (category_id, worker_id, created_at) |
| | 562 | WITH numbered AS ( |
| | 563 | SELECT |
| | 564 | wc.category_id, |
| | 565 | w.id AS worker_id, |
| | 566 | w.created_at, |
| | 567 | ROW_NUMBER() OVER ( |
| | 568 | PARTITION BY wc.category_id |
| | 569 | ORDER BY RANDOM() |
| | 570 | ) AS rn |
| | 571 | FROM Worker w |
| | 572 | JOIN WorkerCategory wc ON wc.worker_id = w.id |
| | 573 | WHERE w.created_at < v_batch_end |
| | 574 | ) |
| | 575 | SELECT category_id, worker_id, created_at |
| | 576 | FROM numbered |
| | 577 | WHERE rn <= 20; |
| | 578 | |
| | 579 | -- ---------------------------------------------------------------- |
| | 580 | -- ОПТ 3: Генерирај парови (TaskRequest, Worker) ЕДНАШ во temp табела |
| | 581 | -- Наместо да се прави lateral join двапати (worker + client offers) |
| | 582 | -- со ORDER BY RANDOM() на секој ред — ова е главниот bottleneck. |
| | 583 | -- Истиот број на offers (2–5 по TaskRequest), иста рандомност. |
| | 584 | -- ---------------------------------------------------------------- |
| | 585 | |
| | 586 | INSERT INTO tmp_offers_to_make |
| | 587 | (task_request_id, worker_id, worker_created, tr_created, client_created, initiated_by, price) |
| | 588 | SELECT |
| | 589 | tr.id, |
| | 590 | w.worker_id, |
| | 591 | w.created_at, |
| | 592 | tr.created_at, |
| | 593 | u.created_at, |
| | 594 | 'WORKER', |
| | 595 | COALESCE( |
| | 596 | FLOOR(RANDOM() * (b.max_price - b.min_price + 1) + b.min_price), |
| | 597 | FLOOR(RANDOM() * (300 - 100 + 1) + 100) |
| | 598 | )::INT |
| | 599 | FROM TaskRequest tr |
| | 600 | JOIN Client c ON c.id = tr.client_id |
| | 601 | JOIN UserAccount u ON u.id = c.user_id |
| | 602 | JOIN LATERAL ( |
| | 603 | |
| | 604 | -- ОПТ: tmp_worker_sample е веќе случаен → само земи 2–5 |
| | 605 | SELECT bw.worker_id, bw.created_at |
| | 606 | FROM tmp_worker_sample bw |
| | 607 | WHERE bw.category_id = tr.category_id |
| | 608 | LIMIT (FLOOR(RANDOM()*4)+2) -- нема ORDER BY RANDOM() овде! |
| | 609 | ) w ON TRUE |
| | 610 | JOIN Category child ON child.id = tr.category_id |
| | 611 | JOIN Category parent ON parent.id = child.parent_category_id |
| | 612 | LEFT JOIN WorkerBadge wb |
| | 613 | ON wb.worker_id = w.worker_id |
| | 614 | AND wb.is_active = TRUE |
| | 615 | LEFT JOIN Badge b |
| | 616 | ON b.id = wb.badge_id |
| | 617 | AND b.category_id = parent.id |
| | 618 | WHERE tr.status = 'OPEN' |
| | 619 | AND tr.created_at >= v_batch_start |
| | 620 | AND tr.created_at < v_batch_end; |
| | 621 | |
| | 622 | -- ------------------------------------------------ |
| | 623 | -- 1. WORKER-INITIATED OFFERS |
| | 624 | -- ------------------------------------------------ |
| | 625 | INSERT INTO Offer |
| | 626 | (worker_id, task_request_id, price, offer_status, initiated_by, created_at) |
| | 627 | SELECT |
| | 628 | worker_id, |
| | 629 | task_request_id, |
| | 630 | price, |
| | 631 | 'PENDING', |
| | 632 | 'WORKER', |
| | 633 | GREATEST( |
| | 634 | tr_created + INTERVAL '1 minute', |
| | 635 | worker_created + INTERVAL '1 hour', |
| | 636 | client_created + INTERVAL '1 minute', |
| | 637 | LEAST( |
| | 638 | tr_created + (RANDOM() * INTERVAL '12 hours'), |
| | 639 | v_batch_end - INTERVAL '1 hour' |
| | 640 | ) |
| | 641 | ) |
| | 642 | FROM tmp_offers_to_make; |
| | 643 | |
| | 644 | GET DIAGNOSTICS v_cnt = ROW_COUNT; |
| | 645 | RAISE NOTICE ' Worker offers inserted: %', v_cnt; |
| | 646 | |
| | 647 | -- ------------------------------------------------ |
| | 648 | -- 2. CLIENT-INITIATED OFFERS (~50% од истите парови) |
| | 649 | -- Ист ефект: RANDOM() < 0.5 филтер |
| | 650 | -- ------------------------------------------------ |
| | 651 | INSERT INTO Offer |
| | 652 | (worker_id, task_request_id, price, offer_status, initiated_by, created_at) |
| | 653 | SELECT |
| | 654 | worker_id, |
| | 655 | task_request_id, |
| | 656 | FLOOR(RANDOM() * (300 - 100 + 1) + 100)::INT, |
| | 657 | 'PENDING', |
| | 658 | 'CLIENT', |
| | 659 | GREATEST( |
| | 660 | tr_created + INTERVAL '1 minute', |
| | 661 | worker_created + INTERVAL '1 hour', |
| | 662 | client_created + INTERVAL '1 minute', |
| | 663 | LEAST( |
| | 664 | tr_created + (RANDOM() * INTERVAL '12 hours'), |
| | 665 | v_batch_end - INTERVAL '1 hour' |
| | 666 | ) |
| | 667 | ) |
| | 668 | FROM tmp_offers_to_make |
| | 669 | WHERE RANDOM() < 0.5; |
| | 670 | |
| | 671 | GET DIAGNOSTICS v_cnt = ROW_COUNT; |
| | 672 | RAISE NOTICE ' Client offers inserted: %', v_cnt; |
| | 673 | |
| | 674 | -- ------------------------------------------------ |
| | 675 | -- 3. ACCEPT ЕДЕН OFFER PО TASK, REJECT ОСТАНАТИТЕ |
| | 676 | -- ОПТ: Еден UPDATE со CASE наместо два одделни UPDATE |
| | 677 | -- Ист резултат: 1 ACCEPTED, останатите REJECTED по TaskRequest |
| | 678 | -- ------------------------------------------------ |
| | 679 | INSERT INTO tmp_batch_offers (id, task_request_id, created_at) |
| | 680 | SELECT o.id, o.task_request_id, o.created_at |
| | 681 | FROM Offer o |
| | 682 | WHERE o.offer_status = 'PENDING' |
| | 683 | AND o.created_at >= v_batch_start |
| | 684 | AND o.created_at < v_batch_end; |
| | 685 | |
| | 686 | WITH last_offer AS ( |
| | 687 | SELECT task_request_id, |
| | 688 | MAX(created_at) AS last_offer_time |
| | 689 | FROM tmp_batch_offers |
| | 690 | GROUP BY task_request_id |
| | 691 | ), |
| | 692 | ranked AS ( |
| | 693 | SELECT o.id, |
| | 694 | o.task_request_id, |
| | 695 | o.created_at AS offer_created_at, |
| | 696 | lo.last_offer_time, |
| | 697 | ROW_NUMBER() OVER ( |
| | 698 | PARTITION BY o.task_request_id |
| | 699 | ORDER BY RANDOM() |
| | 700 | ) AS rn |
| | 701 | FROM tmp_batch_offers o |
| | 702 | JOIN last_offer lo ON lo.task_request_id = o.task_request_id |
| | 703 | ) |
| | 704 | -- ОПТ: Еден UPDATE со CASE → половина I/O наспроти два UPDATE |
| | 705 | UPDATE Offer o |
| | 706 | SET |
| | 707 | offer_status = CASE WHEN r.rn = 1 THEN 'ACCEPTED' ELSE 'REJECTED' END, |
| | 708 | updated_at = CASE |
| | 709 | WHEN r.rn = 1 THEN |
| | 710 | GREATEST( |
| | 711 | r.offer_created_at + INTERVAL '1 minute', |
| | 712 | LEAST( |
| | 713 | GREATEST(r.offer_created_at, r.last_offer_time) |
| | 714 | + (RANDOM() * INTERVAL '1 hour'), |
| | 715 | v_batch_end - INTERVAL '1 hour' |
| | 716 | ) |
| | 717 | ) |
| | 718 | ELSE |
| | 719 | GREATEST(o.created_at, |
| | 720 | GREATEST( |
| | 721 | r.offer_created_at + INTERVAL '1 minute', |
| | 722 | LEAST( |
| | 723 | GREATEST(r.offer_created_at, r.last_offer_time) |
| | 724 | + (RANDOM() * INTERVAL '1 hour'), |
| | 725 | v_batch_end - INTERVAL '1 hour' |
| | 726 | ) |
| | 727 | ) |
| | 728 | ) |
| | 729 | END |
| | 730 | FROM ranked r |
| | 731 | WHERE o.id = r.id; |
| | 732 | |
| | 733 | -- ------------------------------------------------ |
| | 734 | -- 4. CLOSE ACCEPTED TASK REQUESTS |
| | 735 | -- ------------------------------------------------ |
| | 736 | WITH accepted_tasks AS ( |
| | 737 | SELECT DISTINCT task_request_id, updated_at |
| | 738 | FROM Offer |
| | 739 | WHERE offer_status = 'ACCEPTED' |
| | 740 | AND created_at >= v_batch_start |
| | 741 | AND created_at < v_batch_end |
| | 742 | ) |
| | 743 | UPDATE TaskRequest tr |
| | 744 | SET status = 'CLOSED', |
| | 745 | updated_at = GREATEST(at2.updated_at, tr.created_at + INTERVAL '1 minute') |
| | 746 | FROM accepted_tasks at2 |
| | 747 | WHERE tr.id = at2.task_request_id |
| | 748 | AND tr.status = 'OPEN'; |
| | 749 | |
| | 750 | -- ------------------------------------------------ |
| | 751 | -- 5. CREATE TASKS |
| | 752 | -- ------------------------------------------------ |
| | 753 | INSERT INTO Task (offer_id, status, created_at) |
| | 754 | SELECT o.id, |
| | 755 | 'ACTIVE', |
| | 756 | GREATEST( |
| | 757 | o.updated_at, |
| | 758 | o.created_at + INTERVAL '1 minute', |
| | 759 | u.created_at + INTERVAL '1 minute' |
| | 760 | ) |
| | 761 | FROM Offer o |
| | 762 | JOIN TaskRequest tr ON tr.id = o.task_request_id |
| | 763 | JOIN Client c ON c.id = tr.client_id |
| | 764 | JOIN UserAccount u ON u.id = c.user_id |
| | 765 | WHERE o.offer_status = 'ACCEPTED' |
| | 766 | AND o.created_at >= v_batch_start |
| | 767 | AND o.created_at < v_batch_end |
| | 768 | AND NOT EXISTS ( |
| | 769 | SELECT 1 FROM Task t WHERE t.offer_id = o.id |
| | 770 | ); |
| | 771 | |
| | 772 | GET DIAGNOSTICS v_cnt = ROW_COUNT; |
| | 773 | RAISE NOTICE ' Tasks created: %', v_cnt; |
| | 774 | |
| | 775 | -- ------------------------------------------------ |
| | 776 | -- 6. COMPLETE TASKS |
| | 777 | -- ------------------------------------------------ |
| | 778 | UPDATE Task t |
| | 779 | SET status = 'COMPLETED', |
| | 780 | updated_at = GREATEST( |
| | 781 | t.created_at + INTERVAL '1 hour', |
| | 782 | LEAST( |
| | 783 | t.created_at + (RANDOM() * INTERVAL '2 days'), |
| | 784 | v_batch_end - INTERVAL '1 hour' |
| | 785 | ) |
| | 786 | ) |
| | 787 | WHERE t.status = 'ACTIVE' |
| | 788 | AND t.created_at >= v_batch_start |
| | 789 | AND t.created_at < v_batch_end; |
| | 790 | |
| | 791 | -- ------------------------------------------------ |
| | 792 | -- 7. REVIEWS |
| | 793 | -- ------------------------------------------------ |
| | 794 | INSERT INTO tmp_batch_tasks |
| | 795 | (task_id, updated_at, worker_id, worker_user_id, client_user_id) |
| | 796 | SELECT |
| | 797 | t.id, |
| | 798 | t.updated_at, |
| | 799 | o.worker_id, |
| | 800 | w.user_id, |
| | 801 | c.user_id |
| | 802 | FROM Task t |
| | 803 | JOIN Offer o ON o.id = t.offer_id |
| | 804 | JOIN Worker w ON w.id = o.worker_id |
| | 805 | JOIN TaskRequest tr ON tr.id = o.task_request_id |
| | 806 | JOIN Client c ON c.id = tr.client_id |
| | 807 | WHERE t.status = 'COMPLETED' |
| | 808 | AND t.created_at >= v_batch_start |
| | 809 | AND t.created_at < v_batch_end; |
| | 810 | |
| | 811 | -- Client → Worker |
| | 812 | INSERT INTO Review |
| | 813 | (task_id, reviewer_id, reviewed_id, rating, created_at) |
| | 814 | SELECT |
| | 815 | bt.task_id, |
| | 816 | bt.client_user_id, |
| | 817 | bt.worker_user_id, |
| | 818 | LEAST(5, GREATEST(1, |
| | 819 | ROUND((random()+random()+random()+random())/4.0*4+1)::INT |
| | 820 | )), |
| | 821 | GREATEST( |
| | 822 | bt.updated_at + INTERVAL '1 minute', |
| | 823 | LEAST( |
| | 824 | bt.updated_at + (RANDOM() * INTERVAL '12 hours'), |
| | 825 | v_batch_end - INTERVAL '1 hour' |
| | 826 | ) |
| | 827 | ) |
| | 828 | FROM tmp_batch_tasks bt |
| | 829 | LEFT JOIN Review r ON r.task_id = bt.task_id |
| | 830 | AND r.reviewer_id = bt.client_user_id |
| | 831 | WHERE r.id IS NULL; |
| | 832 | |
| | 833 | GET DIAGNOSTICS v_cnt = ROW_COUNT; |
| | 834 | RAISE NOTICE ' Reviews client->worker: %', v_cnt; |
| | 835 | |
| | 836 | -- Worker → Client |
| | 837 | INSERT INTO Review |
| | 838 | (task_id, reviewer_id, reviewed_id, rating, created_at) |
| | 839 | SELECT |
| | 840 | bt.task_id, |
| | 841 | bt.worker_user_id, |
| | 842 | bt.client_user_id, |
| | 843 | LEAST(5, GREATEST(1, |
| | 844 | ROUND((random()+random()+random())/3.0*4+1)::INT |
| | 845 | )), |
| | 846 | GREATEST( |
| | 847 | bt.updated_at + INTERVAL '1 minute', |
| | 848 | LEAST( |
| | 849 | bt.updated_at + (RANDOM() * INTERVAL '12 hours'), |
| | 850 | v_batch_end - INTERVAL '1 hour' |
| | 851 | ) |
| | 852 | ) |
| | 853 | FROM tmp_batch_tasks bt |
| | 854 | LEFT JOIN Review r ON r.task_id = bt.task_id |
| | 855 | AND r.reviewer_id = bt.worker_user_id |
| | 856 | WHERE r.id IS NULL; |
| | 857 | |
| | 858 | GET DIAGNOSTICS v_cnt = ROW_COUNT; |
| | 859 | RAISE NOTICE ' Reviews worker->client: %', v_cnt; |
| | 860 | |
| | 861 | -- ------------------------------------------------ |
| | 862 | -- 8. BADGE RECALC |
| | 863 | -- ------------------------------------------------ |
| | 864 | CALL recalc_worker_badges(v_batch_end, v_batch_start); |
| | 865 | RAISE NOTICE ' Badges updated.'; |
| | 866 | |
| | 867 | COMMIT; |
| | 868 | v_batch_start := v_batch_end; |
| | 869 | END LOOP; |
| | 870 | |
| | 871 | RAISE NOTICE 'All batches done.'; |
| | 872 | END; |
| | 873 | $$ LANGUAGE plpgsql; |
| | 874 | |
| | 875 | |
| | 876 | |
| | 877 | }}} |