wiki:DmlScript

Version 6 (modified by 231141, 3 weeks ago) ( diff )

--

Home Database Creation
--++++++++++ helpers ++++++++++

CREATE TABLE female_names (
    name VARCHAR(100)
);
CREATE TABLE male_names (
    name VARCHAR(100)
);

CREATE TABLE male_surnames (
    male_surname VARCHAR(100)
);

CREATE TABLE female_surnames (
    female_surname VARCHAR(100)
);
CREATE TABLE cities (
    city VARCHAR(25),
    latitude DECIMAL,
    longitude DECIMAL
);
--+++++++++++ location ++++++++++
INSERT INTO cities (city, latitude, longitude) VALUES
('Skopje', 41.9981, 21.4254),
('Bitola', 41.0314, 21.3347),
('Kumanovo', 42.1322, 21.7144),
('Prilep', 41.3451, 21.5550),
('Tetovo', 42.0097, 20.9716),
('Veles', 41.7156, 21.7756),
('Stip', 41.7458, 22.1958),
('Ohrid', 41.1231, 20.8016),
('Gostivar', 41.8000, 20.9167),
('Strumica', 41.4378, 22.6427),
('Kavadarci', 41.4331, 22.0119),
('Kocani', 41.9164, 22.4128),
('Kicevo', 41.5136, 20.9586),
('Struga', 41.1770, 20.6770),
('Radovis', 41.6383, 22.4647),
('Gevgelija', 41.1417, 22.5014),
('Debar', 41.5244, 20.5242),
('Kriva Palanka', 42.2009, 22.3317),
('Sveti Nikole', 41.8696, 21.9527),
('Negotino', 41.4839, 22.0892),
('Resen', 41.0889, 21.0122),
('Delcevo', 41.9672, 22.7694),
('Vinica', 41.8828, 22.5092),
('Berovo', 41.7031, 22.8578),
('Kratovo', 42.0784, 22.1800),
('Krusevo', 41.3689, 21.2489),
('Bogdanci', 41.2031, 22.5756),
('Demir Hisar', 41.2214, 21.2031),
('Demir Kapija', 41.4053, 22.2467),
('Pehcevo', 41.7633, 22.8892),
('Makedonska Kamenica', 42.0208, 22.5876),
('Probistip', 41.9985, 22.1786),
('Valandovo', 41.3174, 22.5619)
ON CONFLICT DO NOTHING;


INSERT INTO Location (city, latitude, longitude)
SELECT
    'Skopje',

    ROUND((41.97 + random() * 0.06)::numeric, 6),
    ROUND((21.38 + random() * 0.08)::numeric, 6)

FROM generate_series(1, 4000)
ON CONFLICT DO NOTHING;

INSERT INTO Location (city, latitude, longitude)
SELECT
    c.city,

    ROUND((c.latitude - 0.03 + random() * 0.06)::numeric, 6),
    ROUND((c.longitude - 0.05 + random() * 0.10)::numeric, 6)

FROM cities c
CROSS JOIN generate_series(1, 200)
WHERE c.city <> 'Skopje'
ON CONFLICT DO NOTHING;


-- ++++++++++ userAccount ++++++++++

CREATE OR REPLACE FUNCTION clean_text(input TEXT)
RETURNS TEXT AS $$
BEGIN
    RETURN TRANSLATE(input,
        'абвгдѓежзѕијклљмнњопрстќуфхцчџшАБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ',  -- Cyrillic
        'abvgdgezzsijkljmnjoprstkufhccdsABVGDGEZZSIJKLJMJNJOPRSTKUFHCCDS'   -- Latin
    );
END;
$$ LANGUAGE plpgsql IMMUTABLE;

WITH offset_cte AS (
    SELECT COUNT(*) AS offset_val FROM UserAccount
),
all_data AS (
    SELECT
        name,
        surname,
        ROW_NUMBER() OVER () AS rn
    FROM (
        -- машки
        SELECT clean_text(TRIM(m.name)) AS name,
               clean_text(TRIM(s.surname)) AS surname
        FROM male_names m
        CROSS JOIN surnames s

        UNION ALL

        -- женски
        SELECT clean_text(TRIM(f.name)) AS name, clean_text(TRIM(s.female_surname)) AS surname
        FROM female_names f
        CROSS JOIN female_surnames s
    ) t
    ORDER BY random()
    LIMIT 350000
)

INSERT INTO UserAccount (name, surname, email, phone_number, created_at)
SELECT
    a.name,
    a.surname,

     LOWER(REGEXP_REPLACE(a.name, '[^a-zA-Z0-9]', '', 'g') || '.' ||
        REGEXP_REPLACE(a.surname, '[^a-zA-Z0-9]', '', 'g') ||
        (a.rn + o.offset_val) ||
        '@gmail.com'),

     '+3897'
         || (ARRAY['1','2','5','7','8'])[((a.rn + o.offset_val - 1) % 5) + 1]
         || LPAD((a.rn + o.offset_val)::text, 6, '0'),

    TIMESTAMP '2017-01-01' + (random() * (NOW() - '2017-01-01'))

FROM all_data AS a
CROSS JOIN offset_cte AS o;

drop table female_names;
drop table female_surnames;
drop table male_names;
drop table surnames;

--++++++++++ client ++++++++++
INSERT INTO Client (user_id)
SELECT id
FROM UserAccount
WHERE id NOT IN (SELECT user_id FROM Client)
ORDER BY random()
LIMIT 100000;

--++++++++++ worker ++++++++++
WITH random_workers AS (
    SELECT id, created_at, ROW_NUMBER() OVER (ORDER BY random()) AS rn
    FROM UserAccount
    WHERE id NOT IN (SELECT user_id FROM Client)
    LIMIT 250000
),
random_locations AS (
    SELECT id, ROW_NUMBER() OVER (ORDER BY random()) AS rn
    FROM Location
),
location_count AS (
    SELECT COUNT(*) AS total FROM Location
)
INSERT INTO Worker (user_id, work_mode, service_radius_km, location_id, created_at)
SELECT
    w.id,
    (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random() * 3 + 1)::int],
    15,
    l.id,
    w.created_at
FROM random_workers w
CROSS JOIN location_count lc
JOIN random_locations l ON ((w.rn - 1) % lc.total + 1) = l.rn;

--++++++++++ favourited ++++++++++

WITH worker_count AS (
    SELECT COUNT(*) AS cnt FROM Worker
),
client_sample AS (
    SELECT id, row_number() OVER () AS rn
    FROM Client
    ORDER BY random()
    LIMIT 15000
),
worker_sample AS (
    SELECT id, row_number() OVER () AS rn
    FROM Worker
    ORDER BY random()
    LIMIT 15000
)
INSERT INTO Favourite (worker_id, client_id)
SELECT
    w.id AS worker_id,
    c.id AS client_id
FROM client_sample c
JOIN worker_sample w
    ON w.rn = ((c.rn * 1000003) % 15000) + 1  -- prime number shuffle
ON CONFLICT DO NOTHING;

--++++++++++ category ++++++++++

CREATE TABLE category_staging (
    category_name        VARCHAR(100),
    description          VARCHAR(500),
    parent_category_name VARCHAR(100)
);

INSERT INTO Category (category_name, description, parent_category_id)
SELECT category_name, description, NULL
FROM category_staging
WHERE parent_category_name IS NULL OR parent_category_name = ''
ON CONFLICT (category_name) DO NOTHING;

INSERT INTO Category (category_name, description, parent_category_id)
SELECT
    s.category_name,
    s.description,
    p.id
FROM category_staging s
JOIN Category p ON p.category_name = s.parent_category_name
WHERE s.parent_category_name IS NOT NULL AND s.parent_category_name != ''
ON CONFLICT (category_name) DO NOTHING;

DROP TABLE category_staging;

--++++++++++ worker_category ++++++++++
--dodeluvame samo child categories
INSERT INTO WorkerCategory (worker_id, category_id)
SELECT worker_id, category_id
FROM (
    SELECT
        w.id AS worker_id,
        c.id AS category_id,
        ROW_NUMBER() OVER (
            PARTITION BY w.id
            ORDER BY random()
        ) AS rn
    FROM Worker w
    JOIN Category c
        ON c.parent_category_id IS NOT NULL
) t
WHERE rn <= 3
ON CONFLICT DO NOTHING;


-- ++++++++++ badges ++++++++++

WITH tier_definitions AS (
    SELECT * FROM (VALUES
        (1, 'Beginner',   'Entry-level worker with basic experience in this category',              0.00, 0.20),
        (2, 'Apprentice', 'Developing worker familiar with standard tasks in this category',        0.20, 0.40),
        (3, 'Skilled',    'Competent worker with consistent positive ratings in this category',     0.40, 0.60),
        (4, 'Advanced',   'Experienced worker handling complex tasks in this category',             0.60, 0.80),
        (5, 'Expert',     'Top-rated professional trusted for high-demand work in this category',   0.80, 1.00)
    ) AS t(tier_level, tier_name, tier_description, min_factor, max_factor)
),
category_price_ranges AS (
    SELECT * FROM (VALUES
        ('Home Cleaning',             5,   150),
        ('Plumbing',                  20,  300),
        ('Electrical',                25,  400),
        ('General Handyman',          10,  250),
        ('Painting',                  15,  350),
        ('Outdoor & Lawn',            10,  280),
        ('Moving & Hauling',          15,  250),
        ('Tech & Smart Home',         20,  350),
        ('Organization & Decluttering',10, 220),
        ('Errands & Delivery',         5,  150),
        ('Pet Care',                  10,  200),
        ('Childcare & Tutoring',      10,  200),
        ('Cooking & Meal Prep',       10,  300),
        ('Vehicle & Auto',            10,  250),
        ('Administrative & Business', 10,  220),
        ('Remodeling & Renovation',   25,  600),
        ('Event & Party Setup',       10,  300),
        ('Personal Assistance',       10,  220)
    ) AS p(category_name, base_min, base_max)
)
INSERT INTO Badge (badge_name, description, category_id, min_price, max_price, tier_level)
SELECT
    c.category_name || ' - ' || t.tier_name                          AS badge_name,
    t.tier_description || ' (Category: ' || c.category_name || ')'  AS description,
    c.id                                                             AS category_id,
    FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.min_factor)::INT AS min_price,
    FLOOR(cpr.base_min + (cpr.base_max - cpr.base_min) * t.max_factor)::INT AS max_price,
    t.tier_level
FROM Category c
JOIN category_price_ranges cpr ON cpr.category_name = c.category_name
CROSS JOIN tier_definitions t
WHERE c.parent_category_id IS NULL  -- only parent categories get badges
ORDER BY c.id, t.tier_level;

--++++++++++ worker_badge ++++++++++
-- not valid yet, needs review for tasks to calculate properly
--initially everyone gets begginer badge in their categories
INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired,is_active)
SELECT DISTINCT
    wc.worker_id,
    b.id,
    w.created_at,
    true
FROM WorkerCategory wc
JOIN Worker w ON w.id = wc.worker_id
JOIN Category child ON child.id = wc.category_id
JOIN Badge b
    ON b.category_id = child.parent_category_id
   AND b.tier_level = 1
WHERE child.parent_category_id IS NOT NULL

ON CONFLICT (worker_id, badge_id) DO UPDATE
SET is_active = TRUE,
    date_acquired = EXCLUDED.date_acquired;
    

--++++++++++ task requests ++++++++++
--5milioni taskrequests
DO $$
DECLARE
    v_min_id        BIGINT;
    v_max_id        BIGINT;
    v_batch_size    INT  := 5000;   -- clients per iteration
    v_rows_per_cli  INT  := 50;     -- task rows per client
    v_offset        BIGINT := 0;
    v_batch_min     BIGINT;
    v_batch_max     BIGINT;
    v_rows_inserted BIGINT := 0;
BEGIN
    SELECT MIN(id), MAX(id)
      INTO v_min_id, v_max_id
      FROM Client;

    LOOP
        v_batch_min := v_min_id + v_offset;
        v_batch_max := v_min_id + v_offset + v_batch_size - 1;

        EXIT WHEN v_batch_min > v_max_id;

        INSERT INTO TaskRequest
            (client_id, description, work_mode, status,
             category_id, location_id, created_at)
        SELECT
            c.id,
            td.task_description,
            (ARRAY['HYBRID','ONSITE','REMOTE'])[floor(random()*3+1)::int],
            'OPEN',
            cat.id,
            l.id,
            u.created_at + INTERVAL '1 minute'
                + (random() * (TIMESTAMP '2026-01-01' - u.created_at - INTERVAL '1 minute'))
        FROM Client c
        JOIN UserAccount u  ON c.user_id = u.id
        -- only clients in this slice
        JOIN generate_series(1, v_rows_per_cli) gs(n) ON TRUE
        JOIN LATERAL (
            SELECT task_description, category_name
              FROM temp_task_desc
             ORDER BY RANDOM()* gs.n
             LIMIT 1
        ) td ON TRUE
        JOIN Category cat ON cat.category_name = td.category_name
        JOIN LATERAL (
                SELECT id
                FROM Location
                ORDER BY RANDOM()* gs.n
                LIMIT 1
                ) l ON true
        WHERE c.id BETWEEN v_batch_min AND v_batch_max;

        GET DIAGNOSTICS v_rows_inserted = ROW_COUNT;
        RAISE NOTICE 'Batch offset=% | clients %-% | rows inserted=%',
            v_offset, v_batch_min, v_batch_max, v_rows_inserted;

        v_offset := v_offset + v_batch_size;

        COMMIT;
    END LOOP;

    RAISE NOTICE 'Done. All batches complete.';
END;
$$ LANGUAGE plpgsql;


-- ================================================================
--helper procedura za badges

CREATE OR REPLACE PROCEDURE recalc_worker_badges(p_as_of TIMESTAMP, p_from TIMESTAMP)
LANGUAGE plpgsql
AS $$
BEGIN

    -- Само работници со нови reviews во овој batch
    DROP TABLE IF EXISTS tmp_affected_workers;
    CREATE TEMP TABLE tmp_affected_workers AS
        SELECT DISTINCT o.worker_id
        FROM  Review r
        JOIN  Task  t ON t.id = r.task_id
        JOIN  Offer o ON o.id = t.offer_id
        WHERE r.created_at >  p_from
          AND r.created_at <= p_as_of;

    CREATE INDEX ON tmp_affected_workers (worker_id);

    IF NOT EXISTS (SELECT 1 FROM tmp_affected_workers) THEN
        RETURN;
    END IF;

    -- ОПТ: GROUP BY оди директно — ист резултат, нема промена во логиката.
    -- Не скенираме повеќе Review отколку што треба бидејќи
    -- tmp_affected_workers е мал сет.
    DROP TABLE IF EXISTS tmp_avg_ratings;
    CREATE TEMP TABLE tmp_avg_ratings AS
        SELECT
            o.worker_id,
            parent.id     AS parent_category_id,
            AVG(r.rating) AS avg_rating,
            COUNT(*)      AS review_count
        FROM  Review r
        JOIN  Task        t      ON t.id      = r.task_id
        JOIN  Offer       o      ON o.id      = t.offer_id
        JOIN  TaskRequest tr     ON tr.id     = o.task_request_id
        JOIN  Category    child  ON child.id  = tr.category_id
        JOIN  Category    parent ON parent.id = child.parent_category_id
        WHERE o.worker_id IN (SELECT worker_id FROM tmp_affected_workers)
          AND r.created_at <= p_as_of
        GROUP BY o.worker_id, parent.id;

    CREATE INDEX ON tmp_avg_ratings (worker_id, parent_category_id);

    DROP TABLE IF EXISTS tmp_new_badges;
    CREATE TEMP TABLE tmp_new_badges AS
        SELECT DISTINCT ON (ar.worker_id, ar.parent_category_id)
            ar.worker_id,
            ar.parent_category_id,
            b.id AS badge_id
        FROM  tmp_avg_ratings ar
        JOIN  Badge b ON b.category_id = ar.parent_category_id
        ORDER BY
            ar.worker_id,
            ar.parent_category_id,
            ABS(b.tier_level - ROUND((ar.avg_rating - 1) / 4.0 * 4 + 1));

    CREATE INDEX ON tmp_new_badges (worker_id);
    CREATE INDEX ON tmp_new_badges (badge_id);

    -- Деактивирај стари badges
    UPDATE WorkerBadge wb
    SET    is_active = FALSE
    WHERE  wb.is_active = TRUE
      AND  wb.worker_id IN (SELECT worker_id FROM tmp_affected_workers)
      AND  NOT EXISTS (
          SELECT 1 FROM tmp_new_badges nb
          WHERE nb.worker_id = wb.worker_id
            AND nb.badge_id  = wb.badge_id
      );

    -- Вметни/ажурирај точни badges
    INSERT INTO WorkerBadge (worker_id, badge_id, date_acquired, is_active)
    SELECT worker_id, badge_id, p_as_of::DATE, TRUE
    FROM   tmp_new_badges
    ON CONFLICT (worker_id, badge_id) DO UPDATE
        SET is_active     = TRUE,
            date_acquired = EXCLUDED.date_acquired;

END;
$$;


-------------------------------------------------------------------

DO $$
DECLARE
    v_batch_start   TIMESTAMP;
    v_batch_end     TIMESTAMP;
    v_origin        TIMESTAMP := TIMESTAMP '2023-02-01';
    v_finish        TIMESTAMP := TIMESTAMP '2023-03-01'; -- прошири колку сакаш, најполовно на 3-4 месеци,
    v_cnt           BIGINT;
    v_tables_ready  BOOLEAN   := FALSE;
BEGIN
    v_batch_start := v_origin;

    LOOP
        v_batch_end := v_batch_start + INTERVAL '1 month';
        EXIT WHEN v_batch_start >= v_finish;

        RAISE NOTICE '=== BATCH % → % ===', v_batch_start, v_batch_end;

        -- ----------------------------------------------------------------
        -- ОПТ 1: Создај temp табели САМО еднаш, потоа TRUNCATE
        -- Избегнува каталошки lock + index rebuild на секоја итерација
        -- ----------------------------------------------------------------
        IF NOT v_tables_ready THEN
            CREATE TEMP TABLE tmp_worker_sample (
                category_id  INT,
                worker_id    INT,
                created_at   TIMESTAMP
            ) ON COMMIT PRESERVE ROWS;
            CREATE INDEX ON tmp_worker_sample (category_id, worker_id);

            CREATE TEMP TABLE tmp_offers_to_make (
                task_request_id INT,
                worker_id       INT,
                worker_created  TIMESTAMP,
                tr_created      TIMESTAMP,
                client_created  TIMESTAMP,
                initiated_by    TEXT,
                price           INT
            ) ON COMMIT PRESERVE ROWS;
            CREATE INDEX ON tmp_offers_to_make (task_request_id);

            CREATE TEMP TABLE tmp_batch_offers (
                id              INT,
                task_request_id INT,
                created_at      TIMESTAMP
            ) ON COMMIT PRESERVE ROWS;
            CREATE INDEX ON tmp_batch_offers (task_request_id);
            CREATE INDEX ON tmp_batch_offers (id);

            CREATE TEMP TABLE tmp_batch_tasks (
                task_id         INT,
                updated_at      TIMESTAMP,
                worker_id       INT,
                worker_user_id  INT,
                client_user_id  INT
            ) ON COMMIT PRESERVE ROWS;
            CREATE INDEX ON tmp_batch_tasks (task_id);

            v_tables_ready := TRUE;
        ELSE
            TRUNCATE tmp_worker_sample;
            TRUNCATE tmp_offers_to_make;
            TRUNCATE tmp_batch_offers;
            TRUNCATE tmp_batch_tasks;
        END IF;

        -- ----------------------------------------------------------------
        -- ОПТ 2: Земи max 20 работници по категорија ЕДНАШ
        -- Истата логика, но RANDOM() се вика еднаш, не за секој TaskRequest
        -- ----------------------------------------------------------------
        INSERT INTO tmp_worker_sample (category_id, worker_id, created_at)
        WITH numbered AS (
            SELECT
                wc.category_id,
                w.id AS worker_id,
                w.created_at,
                ROW_NUMBER() OVER (
                    PARTITION BY wc.category_id
                    ORDER BY RANDOM()
                ) AS rn
            FROM Worker w
            JOIN WorkerCategory wc ON wc.worker_id = w.id
            WHERE w.created_at < v_batch_end
        )
        SELECT category_id, worker_id, created_at
        FROM numbered
        WHERE rn <= 20;

        -- ----------------------------------------------------------------
        -- ОПТ 3: Генерирај парови (TaskRequest, Worker) ЕДНАШ во temp табела
        -- Наместо да се прави lateral join двапати (worker + client offers)
        -- со ORDER BY RANDOM() на секој ред — ова е главниот bottleneck.
        -- Истиот број на offers (2–5 по TaskRequest), иста рандомност.
        -- ----------------------------------------------------------------

        INSERT INTO tmp_offers_to_make
            (task_request_id, worker_id, worker_created, tr_created, client_created, initiated_by, price)
        SELECT
            tr.id,
            w.worker_id,
            w.created_at,
            tr.created_at,
            u.created_at,
            'WORKER',
            COALESCE(
                FLOOR(RANDOM() * (b.max_price - b.min_price + 1) + b.min_price),
                FLOOR(RANDOM() * (300 - 100 + 1) + 100)
            )::INT
        FROM TaskRequest tr
        JOIN Client       c      ON c.id      = tr.client_id
        JOIN UserAccount  u      ON u.id      = c.user_id
        JOIN LATERAL (

            -- ОПТ: tmp_worker_sample е веќе случаен → само земи 2–5
            SELECT bw.worker_id, bw.created_at
            FROM   tmp_worker_sample bw
            WHERE  bw.category_id = tr.category_id
            LIMIT  (FLOOR(RANDOM()*4)+2)   -- нема ORDER BY RANDOM() овде!
        ) w ON TRUE
        JOIN Category child   ON child.id   = tr.category_id
        JOIN Category parent  ON parent.id  = child.parent_category_id
        LEFT JOIN WorkerBadge wb
            ON  wb.worker_id  = w.worker_id
            AND wb.is_active  = TRUE
        LEFT JOIN Badge b
            ON  b.id          = wb.badge_id
            AND b.category_id = parent.id
        WHERE tr.status     = 'OPEN'
          AND tr.created_at >= v_batch_start
          AND tr.created_at <  v_batch_end;

        -- ------------------------------------------------
        -- 1. WORKER-INITIATED OFFERS
        -- ------------------------------------------------
        INSERT INTO Offer
            (worker_id, task_request_id, price, offer_status, initiated_by, created_at)
        SELECT
            worker_id,
            task_request_id,
            price,
            'PENDING',
            'WORKER',
            GREATEST(
                tr_created     + INTERVAL '1 minute',
                worker_created + INTERVAL '1 hour',
                client_created + INTERVAL '1 minute',
                LEAST(
                    tr_created + (RANDOM() * INTERVAL '12 hours'),
                    v_batch_end - INTERVAL '1 hour'
                )
            )
        FROM tmp_offers_to_make;

        GET DIAGNOSTICS v_cnt = ROW_COUNT;
        RAISE NOTICE '  Worker offers inserted: %', v_cnt;

        -- ------------------------------------------------
        -- 2. CLIENT-INITIATED OFFERS (~50% од истите парови)
        -- Ист ефект: RANDOM() < 0.5 филтер
        -- ------------------------------------------------
        INSERT INTO Offer
            (worker_id, task_request_id, price, offer_status, initiated_by, created_at)
        SELECT
            worker_id,
            task_request_id,
            FLOOR(RANDOM() * (300 - 100 + 1) + 100)::INT,
            'PENDING',
            'CLIENT',
            GREATEST(
                tr_created     + INTERVAL '1 minute',
                worker_created + INTERVAL '1 hour',
                client_created + INTERVAL '1 minute',
                LEAST(
                    tr_created + (RANDOM() * INTERVAL '12 hours'),
                    v_batch_end - INTERVAL '1 hour'
                )
            )
        FROM tmp_offers_to_make
        WHERE RANDOM() < 0.5;

        GET DIAGNOSTICS v_cnt = ROW_COUNT;
        RAISE NOTICE '  Client offers inserted: %', v_cnt;

        -- ------------------------------------------------
        -- 3. ACCEPT ЕДЕН OFFER PО TASK, REJECT ОСТАНАТИТЕ
        -- ОПТ: Еден UPDATE со CASE наместо два одделни UPDATE
        -- Ист резултат: 1 ACCEPTED, останатите REJECTED по TaskRequest
        -- ------------------------------------------------
        INSERT INTO tmp_batch_offers (id, task_request_id, created_at)
        SELECT o.id, o.task_request_id, o.created_at
        FROM Offer o
        WHERE o.offer_status = 'PENDING'
          AND o.created_at  >= v_batch_start
          AND o.created_at  <  v_batch_end;

        WITH last_offer AS (
            SELECT task_request_id,
                   MAX(created_at) AS last_offer_time
            FROM   tmp_batch_offers
            GROUP BY task_request_id
        ),
        ranked AS (
            SELECT o.id,
                   o.task_request_id,
                   o.created_at      AS offer_created_at,
                   lo.last_offer_time,
                   ROW_NUMBER() OVER (
                       PARTITION BY o.task_request_id
                       ORDER BY RANDOM()
                   ) AS rn
            FROM  tmp_batch_offers o
            JOIN  last_offer lo ON lo.task_request_id = o.task_request_id
        )
        -- ОПТ: Еден UPDATE со CASE → половина I/O наспроти два UPDATE
        UPDATE Offer o
        SET
            offer_status = CASE WHEN r.rn = 1 THEN 'ACCEPTED' ELSE 'REJECTED' END,
            updated_at   = CASE
                WHEN r.rn = 1 THEN
                    GREATEST(
                        r.offer_created_at + INTERVAL '1 minute',
                        LEAST(
                            GREATEST(r.offer_created_at, r.last_offer_time)
                                + (RANDOM() * INTERVAL '1 hour'),
                            v_batch_end - INTERVAL '1 hour'
                        )
                    )
                ELSE
                    GREATEST(o.created_at,
                        GREATEST(
                            r.offer_created_at + INTERVAL '1 minute',
                            LEAST(
                                GREATEST(r.offer_created_at, r.last_offer_time)
                                    + (RANDOM() * INTERVAL '1 hour'),
                                v_batch_end - INTERVAL '1 hour'
                            )
                        )
                    )
                END
        FROM ranked r
        WHERE o.id = r.id;

        -- ------------------------------------------------
        -- 4. CLOSE ACCEPTED TASK REQUESTS
        -- ------------------------------------------------
        WITH accepted_tasks AS (
            SELECT DISTINCT task_request_id, updated_at
            FROM   Offer
            WHERE  offer_status = 'ACCEPTED'
              AND  created_at  >= v_batch_start
              AND  created_at  <  v_batch_end
        )
        UPDATE TaskRequest tr
        SET    status     = 'CLOSED',
               updated_at = GREATEST(at2.updated_at, tr.created_at + INTERVAL '1 minute')
        FROM   accepted_tasks at2
        WHERE  tr.id     = at2.task_request_id
          AND  tr.status = 'OPEN';

        -- ------------------------------------------------
        -- 5. CREATE TASKS
        -- ------------------------------------------------
        INSERT INTO Task (offer_id, status, created_at)
        SELECT o.id,
               'ACTIVE',
               GREATEST(
                   o.updated_at,
                   o.created_at  + INTERVAL '1 minute',
                   u.created_at  + INTERVAL '1 minute'
               )
        FROM   Offer o
        JOIN   TaskRequest  tr ON tr.id = o.task_request_id
        JOIN   Client        c ON c.id  = tr.client_id
        JOIN   UserAccount   u ON u.id  = c.user_id
        WHERE  o.offer_status = 'ACCEPTED'
          AND  o.created_at  >= v_batch_start
          AND  o.created_at  <  v_batch_end
          AND  NOT EXISTS (
              SELECT 1 FROM Task t WHERE t.offer_id = o.id
          );

        GET DIAGNOSTICS v_cnt = ROW_COUNT;
        RAISE NOTICE '  Tasks created: %', v_cnt;

        -- ------------------------------------------------
        -- 6. COMPLETE TASKS
        -- ------------------------------------------------
        UPDATE Task t
        SET    status     = 'COMPLETED',
               updated_at = GREATEST(
                   t.created_at + INTERVAL '1 hour',
                   LEAST(
                       t.created_at + (RANDOM() * INTERVAL '2 days'),
                       v_batch_end  - INTERVAL '1 hour'
                   )
               )
        WHERE  t.status     = 'ACTIVE'
          AND  t.created_at >= v_batch_start
          AND  t.created_at <  v_batch_end;

        -- ------------------------------------------------
        -- 7. REVIEWS
        -- ------------------------------------------------
        INSERT INTO tmp_batch_tasks
            (task_id, updated_at, worker_id, worker_user_id, client_user_id)
        SELECT
            t.id,
            t.updated_at,
            o.worker_id,
            w.user_id,
            c.user_id
        FROM  Task        t
        JOIN  Offer       o  ON o.id  = t.offer_id
        JOIN  Worker      w  ON w.id  = o.worker_id
        JOIN  TaskRequest tr ON tr.id = o.task_request_id
        JOIN  Client      c  ON c.id  = tr.client_id
        WHERE t.status     = 'COMPLETED'
          AND t.created_at >= v_batch_start
          AND t.created_at <  v_batch_end;

        -- Client → Worker
        INSERT INTO Review
            (task_id, reviewer_id, reviewed_id, rating, created_at)
        SELECT
            bt.task_id,
            bt.client_user_id,
            bt.worker_user_id,
            LEAST(5, GREATEST(1,
                ROUND((random()+random()+random()+random())/4.0*4+1)::INT
            )),
            GREATEST(
                bt.updated_at + INTERVAL '1 minute',
                LEAST(
                    bt.updated_at + (RANDOM() * INTERVAL '12 hours'),
                    v_batch_end   - INTERVAL '1 hour'
                )
            )
        FROM  tmp_batch_tasks bt
        LEFT JOIN Review r ON r.task_id    = bt.task_id
                          AND r.reviewer_id = bt.client_user_id
        WHERE r.id IS NULL;

        GET DIAGNOSTICS v_cnt = ROW_COUNT;
        RAISE NOTICE '  Reviews client->worker: %', v_cnt;

        -- Worker → Client
        INSERT INTO Review
            (task_id, reviewer_id, reviewed_id, rating, created_at)
        SELECT
            bt.task_id,
            bt.worker_user_id,
            bt.client_user_id,
            LEAST(5, GREATEST(1,
                ROUND((random()+random()+random())/3.0*4+1)::INT
            )),
            GREATEST(
                bt.updated_at + INTERVAL '1 minute',
                LEAST(
                    bt.updated_at + (RANDOM() * INTERVAL '12 hours'),
                    v_batch_end   - INTERVAL '1 hour'
                )
            )
        FROM  tmp_batch_tasks bt
        LEFT JOIN Review r ON r.task_id    = bt.task_id
                          AND r.reviewer_id = bt.worker_user_id
        WHERE r.id IS NULL;

        GET DIAGNOSTICS v_cnt = ROW_COUNT;
        RAISE NOTICE '  Reviews worker->client: %', v_cnt;

        -- ------------------------------------------------
        -- 8. BADGE RECALC
        -- ------------------------------------------------
        CALL recalc_worker_badges(v_batch_end, v_batch_start);
        RAISE NOTICE '  Badges updated.';

        COMMIT;
        v_batch_start := v_batch_end;
    END LOOP;

    RAISE NOTICE 'All batches done.';
END;
$$ LANGUAGE plpgsql;


--++++++++++ notification type +++++++++++
  
INSERT INTO NotificationType (name, description)
VALUES
    ('New Task Created',      'Triggered when a new task is created and awaiting offers.'),
    ('Task Accepted',         'Triggered when a task is accepted by a worker.'),
    ('Task Started',          'Triggered when a worker begins a task.'),
    ('Task Completed',        'Triggered when a task is marked complete.'),
    ('Task Cancelled',        'Triggered when a task is cancelled.'),
    ('Offer Received',        'Triggered when a new offer is submitted on a task.'),
    ('Offer Accepted',        'Triggered when an offer is accepted by the client.'),
    ('Offer Rejected',        'Triggered when an offer is rejected.'),
    ('Offer Expired',         'Triggered when an offer expires without a response.'),
    ('Payment Pending',       'Triggered when a payment is being processed.'),
    ('Payment Successful',    'Triggered when a payment completes successfully.'),
    ('Payment Failed',        'Triggered when a payment attempt fails.'),
    ('Review Received',       'Triggered when a user receives a new review.'),
    ('Complaint Opened',      'Triggered when a complaint is filed.'),
    ('Complaint Resolved',    'Triggered when a complaint is resolved.'),
    ('Complaint Dismissed',   'Triggered when a complaint is dismissed.'),
    ('Message Received',      'Triggered when a new message is received.'),
    ('Badge Earned',          'Triggered when a worker earns a new badge.'),
    ('Profile Viewed',        'Triggered when a user profile is viewed.'),
    ('System Announcement',   'General system-wide announcements.');


INSERT INTO NotificationPreference
(is_enabled, channel, user_id, notification_type_id)
SELECT
    (random() > 0.25)                     AS is_enabled,
    channel                               AS channel,
    u                                     AS user_id,
    nt                                    AS notification_type_id
FROM
    generate_series((SELECT min(id) FROM useraccount), (SELECT max(id) FROM useraccount)) AS u,
    generate_series(1, 20)                AS nt,
    (VALUES ('EMAIL'), ('SMS'), ('PUSH')) AS c(channel)
WHERE u IN (SELECT id FROM useraccount)  -- only real user IDs
LIMIT 7000000;  

--++++++++++ compalaints +++++++++++

DO $$
DECLARE
    batch_size  INT := 100000;
    total_rows  INT := 1000000;
    offset_val  INT := 0;

    task_ids    INT[];
    client_ids  INT[];
    worker_ids  INT[];
BEGIN
    -- Load matched task/client/worker combos from the actual join chain
    SELECT
        array_agg(t.id),
        array_agg(tr.client_id),
        array_agg(o.worker_id)
    INTO task_ids, client_ids, worker_ids
    FROM Task         t
    JOIN Offer        o  ON o.id          = t.offer_id
    JOIN TaskRequest  tr ON tr.id         = o.task_request_id;

    WHILE offset_val < total_rows LOOP
        INSERT INTO Complaint
            (reason, description, status, created_at, updated_at,
             task_id, client_id, worker_id)
        SELECT
            CASE
                WHEN is_worker_complaint THEN
                    CASE floor(random() * 6)::INT
                        WHEN 0 THEN 'Client: Payment not made as agreed'
                        WHEN 1 THEN 'Client: Unsafe or hazardous work conditions'
                        WHEN 2 THEN 'Client: Abusive or threatening behaviour'
                        WHEN 3 THEN 'Client: Scope creep beyond agreed terms'
                        WHEN 4 THEN 'Client: Access denied to work site'
                        ELSE        'Client: False damage claim'
                    END
                ELSE
                    CASE floor(random() * 6)::INT
                        WHEN 0 THEN 'Worker: Work not completed as agreed'
                        WHEN 1 THEN 'Worker: Poor quality of service'
                        WHEN 2 THEN 'Worker: No-show or late arrival'
                        WHEN 3 THEN 'Worker: Unprofessional behaviour'
                        WHEN 4 THEN 'Worker: Overcharged or billing dispute'
                        ELSE        'Worker: Damaged property or belongings'
                    END
            END                                                                  AS reason,

            CASE
                WHEN is_worker_complaint THEN
                    CASE floor(random() * 4)::INT
                        WHEN 0 THEN 'The client did not fulfil the agreed payment terms.'
                        WHEN 1 THEN 'The client created conditions that made work impossible.'
                        WHEN 2 THEN 'The client behaved in an abusive or threatening manner.'
                        ELSE        NULL
                    END
                ELSE
                    CASE floor(random() * 4)::INT
                        WHEN 0 THEN 'The worker did not follow the agreed specifications.'
                        WHEN 1 THEN 'The quality of the delivered work was unsatisfactory.'
                        WHEN 2 THEN 'The worker arrived significantly late without notice.'
                        ELSE        NULL
                    END
            END                                                                  AS description,

            CASE floor(random() * 3)::INT
                WHEN 0 THEN 'OPEN'
                WHEN 1 THEN 'RESOLVED'
                ELSE        'DISMISSED'
            END                                                                  AS status,

            created_ts                                                           AS created_at,

            CASE WHEN floor(random() * 3)::INT > 0
                     THEN created_ts + (random() * INTERVAL '30 days')
                 ELSE NULL
            END                                                                  AS updated_at,

            task_ids  [idx]                                                      AS task_id,
            client_ids[idx]                                                      AS client_id,
            worker_ids[idx]                                                      AS worker_id

        FROM (
            SELECT
                i,
                '2017-01-15 19:44:09.798'::TIMESTAMP
                    + random() * ('2026-04-29 23:45:45.249'::TIMESTAMP - '2017-01-15 19:44:09.798'::TIMESTAMP) AS created_ts,
                random() < 0.5                                                                                  AS is_worker_complaint,
                1 + floor(random() * array_length(task_ids, 1))::INT                                           AS idx
            FROM generate_series(offset_val + 1, offset_val + batch_size) AS i
        ) sub;

        offset_val := offset_val + batch_size;
        RAISE NOTICE 'Inserted % / % rows', offset_val, total_rows;
    END LOOP;
END;
$$;


CREATE TEMP TABLE complaints_ordered AS
SELECT 
    c.id AS cid,
    cl.user_id AS client_user_id,
    w.user_id AS worker_user_id,
    random() AS rnd
FROM Complaint c
JOIN Task t ON t.id = c.task_id
JOIN Offer o ON o.id = t.offer_id
JOIN TaskRequest tr ON tr.id = o.task_request_id
JOIN Client cl ON cl.id = tr.client_id AND cl.id = c.client_id
JOIN Worker w ON w.id = o.worker_id AND w.id = c.worker_id;

CREATE INDEX ON complaints_ordered (rnd);

INSERT INTO ComplaintAttachment (file_url, description, complaint_id, user_id)
SELECT
    'https://storage.example.com/complaints/'
        || cid || '/'
        || md5(random()::TEXT) || '.'
        || CASE floor(random() * 4)::INT
               WHEN 0 THEN 'jpg'
               WHEN 1 THEN 'png'
               WHEN 2 THEN 'pdf'
               ELSE 'mp4'
           END AS file_url,
    CASE floor(random() * 3)::INT
        WHEN 0 THEN 'Photo evidence of the issue'
        WHEN 1 THEN 'Screenshot of conversation'
        ELSE NULL
    END AS description,
    cid AS complaint_id,
    CASE WHEN random() < 0.5 THEN client_user_id ELSE worker_user_id END AS user_id
FROM complaints_ordered
ORDER BY rnd
LIMIT 2000000;  -- Your total desired rows

DROP TABLE IF EXISTS complaints_ordered;


--++++++++++ payments +++++++++++
CREATE TEMP TABLE payments_base AS
SELECT
    t.id         AS task_id,
    tr.client_id,
    o.worker_id,
    row_number() OVER (ORDER BY random()) AS rn
FROM Task        t
JOIN Offer       o  ON o.id  = t.offer_id
JOIN TaskRequest tr ON tr.id = o.task_request_id
CROSS JOIN generate_series(1, 10) AS multiplier
LIMIT 10000000;


truncate table Payment restart identity cascade;

DO $$
DECLARE
    batch_size INT := 500000;
    max_rn     INT := (SELECT MAX(rn) FROM payments_base);
    curr_rn    INT := 1;
BEGIN
    WHILE curr_rn <= max_rn LOOP
        INSERT INTO Payment
            (amount, payment_method, status,
             task_id, client_id, worker_id, created_at, updated_at)
        SELECT
            (floor(random() * 99000) + 1000)::INT                           AS amount,

            CASE floor(random() * 3)::INT
                WHEN 0 THEN 'CARD'
                WHEN 1 THEN 'PAYPAL'
                ELSE        'CASH'
            END                                                              AS payment_method,

            CASE floor(random() * 10)::INT
                WHEN 0 THEN 'FAILED'
                WHEN 1 THEN 'PENDING'
                ELSE        'PAID'
            END                                                              AS status,

            pb.task_id,
            pb.client_id,
            pb.worker_id,

            created_ts                                                       AS created_at,

            CASE WHEN random() > 0.2
                     THEN LEAST(created_ts + (random() * INTERVAL '30 days'), NOW())
                 ELSE NULL
            END                                                              AS updated_at

        FROM payments_base pb
        CROSS JOIN LATERAL (
            SELECT '2017-01-15 19:44:09.798'::TIMESTAMP
                + random() * ('2026-04-29 23:45:45.249'::TIMESTAMP - '2017-01-15 19:44:09.798'::TIMESTAMP) AS created_ts
        ) ts
        WHERE pb.rn BETWEEN curr_rn AND curr_rn + batch_size - 1;

        curr_rn := curr_rn + batch_size;
        RAISE NOTICE 'Inserted up to row % / %', curr_rn - 1, max_rn;
    END LOOP;
END;
$$;

DROP TABLE payments_base;


Note: See TracWiki for help on using the wiki.