import csv
import random
from datetime import datetime, timedelta

from mk_gragjani_lists import maski_iminja, zenski_iminja, preziminja, ulici

N = 2_000_000
OUTPUT_FILE = "gragjanin.csv"

region_codes = {
    "Skopje": "45", "Bitola": "41", "Resen": "41",
    "Kumanovo": "42", "Kriva Palanka": "42", "Kratovo": "42",
    "Ohrid": "43", "Struga": "43", "Debar": "43",
    "Prilep": "44", "Krushevo": "44", "Makedonski Brod": "44",
    "Strumica": "46", "Radovish": "46", "Gevgelija": "46", "Valandovo": "46",
    "Tetovo": "47", "Gostivar": "47",
    "Veles": "48", "Kavadarci": "48", "Negotino": "48",
    "Shtip": "49", "Kochani": "49", "Berovo": "49",
    "Delchevo": "49", "Vinica": "49", "Probishtip": "49"
}

cities = list(region_codes.keys())
used_embg = set()


def random_date(start_year=1950, end_year=2025):
    start = datetime(start_year, 1, 1)
    end = datetime(end_year, 12, 31)
    return start + timedelta(days=random.randint(0, (end - start).days))


def random_phone():
    return f"07{random.randint(0, 9)}-{random.randint(100, 999)}-{random.randint(100, 999)}"


def random_address():
    return f"ул. {random.choice(ulici)} бр. {random.randint(1, 200)}"


def calculate_control_digit(first_12_digits):
    weights = [7, 6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2]
    total = sum(int(digit) * weight for digit, weight in zip(first_12_digits, weights))
    remainder = total % 11
    control = 11 - remainder

    if control == 10:
        return None
    if control == 11:
        return 0

    return control


def generate_unique_embg(date_of_birth, city, gender):
    date_part = date_of_birth.strftime("%d%m") + date_of_birth.strftime("%Y")[1:]
    region = region_codes[city]

    while True:
        if gender == "M":
            serial = random.randint(0, 499)
        else:
            serial = random.randint(500, 999)

        first_12 = f"{date_part}{region}{serial:03d}"
        control = calculate_control_digit(first_12)

        if control is None:
            continue

        embg = first_12 + str(control)

        if embg not in used_embg:
            used_embg.add(embg)
            return embg


with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)

    writer.writerow([
        "embg",
        "ime",
        "prezime",
        "adresa",
        "grad",
        "telefonski_broj",
        "datum_ragjanje",
        "pol"
    ])

    for i in range(1, N + 1):
        date = random_date()
        city = random.choice(cities)
        gender = random.choice(["M", "F"])

        ime = random.choice(maski_iminja) if gender == "M" else random.choice(zenski_iminja)
        embg = generate_unique_embg(date, city, gender)

        writer.writerow([
            embg,
            ime,
            random.choice(preziminja),
            random_address(),
            city,
            random_phone(),
            date.strftime("%Y-%m-%d"),
            gender
        ])

        if i % 100_000 == 0:
            print(f"{i} rows generated...")

print("CSV file generated:", OUTPUT_FILE)
print("Total unique EMBG:", len(used_embg))