import csv import random from datetime import datetime, timedelta from mk_gragjani_lists import maski_iminja, zenski_iminja, preziminja, ulici N = 2_000_000 OUTPUT_FILE = "gragjanin.csv" region_codes = { "Skopje": "45", "Bitola": "41", "Resen": "41", "Kumanovo": "42", "Kriva Palanka": "42", "Kratovo": "42", "Ohrid": "43", "Struga": "43", "Debar": "43", "Prilep": "44", "Krushevo": "44", "Makedonski Brod": "44", "Strumica": "46", "Radovish": "46", "Gevgelija": "46", "Valandovo": "46", "Tetovo": "47", "Gostivar": "47", "Veles": "48", "Kavadarci": "48", "Negotino": "48", "Shtip": "49", "Kochani": "49", "Berovo": "49", "Delchevo": "49", "Vinica": "49", "Probishtip": "49" } cities = list(region_codes.keys()) used_embg = set() def random_date(start_year=1950, end_year=2025): start = datetime(start_year, 1, 1) end = datetime(end_year, 12, 31) return start + timedelta(days=random.randint(0, (end - start).days)) def random_phone(): return f"07{random.randint(0, 9)}-{random.randint(100, 999)}-{random.randint(100, 999)}" def random_address(): return f"ул. {random.choice(ulici)} бр. {random.randint(1, 200)}" def calculate_control_digit(first_12_digits): weights = [7, 6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2] total = sum(int(digit) * weight for digit, weight in zip(first_12_digits, weights)) remainder = total % 11 control = 11 - remainder if control == 10: return None if control == 11: return 0 return control def generate_unique_embg(date_of_birth, city, gender): date_part = date_of_birth.strftime("%d%m") + date_of_birth.strftime("%Y")[1:] region = region_codes[city] while True: if gender == "M": serial = random.randint(0, 499) else: serial = random.randint(500, 999) first_12 = f"{date_part}{region}{serial:03d}" control = calculate_control_digit(first_12) if control is None: continue embg = first_12 + str(control) if embg not in used_embg: used_embg.add(embg) return embg with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow([ "embg", "ime", "prezime", "adresa", "grad", "telefonski_broj", "datum_ragjanje", "pol" ]) for i in range(1, N + 1): date = random_date() city = random.choice(cities) gender = random.choice(["M", "F"]) ime = random.choice(maski_iminja) if gender == "M" else random.choice(zenski_iminja) embg = generate_unique_embg(date, city, gender) writer.writerow([ embg, ime, random.choice(preziminja), random_address(), city, random_phone(), date.strftime("%Y-%m-%d"), gender ]) if i % 100_000 == 0: print(f"{i} rows generated...") print("CSV file generated:", OUTPUT_FILE) print("Total unique EMBG:", len(used_embg))