DatabaseCreation: gragjanin.txt

File gragjanin.txt, 3.0 KB (added by 231018, 16 hours ago)
Line 
1import csv
2import random
3from datetime import datetime, timedelta
4
5from mk_gragjani_lists import maski_iminja, zenski_iminja, preziminja, ulici
6
7N = 2_000_000
8OUTPUT_FILE = "gragjanin.csv"
9
10region_codes = {
11 "Skopje": "45", "Bitola": "41", "Resen": "41",
12 "Kumanovo": "42", "Kriva Palanka": "42", "Kratovo": "42",
13 "Ohrid": "43", "Struga": "43", "Debar": "43",
14 "Prilep": "44", "Krushevo": "44", "Makedonski Brod": "44",
15 "Strumica": "46", "Radovish": "46", "Gevgelija": "46", "Valandovo": "46",
16 "Tetovo": "47", "Gostivar": "47",
17 "Veles": "48", "Kavadarci": "48", "Negotino": "48",
18 "Shtip": "49", "Kochani": "49", "Berovo": "49",
19 "Delchevo": "49", "Vinica": "49", "Probishtip": "49"
20}
21
22cities = list(region_codes.keys())
23used_embg = set()
24
25
26def random_date(start_year=1950, end_year=2025):
27 start = datetime(start_year, 1, 1)
28 end = datetime(end_year, 12, 31)
29 return start + timedelta(days=random.randint(0, (end - start).days))
30
31
32def random_phone():
33 return f"07{random.randint(0, 9)}-{random.randint(100, 999)}-{random.randint(100, 999)}"
34
35
36def random_address():
37 return f"ул. {random.choice(ulici)} бр. {random.randint(1, 200)}"
38
39
40def calculate_control_digit(first_12_digits):
41 weights = [7, 6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2]
42 total = sum(int(digit) * weight for digit, weight in zip(first_12_digits, weights))
43 remainder = total % 11
44 control = 11 - remainder
45
46 if control == 10:
47 return None
48 if control == 11:
49 return 0
50
51 return control
52
53
54def generate_unique_embg(date_of_birth, city, gender):
55 date_part = date_of_birth.strftime("%d%m") + date_of_birth.strftime("%Y")[1:]
56 region = region_codes[city]
57
58 while True:
59 if gender == "M":
60 serial = random.randint(0, 499)
61 else:
62 serial = random.randint(500, 999)
63
64 first_12 = f"{date_part}{region}{serial:03d}"
65 control = calculate_control_digit(first_12)
66
67 if control is None:
68 continue
69
70 embg = first_12 + str(control)
71
72 if embg not in used_embg:
73 used_embg.add(embg)
74 return embg
75
76
77with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
78 writer = csv.writer(f)
79
80 writer.writerow([
81 "embg",
82 "ime",
83 "prezime",
84 "adresa",
85 "grad",
86 "telefonski_broj",
87 "datum_ragjanje",
88 "pol"
89 ])
90
91 for i in range(1, N + 1):
92 date = random_date()
93 city = random.choice(cities)
94 gender = random.choice(["M", "F"])
95
96 ime = random.choice(maski_iminja) if gender == "M" else random.choice(zenski_iminja)
97 embg = generate_unique_embg(date, city, gender)
98
99 writer.writerow([
100 embg,
101 ime,
102 random.choice(preziminja),
103 random_address(),
104 city,
105 random_phone(),
106 date.strftime("%Y-%m-%d"),
107 gender
108 ])
109
110 if i % 100_000 == 0:
111 print(f"{i} rows generated...")
112
113print("CSV file generated:", OUTPUT_FILE)
114print("Total unique EMBG:", len(used_embg))