AdvancedTopics: OLAP.sql

File OLAP.sql, 21.3 KB (added by 231066, 22 hours ago)

Implementation of data cubes

Line 
1
2DROP MATERIALIZED VIEW IF EXISTS mv_election_results_cube CASCADE;
3
4CREATE MATERIALIZED VIEW mv_election_results_cube AS
5
6WITH base AS (
7 SELECT
8 e.election_id,
9 e.name AS election_name,
10 e.election_date,
11 EXTRACT(YEAR FROM e.election_date)::INT AS election_year,
12 et.type_name AS election_type,
13 wm.method_name AS winner_method,
14 cr.region_id AS country_id,
15 cr.name AS country_name,
16 pr.region_id AS planning_region_id,
17 pr.name AS planning_region_name,
18 r.region_id AS municipality_id,
19 r.name AS municipality_name,
20 pe.entity_id,
21 pe.name AS entity_name,
22 pe.type AS entity_type,
23 vr.station_id,
24 SUM(vr.votes) AS votes
25 FROM vote_result vr
26 JOIN election e ON e.election_id = vr.election_id
27 JOIN election_type et ON et.election_type_id = e.election_type_id
28 JOIN winner_method wm ON wm.method_id = e.winner_method_id
29 JOIN polling_station ps ON ps.station_id = vr.station_id
30 JOIN region r ON r.region_id = ps.municipality_id
31 LEFT JOIN region pr ON pr.region_id = r.parent_region_id
32 LEFT JOIN region cr ON cr.region_id = pr.parent_region_id
33 JOIN political_entity pe ON pe.entity_id = vr.entity_id
34 WHERE vr.candidate_id IS NULL
35 GROUP BY
36 e.election_id, e.name, e.election_date,
37 EXTRACT(YEAR FROM e.election_date),
38 et.type_name, wm.method_name,
39 cr.region_id, cr.name,
40 pr.region_id, pr.name,
41 r.region_id, r.name,
42 pe.entity_id, pe.name, pe.type,
43 vr.station_id
44)
45SELECT
46 election_id,
47 election_name,
48 election_date,
49 election_year,
50 election_type,
51 winner_method,
52 country_id,
53 country_name,
54 planning_region_id,
55 planning_region_name,
56 municipality_id,
57 municipality_name,
58 entity_id,
59 entity_name,
60 entity_type,
61 SUM(votes) AS total_votes,
62 COUNT(DISTINCT station_id) AS stations_reporting,
63 ROUND(
64 SUM(votes) * 100.0 / NULLIF(
65 SUM(SUM(votes)) OVER (
66 PARTITION BY election_id, country_id,
67 planning_region_id, municipality_id
68 ), 0
69 ), 2
70 ) AS vote_share_pct,
71 GROUPING(
72 election_id, country_id,
73 planning_region_id, municipality_id, entity_id
74 ) AS grouping_level
75FROM base
76GROUP BY ROLLUP (
77 (election_id, election_name, election_date,
78 election_year, election_type, winner_method),
79 (country_id, country_name),
80 (planning_region_id, planning_region_name),
81 (municipality_id, municipality_name),
82 (entity_id, entity_name, entity_type)
83)
84WITH DATA;
85
86CREATE INDEX idx_cube1_election ON mv_election_results_cube(election_id);
87CREATE INDEX idx_cube1_entity ON mv_election_results_cube(entity_id);
88CREATE INDEX idx_cube1_country ON mv_election_results_cube(country_id);
89CREATE INDEX idx_cube1_region ON mv_election_results_cube(planning_region_id);
90CREATE INDEX idx_cube1_grouping ON mv_election_results_cube(grouping_level);
91CREATE INDEX idx_cube1_year ON mv_election_results_cube(election_year);
92CREATE INDEX idx_cube1_type ON mv_election_results_cube(election_type);
93
94CREATE UNIQUE INDEX idx_cube1_unique ON mv_election_results_cube(
95 COALESCE(election_id, -1),
96 COALESCE(country_id, -1),
97 COALESCE(planning_region_id, -1),
98 COALESCE(municipality_id, -1),
99 COALESCE(entity_id, -1)
100);
101
102
103
104-- CUBE 2: mv_turnout_cube
105
106
107DROP MATERIALIZED VIEW IF EXISTS mv_turnout_cube CASCADE;
108
109CREATE MATERIALIZED VIEW mv_turnout_cube AS
110
111WITH
112
113registered AS (
114 SELECT
115 ps.municipality_id AS region_id,
116 COUNT(v.voter_id) AS registered_voters
117 FROM voter v
118 JOIN polling_station ps ON ps.station_id = v.station_id
119 GROUP BY ps.municipality_id
120),
121
122checkins AS (
123 SELECT
124 ps.municipality_id AS region_id,
125 ve.election_id,
126 COUNT(ve.voter_election_id) AS voters_checked_in
127 FROM voter_election ve
128 JOIN polling_station ps ON ps.station_id = ve.station_id
129 GROUP BY ps.municipality_id, ve.election_id
130),
131
132invalids AS (
133 SELECT
134 ps.municipality_id AS region_id,
135 b.election_id,
136 COUNT(*) AS invalid_ballots
137 FROM ballot b
138 JOIN polling_station ps ON ps.station_id = b.station_id
139 WHERE b.is_valid = FALSE
140 GROUP BY ps.municipality_id, b.election_id
141),
142
143base AS (
144 SELECT
145 e.election_id,
146 e.name AS election_name,
147 e.election_date,
148 EXTRACT(YEAR FROM e.election_date)::INT AS election_year,
149 et.type_name AS election_type,
150 cr.region_id AS country_id,
151 cr.name AS country_name,
152 pr.region_id AS planning_region_id,
153 pr.name AS planning_region_name,
154 r.region_id AS municipality_id,
155 r.name AS municipality_name,
156 COALESCE(reg.registered_voters, 0) AS registered_voters,
157 COALESCE(c.voters_checked_in, 0) AS voters_checked_in,
158 COALESCE(inv.invalid_ballots, 0) AS invalid_ballots
159 FROM election e
160 JOIN election_type et ON et.election_type_id = e.election_type_id
161 JOIN region r ON r.region_id IN (
162 SELECT DISTINCT municipality_id
163 FROM polling_station
164 )
165 LEFT JOIN region pr ON pr.region_id = r.parent_region_id
166 LEFT JOIN region cr ON cr.region_id = pr.parent_region_id
167 LEFT JOIN registered reg ON reg.region_id = r.region_id
168 LEFT JOIN checkins c ON c.region_id = r.region_id
169 AND c.election_id = e.election_id
170 LEFT JOIN invalids inv ON inv.region_id = r.region_id
171 AND inv.election_id = e.election_id
172 WHERE COALESCE(reg.registered_voters, 0) > 0
173)
174
175SELECT
176 election_id,
177 election_name,
178 election_date,
179 election_year,
180 election_type,
181 country_id,
182 country_name,
183 planning_region_id,
184 planning_region_name,
185 municipality_id,
186 municipality_name,
187 SUM(registered_voters) AS registered_voters,
188 SUM(voters_checked_in) AS voters_checked_in,
189 SUM(invalid_ballots) AS invalid_ballots,
190 ROUND(
191 SUM(voters_checked_in) * 100.0
192 / NULLIF(SUM(registered_voters), 0), 2
193 ) AS turnout_pct,
194 ROUND(
195 SUM(invalid_ballots) * 100.0
196 / NULLIF(SUM(voters_checked_in), 0), 2
197 ) AS invalid_pct,
198 GROUPING(
199 election_id, country_id,
200 planning_region_id, municipality_id
201 ) AS grouping_level
202FROM base
203GROUP BY ROLLUP (
204 (election_id, election_name, election_date,
205 election_year, election_type),
206 (country_id, country_name),
207 (planning_region_id, planning_region_name),
208 (municipality_id, municipality_name)
209)
210WITH DATA;
211
212CREATE INDEX idx_cube2_election ON mv_turnout_cube(election_id);
213CREATE INDEX idx_cube2_country ON mv_turnout_cube(country_id);
214CREATE INDEX idx_cube2_region ON mv_turnout_cube(planning_region_id);
215CREATE INDEX idx_cube2_grouping ON mv_turnout_cube(grouping_level);
216CREATE INDEX idx_cube2_year ON mv_turnout_cube(election_year);
217CREATE INDEX idx_cube2_type ON mv_turnout_cube(election_type);
218
219CREATE UNIQUE INDEX idx_cube2_unique ON mv_turnout_cube(
220 COALESCE(election_id, -1),
221 COALESCE(country_id, -1),
222 COALESCE(planning_region_id, -1),
223 COALESCE(municipality_id, -1)
224);
225
226
227-- CUBE 3: mv_candidate_cube
228
229DROP MATERIALIZED VIEW IF EXISTS mv_candidate_cube CASCADE;
230
231CREATE MATERIALIZED VIEW mv_candidate_cube AS
232
233WITH base AS (
234 SELECT
235 c.candidate_id,
236 p.person_id,
237 p.name AS first_name,
238 p.surname,
239 p.gender,
240 EXTRACT(YEAR FROM p.date_of_birth)::INT AS birth_year,
241 pp.party_id,
242 pp.name AS party_name,
243 pp.abbreviation AS party_abbreviation,
244 e.election_id,
245 e.name AS election_name,
246 e.election_date,
247 EXTRACT(YEAR FROM e.election_date)::INT AS election_year,
248 et.type_name AS election_type,
249 cli.position AS list_position
250 FROM candidate c
251 JOIN person p ON p.person_id = c.person_id
252 JOIN candidate_party cp ON cp.candidate_id = c.candidate_id
253 JOIN political_party pp ON pp.party_id = cp.party_id
254 JOIN election e ON e.election_id = cp.election_id
255 JOIN election_type et ON et.election_type_id = e.election_type_id
256 LEFT JOIN candidate_list_item cli ON cli.candidate_id = c.candidate_id
257 LEFT JOIN candidate_list cl ON cl.list_id = cli.list_id
258 AND cl.election_id = e.election_id
259)
260
261SELECT
262 candidate_id,
263 first_name,
264 surname,
265 gender,
266 birth_year,
267 party_id,
268 party_name,
269 party_abbreviation,
270 election_id,
271 election_name,
272 election_date,
273 election_year,
274 election_type,
275 COUNT(DISTINCT election_id) AS elections_contested,
276 COUNT(DISTINCT party_id) AS parties_represented,
277 MIN(list_position) AS best_list_position,
278 MAX(list_position) AS worst_list_position,
279 AVG(list_position) AS avg_list_position,
280 GROUPING(
281 candidate_id, party_id, election_id
282 ) AS grouping_level
283FROM base
284GROUP BY ROLLUP (
285 (candidate_id, first_name, surname, gender, birth_year),
286 (party_id, party_name, party_abbreviation),
287 (election_id, election_name, election_date,
288 election_year, election_type)
289)
290WITH DATA;
291
292CREATE INDEX idx_cube3_candidate ON mv_candidate_cube(candidate_id);
293CREATE INDEX idx_cube3_party ON mv_candidate_cube(party_id);
294CREATE INDEX idx_cube3_election ON mv_candidate_cube(election_id);
295CREATE INDEX idx_cube3_grouping ON mv_candidate_cube(grouping_level);
296CREATE INDEX idx_cube3_year ON mv_candidate_cube(election_year);
297
298CREATE UNIQUE INDEX idx_cube3_unique ON mv_candidate_cube(
299 COALESCE(candidate_id, -1),
300 COALESCE(party_id, -1),
301 COALESCE(election_id, -1)
302);
303
304
305-- CUBE 4: mv_party_dominance_cube
306
307
308DROP MATERIALIZED VIEW IF EXISTS mv_party_dominance_cube CASCADE;
309
310CREATE MATERIALIZED VIEW mv_party_dominance_cube AS
311
312WITH
313party_votes AS (
314 SELECT
315 pp.party_id,
316 pp.name AS party_name,
317 pp.abbreviation,
318 pp.founded_year,
319 pp.ideology,
320 e.election_id,
321 e.name AS election_name,
322 e.election_date,
323 EXTRACT(YEAR FROM e.election_date)::INT AS election_year,
324 et.type_name AS election_type,
325 cr.region_id AS country_id,
326 cr.name AS country_name,
327 SUM(vr.votes) AS total_votes
328 FROM vote_result vr
329 JOIN political_entity pe ON pe.entity_id = vr.entity_id
330 JOIN political_party pp ON pp.party_id = pe.party_id
331 JOIN election e ON e.election_id = vr.election_id
332 JOIN election_type et ON et.election_type_id = e.election_type_id
333 JOIN polling_station ps ON ps.station_id = vr.station_id
334 JOIN region r ON r.region_id = ps.municipality_id
335 LEFT JOIN region pr ON pr.region_id = r.parent_region_id
336 LEFT JOIN region cr ON cr.region_id = pr.parent_region_id
337 WHERE vr.candidate_id IS NULL
338 AND pe.party_id IS NOT NULL
339 GROUP BY
340 pp.party_id, pp.name, pp.abbreviation,
341 pp.founded_year, pp.ideology,
342 e.election_id, e.name, e.election_date,
343 EXTRACT(YEAR FROM e.election_date),
344 et.type_name,
345 cr.region_id, cr.name
346),
347
348-- Election totals for vote share calculation
349election_totals AS (
350 SELECT
351 election_id,
352 country_id,
353 SUM(total_votes) AS election_total
354 FROM party_votes
355 GROUP BY election_id, country_id
356)
357
358SELECT
359 pv.party_id,
360 pv.party_name,
361 pv.abbreviation,
362 pv.founded_year,
363 pv.ideology,
364 pv.election_id,
365 pv.election_name,
366 pv.election_date,
367 pv.election_year,
368 pv.election_type,
369 pv.country_id,
370 pv.country_name,
371 SUM(pv.total_votes) AS total_votes,
372 ROUND(
373 SUM(pv.total_votes) * 100.0
374 / NULLIF(MAX(et.election_total), 0), 2
375 ) AS vote_share_pct,
376 RANK() OVER (
377 PARTITION BY pv.election_id, pv.country_id
378 ORDER BY SUM(pv.total_votes) DESC
379 ) AS rank_in_election,
380 GROUPING(
381 pv.party_id, pv.election_id, pv.country_id
382 ) AS grouping_level
383FROM party_votes pv
384JOIN election_totals et ON et.election_id = pv.election_id
385 AND et.country_id = pv.country_id
386GROUP BY ROLLUP (
387 (pv.party_id, pv.party_name, pv.abbreviation,
388 pv.founded_year, pv.ideology),
389 (pv.election_id, pv.election_name, pv.election_date,
390 pv.election_year, pv.election_type),
391 (pv.country_id, pv.country_name)
392)
393WITH DATA;
394
395CREATE INDEX idx_cube4_party ON mv_party_dominance_cube(party_id);
396CREATE INDEX idx_cube4_election ON mv_party_dominance_cube(election_id);
397CREATE INDEX idx_cube4_country ON mv_party_dominance_cube(country_id);
398CREATE INDEX idx_cube4_grouping ON mv_party_dominance_cube(grouping_level);
399CREATE INDEX idx_cube4_year ON mv_party_dominance_cube(election_year);
400CREATE INDEX idx_cube4_rank ON mv_party_dominance_cube(rank_in_election);
401
402CREATE UNIQUE INDEX idx_cube4_unique ON mv_party_dominance_cube(
403 COALESCE(party_id, -1),
404 COALESCE(election_id, -1),
405 COALESCE(country_id, -1)
406);
407
408
409
410-- CUBE 5: mv_electoral_district_cube
411
412DROP MATERIALIZED VIEW IF EXISTS mv_electoral_district_cube CASCADE;
413
414CREATE MATERIALIZED VIEW mv_electoral_district_cube AS
415
416WITH
417
418district_votes AS (
419 SELECT
420 ed.district_id,
421 ed.election_id,
422 ed.region_id,
423 ed.seats_available,
424 ep.entity_id,
425 SUM(vr.votes) AS entity_votes
426 FROM electoral_district ed
427 JOIN election_participant ep ON ep.district_id = ed.district_id
428 JOIN vote_result vr ON vr.election_id = ed.election_id
429 AND vr.entity_id = ep.entity_id
430 JOIN polling_station ps ON ps.station_id = vr.station_id
431 JOIN region r ON r.region_id = ps.municipality_id
432 WHERE vr.candidate_id IS NULL
433 AND (
434 r.region_id = ed.region_id
435 OR r.parent_region_id = ed.region_id
436 )
437 GROUP BY
438 ed.district_id, ed.election_id,
439 ed.region_id, ed.seats_available,
440 ep.entity_id
441),
442
443district_totals AS (
444 SELECT
445 district_id,
446 SUM(entity_votes) AS district_total,
447 COUNT(DISTINCT entity_id) AS entities_competing
448 FROM district_votes
449 GROUP BY district_id
450),
451
452eligible AS (
453 SELECT
454 dv.*,
455 dt.district_total,
456 dt.entities_competing,
457 ROUND(dv.entity_votes * 100.0
458 / NULLIF(dt.district_total, 0), 2) AS vote_share_pct
459 FROM district_votes dv
460 JOIN district_totals dt ON dt.district_id = dv.district_id
461 WHERE dv.entity_votes * 100.0
462 / NULLIF(dt.district_total, 0) >= 5.0
463),
464
465quotients AS (
466 SELECT
467 e.*,
468 gs.divisor,
469 e.entity_votes::NUMERIC / gs.divisor AS quotient
470 FROM eligible e
471 CROSS JOIN generate_series(1, COALESCE(e.seats_available, 1)) AS gs(divisor)
472),
473
474ranked AS (
475 SELECT
476 *,
477 RANK() OVER (
478 PARTITION BY district_id
479 ORDER BY quotient DESC
480 ) AS quotient_rank
481 FROM quotients
482),
483
484seats_calc AS (
485 SELECT
486 district_id,
487 election_id,
488 region_id,
489 seats_available,
490 entity_id,
491 entity_votes,
492 district_total,
493 entities_competing,
494 vote_share_pct,
495 COUNT(*) FILTER (
496 WHERE quotient_rank <= seats_available
497 ) AS seats_won
498 FROM ranked
499 GROUP BY
500 district_id, election_id, region_id,
501 seats_available, entity_id,
502 entity_votes, district_total,
503 entities_competing, vote_share_pct
504)
505
506SELECT
507 s.election_id,
508 e.name AS election_name,
509 e.election_date,
510 EXTRACT(YEAR FROM e.election_date)::INT AS election_year,
511 et.type_name AS election_type,
512 s.district_id,
513 r.name AS district_name,
514 pr.name AS parent_region_name,
515 MAX(s.seats_available) AS seats_available,
516 s.entity_id,
517 pe.name AS entity_name,
518 pe.type AS entity_type,
519 SUM(s.entity_votes) AS total_votes,
520 SUM(s.district_total) AS total_district_votes,
521 ROUND(SUM(s.entity_votes) * 100.0
522 / NULLIF(SUM(s.district_total), 0), 2) AS vote_share_pct,
523 SUM(s.seats_won) AS seats_won,
524 MAX(s.entities_competing) AS entities_competing,
525 GROUPING(s.election_id, s.district_id, s.entity_id) AS grouping_level
526FROM seats_calc s
527JOIN election e ON e.election_id = s.election_id
528JOIN election_type et ON et.election_type_id = e.election_type_id
529JOIN region r ON r.region_id = s.region_id
530LEFT JOIN region pr ON pr.region_id = r.parent_region_id
531JOIN political_entity pe ON pe.entity_id = s.entity_id
532GROUP BY ROLLUP (
533 (s.election_id, e.name, e.election_date,
534 EXTRACT(YEAR FROM e.election_date), et.type_name),
535 (s.district_id, r.name, pr.name),
536 (s.entity_id, pe.name, pe.type)
537)
538WITH DATA;
539
540CREATE INDEX idx_cube5_election ON mv_electoral_district_cube(election_id);
541CREATE INDEX idx_cube5_district ON mv_electoral_district_cube(district_id);
542CREATE INDEX idx_cube5_entity ON mv_electoral_district_cube(entity_id);
543CREATE INDEX idx_cube5_grouping ON mv_electoral_district_cube(grouping_level);
544CREATE INDEX idx_cube5_seats ON mv_electoral_district_cube(seats_won);
545
546CREATE UNIQUE INDEX idx_cube5_unique ON mv_electoral_district_cube(
547 COALESCE(election_id, -1),
548 COALESCE(district_id, -1),
549 COALESCE(entity_id, -1)
550);