Poland Presidential Elections 2020
Void votes per county
Jacek Kotowski
Loading libs
pacman::p_load(tidyverse, readxl, janitor, sf, scales, mgcv)
pacman::p_load(tidyverse, readxl, janitor, sf, scales, mgcv)
Loading data
from https://wybory.gov.pl/prezydent20200628/pl/dane_w_arkuszach
df_raw <-
read_xlsx(path = "dane/wyniki_gl_na_kand_po_gminach_proc_utf8.xlsx") %>%
clean_names() %>%
mutate(across(
.cols = c(
"frekwencja" ,
"percent_glosow_niewaznych" ,
"w_tym_z_powodu_postawienia_znaku_x_obok_nazwiska_dwoch_lub_wiekszej_liczby_kandydatow",
"w_tym_z_powodu_niepostawienia_znaku_x_obok_nazwiska_zadnego_kandydata" ,
"w_tym_z_powodu_postawienia_znaku_x_wylacznie_obok_skreslonego_nazwiska_kandydata" ,
"percent_glosow_waznych" ,
"andrzej_sebastian_duda" ,
"rafal_kazimierz_trzaskowski" ,
"liczba_obwodow"
),
~ str_replace(.,pattern = ",", replacement = ".") %>% as.numeric
))
from https://wybory.gov.pl/prezydent20200628/pl/dane_w_arkuszach
df_raw <-
read_xlsx(path = "dane/wyniki_gl_na_kand_po_gminach_proc_utf8.xlsx") %>%
clean_names() %>%
mutate(across(
.cols = c(
"frekwencja" ,
"percent_glosow_niewaznych" ,
"w_tym_z_powodu_postawienia_znaku_x_obok_nazwiska_dwoch_lub_wiekszej_liczby_kandydatow",
"w_tym_z_powodu_niepostawienia_znaku_x_obok_nazwiska_zadnego_kandydata" ,
"w_tym_z_powodu_postawienia_znaku_x_wylacznie_obok_skreslonego_nazwiska_kandydata" ,
"percent_glosow_waznych" ,
"andrzej_sebastian_duda" ,
"rafal_kazimierz_trzaskowski" ,
"liczba_obwodow"
),
~ str_replace(.,pattern = ",", replacement = ".") %>% as.numeric
))
Cleaning data
Counties codes are six digits precision in map but 7 digits precision in data. We need to aggregate them.
Also, Warsaw is several “counties” which need to be collapsed.
# df_raw <-
# df_raw %>%
# mutate(
# gmina =
# if_else(str_detect(rodzaj_jednostki, "Warszawa"),
# "Warszawa",
# gmina),
# kod_teryt =
# if_else(str_detect(rodzaj_jednostki, "Warszawa"),
# "146501",
# kod_teryt),
# )
# df_raw <-
# df_raw %>%
# group_by(kod_teryt) %>%
# summarise(gmina = first(gmina),
# across(where(is.numeric), sum))
Counties codes are six digits precision in map but 7 digits precision in data. We need to aggregate them.
Also, Warsaw is several “counties” which need to be collapsed.
# df_raw <-
# df_raw %>%
# mutate(
# gmina =
# if_else(str_detect(rodzaj_jednostki, "Warszawa"),
# "Warszawa",
# gmina),
# kod_teryt =
# if_else(str_detect(rodzaj_jednostki, "Warszawa"),
# "146501",
# kod_teryt),
# )
# df_raw <-
# df_raw %>%
# group_by(kod_teryt) %>%
# summarise(gmina = first(gmina),
# across(where(is.numeric), sum))
Loading shp files
from https://gis-support.pl/granice-administracyjne/
Read also about warning related to st_simplify here: https://stackoverflow.com/questions/60008135/st-simplify-dtolerence-with-decimal-degree
map_gminy <-
# st_read("dane/Gminy.shp") %>%
st_read("dane/Jednostki_ewidencyjne.shp") %>%
st_make_valid() %>%
select(kod_teryt = JPT_KOD_JE, gmina = JPT_NAZWA_) %>%
mutate(kod_teryt = str_sub(kod_teryt, end = 6)) %>%
mutate(
gmina =
case_when(
str_detect(kod_teryt, "10610") ~ "Łódz",
str_detect(kod_teryt, "12610") ~ "Kraków",
TRUE ~ as.character(gmina)
),
kod_teryt =
case_when(
# Lod
str_detect(kod_teryt, "10610") ~ "106101",
# Kra
str_detect(kod_teryt, "12610") ~ "126101",
# ZGr
str_detect(kod_teryt, "080910") ~ "086201",
TRUE ~ as.character(kod_teryt)
)
) %>%
group_by(kod_teryt) %>%
mutate(n = n()) %>%
summarize(gmina = first(gmina),
geometry = st_combine(geometry) ) %>%
ungroup()
## Reading layer `Jednostki_ewidencyjne' from data source `C:\Users\jkotows2\Desktop\_r_wybory\dane\Jednostki_ewidencyjne.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3138 features and 29 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 14.12288 ymin: 49.00205 xmax: 24.14578 ymax: 54.83642
## geographic CRS: ETRS89
## `summarise()` ungrouping output (override with `.groups` argument)
from https://gis-support.pl/granice-administracyjne/
Read also about warning related to st_simplify here: https://stackoverflow.com/questions/60008135/st-simplify-dtolerence-with-decimal-degree
map_gminy <-
# st_read("dane/Gminy.shp") %>%
st_read("dane/Jednostki_ewidencyjne.shp") %>%
st_make_valid() %>%
select(kod_teryt = JPT_KOD_JE, gmina = JPT_NAZWA_) %>%
mutate(kod_teryt = str_sub(kod_teryt, end = 6)) %>%
mutate(
gmina =
case_when(
str_detect(kod_teryt, "10610") ~ "Łódz",
str_detect(kod_teryt, "12610") ~ "Kraków",
TRUE ~ as.character(gmina)
),
kod_teryt =
case_when(
# Lod
str_detect(kod_teryt, "10610") ~ "106101",
# Kra
str_detect(kod_teryt, "12610") ~ "126101",
# ZGr
str_detect(kod_teryt, "080910") ~ "086201",
TRUE ~ as.character(kod_teryt)
)
) %>%
group_by(kod_teryt) %>%
mutate(n = n()) %>%
summarize(gmina = first(gmina),
geometry = st_combine(geometry) ) %>%
ungroup()
## Reading layer `Jednostki_ewidencyjne' from data source `C:\Users\jkotows2\Desktop\_r_wybory\dane\Jednostki_ewidencyjne.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3138 features and 29 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 14.12288 ymin: 49.00205 xmax: 24.14578 ymax: 54.83642
## geographic CRS: ETRS89
## `summarise()` ungrouping output (override with `.groups` argument)
Joining shp with data
df <- merge(map_gminy, df_raw, by="kod_teryt")
Which ones did not merge:
df_raw %>% anti_join(df)
## Joining, by = c("nr_okw", "kod_teryt", "rodzaj_jednostki", "powiat", "wojewodztwo", "frekwencja", "percent_glosow_niewaznych", "w_tym_z_powodu_postawienia_znaku_x_obok_nazwiska_dwoch_lub_wiekszej_liczby_kandydatow", "w_tym_z_powodu_niepostawienia_znaku_x_obok_nazwiska_zadnego_kandydata", "w_tym_z_powodu_postawienia_znaku_x_wylacznie_obok_skreslonego_nazwiska_kandydata", "percent_glosow_waznych", "andrzej_sebastian_duda", "rafal_kazimierz_trzaskowski", "liczba_obwodow")
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl> 21 149801 statki statki statki mazowieckie 99.74 21 149901 zagranica zagranica zagranica mazowieckie 79.57
as.data.frame(map_gminy) %>% anti_join(df)
## Joining, by = c("kod_teryt", "geometry")
## Warning in `[<-.data.frame`(`*tmp*`, is_list, value = list(`3` = "<S3:
## sfc_GEOMETRY>")): replacement element 1 has 1 row to replace 0 rows
df %>% filter(str_detect(gmina.x, "Zielona"))
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> 1 086201 m. Zielona Góra 12 gmina miejska m. Zielona Góra Zielona Góra lubuskie 2 240402 Dąbrowa Zielona 39 gmina wiejska gm. Dąbrowa Zielona częstochowski śląskie
Only statki (ships) and zagranica (foreign votes) left. Good.
df <- merge(map_gminy, df_raw, by="kod_teryt")
Which ones did not merge:
df_raw %>% anti_join(df)
## Joining, by = c("nr_okw", "kod_teryt", "rodzaj_jednostki", "powiat", "wojewodztwo", "frekwencja", "percent_glosow_niewaznych", "w_tym_z_powodu_postawienia_znaku_x_obok_nazwiska_dwoch_lub_wiekszej_liczby_kandydatow", "w_tym_z_powodu_niepostawienia_znaku_x_obok_nazwiska_zadnego_kandydata", "w_tym_z_powodu_postawienia_znaku_x_wylacznie_obok_skreslonego_nazwiska_kandydata", "percent_glosow_waznych", "andrzej_sebastian_duda", "rafal_kazimierz_trzaskowski", "liczba_obwodow")
<dbl> | <chr> | <chr> | <chr> | <chr> | <chr> | <dbl> | |
---|---|---|---|---|---|---|---|
21 | 149801 | statki | statki | statki | mazowieckie | 99.74 | |
21 | 149901 | zagranica | zagranica | zagranica | mazowieckie | 79.57 |
as.data.frame(map_gminy) %>% anti_join(df)
## Joining, by = c("kod_teryt", "geometry")
## Warning in `[<-.data.frame`(`*tmp*`, is_list, value = list(`3` = "<S3:
## sfc_GEOMETRY>")): replacement element 1 has 1 row to replace 0 rows
df %>% filter(str_detect(gmina.x, "Zielona"))
<chr> | <chr> | <dbl> | <chr> | <chr> | <chr> | <chr> | ||
---|---|---|---|---|---|---|---|---|
1 | 086201 | m. Zielona Góra | 12 | gmina miejska | m. Zielona Góra | Zielona Góra | lubuskie | |
2 | 240402 | Dąbrowa Zielona | 39 | gmina wiejska | gm. Dąbrowa Zielona | częstochowski | śląskie |
Only statki (ships) and zagranica (foreign votes) left. Good.
Neighbourhood of counties
Generate id’s for each row
df <-
df %>%
mutate(idx = row_number() %>% as.factor())
Generate matrix with neighboring counties
Output will look like:
Sparse geometry binary predicate list of length 2477, where the predicate was `touches’ first 10 elements: 1: 2 2: 1, 3, 4, 5, 6, 78, 591 3: 2, 6, 58, 96, 100, 591 4: 2, 5, 68, 76, 78, 156, 158
# counties_nb <- df %>%
# st_touches(sparse = T) %>%
# set_names(df$idx)
#
# saveRDS(counties_nb, "counties_nb.Rds")
counties_nb <- readRDS("counties_nb.Rds")
Generate id’s for each row
df <-
df %>%
mutate(idx = row_number() %>% as.factor())
Generate matrix with neighboring counties
Output will look like:
Sparse geometry binary predicate list of length 2477, where the predicate was `touches’ first 10 elements: 1: 2 2: 1, 3, 4, 5, 6, 78, 591 3: 2, 6, 58, 96, 100, 591 4: 2, 5, 68, 76, 78, 156, 158
# counties_nb <- df %>%
# st_touches(sparse = T) %>%
# set_names(df$idx)
#
# saveRDS(counties_nb, "counties_nb.Rds")
counties_nb <- readRDS("counties_nb.Rds")
Simplify geometries to make a dataframe lighter
I could not do that earlier because calculation of neighborhood matrix would not work.
df <-
df %>%
st_simplify(preserveTopology = T, dTolerance = 0.005)
## Warning in st_simplify.sfc(st_geometry(x), preserveTopology, dTolerance):
## st_simplify does not correctly simplify longitude/latitude data, dTolerance
## needs to be in decimal degrees
I could not do that earlier because calculation of neighborhood matrix would not work.
df <-
df %>%
st_simplify(preserveTopology = T, dTolerance = 0.005)
## Warning in st_simplify.sfc(st_geometry(x), preserveTopology, dTolerance):
## st_simplify does not correctly simplify longitude/latitude data, dTolerance
## needs to be in decimal degrees
Ranking void votes
df %>%
slice_max(percent_glosow_niewaznych,n=20) %>%
ggplot(
aes(
fct_reorder(gmina.x, percent_glosow_niewaznych),
percent_glosow_niewaznych)
) +
geom_col(fill = "blue",
color = "gray",
alpha = 0.1,
width = 0.7) +
geom_text(
aes(
label = paste0(
scales::number(
percent_glosow_niewaznych,
accuracy = 0.01)
)
)
) +
coord_flip() +
theme_light() +
labs(
title="Poland Presidential Elections 2020",
subtitle = "Void votes percent top 20",
caption = "Source: PKW, Author: Jacek Kotowski",
x = "",
y = ""
)

df %>%
slice_max(percent_glosow_niewaznych,n=20) %>%
ggplot(
aes(
fct_reorder(gmina.x, percent_glosow_niewaznych),
percent_glosow_niewaznych)
) +
geom_col(fill = "blue",
color = "gray",
alpha = 0.1,
width = 0.7) +
geom_text(
aes(
label = paste0(
scales::number(
percent_glosow_niewaznych,
accuracy = 0.01)
)
)
) +
coord_flip() +
theme_light() +
labs(
title="Poland Presidential Elections 2020",
subtitle = "Void votes percent top 20",
caption = "Source: PKW, Author: Jacek Kotowski",
x = "",
y = ""
)
Smooth with a gam regression
see: https://fromthebottomoftheheap.net/2017/10/19/first-steps-with-mrf-smooths/ For count data nb (negative binomial) distribution works well. For % data later on think using ? betar()
# Model
df_for_model <- df %>% select(-geometry) %>% as_tibble()
hist(df_for_model$percent_glosow_niewaznych)

hist(df_for_model$percent_glosow_niewaznych %>% scale(.))

What distribution to use? Poisson, negative-binomial, zero-inflated ? Trial and error elected negative-binomial.
m1 <-
gam(
percent_glosow_niewaznych ~
s(
idx,
bs = 'mrf',
k = 500,
xt = list(nb = counties_nb),
fx = TRUE
),
data = df_for_model,
method = 'REML',
control = gam.control(nthreads = 8),
family = Gamma() #tried Gamma.
)
summary(m1)
##
## Family: Gamma
## Link function: inverse
##
## Formula:
## percent_glosow_niewaznych ~ s(idx, bs = "mrf", k = 500, xt = list(nb = counties_nb),
## fx = TRUE)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.166806 0.006176 188.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(idx) 499 499 1.971 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.159 Deviance explained = 30.1%
## -REML = 1625.5 Scale est. = 0.068381 n = 2494
gam.check(m1)

##
## Method: REML Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-3.354595e-07,-3.354595e-07]
## (score 1625.498 & scale 0.06838101).
## Hessian positive definite, eigenvalue range [1025.09,1025.09].
## Model rank = 500 / 500
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(idx) 499 499 NA NA
see: https://fromthebottomoftheheap.net/2017/10/19/first-steps-with-mrf-smooths/ For count data nb (negative binomial) distribution works well. For % data later on think using ? betar()
# Model
df_for_model <- df %>% select(-geometry) %>% as_tibble()
hist(df_for_model$percent_glosow_niewaznych)
hist(df_for_model$percent_glosow_niewaznych %>% scale(.))
What distribution to use? Poisson, negative-binomial, zero-inflated ? Trial and error elected negative-binomial.
m1 <-
gam(
percent_glosow_niewaznych ~
s(
idx,
bs = 'mrf',
k = 500,
xt = list(nb = counties_nb),
fx = TRUE
),
data = df_for_model,
method = 'REML',
control = gam.control(nthreads = 8),
family = Gamma() #tried Gamma.
)
summary(m1)
##
## Family: Gamma
## Link function: inverse
##
## Formula:
## percent_glosow_niewaznych ~ s(idx, bs = "mrf", k = 500, xt = list(nb = counties_nb),
## fx = TRUE)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.166806 0.006176 188.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(idx) 499 499 1.971 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.159 Deviance explained = 30.1%
## -REML = 1625.5 Scale est. = 0.068381 n = 2494
gam.check(m1)
##
## Method: REML Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-3.354595e-07,-3.354595e-07]
## (score 1625.498 & scale 0.06838101).
## Hessian positive definite, eigenvalue range [1025.09,1025.09].
## Model rank = 500 / 500
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(idx) 499 499 NA NA
Add predictions to database.
df_m1<- data.frame(.predicted = predict(m1, type = "response"))
df <- df %>%
bind_cols(df_m1 )
df_m1<- data.frame(.predicted = predict(m1, type = "response"))
df <- df %>%
bind_cols(df_m1 )
Plot original data.
p <- ggplot(df) +
geom_sf(aes(fill=percent_glosow_niewaznych)) +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Void votes",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes prc"
)+
scale_fill_viridis_c(
guide = guide_colourbar(barwidth = 0.5, barheight = 10),
labels = scales::number ,
option = "viridis",
breaks = c(0.01, 0.1, 0.2, 0.5, 1, 2, 3.8),
trans = "log10",
direction = -1)
p

p <- ggplot(df) +
geom_sf(aes(fill=percent_glosow_niewaznych)) +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Void votes",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes prc"
)+
scale_fill_viridis_c(
guide = guide_colourbar(barwidth = 0.5, barheight = 10),
labels = scales::number ,
option = "viridis",
breaks = c(0.01, 0.1, 0.2, 0.5, 1, 2, 3.8),
trans = "log10",
direction = -1)
p
Plot smoothed data.
p2 <- ggplot(df) +
geom_sf(aes( fill=.predicted)) +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Void votes smoothed with GAM",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes"
)+
scale_fill_viridis_c(
guide = guide_colourbar(barwidth = 0.5, barheight = 10),
labels = scales::number ,
option = "viridis",
breaks = c(0.01, 0.1, 0.2, 0.5, 1, 2, 3),
trans = "log10",
direction = -1)
p2

p2 <- ggplot(df) +
geom_sf(aes( fill=.predicted)) +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Void votes smoothed with GAM",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes"
)+
scale_fill_viridis_c(
guide = guide_colourbar(barwidth = 0.5, barheight = 10),
labels = scales::number ,
option = "viridis",
breaks = c(0.01, 0.1, 0.2, 0.5, 1, 2, 3),
trans = "log10",
direction = -1)
p2
Plot smoothed election map
Inspection of histograms of real, fitted and residual values.
library(patchwork)
hist1 <- ggplot(df, aes(x=percent_glosow_niewaznych)) +
geom_freqpoly() +
geom_histogram(alpha=0.5)+
scale_x_log10()+
labs(title = "Real")
hist2 <- ggplot(df, aes(x=.predicted)) +
geom_freqpoly() +
geom_histogram(alpha=0.5)+
scale_x_log10()+
labs(title = "Fitted")
hist1|hist2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

How to transform skewed values? f(x) log10(x + 1 - min(x))?
p3 <- ggplot(df) +
geom_sf(aes( fill=percent_glosow_niewaznych-.predicted ), color="lightgrey") +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Residuals",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes residuals"
)+
scale_fill_gradientn(
# low = "lightblue",
# mid = "white",
# high = "red",
space = "Lab",
breaks = c( 0, 0.5, 1, 1.5, 1.8,2),
colours = c("white", "lightyellow", "orange", "red"),
# midpoint = 0,
na.value = "gray",
guide = "colourbar",
aesthetics = "fill"
# trans= "pseudo_log"
) +
guides(fill =
guide_legend(
reverse = TRUE,
title.theme = element_text(
size = 10,
face = "italic",
colour = "black",
angle = 0
)))
p3
Which counties seem anomalous to me?
df %>% st_drop_geometry() %>%
mutate(reszty = number(percent_glosow_niewaznych - .predicted, accuracy =0.01) ) %>%
arrange(desc(reszty)) %>%
head(10) %>%
select(gmina = gmina.x, wojewodztwo, prc_niewaznych = percent_glosow_niewaznych, reszty)
<chr> <chr> <dbl> <chr> 1 Platerówka dolnośląskie 3.02 1.82 2 SPYTKOWICE małopolskie 2.62 1.63 3 Jordanów Śląski dolnośląskie 2.90 1.60 4 Ślemień śląskie 2.57 1.26 5 SŁUPIA łódzkie 2.97 1.24 6 BONIEWO kujawsko-pomorskie 2.36 1.16 7 ŻUKOWICE dolnośląskie 2.08 1.07 8 Janowiec Kościelny warmińsko-mazurskie 1.98 1.04 9 Pakosław wielkopolskie 2.17 1.01 10 KLUKOWO podlaskie 2.13 0.93
Inspection of histograms of real, fitted and residual values.
library(patchwork)
hist1 <- ggplot(df, aes(x=percent_glosow_niewaznych)) +
geom_freqpoly() +
geom_histogram(alpha=0.5)+
scale_x_log10()+
labs(title = "Real")
hist2 <- ggplot(df, aes(x=.predicted)) +
geom_freqpoly() +
geom_histogram(alpha=0.5)+
scale_x_log10()+
labs(title = "Fitted")
hist1|hist2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
How to transform skewed values? f(x) log10(x + 1 - min(x))?
p3 <- ggplot(df) +
geom_sf(aes( fill=percent_glosow_niewaznych-.predicted ), color="lightgrey") +
theme_void()+
labs(title = "Presidential elections Poland 2020",
subtitle = "Residuals",
caption = "Source: PKW, Author: Jacek Kotowski",
fill = "Void votes residuals"
)+
scale_fill_gradientn(
# low = "lightblue",
# mid = "white",
# high = "red",
space = "Lab",
breaks = c( 0, 0.5, 1, 1.5, 1.8,2),
colours = c("white", "lightyellow", "orange", "red"),
# midpoint = 0,
na.value = "gray",
guide = "colourbar",
aesthetics = "fill"
# trans= "pseudo_log"
) +
guides(fill =
guide_legend(
reverse = TRUE,
title.theme = element_text(
size = 10,
face = "italic",
colour = "black",
angle = 0
)))
p3
Which counties seem anomalous to me?
df %>% st_drop_geometry() %>%
mutate(reszty = number(percent_glosow_niewaznych - .predicted, accuracy =0.01) ) %>%
arrange(desc(reszty)) %>%
head(10) %>%
select(gmina = gmina.x, wojewodztwo, prc_niewaznych = percent_glosow_niewaznych, reszty)
<chr> | <chr> | <dbl> | <chr> | |
---|---|---|---|---|
1 | Platerówka | dolnośląskie | 3.02 | 1.82 |
2 | SPYTKOWICE | małopolskie | 2.62 | 1.63 |
3 | Jordanów Śląski | dolnośląskie | 2.90 | 1.60 |
4 | Ślemień | śląskie | 2.57 | 1.26 |
5 | SŁUPIA | łódzkie | 2.97 | 1.24 |
6 | BONIEWO | kujawsko-pomorskie | 2.36 | 1.16 |
7 | ŻUKOWICE | dolnośląskie | 2.08 | 1.07 |
8 | Janowiec Kościelny | warmińsko-mazurskie | 1.98 | 1.04 |
9 | Pakosław | wielkopolskie | 2.17 | 1.01 |
10 | KLUKOWO | podlaskie | 2.13 | 0.93 |