Monday, November 16, 2020

Excess mortality - exercises with ggplot2 (faceting and plots imposed on each other).

Data from (Eurostat bookmark): https://appsso.eurostat.ec.europa.eu/nui/show.do?query=BOOKMARK_DS-1177601_QID_-7BD49E8C_UID_-3F171EB0&layout=TIME,C,X,0;GEO,L,Y,0;SEX,L,Z,0;UNIT,L,Z,1;INDICATORS,C,Z,2;&zSelection=DS-1177601SEX,T;DS-1177601INDICATORS,OBS_FLAG;DS-1177601UNIT,NR;&rankName1=UNIT_1_2_-1_2&rankName2=INDICATORS_1_2_-1_2&rankName3=SEX_1_2_-1_2&rankName4=TIME_1_0_0_0&rankName5=GEO_1_2_0_1&sortC=ASC_-1_FIRST&rStp=&cStp=&rDCh=&cDCh=&rDM=true&cDM=true&footnes=false&empty=false&wai=false&time_mode=NONE&time_most_recent=false&lang=EN&cfo=%23%23%23%2C%23%23%23.%23%23%23

pacman::p_load(covdata, tidyverse, ggrepel, slider, lubridate, gghighlight)

## Convenince "Not in" operator
"%nin%" <- function(x, y) {
  return( !(x %in% y) )
}

Load mortality data

demo_r_mweek3_1_Data <- read_csv(
  "data/demo_r_mwk_ts_1_Data_2020-11-16.csv",
  col_types = cols(
    SEX = col_skip(),
    UNIT = col_skip(),
    Value = col_character(),
    `Flag and Footnotes` = col_skip()
  ),
  locale = locale(grouping_mark = " ", encoding = "WINDOWS-1252")
) %>% 
  mutate(GEO = case_when(
    GEO == "Germany (until 1990 former territory of the FRG)" ~ "Germany",
    TRUE ~ GEO
  ))

pop <- tibble::tribble(
  ~iso3, ~iso2,    ~GEO,      ~pop,
  "HRV", "HR",   "Croatia",  4076246L, 
  "CZE", "CZ",   "Czechia", 10649800L,
  "EST", "EE",   "Estonia",  1324820L,
  "FRA", "FR",    "France", 67012883L,
  "DEU", "DE",   "Germany", 83019213L,
  "LTU", "LT", "Lithuania",  2794184L,
  "ROU", "RO",   "Romania", 19414458L,
  "LVA", "LT",    "Latvia",  1919968L,
  "POL", "PL",    "Poland", 37972812L,
  "HUN", "HU",  "Hungary",  9772756L,
  "SRB", "RS",    "Serbia",  6963764L,
  "SVK", "SK",  "Slovakia",  5450421L,
  "ESP", "ES",     "Spain", 46758917L,
  "ITA", "IT",     "Italy", 60317116L
  ) 
df <- demo_r_mweek3_1_Data %>% 
 # janitor::clean_names() %>% 
 # separate(freq_sex_unit_geo_time_period,into = c("freq","sex","unit","iso2"), sep = ";") %>% 
 # pivot_longer(cols= starts_with("x"), names_to = "TIME", values_to = "VALUE" ) %>% 
  mutate(VALUE = gsub(x = Value, pattern = '[^0-9\\.]', '', perl = TRUE) %>% as.numeric() ) %>% 
  select(-Value) %>% 
  mutate(TIME_week = str_sub(TIME, 6,7) %>% as.numeric(),
         TIME_year = str_sub(TIME, 1,4) %>% as.numeric()) %>% 
  filter(!is.na(VALUE)) %>% 
  # group_by(iso2, TIME, TIME_year, TIME_week) %>% 
  # summarise(VALUE = sum(VALUE)) %>% 
  inner_join(pop) %>% 
  group_by(iso2) %>% 
  mutate(end_label = if_else(TIME == dplyr::last(TIME), iso2, " ")) %>% 
  ungroup() %>% 
  filter(TIME_week != 99) %>% 
  mutate(mort_per_mil = VALUE/pop * 1000000) 
## Joining, by = "GEO"
  df %>%
  ggplot(aes(
    x = TIME_week,
    y = mort_per_mil,
    color = if_else(TIME_year == 2020, "red", "gray"),
    group = TIME_year
  ))+
  geom_line() +

  facet_wrap(~ GEO) +
  theme_minimal() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(
    x = "Week",
    y = "Mortality per 1 million",
    title = "Weekly mortality per 1 million until week 41 (October 17th), 2020",
    subtitle = "Data imposed on previous years (gray) 2010-2019",
    caption = "Data: hhttps://ec.europa.eu/eurostat/databrowser/view/DEMO_R_MWK_TS__custom_96757/default/table?lang=en"
  ) +
    scale_x_discrete(limits=paste0("W",seq(1,53, by=1)),
                     breaks=paste0("W",seq(1,53, by=4)))+
  scale_color_manual(values = c("lightgray", "red"))

  df  %>%  filter(TIME_year==2020) %>% 
  mutate(flag = if_else(iso2=="PL", 1, 0)) %>% 
  ggplot(mapping = aes(x = TIME, y = mort_per_mil, 
         color = as.factor(flag), 
         group = GEO
         )) + 
  geom_line(size = 0.5) +  
  geom_text(aes(x=TIME, label = end_label), point.padding = NA, segment.colour = NA, size = 2.8) + 
  guides(color = FALSE, size = FALSE, alpha = FALSE) +
#  scale_y_log10() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x = "Week", 
       y = "Mortality per 1 million", 
       title = "Mortality per 1 million", 
       subtitle ="Eurostat weekly data until week 41, 2020 (October 17)", 
       caption = "Data: https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=demo_r_mweek3&lang=en") +
  scale_color_manual(values = c("lightgrey", "red"))
## Warning: Ignoring unknown parameters: point.padding, segment.colour

No comments:

Post a Comment

An example of a bat file that shows dialogues

@echo off setlocal :: Prompt user for input file names set /p jpgfile="Enter the name of the JPG file: " set /p archive="Ent...