# load the data on districts' german/non-german vernacular ratio (source in text),
# recode name of districts to postal codes
df_language <- readr::read_csv(here(
"posts",
"2020-03-02-primary-schools-in-vienna",
"umgangssprache_deutsch_bezirk_wien.csv"
)) %>%
clean_names() %>%
rename(
non_german_perc = x16_17,
district = bezirk,
pupils_total = volksschuler_innen_gesamt,
german_abs = deutsch,
non_german_abs = nicht_deutsch,
non_german_perc_06 = x06_jul,
change_16_06 = unterschied_in_percent
) %>%
mutate(district_code = case_when(
district == "Alsergrund" ~ 9,
district == "Brigittenau" ~ 20,
district == "Donaustadt" ~ 22,
district == "Döbling" ~ 19,
district == "Favoriten" ~ 10,
district == "Floridsdorf" ~ 21,
district == "Hernals" ~ 17,
district == "Hietzing" ~ 13,
district == "Innere Stadt" ~ 1,
district == "Josefstadt" ~ 8,
district == "Landstraße" ~ 3,
district == "Leopoldstadt" ~ 2,
district == "Liesing" ~ 23,
district == "Margareten" ~ 5,
district == "Mariahilf" ~ 6,
district == "Meidling" ~ 12,
district == "Neubau" ~ 7,
district == "Ottakring" ~ 16,
district == "Penzing" ~ 14,
district == "Rudolfsheim-Fünfhaus" ~ 15,
district == "Simmering" ~ 11,
district == "Wieden" ~ 4,
district == "Währing" ~ 18,
TRUE ~ NA_real_
))
# merge data
df_share <- df_share %>%
left_join(., df_language %>%
select(district_code, district, non_german_perc),
by = c("district_code")
)
# produce plot
plot_langauge <- df_share %>%
mutate(tooltip = paste("ratio: ", scales::percent(n_rel),
"\n",
paste(properties_bezeichnung, properties_str,
paste0(district_code, ". ", "district"),
sep = ", "
),
"\n",
properties_erhalter,
sep = ""
)) %>%
filter(school_type_secondary == "matura") %>%
filter(properties_erhalter == "öffentl.") %>%
ggplot() +
labs(
y = "AHS ratio",
title = "Relation between AHS ratio and primary school pupils' vernacular",
subtitle = str_wrap(
"Note that AHS ratio are school level data;
info on vernacular is district level data.
Only public schools. Hover over dots to get details.",
150
),
caption = my_caption,
x = "% of primary school pupils with non-German vernacular per district"
) +
geom_jitter_interactive(aes(
x = non_german_perc,
y = n_rel,
tooltip = tooltip
)) +
geom_smooth(aes(
x = non_german_perc,
y = n_rel
),
method = "lm", formula = y ~ x
) +
scale_y_continuous(
labels = scales::percent,
minor_breaks = NULL
) +
scale_x_continuous(
labels = scales::label_percent(scale = 1),
minor_breaks = NULL,
limits = c(0, 100)
) +
scale_color_paletteer_d("ggsci::default_jama") +
# facet_wrap(vars(properties_erhalter))+
hrbrthemes::theme_ipsum_rc() +
theme(
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.major.x = element_line(linetype = "dotted"),
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
plot.margin = margin(l = 0, t = 0.5, b = 0.5, unit = "cm"),
plot.background = element_rect(fill = plot_bg_color, color = "transparent"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 0, unit = "cm")),
plot.title.position = "plot",
plot.subtitle = element_text(size = 12, color = "grey30"),
plot.caption.position = "plot",
plot.caption = element_markdown(color = "grey30", hjust = c(0, 1))
)
# wrap plot as htmlwidget for interactivity
plot_langauge <- girafe(
ggobj = plot_langauge,
options = list(opts_tooltip(
css = "background-color:#323E4F;
color: white;
font-family:Roboto Condensed;",
delay_mouseout = 5000
)),
pointsize = 6,
width_svg = 10,
height_svg = 7
)
# correlation coefficient
cor(
df_share$n_rel[df_share$school_type_secondary == "matura"],
df_share$non_german_perc[df_share$school_type_secondary == "matura"]
)