Commit 22258776 authored by gossa's avatar gossa

Ajout de Barro-Lee

parent 40c1abfc
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
library(tidyverse)
library(ggplot2)
library(ggbeeswarm)
library(ggthemes)
bl <- read.csv("BL2013_MF2599_v2.2.csv") %>%
mutate(
year = as.factor(year),
lh.pop = lh * pop)
bl.sel <- filter(bl, country %in%
c("Germany","France",
"USA","United Kingdom", "Russian Federation",
"Republic of Korea","China","Japan"))
bl.sel <- filter(bl, country %in%
c(
"USA","United Kingdom", "Russian Federation",
"China"))
ggplot(bl, aes(x=year,y=lh.pop)) + geom_boxplot() +
geom_line(data=bl.sel, aes(colour=country, group=country))
bl %>%
filter(year==2010) %>%
mutate(
rank.lh = rank(-lh),
rank.lhc = rank(-lhc)) %>%
arrange(rank.lhc) %>%
select(country,rank.lh,lh,rank.lhc,lhc)
bl <- bind_rows(
read.csv("BL2013_M2599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_F2599_v2.2.csv") %>% mutate(sex = "F"),
read.csv("BL2013_MF2599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_M1599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_F1599_v2.2.csv") %>% mutate(sex = "F"),
read.csv("BL2013_MF1599_v2.2.csv") %>% mutate(sex = as.character(sex))) %>%
mutate(
year = as.factor(year),
sex = as.factor(sex))
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
group_by(year,sex) %>%
summarise(
pop.lhc = sum(lhc * pop),
pop = sum(pop),
lhc = pop.lhc / pop) %>%
ggplot(aes(x=year,y=lhc, fill=sex)) +
# geom_col(, position="dodge") +
geom_line(aes(colour=sex, group=sex), size=2) +
theme_hc()
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
ggplot(aes(x=year,y=lhc, fill=sex)) +
geom_smooth(aes(colour=sex, group=sex)) +
theme_hc()
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
group_by(year,sex,region_code) %>%
summarise(lhc = sum(lhc)) %>%
ggplot(aes(x=sex,y=lhc, fill=region_code)) + geom_col() +
facet_wrap( ~ year) +
theme_hc()
bl %>%
filter(agefrom == 25, sex %in% c("M","F")) %>%
ggplot(aes(x=year, y=lhc, colour=sex)) + geom_boxplot() +
geom_line(data=filter(bl, country %in% c("Germany")),
aes(linetype=country, colour=sex, group=interaction(country,sex))) +
theme_hc()
bl.diff <- bl %>%
filter(agefrom == 25, sex %in% c("M","F")) %>%
select(BLcode,region_code,country,year,sex,lhc) %>%
pivot_wider(names_from = sex, values_from = lhc) %>%
mutate(diff = F-M)
ggplot(bl.diff,aes(x=year, y=diff)) + geom_boxplot() + theme_hc()
bl.diff %>%
filter(year == max(levels(year))) %>%
mutate(rank = rank(-diff)) %>%
arrange(desc(rank))
map_countries <- unique(as.character(map_data("world")$region))
bl_countries <- unique(as.character(bl.diff$country))
setdiff(bl_countries,map_countries)
# Anime
p.global <- bl %>%
filter(agefrom==15, sex=="MF") %>%
select(country,year,lhc) %>%
#mutate(year=as.numeric(as.character(year))) %>%
left_join(bl_map_data) %>%
ggplot(aes(x = long, y = lat, group = group, fill = lhc)) +
geom_polygon(colour = "black", size=0.1) +
scale_fill_distiller(palette="Purples", na.value = "white", direction=1, labels = scales::percent) +
theme_void() +
#labs(title = "Année = {round(frame_time,0)}") +
#transition_time(year)
#transition_manual(year)
labs(title = "Année = {closest_state}")
animate(
plot = p.global +
transition_states(year, transition_length = 3, state_length = 1) +
enter_fade() +
exit_fade(),
width = 16,
height = 8,
units = "cm",
res = 120,
renderer = gifski_renderer(file = "lhc.gif")
)
p.global <- bl %>%
filter(agefrom==15, sex=="MF") %>%
mutate(lhc.pop = lhc*pop) %>%
select(country,year,lhc.pop) %>%
left_join(bl_map_data) %>%
ggplot(aes(x = long, y = lat, group = group, fill = lhc.pop)) +
geom_polygon(colour = "black", size=0.1) +
scale_fill_distiller(palette="Purples", na.value = "white", direction=1, labels = scales::percent) +
theme_void() +
#labs(title = "Année = {round(frame_time,0)}") +
#transition_time(year)
#transition_manual(year)
labs(title = "Année = {closest_state}")
animate(
plot = p.global +
transition_states(year, transition_length = 3, state_length = 1) +
enter_fade() +
exit_fade(),
width = 16,
height = 8,
units = "cm",
res = 120,
renderer = gifski_renderer(file = "lhc-pop.gif")
)
levelsPaletteP <- RColorBrewer::brewer.pal(8,"Paired")[c(2,1,4,3,8,7,5)]
levelsPalette <- rev(RColorBrewer::brewer.pal(8,"Purples"))
bl %>%
filter(agefrom==25, sex=="MF", country %in% c("USA","China","United Kingdom","Germany","France")) %>%
mutate(
lp = lp-lpc,
ls = ls-lsc,
lh = lh-lhc
) %>%
mutate(country = fct_rev(country)) %>%
select(country, year, pop, lu:lhc) %>%
pivot_longer(cols = lu:lhc, names_to="level", values_to="value") %>%
mutate(level = factor(level, levels=rev(c("lu","lp","lpc","ls","lsc","lh","lhc")))) %>%
mutate(value.pop = value*pop / 1000) %>%
ggplot(aes(x=year,y=value.pop,colour=level,fill=level,group=level)) + geom_area() +
facet_grid(country ~ .) +
scale_fill_manual(values=levelsPalette) +
scale_colour_manual(values=levelsPalette) +
#scale_y_continuous(labels = scales::percent) +
theme_excel_new()
bl %>%
filter(agefrom==25, sex=="MF") %>%
select(country,year,lhc) %>%
filter(as.numeric(year) > 5) %>%
pivot_wider(
names_prefix = 'y',
names_from = year,
values_from = lhc
) %>%
mutate(
diff = y2010 - y2005
) %>%
arrange(diff) -> tes
filter(select(bl,country,year), year == 1950)
bl %>%
group_by(year, country) %>%
filter(country %in% c("France","Germany")) %>%
summarise(population = sum(pop)) %>%
ggplot(aes(x=year,y=population,group = country, colour = country)) + geom_line() +
scale_color_brewer(palette="Paired")
---
title: "BarroLee"
author: "Julien Gossa"
date: "26/08/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(include = TRUE)
library(tidyverse)
library(ggplot2)
library(ggbeeswarm)
library(ggthemes)
library(maps)
library(gganimate)
```
Barro, Robert and Jong-Wha Lee, 2013, “A New Data Set of Educational Attainment in the World, 1950-2010.” Journal of Development Economics, vol 104, pp.184-198.
http://www.barrolee.com/
```{r load}
bl <- bind_rows(
read.csv("BL2013_M2599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_F2599_v2.2.csv") %>% mutate(sex = "F"),
read.csv("BL2013_MF2599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_M1599_v2.2.csv") %>% mutate(sex = as.character(sex)),
read.csv("BL2013_F1599_v2.2.csv") %>% mutate(sex = "F"),
read.csv("BL2013_MF1599_v2.2.csv") %>% mutate(sex = as.character(sex))) %>%
mutate(
year = as.factor(year),
sex = as.factor(sex)) %>%
mutate_at(vars(lu:lhc), function(x) x/100)
bl2map <- read.csv("bl2map.csv")
bl2mapv <- as.character(bl2map$bl.name)
names(bl2mapv) <- bl2map$map.name
bl_map_data <- map_data("world") %>%
mutate(
region = case_when(
subregion == "Macao" ~ "Macao",
subregion == "Hong Kong" ~ "Hong Kong",
TRUE ~ region
)) %>%
mutate(
country = recode(region, !!!bl2mapv)
)
```
## Massification et stagnation éducative
```{r global.data}
bl.global <- bl %>%
filter(agefrom == 15, sex %in% c("MF")) %>%
group_by(year) %>%
summarise(
pop.lhc = sum(lhc * pop),
pop = sum(pop),
lhc = pop.lhc / pop)
```
```{r global.taux}
ggplot(bl.global, aes(x=year,y=lhc)) +
# geom_col(, position="dodge") +
geom_line(aes(group=1), size=2) +
scale_y_continuous(labels = scales::percent, limits = c(0,0.08)) +
theme_hc()
```
```{r global.taux}
bl %>%
filter(agefrom == 15, sex %in% c("MF")) %>%
ggplot(aes(x=year,y=lhc)) +
# geom_col(, position="dodge") +
geom_boxplot(fill="purple") +
scale_y_continuous(labels = scales::percent) +
theme_hc()
```
```{r global.map}
bl %>%
filter(year==2010) %>%
right_join(bl_map_data) %>%
ggplot(aes(x = long, y = lat, group = group, fill = lhc)) +
geom_polygon(colour = "black", size=0.1) +
scale_fill_distiller(palette="Purples", na.value = "white", direction=1, labels = scales::percent) +
theme_void()
```
```{r global.map}
bl %>%
right_join(bl_map_data) %>%
ggplot(aes(x = long, y = lat, group = group, fill = lhc)) +
geom_polygon(colour = "black", size=0.1) +
scale_fill_distiller(palette="Purples", na.value = "white", direction=1, labels = scales::percent) +
theme_void() +
transition_states(year)
```
```{r global.abs}
ggplot(bl.global, aes(x=year,y=pop.lhc)) +
# geom_col(, position="dodge") +
geom_line(aes(group=1), size=2) +
scale_y_continuous(labels = function(x) paste0(x/1000," M")) +
theme_hc()
```
## Taux d'éducation supérieure complète des femmes et des hommes, dans la population agée de plus de 15 ans, tous pays confondus.
```{r FM.global}
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
group_by(year,sex) %>%
summarise(
pop.lhc = sum(lhc * pop),
pop = sum(pop),
lhc = pop.lhc / pop) %>%
ggplot(aes(x=year,y=lhc, fill=sex)) +
# geom_col(, position="dodge") +
geom_line(aes(colour=sex, group=sex), size=2) +
theme_hc()
```
## Distribution des taux d'éducation supérieure complète des femmes et des hommes dans la population agée de plus de 15 ans, par pays.
```{r FM.distribution}
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
ggplot(aes(x=year, y=lhc, fill=sex)) + geom_boxplot() +
theme_hc()
```
## Distribution des différence des taux d'éducation supérieure complète des femmes et des hommes dans la population agée de plus de 15 ans, par pays.
```{r diff.distribution}
bl %>%
filter(agefrom == 15, sex %in% c("M","F")) %>%
select(country, year, sex, lhc) %>%
pivot_wider(values_from = lhc, names_from = sex) %>%
mutate(diff = F-M) %>%
ggplot(aes(x=year, y=diff)) + geom_boxplot() +
theme_hc()
```
## Distribution des différence des taux d'éducation supérieure complète des femmes et des hommes dans la population agée de plus de 15 ans, par pays.
```{r diff.region}
bl.diff <- bl %>%
filter(agefrom == 15, sex %in% c("M","F"), year==max(levels(year))) %>%
select(country, region_code, year, sex, lhc) %>%
pivot_wider(values_from = lhc, names_from = sex) %>%
mutate(
diff.abs = F-M,
diff.rel = (F-M)/M,
diff.dis = cuts())
bl.diff %>%
ggplot(aes(x=reorder(region_code, diff.rel), y=diff.rel, fill=region_code)) + geom_boxplot() +
coord_flip() +
theme_hc()
```
```{r diff.map}
bl_map_data %>%
left_join(bl.diff) %>%
ggplot(aes(x = long, y = lat, group = group, fill = diff.abs)) +
geom_polygon(colour = "gray90") +
scale_fill_distiller(palette="RdYlBu", limits=c(-12,12), na.value = "white") +
theme_void()
```
```{r diff.map}
bl_map_data %>%
left_join(bl.diff) %>%
ggplot(aes(x = long, y = lat, group = group, fill = pmin(diff.rel,1))) +
geom_polygon(colour = "gray90") +
scale_fill_distiller(palette="RdYlBu", limits=c(-1,1), na.value = "white") +
theme_void()
```
\ No newline at end of file
bl.name,map.name
"Congo","Republic of Congo"
"Cote dIvoire","Ivory Coast"
"United Republic of Tanzania","Tanzania"
"Dominican Rep.","Dominican Republic"
"Trinidad and Tobago","Trinidad"
"Iran (Islamic Republic of)","Iran"
"Republic of Korea","South Korea"
"Syrian Arab Republic","Syria"
"United Kingdom","UK"
"Libyan Arab Jamahiriya","Libya"
"Brunei Darussalam","Brunei"
"Russian Federation","Russia"
"Lao People's Democratic Republic","Laos"
"Viet Nam","Vietnam"
"Republic of Moldova","Moldova"
"China, Hong Kong Special Administrative Region","Hong Kong"
"China, Macao Special Administrative Region","Macao"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment