Analyzing data for #tidytuesday week of 3/05/2019 (source)
Load libraries
library(tidyverse)
library(scales)
library(lubridate)
jobs_gender <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
Clean & plot data
jobs_gender %>%
filter(year == '2016') %>%
mutate(male_diff = ((((total_earnings_male/total_earnings)-1)*workers_male)/total_workers),
female_diff = (((total_earnings_female/total_earnings)-1)*workers_female)/total_workers) %>%
ggplot() +
geom_jitter(aes(total_earnings, female_diff),
color = 'salmon',
alpha = 0.5,
size = 2.5) +
geom_jitter(aes(total_earnings, male_diff),
color = 'steelblue',
alpha = 0.5,
size = 2.5) +
geom_hline(yintercept = 0, color = 'grey54', lty = 'dashed') +
facet_wrap(~major_category) +
scale_x_continuous(labels = dollar_format(),
limits = c(0,200000)) +
scale_y_continuous(labels = percent_format(round(1)),
limits = c(-0.3,0.3)) +
labs(x = "Average Median Earnings",
y = "Difference from Average",
caption = "Graphic: @eeysirhc\nSource: Bureau of Labor Statistics",
title = "2016 Earnings Differences (Weighted) by Job Sector",
subtitle = "Blue = Male; Red = Female") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.subtitle = element_text(size = 12),
legend.position = 'none')