TidyTuesday: Steam Games
Data from #tidytuesday week of 2019-07-30 (source) Load R packages library(tidyverse) library(RColorBrewer) library(scales) Download data steam_raw <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-30/video_games.csv") Parse data steam_games <- steam_raw %>% # VARIABLE FOR AGE OF GAME mutate(release_year = substring(release_date, 8, 12), # EXTRACT YEAR release_year = as.numeric(str_trim(release_year)), release_year = case_when(release_year == 5 ~ 2015, # INCORRECT DATA POINT TRUE ~ release_year), age = 2019 - release_year) %>% # VARIABLE FOR MIN/MAX NUMBER OF OWNERS mutate(max_owners = str_trim(word(owners, 2, sep = "\\..")), max_owners = as.numeric(str_replace_all(max_owners, ",", "")), min_owners = str_trim(word(owners, 1, sep = "\\..")), min_owners = as.numeric(str_replace_all(min_owners, ",", ""))) %>% # REMOVE VALUES WITH INCONSISTENT RELEASE_DATE FORMAT (n=37) filter(age < 15) %>% # FILTER OUT STUDIO SOFTWARE filter(price < 150) Visualize data Question: how many people still play games that are X years old (on Steam) ? ...