From the carData::Salaries
data set of 2008-2009
nine-month academic salary for Assistant Professors, Associate
Professors and Professors in a college in the U.S., create a graph that
presents:
library(tidyverse)
range(carData::Salaries$yrs.since.phd)
## [1] 1 56
range(carData::Salaries$salary)
## [1] 57800 231545
carData::Salaries |>
mutate(discipline=factor(discipline,
levels=c("A","B"),
labels=c("Theoretical","Applied"))) |>
group_by(yrs.since.phd,sex,discipline) |>
summarise(meanSal=mean(salary)) |>
ggplot(aes(x=yrs.since.phd,y=meanSal,color=sex))+
geom_line()+
scale_x_continuous("\nYears since PhD.",breaks=seq(0,56,5))+
scale_y_continuous("Salaries in USD\n",breaks=seq(0,250000,25000))+
scale_color_brewer("",palette = "Set1")+
facet_wrap(~discipline)+
theme_minimal()+
theme(legend.position = "top")
## Setting up
library(rio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
CES_raw <- import("2021 Canadian Election Study v2.0.dta")
## A function to replace -99 by NA
remove99 <- function(x){
out <- ifelse(x==-99,NA,x)
return(out)
}
## Cleaning
CES <- factorize(CES_raw) |>
rename(UUID=cps21_ResponseId,
yob=cps21_yob,
gender=cps21_genderid,
prov=cps21_province,
vote=pes21_votechoice2021,
pplFeel_Americans=cps21_groups_therm_7,
pplFeel_Francophones=cps21_groups_therm_3,
pplFeel_LPC=cps21_party_rating_23,
pplFeel_CPC=cps21_party_rating_24,
pplFeel_NDP=cps21_party_rating_25,
pplFeel_BQ=cps21_party_rating_26,
pplFeel_GPC=cps21_party_rating_27,
pplFeel_PPC=cps21_party_rating_29) |>
select(UUID,yob,gender,prov,vote,
pplFeel_Americans,pplFeel_Francophones,
pplFeel_LPC,pplFeel_CPC,pplFeel_NDP,
pplFeel_BQ,pplFeel_GPC,pplFeel_PPC) |>
filter(!(prov %in% c("Northwest Territories","Nunavut","Yukon"))) |>
mutate(age=2021-as.numeric(as.character(yob)),
vt = case_when(
vote == "Liberal Party" ~ "LPC",
vote == "Conservative Party" ~ "CPC",
vote == "ndp" ~ "NDP",
vote == "Bloc Québécois" ~ "BQ",
vote == "Green Party" ~ "GPC",
vote == "People's Party" ~ "PPC",
vote == "Another party (specify)" ~ "Other",
vote %in% c("I spoiled my vote","Don't know / Prefer not to answer") ~ NA_character_
),
prov=factor(prov,levels=c("British Columbia","Alberta","Saskatchewan","Manitoba",
"Ontario","Quebec",
"New Brunswick","Prince Edward Island","Nova Scotia",
"Newfoundland and Labrador"))) |>
mutate_at(vars(starts_with("pplFeel")),remove99)
tidyr::pivot_longer()
tidyr::pivot_wider()
|
|
Using the CES
data, create a graph that superimposes the
histogram of feelings towards Francophones on top of the histogram of
feelings towards Americans.
Long <- CES |>
pivot_longer(c("pplFeel_Americans","pplFeel_Francophones"),
names_pattern = "pplFeel_(.*)",
names_to = "group",values_to = "score")
ggplot(Long,aes(x=score,fill=group))+
geom_histogram(position="identity",bins = 20,alpha=0.3)+
theme_minimal()+
theme(legend.position = "top")+
labs(x="\nFeeling Thermometer Score (0-100)",
y="Frequency\n",
fill="")
Let’s do the same exercise, but without the help of
pivot_longer
.
Data_A <- CES |>
select(-pplFeel_Francophones) |>
rename(score=pplFeel_Americans) |>
mutate(group="Americans")
Data_F <- CES |>
select(-pplFeel_Americans) |>
rename(score=pplFeel_Francophones) |>
mutate(group="Francophones")
Long2 <- rbind(Data_A,Data_F)
ggplot(Long2,aes(x=score,fill=group))+
geom_histogram(position="identity",bins = 20,alpha=0.3)+
theme_minimal()+
theme(legend.position = "top")+
labs(x="\nFeeling Thermometer Score (0-100)",
y="Frequency\n",
fill="")
From Long
, go back to the original data.
Wide <- Long |>
pivot_wider(names_from = "group", names_prefix = "pplFeel_",values_from = score)
From Long
to the original data without the use of
pivot_wider
.
Wide2 <- Long2 |>
filter(group=="Americans") |>
rename(pplFeel_Americans=score) |>
mutate(pplFeel_Francophones=Long$score[Long$group=="Francophones"]) |>
select(-group)
Using the data we just cleaned (CES
):