library(tidyverse)
library(rio)
library(uwo4419)
# Data
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
CES_raw <- import("2021 Canadian Election Study v2.0.dta")
## A function to replace -99 by NA
remove99 <- function(x){
out <- ifelse(x==-99,NA,x)
return(out)
}
## Cleaning
CES <- factorize(CES_raw) |>
# Renaming variables
rename(UUID=cps21_ResponseId,
yob=cps21_yob,
gender=cps21_genderid,
prov=cps21_province,
vote=pes21_votechoice2021,
pplFeel_Americans=cps21_groups_therm_7,
pplFeel_Francophones=cps21_groups_therm_3,
pplFeel_LPC=cps21_party_rating_23,
pplFeel_CPC=cps21_party_rating_24,
pplFeel_NDP=cps21_party_rating_25,
pplFeel_BQ=cps21_party_rating_26,
pplFeel_GPC=cps21_party_rating_27,
pplFeel_PPC=cps21_party_rating_29) |>
# Making my selection
select(UUID,yob,gender,prov,vote,
pplFeel_Americans,pplFeel_Francophones,
pplFeel_LPC,pplFeel_CPC,pplFeel_NDP,
pplFeel_BQ,pplFeel_GPC,pplFeel_PPC) |>
# Removing territories
filter(!(prov %in% c("Northwest Territories","Nunavut","Yukon"))) |>
mutate(age=2021-as.numeric(as.character(yob)),
vt = case_when(
vote == "Liberal Party" ~ "LPC",
vote == "Conservative Party" ~ "CPC",
vote == "ndp" ~ "NDP",
vote == "Bloc Québécois" ~ "BQ",
vote == "Green Party" ~ "GPC",
vote == "People's Party" ~ "PPC",
vote == "Another party (specify)" ~ "Other",
vote %in% c("I spoiled my vote","Don't know / Prefer not to answer") ~ NA_character_
),
prov=factor(prov,levels=c("British Columbia","Alberta","Saskatchewan","Manitoba",
"Ontario","Quebec",
"New Brunswick","Prince Edward Island","Nova Scotia",
"Newfoundland and Labrador"))) |>
mutate_at(vars(starts_with("pplFeel")),remove99) |>
filter(gender %in% c("A man", "A woman")) |>
mutate(gender=droplevels(gender))
CES):
CES |>
select(-pplFeel_Americans,-pplFeel_Francophones) |>
group_by(prov) |>
summarise(across(starts_with("pplFeel_"),~mean(.x,na.rm=T))) |>
ungroup() |>
pivot_longer(-prov,names_pattern = "pplFeel_(.*)",names_to = "Party", values_to = "score") |>
mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC"))) |>
ggplot(aes(x=score,y=prov,color=Party))+
geom_point() +
scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
theme_minimal()+
coord_cartesian(xlim=c(0,100))+
labs(y="",x="\nFeeling Thermometer Average Score (0-100)")
DAMisc::sumStats(CES,"pplFeel_Francophones") |>
arrange(mean)
## # A tibble: 1 × 11
## variable mean sd iqr min q25 q50 q75 max n nNA
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 pplFeel_Francopho… 67.6 26.5 40 0 50 71 90 100 20797 2251
From the CES data:
CES |>
select(UUID,prov,starts_with("pplFeel_")) |>
select(-pplFeel_Americans,-pplFeel_Francophones) |>
pivot_longer(c(-UUID,-prov),names_pattern = "pplFeel_(.*)",names_to = "Party", values_to = "score") |>
group_by(prov,Party) |>
summarise(ci=mean_cl_normal(score)) |>
unnest_wider(ci) |>
mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC"))) |>
ggplot(aes(x=prov,y=y,ymin=ymin,ymax=ymax,color=Party))+
annotate('rect', xmin = seq(.5,8.5,2), xmax = seq(1.5,9.5,2),
ymin=-Inf, ymax=Inf, alpha=0.08, fill="black")+
geom_pointrange(size=0.1,position=position_dodge(0.5)) +
scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
theme_minimal()+
theme(legend.position = "top")+
coord_flip(ylim=c(0,100))+
labs(x="",y="\nFeeling Thermometer Average Score (0-100)")+
guides(color = guide_legend(nrow = 1))
tTest("gender","pplFeel_CPC",data=CES)
## Summary:
## mean n se
## A man 46.20117 9380 31.91189
## A woman 38.7022 11172 30.65365
## Difference 7.498971 20552 0.438948
##
##
## Welch Two Sample t-test
##
## data: pplFeel_CPC by gender
## t = 17.084, df = 19642, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group A man and group A woman is not equal to 0
## 95 percent confidence interval:
## 6.638596 8.359346
## sample estimates:
## mean in group A man mean in group A woman
## 46.20117 38.70220
t.test(CES$pplFeel_LPC,CES$pplFeel_CPC,paired = T,alternative = "greater")
##
## Paired t-test
##
## data: CES$pplFeel_LPC and CES$pplFeel_CPC
## t = 14.114, df = 20459, p-value < 2.2e-16
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
## 4.562733 Inf
## sample estimates:
## mean difference
## 5.164663
| Test | Data type | Why do we use it? | Interpretation |
|---|---|---|---|
| \(\chi^2\) | Nominal | Are the frequencies of the contingency table of the two variables randomly distributed? | \(H_0\): Frequencies are randomly distributed. |
| \(\phi\) | Nominal (2x2 table) | How strong is the relationship (-1 to 1)? | 0=no >0.1=moderate >0.3=strong |
| Cramer's V | Nominal (>2x2 table) | How strong is the relationship (-1 to 1)? | 0=no >0.1=moderate >0.3=strong |
| \(\lambda\) | Nominal | What is the exact improvement in error in predicting DV by knowing IV? | Percent improvement |
| \(\gamma\) | Ordinal | How strong is the relationship (-1 to 1)? | 0=no >0.1=moderate >0.3=strong |
| Somer's d | Ordinal | How strong is the relationship (-1 to 1)? (A \(\gamma\) that accounts for ties on IV) | 0=no >0.1=moderate >0.3=strong |
| Kendall's \(\tau_b\) | Ordinal | How strong is the relationship (-1 to 1)? (A \(\gamma\) that accounts for ties on IV and DV) | 0=no >0.1=moderate >0.3=strong |
| Spearman's \(\rho_s\) | Ordinal | How strong is the relationship (-1 to 1)? (Talk of a correlation. Testing for monotonic relationship) | 0=no >0.1=moderate >0.3=strong |
| Pearson r | Continuous | How strong is the relationship (-1 to 1)? (Talk of a correlation. Testing for linear relationship) | 0=no >0.1=moderate >0.3=strong |
vote and
pplFeel_Francophones?makeStats(CES$vote,CES$pplFeel_Francophones,lambda=T,cramersV=T,chisq=T,gamma=T,d=T,taub=T,rho=T)
## statistic p-value
## Chi-squared 2202.8482 0
## Cramers V 0.1513 0
## Lambda 0.0551 0
## Kruskal-Goodman Gamma 0.0602 0
## Somers D 0.0514 0
## Tau-b 0.0517 0
## Spearmans Rho 0.0707 0
pplFeel_Francophones and feelings towards the parties?DAMisc::pwCorrMat(pplFeel_Francophones~pplFeel_LPC+pplFeel_CPC+
pplFeel_NDP+pplFeel_BQ+pplFeel_GPC+pplFeel_PPC,data=CES)
## Pairwise Correlations
## pplFeel_Francophones pplFeel_LPC pplFeel_CPC pplFeel_NDP
## pplFeel_Francophones
## pplFeel_LPC 0.162*
## pplFeel_CPC -0.112* -0.356*
## pplFeel_NDP 0.108* 0.419* -0.302*
## pplFeel_BQ 0.398* 0.020* 0.049* 0.068*
## pplFeel_GPC 0.092* 0.278* -0.165* 0.528*
## pplFeel_PPC -0.140* -0.217* 0.302* -0.134*
## pplFeel_BQ pplFeel_GPC pplFeel_PPC
## pplFeel_Francophones
## pplFeel_LPC
## pplFeel_CPC
## pplFeel_NDP
## pplFeel_BQ
## pplFeel_GPC 0.134*
## pplFeel_PPC 0.044* 0.109*