4. Exercise

Using the data we just cleaned (CES):

  1. Compute the average score given to each party per province.
  2. Present these averages in a graph where the Y axis are the provinces and the X axis the averages.
library(tidyverse)
## Setting up
library(rio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
CES_raw <- import("2021 Canadian Election Study v2.0.dta") 

## A function to replace -99 by NA
remove99 <- function(x){
  out <- ifelse(x==-99,NA,x)
  return(out)
}

## Cleaning
CES <- factorize(CES_raw) |> 
  rename(UUID=cps21_ResponseId,
         yob=cps21_yob,
         gender=cps21_genderid,
         prov=cps21_province,
         vote=pes21_votechoice2021,
         pplFeel_Americans=cps21_groups_therm_7,
         pplFeel_Francophones=cps21_groups_therm_3,
         pplFeel_LPC=cps21_party_rating_23,
         pplFeel_CPC=cps21_party_rating_24,
         pplFeel_NDP=cps21_party_rating_25,
         pplFeel_BQ=cps21_party_rating_26,
         pplFeel_GPC=cps21_party_rating_27,
         pplFeel_PPC=cps21_party_rating_29) |> 
  select(UUID,yob,gender,prov,vote,
         pplFeel_Americans,pplFeel_Francophones,
         pplFeel_LPC,pplFeel_CPC,pplFeel_NDP,
         pplFeel_BQ,pplFeel_GPC,pplFeel_PPC) |> 
  filter(!(prov %in% c("Northwest Territories","Nunavut","Yukon"))) |> 
  mutate(age=2021-as.numeric(as.character(yob)),
         vt = case_when(
              vote == "Liberal Party" ~ "LPC",
              vote == "Conservative Party" ~ "CPC",
              vote == "ndp" ~ "NDP",
              vote == "Bloc Québécois" ~ "BQ",
              vote == "Green Party" ~ "GPC",
              vote == "People's Party" ~ "PPC",
              vote == "Another party (specify)" ~ "Other",
              vote %in% c("I spoiled my vote","Don't know / Prefer not to answer") ~ NA_character_
            ),
         prov=factor(prov,levels=c("British Columbia","Alberta","Saskatchewan","Manitoba",
                                   "Ontario","Quebec",
                                   "New Brunswick","Prince Edward Island","Nova Scotia",
                                   "Newfoundland and Labrador"))) |> 
  mutate_at(vars(starts_with("pplFeel")),remove99)

# Option 1
CES |> 
  select(-pplFeel_Americans,-pplFeel_Francophones) |> 
  group_by(prov) |> 
  summarise(across(starts_with("pplFeel_"),~mean(.x,na.rm=T))) |> 
  ungroup() |> 
  pivot_longer(-prov,names_pattern = "pplFeel_(.*)",names_to = "Party", values_to = "score") |> 
  mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC"))) |> 
  ggplot(aes(x=score,y=prov,color=Party))+
  geom_point() +
  scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
  theme_minimal()+
  coord_cartesian(xlim=c(0,100))+
  labs(y="",x="\nFeeling Thermometer Average Score (0-100)")

# Option 2
CES |> 
  group_by(prov) |> 
  summarise(mean_LPC=mean(pplFeel_LPC,na.rm=T),
            mean_CPC=mean(pplFeel_CPC,na.rm=T),
            mean_NDP=mean(pplFeel_NDP,na.rm=T),
            mean_BQ =mean(pplFeel_BQ,na.rm=T),
            mean_GPC=mean(pplFeel_GPC,na.rm=T),
            mean_PPC=mean(pplFeel_PPC,na.rm=T)) |> 
  ungroup() |> 
  pivot_longer(-prov,names_pattern = "mean_(.*)",names_to = "Party", values_to = "score") |> 
  mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC"))) |> 
  ggplot(aes(x=score,y=prov,color=Party))+
  geom_point() +
  scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
  theme_minimal()+
  coord_cartesian(xlim=c(0,100))+
  labs(y="",x="\nFeeling Thermometer Average Score (0-100)")

# Option 3
LPC <- CES |>
  select(prov,pplFeel_LPC) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_LPC,na.rm=T)) |> 
  mutate(Party="LPC")

CPC <- CES |>
  select(prov,pplFeel_CPC) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_CPC,na.rm=T)) |> 
  mutate(Party="CPC")

NDP <- CES |>
  select(prov,pplFeel_NDP) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_NDP,na.rm=T)) |> 
  mutate(Party="NDP")

BQ <- CES |>
  select(prov,pplFeel_BQ) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_BQ,na.rm=T)) |> 
  mutate(Party="BQ")

GPC <- CES |>
  select(prov,pplFeel_GPC) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_GPC,na.rm=T)) |> 
  mutate(Party="GPC")
  
PPC <- CES |>
  select(prov,pplFeel_PPC) |> 
  group_by(prov) |> 
  summarise(score=mean(pplFeel_PPC,na.rm=T)) |> 
  mutate(Party="PPC")

GraphData <- rbind(LPC,CPC,NDP,BQ,GPC,PPC) |> 
  mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC")))

ggplot(GraphData,aes(x=score,y=prov,color=Party))+
  geom_point() +
  scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
  theme_minimal()+
  coord_cartesian(xlim=c(0,100))+
  labs(y="",x="\nFeeling Thermometer Average Score (0-100)")

# Option 4 - Pivot-first, summarise second
CES |> 
  select(UUID,prov,starts_with("pplFeel_")) |> 
  select(-pplFeel_Americans,-pplFeel_Francophones) |> 
  pivot_longer(c(-UUID,-prov),names_pattern = "pplFeel_(.*)",names_to = "Party", values_to = "score") |> 
  group_by(prov,Party) |> 
  summarise(score=mean(score,na.rm=T)) |> 
  mutate(Party=factor(Party,levels=c("LPC","CPC","NDP","BQ","GPC","PPC"))) |> 
  ggplot(aes(x=score,y=prov,color=Party))+
  geom_point() +
  scale_color_manual(values=c("red","blue","orange","cyan","green","purple"))+
  theme_minimal()+
  coord_cartesian(xlim=c(0,100))+
  labs(y="",x="\nFeeling Thermometer Average Score (0-100)")