Survey Data Analysis (Quantitative)

Joe Ripberger

Introduction

Overview

This walkthrough covers the most common forms of survey data analysis:

Descriptive Analysis - Summarizing key characteristics of variables
Cross-Tabulation - Examining relationships between two variables
Statistical Tests - T-tests for group comparisons
Regression Analysis - Linear and logistic regression models

Special focus on proper handling of survey weights

Getting Started

Install and Load Packages

Key packages for survey data analysis:

tidyverse - Data wrangling and visualization
survey - Design-based survey analysis
srvyr - Tidy interface for survey package

# Install packages (only need to do this once)
# install.packages("tidyverse")
# install.packages("survey")
# install.packages("srvyr")

library(tidyverse)  # Data wrangling and visualization
library(survey)     # Design-based survey analysis
library(srvyr)      # Tidy interface for 'survey' package

Import Survey Data

# Import the survey data
survey_raw <- read_csv("WX24_data_wtd.csv")

# Explore the data structure
glimpse(survey_raw)
head(survey_raw)

Import Survey Data

Rows: 1,354
Columns: 255
$ p_id                   <chr> "R_1myLufBiSmMh7hR", "R_3HH0M2zNhBG5Yid", "R_3m…
$ start_date             <dttm> 2024-10-20 21:41:01, 2024-10-10 19:21:09, 2024…
$ end_date               <dttm> 2024-10-20 21:52:14, 2024-10-10 20:15:26, 2024…
$ weight                 <dbl> 0.6575200, 2.7232804, 1.2694891, 0.9056132, 0.8…
$ age                    <dbl> 37, 19, 34, 58, 51, 69, 67, 33, 44, 30, 61, 31,…
$ gend                   <dbl> 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0,…
$ hisp                   <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
$ race                   <dbl> 6, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 2, 4, 1, 1,…
$ state_num              <dbl> 33, 44, 42, 41, 14, 18, 3, 3, 5, 1, 1, 47, 31, …
$ zip                    <chr> "11779", "75204", "57052", "29414", "60137", "4…
$ long_years             <dbl> 10, 2, 34, 21, 3, 40, 1, 6, 4, 9, 6, 30, 11, 4,…
$ last_state             <dbl> NA, 44, NA, NA, 52, NA, 3, NA, 5, NA, NA, NA, N…
$ last_state_othr_spec   <chr> NA, NA, NA, NA, "Ukraine", NA, NA, NA, NA, NA, …
$ last_zip               <chr> NA, "75204", NA, NA, "39600", NA, "85085", NA, …
$ now                    <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
$ rural                  <dbl> 3, 1, 3, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 2, 2, 2,…
$ home                   <dbl> 1, 3, 1, 1, 3, 1, 1, 1, 4, 3, 1, 2, 3, 1, 1, 1,…
$ home_spec              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ rent                   <dbl> 2, 1, 3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 1, 3, 3,…
$ adults                 <dbl> 4, 19, 2, 2, 4, 2, 2, 2, 1, 3, 2, 2, 10, 4, 2, …
$ children               <dbl> 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 1, 3, 1, 0, 0,…
$ follow                 <dbl> 5, 3, 4, 4, 5, 4, 4, 2, 2, 5, 5, 4, 5, 4, 5, 4,…
$ plan_around            <dbl> 5, 3, 2, 2, 4, 3, 3, 3, 3, 5, 5, 4, 5, 4, 3, 2,…
$ und_weather            <dbl> 5, 3, 3, 2, 1, 2, 2, 1, 3, 4, 5, 5, 5, 3, 2, 4,…
$ wthr_info_paper        <dbl> 2, 1, 2, 1, 1, 2, 1, 1, 2, 3, 2, 3, 2, 1, 2, 1,…
$ wthr_info_web          <dbl> 2, 4, 2, 1, 1, 5, 2, 5, 5, 4, 6, 3, 5, 4, 3, 1,…
$ wthr_info_govweb       <dbl> 2, 4, 2, 4, 1, 2, 4, 1, 6, 3, 6, 3, 4, 4, 2, 1,…
$ wthr_info_loctv        <dbl> 2, 5, 2, 2, 1, 4, 2, 1, 4, 6, 6, 4, 4, 3, 6, 5,…
$ wthr_info_cabtv        <dbl> 2, 5, 1, 2, 1, 2, 2, 1, 4, 4, 6, 3, 4, 2, 4, 3,…
$ wthr_info_radio        <dbl> 2, 3, 1, 1, 1, 3, 2, 1, 3, 6, 2, 1, 4, 1, 3, 1,…
$ wthr_info_fam          <dbl> 2, 2, 5, 1, 5, 3, 3, 1, 3, 6, 6, 3, 4, 3, 3, 2,…
$ wthr_info_soc          <dbl> 3, 3, 3, 1, 5, 2, 1, 1, 4, 4, 6, 5, 5, 1, 2, 1,…
$ wthr_info_phone        <dbl> 2, 3, 4, 5, 6, 5, 1, 5, 2, 4, 6, 3, 4, 2, 3, 5,…
$ risk_hail              <dbl> 4, 3, 3, 2, 3, 3, 2, 2, NA, 2, 4, 3, 4, 3, 4, 3…
$ risk_wind              <dbl> 4, 2, 3, 4, 3, 3, 2, 3, 3, 2, 5, 4, 4, 3, 4, 3,…
$ risk_lignt             <dbl> 4, 3, 3, 5, 3, 3, 2, 3, 5, 3, 5, 4, 4, 3, 4, 3,…
$ risk_heat              <dbl> 4, 2, 3, 5, 2, 3, 3, 5, 4, 5, 5, 3, 5, 3, 4, 3,…
$ risk_rain              <dbl> 4, 2, 3, 3, 2, 2, 1, 2, NA, 4, 5, 5, 4, 3, 2, 2…
$ risk_drought           <dbl> 4, 3, 3, 3, 2, 3, 3, 5, NA, 4, 1, 4, 4, 3, 4, 2…
$ risk_cold              <dbl> 4, 2, 2, 1, 3, 2, 1, 1, NA, 5, 4, 4, 5, 3, 3, 4…
$ risk_snow              <dbl> 4, 1, 3, 2, 4, 2, 1, 1, NA, 2, 2, 4, 5, 1, 4, 4…
$ risk_ice               <dbl> 4, 3, 3, 2, 3, 3, 1, 1, NA, 3, 2, 2, 5, 3, 4, 4…
$ risk_tor               <dbl> 4, 4, 3, 4, 2, 4, 2, 2, NA, 3, 5, 3, 4, 2, 3, 2…
$ risk_flood             <dbl> 4, 4, 3, 3, 2, 2, 2, 4, 3, 1, 5, 3, 4, 2, 2, 1,…
$ risk_hur               <dbl> 4, 3, 1, 4, 2, 2, 1, 1, 2, 2, 5, 3, 4, 1, 2, 1,…
$ risk_fire              <dbl> 4, 3, 2, 2, 1, 3, 2, 5, NA, 2, 1, 4, 4, 3, 4, 1…
$ risk_surge             <dbl> 4, 3, 1, 3, 3, 1, 1, 2, 1, 3, 3, 2, 5, 3, 2, 3,…
$ risk_tie               <dbl> 1, 8, 11, 4, 2, 10, 4, 6, 6, 6, 10, 11, 7, 2, 8…
$ alert_und              <dbl> 5, 3, 4, 5, 5, 4, 4, 2, 3, 5, 5, 4, 5, 3, 5, 4,…
$ torwatch               <dbl> NA, 2, NA, 2, 2, NA, NA, 1, 2, NA, NA, NA, NA, …
$ torwarn                <dbl> NA, NA, 2, NA, NA, 2, 2, NA, NA, 1, 2, 1, 2, NA…
$ warn_time              <dbl> 2, 2, 3, 1, 1, 1, 1, 3, 3, 1, 1, 2, 4, 3, 1, 1,…
$ warn_size              <dbl> 2, 2, 3, 2, 2, 3, NA, 2, 3, 5, 2, 2, 2, 2, 2, 2…
$ warn_time_minutes      <dbl> NA, NA, NA, 4, 12, 55, 60, NA, NA, 3, 2, NA, NA…
$ warn_time_hours        <dbl> 7, 8, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6, NA…
$ watch_time             <dbl> 2, 3, 2, 1, 1, 2, 2, 4, 4, 1, 1, 2, 4, 4, 2, 2,…
$ watch_size             <dbl> 2, 2, 3, 3, 2, 4, NA, 3, 3, 5, 2, 2, 4, 2, 3, 3…
$ watch_time_minutes     <dbl> NA, NA, NA, 61, 13, NA, NA, NA, NA, 5, 11, NA, …
$ watch_time_hours       <dbl> 5, NA, 6, NA, NA, 12, 8, NA, NA, NA, NA, 24, NA…
$ tor_watchwarn_und      <dbl> 5, 1, 3, 4, 5, 3, 2, 1, 5, 4, 5, 3, 5, 2, 2, 3,…
$ tor_map_und            <dbl> 5, 3, 2, 4, 4, 3, 2, 1, 5, 3, 5, 3, 4, 3, 2, 4,…
$ tor_radar_und          <dbl> 5, 2, 2, 4, 3, 3, 2, 1, 5, 5, 5, 3, 5, 2, 2, 4,…
$ svr_hail               <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1,…
$ svr_wind               <dbl> 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,…
$ svr_lightning          <dbl> 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,…
$ svr_flood              <dbl> 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,…
$ svr_rain               <dbl> 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,…
$ svr_watchwarn_und      <dbl> 5, 3, 3, 3, 5, 2, 2, 2, 3, 4, 5, 3, 5, 2, 2, 3,…
$ ffd_und                <dbl> 5, 3, 3, 4, 4, 2, 2, 4, 5, 5, 5, 3, 5, 2, 2, 2,…
$ ffd_watchwarn_und      <dbl> 5, 4, 3, 5, 5, 4, 4, 5, 5, 5, 5, 4, 5, 3, 4, 2,…
$ rec_all                <dbl> 5, 2, 3, 5, 5, 3, 3, 4, 4, 5, 5, 4, 5, 3, 4, 3,…
$ rec_most               <dbl> 5, 2, 3, 5, 5, 4, 3, 4, 2, 5, 5, 5, 5, 4, 4, 3,…
$ rec_soon               <dbl> 5, 3, 3, 5, 5, 4, 3, 4, 2, 5, 5, 4, 5, 2, 3, 3,…
$ rec_miss               <dbl> 5, 2, 3, 2, 1, 3, 4, 2, 2, 1, 1, 5, 5, 3, 3, 4,…
$ rec_area               <dbl> 5, 3, 3, 2, 1, 3, 2, 2, 5, 2, 4, 4, 5, 3, 3, 4,…
$ rec_time               <dbl> 5, 3, 3, 4, 5, 3, 4, 4, 5, 2, 1, 4, 5, 4, 4, 4,…
$ rec_sleep              <dbl> 5, 3, 3, 5, 5, 3, 3, 4, 5, 2, 5, 4, 5, 3, 2, 1,…
$ rec_driving            <dbl> 5, 4, 3, 1, 5, 3, 4, 4, 5, 5, 5, 4, 5, 3, 2, 1,…
$ rec_work               <dbl> 5, 3, 3, 4, 5, 4, 4, 4, 5, 5, 5, 5, 5, 3, 2, 5,…
$ rec_store              <dbl> 5, 3, 4, 2, 5, 4, 3, 4, 5, 4, 5, 4, 5, 3, 2, 3,…
$ rec_small_group        <dbl> 5, 3, 4, 2, 5, 3, 3, 4, 5, 5, 5, 5, 5, 3, 2, 3,…
$ rec_large_group        <dbl> 5, 3, 3, 2, 5, 3, 3, 4, 5, 5, 5, 5, 5, 3, 2, 4,…
$ rec_stream             <dbl> 5, 3, 3, 1, 5, 3, 3, 4, 5, 2, 5, 4, 5, 3, 2, 1,…
$ rec_phone              <dbl> 5, 3, 3, 2, 1, 2, 3, 1, 5, 4, 1, 4, 5, 3, 1, 1,…
$ rec_diff_sit           <chr> "No", NA, NA, NA, "Only when my phone is off", …
$ warn_hist              <dbl> 1, 0, 0, 1, 1, 1, 0, 1, NA, 1, 1, 0, 0, 0, 0, 1…
$ warn_when              <dbl> 2, NA, NA, 1, 2, 3, NA, 4, NA, 2, 1, NA, NA, NA…
$ warn_how_br_rad        <dbl> 1, NA, NA, NA, 0, 0, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_wx_rad        <dbl> 0, NA, NA, NA, 0, 1, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_tv            <dbl> 0, NA, NA, NA, 0, 1, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_siren         <dbl> 0, NA, NA, NA, 1, 0, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_int           <dbl> 0, NA, NA, NA, 0, 0, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_soc           <dbl> 0, NA, NA, NA, 0, 0, NA, 0, NA, 0, 0, NA, NA, N…
$ warn_how_word          <dbl> 0, NA, NA, NA, 0, 1, NA, 0, NA, 1, 0, NA, NA, N…
$ warn_how_phone         <dbl> 0, NA, NA, NA, 1, 1, NA, 1, NA, 1, 1, NA, NA, N…
$ warn_how_oth           <dbl> 0, NA, NA, NA, 0, 0, NA, 0, NA, 0, 0, NA, NA, N…
$ warn_how_dk            <dbl> 0, NA, NA, NA, 0, 0, NA, 0, NA, 0, 0, NA, NA, N…
$ warn_how_spec          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ warn_timercv           <dbl> 2, NA, NA, 3, 3, 2, NA, 4, NA, 3, 4, NA, NA, NA…
$ warn_where             <dbl> 1, NA, NA, 1, 1, 6, NA, 1, NA, 1, 1, NA, NA, NA…
$ warn_where_spec        <chr> NA, NA, NA, NA, NA, "Vacation", NA, NA, NA, NA,…
$ warn_iss               <dbl> 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, 1, NA, NA, NA…
$ warn_sure              <dbl> 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, 1, NA, NA, NA…
$ warn_tor               <dbl> 1, NA, NA, 0, 1, 1, NA, 0, NA, 1, 1, NA, NA, NA…
$ last_act               <dbl> 0, NA, NA, 2, 1, NA, NA, 0, NA, 5, 2, NA, NA, N…
$ last_act_spec          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ last_act_satis         <dbl> 5, NA, NA, 4, 4, NA, NA, 1, NA, 4, 5, NA, NA, N…
$ last_act_again         <dbl> 5, NA, NA, 2, 5, NA, NA, 1, NA, 5, 5, NA, NA, N…
$ next_act_day           <dbl> 1, 4, 3, 2, 1, 2, 2, 1, 4, 1, 2, 4, 4, 2, 2, 1,…
$ next_act_day_spec      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ next_act_night         <dbl> 0, 3, 3, 2, 1, 3, 2, 2, 4, 5, 2, 2, 1, 2, 2, 1,…
$ next_act_night_spec    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ resp_ignore            <dbl> 5, 3, 3, 1, 2, 4, 2, 3, 2, 1, 1, 4, 5, 2, 2, 2,…
$ resp_prot              <dbl> 5, 3, 3, 4, 3, 3, 4, 2, 3, 5, 5, 4, 5, 3, 4, 3,…
$ resp_busy              <dbl> 5, 3, 3, 1, 4, 3, 2, 3, 4, 1, 1, 5, 5, 3, 2, 4,…
$ resp_unsure            <dbl> 5, 3, 3, 1, 4, 2, 2, 4, 2, 2, 1, 4, 5, 3, 2, 1,…
$ resp_sleep             <dbl> 5, 3, 3, 4, 3, 2, 3, 3, 2, 5, 5, 5, 5, 3, 4, 2,…
$ resp_driving           <dbl> 5, 3, 3, 3, 2, 2, 4, 4, 1, 4, 5, 5, NA, 3, 3, 2…
$ resp_work              <dbl> 5, 3, 3, 4, 3, 4, 4, 4, 3, 5, 5, 5, 5, 2, 4, 5,…
$ resp_store             <dbl> 5, 3, 3, 4, 3, 4, 4, 4, 4, 4, 5, 4, 5, 2, 3, 5,…
$ resp_small_group       <dbl> 5, 4, 3, 3, 3, 3, 4, 4, 4, 5, 5, 4, 5, 3, 3, 3,…
$ resp_large_group       <dbl> 5, 4, 3, 3, 3, 3, 4, 4, 3, 5, 5, 5, 5, 3, 4, 2,…
$ resp_stream            <dbl> 5, 4, 3, 4, 3, 2, 4, 4, 3, 2, 5, 4, 5, 3, 4, 2,…
$ resp_phone             <dbl> 5, 2, 3, 2, 1, 2, 3, 1, 2, 2, 5, 4, 5, 2, 3, 3,…
$ resp_dif_sit           <chr> "No", NA, NA, NA, NA, NA, NA, NA, "No", "Phone …
$ rec_morn               <dbl> 5, NA, 4, 5, 4, 3, 3, 4, 3, 5, 5, 3, 5, 3, 3, 4…
$ und_morn               <dbl> 5, 3, 4, 5, 4, 3, 3, 4, 2, 4, 5, 3, 5, 3, 4, 4,…
$ resp_morn              <dbl> 5, 4, 4, 5, 3, 3, 4, 3, 2, 5, 5, 4, 5, 3, 4, 3,…
$ rec_aft                <dbl> 5, 5, 4, 5, 4, 3, 3, 4, 5, 5, 5, 3, 5, 3, 3, 5,…
$ und_aft                <dbl> 5, 3, 4, 5, 4, 3, 3, 4, 5, 4, 5, 3, 5, 3, 3, 3,…
$ resp_aft               <dbl> 5, 3, 4, 5, 3, 3, 4, 4, 5, 4, 5, 4, 5, 3, 4, 3,…
$ rec_eve                <dbl> 5, 3, 4, 5, 4, 3, 3, 4, 5, 4, 5, 4, 5, 3, 3, 1,…
$ und_eve                <dbl> 5, 3, 4, 5, 4, 3, 3, 4, 3, 4, 5, 5, 5, 3, 3, 3,…
$ resp_eve               <dbl> 5, 3, 4, 5, 3, 3, 4, 4, 2, 5, 5, 4, 5, 3, 4, 2,…
$ tor_eff1               <dbl> 5, 3, 3, 1, 1, 4, 2, 2, 3, 5, 5, 5, 5, 2, 4, 4,…
$ tor_eff2               <dbl> 5, 3, 3, 5, 2, 4, 3, 4, 2, 5, 5, 3, 5, 3, 4, 4,…
$ tor_eff3               <dbl> 5, 2, 3, 3, 2, 3, 3, 4, 2, 5, 5, 4, 5, 3, 4, 4,…
$ tor_eff4               <dbl> 5, 4, 4, 1, 4, 4, 3, 2, 3, 4, 5, 4, 5, 4, 4, 4,…
$ tor_eff5               <dbl> 5, 3, 3, 1, 1, 4, 2, 2, 3, 5, 5, 4, 5, 3, 4, 4,…
$ tor_eff6               <dbl> 5, 3, 3, 5, 2, 4, 4, 4, 3, 5, 5, 5, 5, 2, 3, 4,…
$ tor_eff7               <dbl> 5, 4, 3, 3, 2, 4, 4, 4, 4, 4, 5, 4, 5, 3, 4, 4,…
$ tor_eff8               <dbl> 5, 2, 4, 1, 4, 4, 4, 2, 4, 5, 5, 4, 5, 4, 4, 4,…
$ nws_trust              <dbl> 5, 3, 3, 4, 5, 4, 2, 4, 4, 5, 5, 4, 5, 3, 3, 4,…
$ lotv_trust             <dbl> 5, 3, 3, 5, 4, 4, 2, 3, 3, 3, 5, 5, 4, 3, 3, 4,…
$ natv_trust             <dbl> 5, 4, 3, 4, 4, 4, 1, 4, 4, 5, 5, 4, 4, 3, 3, 4,…
$ em_trust               <dbl> 5, 5, 3, 4, 4, 4, 2, 4, 3, 5, 5, 3, 5, 3, 3, 4,…
$ fam_trust              <dbl> 5, 2, 3, 2, 4, 3, 3, 2, 5, 2, 5, 4, 4, 2, 3, 2,…
$ wx_info1               <dbl> 5, 3, 3, 4, 4, 3, 2, 1, 4, 5, 4, 3, 5, 1, 3, 1,…
$ wx_info2               <dbl> 5, 3, 3, 4, 4, 4, 3, 1, 3, 5, 5, 3, 5, 2, 3, 3,…
$ wx_info3               <dbl> 5, 3, 3, 4, 4, 3, 2, 3, 3, 5, 5, 3, 5, 2, 5, 5,…
$ wx_info4               <dbl> 5, 4, 3, 3, 5, 4, 2, 3, 2, 4, 3, 4, 5, 3, 3, 3,…
$ wx_info5               <dbl> 5, 2, 3, 5, 3, 3, 2, 3, 3, 2, 2, 3, 5, 3, 1, 1,…
$ wx_info6               <dbl> 5, 3, 3, 3, 3, 3, 3, 2, 2, 1, 3, 2, 5, 3, 3, 2,…
$ wx_info7               <dbl> 5, 3, 3, 4, 5, 4, 2, 5, 3, 5, 5, 3, 5, 3, 4, 1,…
$ wx_info8               <dbl> 5, 4, 3, 1, 5, 3, 3, 1, 4, 5, 5, 4, 5, 2, 1, 2,…
$ wx_info_tie            <dbl> 8, 3, 7, 4, 8, 3, 2, 7, 4, 2, 7, 3, 3, 4, 3, 3,…
$ rand_svr_con           <dbl> 5, 3, 3, 2, 4, 3, 3, 1, 5, 5, 5, 3, 5, 4, 4, 1,…
$ rand_svr_resp          <dbl> 5, 2, 3, 2, 3, 3, 3, 1, 5, 5, 5, 3, 5, 3, 4, 1,…
$ rand_svr_und           <dbl> 5, 3, 3, 5, 5, 3, 4, 3, 2, 5, 5, 4, 5, 4, 2, 1,…
$ rand_svr_conf          <dbl> 5, 3, 3, 4, 2, 3, 3, 4, 1, 4, 5, 3, 5, 3, 3, 1,…
$ rand_otlk_cern         <dbl> 5, 3, 3, 4, 5, 3, 4, 3, 2, 5, 1, 3, 5, 5, 3, 3,…
$ rand_otlk_point_risk   <dbl> 7, NA, NA, 7, NA, NA, 7, 6, NA, 4, 7, 5, NA, NA…
$ rand_otlk_point_plan   <dbl> 5, NA, NA, 3, NA, NA, 5, 4, NA, 4, 5, 4, NA, NA…
$ rand_otlk_drive_risk   <dbl> NA, 5, 4, NA, 6, 7, NA, NA, 3, NA, NA, NA, 5, 7…
$ rand_otlk_drive_plan   <dbl> NA, 2, 3, NA, 5, 2, NA, NA, 4, NA, NA, NA, 5, 5…
$ rand_otlk_und          <dbl> 5, 3, 3, 5, 4, 3, 4, 4, 2, 2, 5, 4, 5, 5, 2, 3,…
$ rand_otlk_help         <dbl> 5, 4, 3, 5, 5, 4, 4, 4, 3, 5, 5, 3, 5, 5, 2, 3,…
$ rand_otlk_use          <dbl> 5, 2, 3, 5, 5, 3, 5, 4, 3, 5, 5, 2, 5, 5, 2, 3,…
$ exf_aware              <dbl> 1, 23, 23, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
$ exf_access             <dbl> 5, NA, NA, 4, 1, 3, 3, 4, 1, 5, 4, NA, 3, 2, 3,…
$ exf_info_loctv         <dbl> 0, NA, NA, 0, NA, 1, 0, 0, NA, 1, 1, NA, 0, NA,…
$ exf_info_web           <dbl> 0, NA, NA, 0, NA, 1, 1, 0, NA, 0, 1, NA, 0, NA,…
$ exf_info_phone         <dbl> 0, NA, NA, 1, NA, 1, 0, 1, NA, 1, 1, NA, 0, NA,…
$ exf_info_govweb        <dbl> 1, NA, NA, 0, NA, 0, 1, 0, NA, 0, 1, NA, 0, NA,…
$ exf_info_cabtv         <dbl> 0, NA, NA, 0, NA, 0, 0, 0, NA, 1, 1, NA, 1, NA,…
$ exf_info_soc           <dbl> 0, NA, NA, 1, NA, 0, 0, 0, NA, 0, 0, NA, 0, NA,…
$ exf_info_fam           <dbl> 0, NA, NA, 0, NA, 0, 0, 0, NA, 0, 0, NA, 0, NA,…
$ exf_info_radio         <dbl> 0, NA, NA, 0, NA, 1, 0, 0, NA, 1, 0, NA, 0, NA,…
$ exf_info_paper         <dbl> 0, NA, NA, 0, NA, 0, 0, 0, NA, 0, 0, NA, 0, NA,…
$ exf_use                <chr> "Yes", NA, NA, "Planning outdoor activities", N…
$ exf_trust              <dbl> 5, NA, NA, 3, NA, 3, 3, 3, NA, 5, 5, NA, 5, NA,…
$ exf_reliable           <dbl> 5, NA, NA, 3, NA, 4, 3, 2, NA, 5, 5, NA, 5, NA,…
$ exf_detail             <dbl> 5, NA, NA, 2, NA, 4, 2, 3, NA, 5, 5, NA, 5, NA,…
$ exf_access_no_inacc    <dbl> NA, NA, NA, NA, 1, NA, NA, NA, 0, NA, NA, NA, N…
$ exf_access_no_rel      <dbl> NA, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, N…
$ exf_access_no_fnd      <dbl> NA, NA, NA, NA, 0, NA, NA, NA, 1, NA, NA, NA, N…
$ exf_access_no_int      <dbl> NA, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, N…
$ exf_access_no_oth      <dbl> NA, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, N…
$ exf_access_no_oth_spec <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ exf_ex_see             <dbl> 0, 0, 2, 2, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,…
$ exf_ex_risk            <dbl> 5, 3, 2, 2, 4, 1, 3, 2, 2, 3, 3, 3, 5, 2, 2, 1,…
$ exf_resp               <dbl> 4, 2, 1, 1, 3, 0, 1, 1, 1, 1, 2, 2, 4, 1, 1, 0,…
$ exf_int_obj            <dbl> 5, 3, 2, 2, 3, 1, 3, 3, 2, 2, 2, 3, 5, 2, 3, 3,…
$ exf_ex_monitor3        <dbl> NA, 3, 3, 4, NA, NA, NA, NA, 3, NA, NA, NA, 5, …
$ exf_ex_monitor5        <dbl> NA, NA, NA, NA, 5, 3, NA, NA, NA, 5, NA, 3, NA,…
$ exf_ex_monitor7        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ exf_ex_conf            <dbl> 5, 3, 4, 5, 4, 3, 3, 3, 3, 5, 5, 5, 5, 5, 2, 3,…
$ exf_ex_use             <dbl> 5, 3, 3, 4, 4, 4, 3, 4, 2, 5, 4, 4, 2, 5, 3, 2,…
$ exf_int_no_conf        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ exf_use_no_rel         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ exf_use_no_inacc       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ exf_use_no_fnd         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, …
$ exf_use_no_int         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ exf_use_no_oth         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ exf_use_no_oth_spec    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ timing_aware           <dbl> 1, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,…
$ timing_see             <dbl> 1, 1, 1, 0, 0, 2, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0,…
$ timing_int_sub         <dbl> 5, 3, 3, 5, 4, 3, 2, 1, 4, 3, 4, 3, 5, 4, 2, 3,…
$ timing_start_obj       <dbl> 6, NA, 11, 32, 30, 32, 32, 31, 12, 27, 27, 9, 1…
$ timing_like_obj        <dbl> 11, NA, 12, 37, 36, 35, 37, NA, 14, 7, 31, 13, …
$ timing_end_obj         <dbl> 12, NA, 13, 3, 44, 37, 44, 36, 17, 9, 3, 23, 11…
$ timing_use             <dbl> 5, 4, 3, 4, 4, 4, 4, 4, 1, 5, 4, 3, 4, 4, 3, 1,…
$ timing_into_no_why     <chr> NA, NA, NA, NA, NA, NA, "No", "Top right corner…
$ timing_use_no_fnd      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ timing_use_no_info     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ timing_use_no_inacc    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, …
$ timing_use_no_rel      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, …
$ timing_use_no_int      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ timing_use_no_oth      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, …
$ timing_use_no_oth_spec <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ your_ability           <dbl> 5, 2, 3, 3, 4, 3, 2, 4, 3, 5, 5, 3, 5, 5, 2, 2,…
$ rq_1                   <dbl> 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 0, 1, 2, 2, 2,…
$ rq_2                   <dbl> 1, NA, 2, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0…
$ rq_3                   <dbl> 1, 1, 2, 1, 1, 0, 0, 0, 1, 1, 2, 0, 1, 0, 1, 1,…
$ rq_4                   <dbl> 1, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,…
$ rq_5                   <dbl> 1, 0, 2, 0, 1, 1, 1, 0, 2, 1, 1, 1, 1, 0, 0, 0,…
$ rq_6                   <dbl> 1, 1, 2, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,…
$ rq_7                   <dbl> 1, 1, 2, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,…
$ rq_8                   <dbl> 1, 0, 2, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,…
$ rq_9                   <dbl> 1, 1, 2, 0, 0, 1, 0, 0, 1, 1, 2, 1, 1, 0, 0, 0,…
$ rq_10                  <dbl> 1, 2, 2, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,…
$ income                 <dbl> 4, 2, 2, 3, 3, 2, 4, 2, 3, 1, 1, 2, 3, 3, 3, 2,…
$ inc_50                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, NA, N…
$ inc_100                <dbl> NA, 6, 8, NA, NA, 10, NA, 7, NA, NA, NA, 7, NA,…
$ inc_150                <dbl> NA, NA, NA, 11, 11, NA, NA, NA, 12, NA, NA, NA,…
$ inc_200                <dbl> 17, NA, NA, NA, NA, NA, 17, NA, NA, NA, NA, NA,…
$ edu                    <dbl> 6, 2, 2, 7, 6, 4, 3, 2, 2, 5, 4, 6, 2, 4, 4, 5,…
$ comments               <chr> NA, "Nothing else", "Nothing comes to mind", NA…
$ watch_warn             <dbl> 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1,…
$ rand_morn              <time> 08:00:00, 03:00:00, 03:00:00, 08:00:00, 09:00:…
$ rand_aft               <time> 16:00:00, 15:00:00, 13:00:00, 15:00:00, 16:00:…
$ rand_eve               <time> 22:00:00, 21:00:00, 23:00:00, 21:00:00, 22:00:…
$ exf_ex_loc             <chr> "Location B", "Location A", "Location B", "Loca…
$ exf_ex_day             <chr> "7 days (next Saturday)", "3 days (Tuesday)", "…
$ rand_otlk_loc          <chr> "memphis", "indy_to_okc", "indy_to_okc", "st_lo…
$ rand_otlk_format       <chr> "static", "static", "interactive", "interactive…
$ rand_svr               <chr> "wind", "wind", "wind_hail", "wind", "severe", …
$ state                  <chr> "New York", "Texas", "South Dakota", "South Car…
$ time_taken             <dbl> 11.216667, 54.283333, 17.650000, 41.883333, 32.…
$ age_group              <dbl> 3, 1, 2, 5, 4, 6, 6, 2, 3, 2, 5, 2, 1, 1, 6, 6,…
$ nws_region             <chr> "Eastern Region", "Southern Region", "Central R…
$ exf_ex_monitor         <dbl> NA, 3, 3, 4, 5, 3, NA, NA, 3, 5, NA, 3, 5, NA, …
$ censusproportion       <dbl> 0.000229, 0.000168, 0.029448, 0.028490, 0.02879…
$ surveyproportion       <dbl> 0.004566210, 0.003154574, 0.025723473, 0.034246…
$ weightfactor           <dbl> 0.050151, 0.053256, 1.144791, 0.831908, 0.99516…

Define Survey Design Object

Two approaches to create survey design objects:

# Option 1: Using srvyr package (tidyverse-friendly)
survey_tidy <- survey_raw |>
  as_survey_design(
    ids = 1,               # No clustering in the sample design
    weights = weightfactor # Weight variable
  )

# Option 2: Using survey package (traditional approach)
survey_base <- svydesign(
  ids = ~1,                # No clustering
  weights = ~weightfactor, # Weight variable
  data = survey_raw
)

survey_tidy works with dplyr-style functions
survey_base required for statistical tests

Descriptive Analysis

Frequencies and Percentages

Summarize categorical variables with and without weights:

# Without weights (unweighted results)
survey_raw |>
  drop_na(follow) |>
  count(follow) |>
  mutate(percentage = n / sum(n) * 100)

Frequencies and Percentages

# A tibble: 5 × 3
  follow     n percentage
   <dbl> <int>      <dbl>
1      1    29       2.15
2      2   119       8.81
3      3   253      18.7 
4      4   653      48.4 
5      5   296      21.9

Frequencies and Percentages

With weights (using srvyr):

# With weights (weighted results) - using srvyr
survey_tidy |>
  drop_na(follow) |>
  group_by(follow) |>
  summarise(
    n = survey_total(),
    percentage = survey_prop(vartype = "ci") * 100
    )

Frequencies and Percentages

# A tibble: 5 × 6
  follow     n  n_se percentage percentage_low percentage_upp
   <dbl> <dbl> <dbl>      <dbl>          <dbl>          <dbl>
1      1  23.0  5.34       2.05           1.30           3.23
2      2 114.  13.3       10.2            8.15          12.7 
3      3 213.  15.4       19.0           16.5           21.8 
4      4 515.  21.8       46.0           42.7           49.5 
5      5 254.  18.4       22.7           19.9           25.8

Frequencies and Percentages

Using the survey package:

# With weights - using survey package
svytable(~follow, design = survey_base) |>
  prop.table() * 100

Frequencies and Percentages

follow
        1         2         3         4         5 
 2.052472 10.187442 19.003746 46.043782 22.712558

Visualizing Frequencies

Bar chart with confidence intervals:

survey_tidy |>
  drop_na(follow) |>
  group_by(follow) |>
  summarise(percentage = survey_prop(vartype = "ci") * 100) |>
  ggplot(aes(x = follow, y = percentage)) +
  geom_col(fill = "steelblue") +
  geom_errorbar(aes(ymin = percentage_low, ymax = percentage_upp),
                width = 0.2) +
  scale_x_continuous(breaks = 1:5,
                     labels = c("Strongly disagree",
                                "Disagree",
                                "Neither disagree\nnor agree",
                                "Agree",
                                "Strongly agree")) +
  labs(title = "How much do you agree or disagree with the following statements?",
       subtitle = "I follow the weather very closely.",
       x = "Response Category",
       y = "Estimated Percent of Respondents (95% CI)") +
  theme_classic()

Visualizing Frequencies

Means, Medians, and Standard Deviations

Summarize continuous variables:

# Without weights
survey_raw |>
  drop_na(age) |>
  summarise(
    mean_value = mean(age),
    median_value = median(age),
    sd_value = sd(age)
    )

Means, Medians, and Standard Deviations

# A tibble: 1 × 3
  mean_value median_value sd_value
       <dbl>        <dbl>    <dbl>
1       51.4           52     16.4

Means, Medians, and Standard Deviations

With weights (using srvyr):

# With weights - using srvyr
survey_tidy |>
  drop_na(age) |>
  summarise(
    mean_value = survey_mean(age, vartype = "ci"),
    median_value = survey_median(age, vartype = "ci"),
    sd_value = survey_sd(age)
    )

Means, Medians, and Standard Deviations

# A tibble: 1 × 7
  mean_value mean_value_low mean_value_upp median_value median_value_low
       <dbl>          <dbl>          <dbl>        <dbl>            <dbl>
1       48.7           47.3           50.0           48               46
# ℹ 2 more variables: median_value_upp <dbl>, sd_value <dbl>

Means, Medians, and Standard Deviations

Using the survey package:

# With weights - using survey package
svymean(~age, design = survey_base, na.rm = TRUE)
svyquantile(~age, design = survey_base, quantiles = 0.5, na.rm = TRUE)

Means, Medians, and Standard Deviations

     mean     SE
age 48.66 0.6911

$age
    quantile ci.2.5 ci.97.5       se
0.5       48     46      51 1.274393

attr(,"hasci")
[1] TRUE
attr(,"class")
[1] "newsvyquantile"

Visualizing Continuous Variables

Weighted histogram:

ggplot(survey_raw, aes(x = age, weight = weightfactor)) +
  geom_histogram(binwidth = 5, fill = "steelblue", color = "black") +
  labs(title = "How old are you?",
       x = "Age",
       y = "Estimated Frequency") +
  theme_classic()

Visualizing Continuous Variables

Cross-Tabulation

Two Categorical Variables

Examine relationships between categorical variables:

# Without weights
survey_raw |>
  drop_na(gend, follow) |>
  count(gend, follow) |>
  group_by(gend) |>
  mutate(percentage = n / sum(n) * 100)

Two Categorical Variables

# A tibble: 10 × 4
# Groups:   gend [2]
    gend follow     n percentage
   <dbl>  <dbl> <int>      <dbl>
 1     0      1     9       1.38
 2     0      2    58       8.92
 3     0      3   129      19.8 
 4     0      4   319      49.1 
 5     0      5   135      20.8 
 6     1      1    20       2.86
 7     1      2    61       8.71
 8     1      3   124      17.7 
 9     1      4   334      47.7 
10     1      5   161      23

Two Categorical Variables

With weights (using srvyr):

# With weights - using srvyr
survey_tidy |>
  drop_na(gend, follow) |>
  group_by(gend, follow) |>
  summarise(
    n = survey_total(),
    percentage = survey_prop(vartype = "ci") * 100
    )

Two Categorical Variables

# A tibble: 10 × 7
# Groups:   gend [2]
    gend follow      n  n_se percentage percentage_low percentage_upp
   <dbl>  <dbl>  <dbl> <dbl>      <dbl>          <dbl>          <dbl>
 1     0      1   8.23  3.74       1.43          0.590           3.45
 2     0      2  57.7   9.55      10.1           7.36           13.6 
 3     0      3 112.   11.2       19.6          16.2            23.5 
 4     0      4 282.   18.3       49.2          44.5            53.9 
 5     0      5 113.   10.4       19.7          16.5            23.4 
 6     1      1  14.7   3.84       2.70          1.62            4.48
 7     1      2  56.3   9.54      10.3           7.48           14.1 
 8     1      3 100.   11.4       18.4          14.9            22.5 
 9     1      4 233.   15.3       42.7          37.9            47.7 
10     1      5 141.   15.9       25.9          21.3            31.0

Two Categorical Variables

Using the survey package:

# With weights - using survey package
svytable(~gend + follow, design = survey_base) |>
  prop.table(margin = 1) * 100

Two Categorical Variables

    follow
gend         1         2         3         4         5
   0  1.434811 10.066314 19.595173 49.195379 19.708324
   1  2.701539 10.314729 18.382247 42.731941 25.869544

Visualizing Cross-Tabulations

Grouped bar chart with confidence intervals:

survey_tidy |>
  drop_na(gend, follow) |>
  group_by(gend, follow) |>
  summarise(percentage = survey_prop(vartype = "ci") * 100) |>
  ggplot(aes(x = follow, y = percentage, fill = factor(gend))) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = percentage_low, ymax = percentage_upp),
                width = 0.2,
                position = position_dodge(width = 0.9)) +
  scale_x_continuous(breaks = 1:5,
                     labels = c("Strongly disagree",
                                "Disagree",
                                "Neither disagree\nnor agree",
                                "Agree",
                                "Strongly agree")) +
  scale_fill_discrete(breaks = c("0", "1"),
                      labels = c("Female", "Male")) +
  labs(title = "How much do you agree or disagree with the following statements?",
       subtitle = "I follow the weather very closely.",
       x = "Response Category",
       y = "Estimated Percent of Respondents (95% CI)",
       fill = "Gender") +
  theme_classic()

Visualizing Cross-Tabulations

Statistical Tests

T-Test

Compare means of a continuous variable across two groups:

# Without weights
t.test(follow ~ gend, data = survey_raw)

T-Test


    Welch Two Sample t-test

data:  follow by gend
t = -0.069959, df = 1348, p-value = 0.9442
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.1053141  0.0980614
sample estimates:
mean in group 0 mean in group 1 
       3.789231        3.792857

T-Test

With weights (using survey package):

# With weights - using survey package
svyttest(follow ~ gend, design = survey_base)

T-Test


    Design-based t-test

data:  follow ~ gend
t = 0.43074, df = 1348, p-value = 0.6667
alternative hypothesis: true difference in mean is not equal to 0
95 percent confidence interval:
 -0.1093706  0.1709132
sample estimates:
difference in mean 
        0.03077132

Regression Analysis

Linear Regression

For continuous outcome variables:

# Without weights
lm_model <- lm(follow ~ gend + risk_tor, data = survey_raw)
summary(lm_model)

Linear Regression


Call:
lm(formula = follow ~ gend + risk_tor, data = survey_raw)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.1486 -0.6941  0.1554  0.4563  1.4564 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 3.393163   0.068437  49.581  < 2e-16 ***
gend        0.003062   0.051103   0.060    0.952    
risk_tor    0.150484   0.021783   6.908 7.55e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9374 on 1345 degrees of freedom
  (6 observations deleted due to missingness)
Multiple R-squared:  0.03427,   Adjusted R-squared:  0.03283 
F-statistic: 23.86 on 2 and 1345 DF,  p-value: 6.553e-11

Linear Regression

With weights (using survey package):

# With weights - using survey package
svyglm_model <- svyglm(follow ~ gend + risk_tor,
                       design = survey_base,
                       family = gaussian())
summary(svyglm_model)

Linear Regression


Call:
svyglm(formula = follow ~ gend + risk_tor, design = survey_base, 
    family = gaussian())

Survey design:
svydesign(ids = ~1, weights = ~weightfactor, data = survey_raw)

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.29590    0.09739  33.841  < 2e-16 ***
gend         0.03413    0.06915   0.494    0.622    
risk_tor     0.17648    0.03134   5.632 2.17e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.9154299)

Number of Fisher Scoring iterations: 2

Linear Regression

Get confidence intervals:

# Get confidence intervals
confint(svyglm_model)

Linear Regression

                 2.5 %    97.5 %
(Intercept)  3.1048377 3.4869555
gend        -0.1015246 0.1697821
risk_tor     0.1150052 0.2379484

Logistic Regression

For binary outcome variables:

# Without weights
logit_model <- glm(exf_aware == 1 ~ gend + risk_tor,
                   data = survey_raw,
                   family = binomial(link = "logit"))
summary(logit_model)

Logistic Regression


Call:
glm(formula = exf_aware == 1 ~ gend + risk_tor, family = binomial(link = "logit"), 
    data = survey_raw)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -0.2753     0.1558  -1.767  0.07726 .  
gend          0.3720     0.1169   3.182  0.00146 ** 
risk_tor      0.2919     0.0521   5.601 2.13e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1727.8  on 1347  degrees of freedom
Residual deviance: 1685.1  on 1345  degrees of freedom
  (6 observations deleted due to missingness)
AIC: 1691.1

Number of Fisher Scoring iterations: 4

Logistic Regression

With weights (using survey package):

# With weights - using survey package
svylogit_model <- svyglm(exf_aware == 1 ~ gend + risk_tor,
                         design = survey_base,
                         family = binomial(link = "logit"))
summary(svylogit_model)

Logistic Regression


Call:
svyglm(formula = exf_aware == 1 ~ gend + risk_tor, design = survey_base, 
    family = binomial(link = "logit"))

Survey design:
svydesign(ids = ~1, weights = ~weightfactor, data = survey_raw)

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.46974    0.20429  -2.299   0.0216 *  
gend         0.37227    0.15017   2.479   0.0133 *  
risk_tor     0.37056    0.07034   5.268  1.6e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1.004359)

Number of Fisher Scoring iterations: 4

Rake Weighting

Calculate Rake Weights

Rake weighting adjusts survey data to match known population distributions:

Purpose: Post-stratification adjustment to match population margins
Targets: Known population proportions (from ACS, CPS, etc.)
Method: Iterative proportional fitting (raking)

Define Population Targets

# Population targets (from ACS, CPS or similar)
pop_age <- tibble(
  age_group = c("18-34", "35-54", "55+"),
  prop = c(0.31, 0.36, 0.33))

pop_gender <- tibble(
  gend_group = c("Female", "Male"),
  prop = c(0.51, 0.49))

Convert to Frequency Targets

# Convert proportions to counts based on your sample size
sample_size <- nrow(survey_raw)

age_targets <- pop_age |>
  mutate(Freq = round(prop * sample_size)) |>
  select(age_group, Freq)
age_targets

gender_targets <- pop_gender |>
  mutate(Freq = round(prop * sample_size)) |>
  select(gend_group, Freq)
gender_targets

Convert to Frequency Targets

# A tibble: 3 × 2
  age_group  Freq
  <chr>     <dbl>
1 18-34       420
2 35-54       487
3 55+         447

# A tibble: 2 × 2
  gend_group  Freq
  <chr>      <dbl>
1 Female       691
2 Male         663

Add Demographic Grouping Variables

# Add demographic grouping variables
survey_raw <- survey_raw |>
  mutate(
    age_group = case_when(
      age >= 18 & age <= 34 ~ "18-34",
      age >= 35 & age <= 54 ~ "35-54",
      age >= 55             ~ "55+"),
    gend_group = case_when(
      gend == 0 ~ "Female",
      gend == 1 ~ "Male")
  )

Create Unweighted Design and Rake

# Create unweighted survey design
survey_raw_unwtd <- svydesign(ids = ~1, weights = ~1, data = survey_raw)

# Rake sample to match population margins
survey_raw_raked <- rake(
  design = survey_raw_unwtd,
  sample.margins = list(~age_group, ~gend_group),
  population.margins = list(age_targets, gender_targets),
  control = list(maxit = 50, epsilon = 1e-7, verbose = FALSE)
)

Check Weight Distribution

# Quick checks on weight distribution
min(weights(survey_raw_raked))
max(weights(survey_raw_raked))
mean(weights(survey_raw_raked))
sum(weights(survey_raw_raked))

Check Weight Distribution

[1] 0.7088422

[1] 1.62208

[1] 1

[1] 1354

Add Weights Back to Dataset

# Add final weights back into the dataset
survey_raw_wtd <- survey_raw_raked |>
  as_survey_design() |>
  mutate(rake_weight = weights(survey_raw_raked))