Consumer Expenditure Survey (CES)

Build Status Build status

The Consumer Expenditure Survey (CES) is the authoritative data source to understand how Americans spend money. Participating households keep a running diary about every purchase over fifteen months. Those diaries are then summed up into precise expenditure categories.

  • One table of survey responses per quarter with one row per sampled household (consumer unit). Additional tables containing one record per expenditure

  • A complex sample survey designed to generalize to the civilian non-institutional population of the United States.

  • Released annually since 1996.

  • Administered by the Bureau of Labor Statistics.

Simplified Download and Importation

The R lodown package easily downloads and imports all available CES microdata by simply specifying "ces" with an output_dir = parameter in the lodown() function. Depending on your internet connection and computer processing speed, you might prefer to run this step overnight.

library(lodown)
lodown( "ces" , output_dir = file.path( path.expand( "~" ) , "CES" ) )

lodown also provides a catalog of available microdata extracts with the get_catalog() function. After requesting the CES catalog, you could pass a subsetted catalog through the lodown() function in order to download and import specific extracts (rather than all available extracts).

library(lodown)
# examine all available CES microdata files
ces_cat <-
    get_catalog( "ces" ,
        output_dir = file.path( path.expand( "~" ) , "CES" ) )

# 2016 only
ces_cat <- subset( ces_cat , year == 2016 )
# download the microdata to your local computer
ces_cat <- lodown( "ces" , ces_cat )

Analysis Examples with the survey library  

Construct a multiply-imputed, complex sample survey design:

library(survey)
library(mitools)

# read in the five quarters of family data files (fmli)

fmli161x <- readRDS( file.path( path.expand( "~" ) , "CES" , "2016/fmli161x.rds" ) )
fmli162 <- readRDS( file.path( path.expand( "~" ) , "CES" , "2016/fmli162.rds" ) )
fmli163 <- readRDS( file.path( path.expand( "~" ) , "CES" , "2016/fmli163.rds" ) )
fmli164 <- readRDS( file.path( path.expand( "~" ) , "CES" , "2016/fmli164.rds" ) )
fmli171 <- readRDS( file.path( path.expand( "~" ) , "CES" , "2016/fmli171.rds" ) )

fmli161x$qtr <- 1
fmli162$qtr <- 2
fmli163$qtr <- 3
fmli164$qtr <- 4
fmli171$qtr <- 5

fmli171 <- fmli171[ , names( fmli161x ) ]

fmly <- rbind( fmli161x , fmli162 , fmli163 , fmli164 , fmli171 )

rm( fmli161x , fmli162 , fmli163 , fmli164 , fmli171 )

wtrep <- c( paste0( "wtrep" , stringr::str_pad( 1:44 , 2 , pad = "0" ) ) , "finlwt21" )

for ( i in wtrep ) fmly[ is.na( fmly[ , i ] ) , i ] <- 0

# create a new variable in the fmly data table called 'totalexp'
# that contains the sum of the total expenditure from the current and previous quarters
fmly$totalexp <- rowSums( fmly[ , c( "totexppq" , "totexpcq" ) ] , na.rm = TRUE )

# immediately convert missing values (NA) to zeroes
fmly[ is.na( fmly$totalexp ) , "totalexp" ] <- 0

# annualize the total expenditure by multiplying the total expenditure by four,
# creating a new variable 'annexp' in the fmly data table
fmly <- transform( fmly , annexp = totalexp * 4 )

# add a column of ones
fmly$one <- 1

# create a vector containing all of the multiply-imputed variables
# (leaving the numbers off the end)
mi_vars <- gsub( "5$" , "" , grep( "[a-z]5$" , names( fmly ) , value = TRUE ) )

# loop through each of the five variables..
for ( i in 1:5 ){

    # copy the 'fmly' table over to a new temporary data frame 'x'
    x <- fmly

    # loop through each of the multiply-imputed variables..
    for ( j in mi_vars ){
    
        # copy the contents of the current column (for example 'welfare1')
        # over to a new column ending in 'mi' (for example 'welfaremi')
        x[ , paste0( j , 'mi' ) ] <- x[ , paste0( j , i ) ]
        
        # delete the all five of the imputed variable columns
        x <- x[ , !( names( x ) %in% paste0( j , 1:5 ) ) ]

    }
    
    # save the current table in the sqlite database as 'imp1' 'imp2' etc.
    assign( paste0( 'imp' , i ) , x )

    # remove the temporary table
    rm( x )
    
}

    
# containing the five multiply-imputed data tables - imp1 through imp5
ces_design <- 
    svrepdesign( 
        weights = ~finlwt21 , 
        repweights = "wtrep[0-9]+" , 
        data = imputationList( list( imp1 , imp2 , imp3 , imp4 , imp5 ) ) , 
        type = "BRR" ,
        combined.weights = TRUE ,
        mse = TRUE
    )

rm( imp1 , imp2 , imp3 , imp4 , imp5 )

Variable Recoding

Add new columns to the data set:

ces_design <- 
    update( 
        ces_design , 
        
        any_food_stamp = as.numeric( jfs_amtmi > 0 )
        
    )

Unweighted Counts

Count the unweighted number of records in the survey sample, overall and by groups:

MIcombine( with( ces_design , svyby( ~ one , ~ one , unwtd.count ) ) )

MIcombine( with( ces_design , svyby( ~ one , ~ bls_urbn , unwtd.count ) ) )

Weighted Counts

Count the weighted size of the generalizable population, overall and by groups:

MIcombine( with( ces_design , svytotal( ~ one ) ) )

MIcombine( with( ces_design ,
    svyby( ~ one , ~ bls_urbn , svytotal )
) )

Descriptive Statistics

Calculate the mean (average) of a linear variable, overall and by groups:

MIcombine( with( ces_design , svymean( ~ annexp ) ) )

MIcombine( with( ces_design ,
    svyby( ~ annexp , ~ bls_urbn , svymean )
) )

Calculate the distribution of a categorical variable, overall and by groups:

MIcombine( with( ces_design , svymean( ~ sex_ref ) ) )

MIcombine( with( ces_design ,
    svyby( ~ sex_ref , ~ bls_urbn , svymean )
) )

Calculate the sum of a linear variable, overall and by groups:

MIcombine( with( ces_design , svytotal( ~ annexp ) ) )

MIcombine( with( ces_design ,
    svyby( ~ annexp , ~ bls_urbn , svytotal )
) )

Calculate the weighted sum of a categorical variable, overall and by groups:

MIcombine( with( ces_design , svytotal( ~ sex_ref ) ) )

MIcombine( with( ces_design ,
    svyby( ~ sex_ref , ~ bls_urbn , svytotal )
) )

Calculate the median (50th percentile) of a linear variable, overall and by groups:

MIcombine( with( ces_design ,
    svyquantile(
        ~ annexp ,
        0.5 , se = TRUE 
) ) )

MIcombine( with( ces_design ,
    svyby(
        ~ annexp , ~ bls_urbn , svyquantile ,
        0.5 , se = TRUE ,
        keep.var = TRUE , ci = TRUE 
) ) )

Estimate a ratio:

MIcombine( with( ces_design ,
    svyratio( numerator = ~ annexp , denominator = ~ fincbtxmi )
) )

Subsetting

Restrict the survey design to california residents:

sub_ces_design <- subset( ces_design , state == '06' )

Calculate the mean (average) of this subset:

MIcombine( with( sub_ces_design , svymean( ~ annexp ) ) )

Measures of Uncertainty

Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:

this_result <-
    MIcombine( with( ces_design ,
        svymean( ~ annexp )
    ) )

coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )

grouped_result <-
    MIcombine( with( ces_design ,
        svyby( ~ annexp , ~ bls_urbn , svymean )
    ) )

coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )

Calculate the degrees of freedom of any survey design object:

degf( ces_design$designs[[1]] )

Calculate the complex sample survey-adjusted variance of any statistic:

MIcombine( with( ces_design , svyvar( ~ annexp ) ) )

Include the complex sample design effect in the result for a specific statistic:

# SRS without replacement
MIcombine( with( ces_design ,
    svymean( ~ annexp , deff = TRUE )
) )

# SRS with replacement
MIcombine( with( ces_design ,
    svymean( ~ annexp , deff = "replace" )
) )

Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop for alternatives:

MIsvyciprop( ~ any_food_stamp , ces_design ,
    method = "likelihood" )

Regression Models and Tests of Association

Perform a design-based t-test:

MIsvyttest( annexp ~ any_food_stamp , ces_design )

Perform a chi-squared test of association for survey data:

MIsvychisq( ~ any_food_stamp + sex_ref , ces_design )

Perform a survey-weighted generalized linear model:

glm_result <- 
    MIcombine( with( ces_design ,
        svyglm( annexp ~ any_food_stamp + sex_ref )
    ) )
    
summary( glm_result )

Replication Example