National Beneficiary Survey (NBS)

License: GPL v3 Github Actions Badge

The principal microdata for U.S. disability researchers interested in Social Security program performance.

  • One table with one row per respondent.

  • A complex sample designed to generalize to Americans between age 18 and full retirement age, covered by either Social Security Disability Insurance (SSDI) or Supplemental Security Income (SSI).

  • Released at irregular intervals, with 2004, 2005, 2006, 2010, 2015, 2017, and 2019 available.

  • Administered by the Social Security Administration.


Download, Import, Preparation

Download and import the round 7 file:

library(haven)

zip_tf <- tempfile()

zip_url <- "https://www.ssa.gov/disabilityresearch/documents/R7NBSPUF_STATA.zip"
    
download.file( zip_url , zip_tf , mode = 'wb' )

nbs_tbl <- read_stata( zip_tf )

nbs_df <- data.frame( nbs_tbl )

names( nbs_df ) <- tolower( names( nbs_df ) )

nbs_df[ , 'one' ] <- 1

Save Locally  

Save the object at any point:

# nbs_fn <- file.path( path.expand( "~" ) , "NBS" , "this_file.rds" )
# saveRDS( nbs_df , file = nbs_fn , compress = FALSE )

Load the same object:

# nbs_df <- readRDS( nbs_fn )

Survey Design Definition

Construct a complex sample survey design:

library(survey)

options( survey.lonely.psu = "adjust" )

# representative beneficiary sample
nbs_design <-
    svydesign(
        id = ~ r7_a_psu_pub , 
        strata = ~ r7_a_strata , 
        weights = ~ r7_wtr7_ben , 
        data = subset( nbs_df , r7_wtr7_ben > 0 ) 
    )
    
# cross-sectional successful worker sample
nbs_design <- 
    svydesign(
        id = ~ r7_a_psu_pub , 
        strata = ~ r7_a_strata , 
        weights = ~ r7_wtr7_cssws , 
        data = subset( nbs_df , r7_wtr7_cssws > 0 ) 
    )
    
# longitudinal successful worker sample
lngsws_design <-
    svydesign(
        id = ~ r7_a_psu_pub , 
        strata = ~ r7_a_strata , 
        weights = ~ r7_wtr7_lngsws , 
        data = subset( nbs_df , r7_wtr7_lngsws > 0 ) 
    )

Variable Recoding

Add new columns to the data set:

nbs_design <- 
    update( 
        nbs_design , 
        
        male = as.numeric( r7_orgsampinfo_sex == 1 ) ,
        
        age_categories = 
            factor( 
                r7_c_intage_pub ,
                labels = 
                    c( "18-25" , "26-40" , "41-55" , "56 and older" )
            )
        
    )

Analysis Examples with the survey library  

Unweighted Counts

Count the unweighted number of records in the survey sample, overall and by groups:

sum( weights( nbs_design , "sampling" ) != 0 )

svyby( ~ one , ~ age_categories , nbs_design , unwtd.count )

Weighted Counts

Count the weighted size of the generalizable population, overall and by groups:

svytotal( ~ one , nbs_design )

svyby( ~ one , ~ age_categories , nbs_design , svytotal )

Descriptive Statistics

Calculate the mean (average) of a linear variable, overall and by groups:

svymean( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE )

svyby( ~ r7_n_totssbenlastmnth_pub , ~ age_categories , nbs_design , svymean , na.rm = TRUE )

Calculate the distribution of a categorical variable, overall and by groups:

svymean( ~ r7_c_hhsize_pub , nbs_design , na.rm = TRUE )

svyby( ~ r7_c_hhsize_pub , ~ age_categories , nbs_design , svymean , na.rm = TRUE )

Calculate the sum of a linear variable, overall and by groups:

svytotal( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE )

svyby( ~ r7_n_totssbenlastmnth_pub , ~ age_categories , nbs_design , svytotal , na.rm = TRUE )

Calculate the weighted sum of a categorical variable, overall and by groups:

svytotal( ~ r7_c_hhsize_pub , nbs_design , na.rm = TRUE )

svyby( ~ r7_c_hhsize_pub , ~ age_categories , nbs_design , svytotal , na.rm = TRUE )

Calculate the median (50th percentile) of a linear variable, overall and by groups:

svyquantile( ~ r7_n_totssbenlastmnth_pub , nbs_design , 0.5 , na.rm = TRUE )

svyby( 
    ~ r7_n_totssbenlastmnth_pub , 
    ~ age_categories , 
    nbs_design , 
    svyquantile , 
    0.5 ,
    ci = TRUE , na.rm = TRUE
)

Estimate a ratio:

svyratio( 
    numerator = ~ r7_n_ssilastmnth_pub , 
    denominator = ~ r7_n_totssbenlastmnth_pub , 
    nbs_design ,
    na.rm = TRUE
)

Subsetting

Restrict the survey design to currently covered by Medicare:

sub_nbs_design <- subset( nbs_design , r7_c_curmedicare == 1 )

Calculate the mean (average) of this subset:

svymean( ~ r7_n_totssbenlastmnth_pub , sub_nbs_design , na.rm = TRUE )

Measures of Uncertainty

Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:

this_result <- svymean( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE )

coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )

grouped_result <-
    svyby( 
        ~ r7_n_totssbenlastmnth_pub , 
        ~ age_categories , 
        nbs_design , 
        svymean ,
        na.rm = TRUE 
    )
    
coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )

Calculate the degrees of freedom of any survey design object:

degf( nbs_design )

Calculate the complex sample survey-adjusted variance of any statistic:

svyvar( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE )

Include the complex sample design effect in the result for a specific statistic:

# SRS without replacement
svymean( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE , deff = TRUE )

# SRS with replacement
svymean( ~ r7_n_totssbenlastmnth_pub , nbs_design , na.rm = TRUE , deff = "replace" )

Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop for alternatives:

svyciprop( ~ male , nbs_design ,
    method = "likelihood" )

Regression Models and Tests of Association

Perform a design-based t-test:

svyttest( r7_n_totssbenlastmnth_pub ~ male , nbs_design )

Perform a chi-squared test of association for survey data:

svychisq( 
    ~ male + r7_c_hhsize_pub , 
    nbs_design 
)

Perform a survey-weighted generalized linear model:

glm_result <- 
    svyglm( 
        r7_n_totssbenlastmnth_pub ~ male + r7_c_hhsize_pub , 
        nbs_design 
    )

summary( glm_result )

Replication Example

This example matches the percentages and t-tests from the final ten rows of Exhibit 4:

ex_4 <-
    data.frame(
        variable_label =
            c( 'coping with stress' , 'concentrating' , 
            'getting around outside of the home' , 
            'shopping for personal items' , 'preparing meals' , 
            'getting into or out of bed' , 'bathing or dressing' , 
            'getting along with others' , 
            'getting around inside the house' , 'eating' ) ,
        variable_name =
            c( "r3_i60_i" , "r3_i59_i" , "r3_i47_i" , "r3_i53_i" , 
            "r3_i55_i" , "r3_i49_i" , "r3_i51_i" , "r3_i61_i" , 
            "r3_i45_i" , "r3_i57_i" ) ,
        overall =
            c( 61 , 58 , 47 , 39 , 37 , 34 , 30 , 27 , 23 , 14 ) ,
        di_only =
            c( 60 , 54 , 47 , 36 , 35 , 36 , 30 , 23 , 24 , 13 ) ,
        concurrent =
            c( 63 , 63 , 47 , 43 , 41 , 34 , 33 , 31 , 23 , 15 ) ,
        concurrent_vs_di =
            c( F , T , F , F , F , F , F , T , F , F ) ,
        ssi =
            c( 61 , 62 , 47 , 40 , 39 , 33 , 29 , 31 , 22 , 15 ) ,
        ssi_vs_di =
            c( F , T , F , F , F , F , F , T , F , F )
    )

Download, import, and recode the round 3 file:

r3_tf <- tempfile()

r3_url <- "https://www.ssa.gov/disabilityresearch/documents/nbsr3pufstata.zip"
    
download.file( r3_url , r3_tf , mode = 'wb' )

r3_tbl <- read_stata( r3_tf )

r3_df <- data.frame( r3_tbl )

names( r3_df ) <- tolower( names( r3_df ) )

r3_design <- 
    svydesign(
        id = ~ r3_a_psu_pub , 
        strata = ~ r3_a_strata , 
        weights = ~ r3_wtr3_ben , 
        data = subset( r3_df , r3_wtr3_ben > 0 ) 
    )
    
r3_design <-
    update(
        r3_design ,
        
        benefit_type =
            factor(
                r3_orgsampinfo_bstatus ,
                levels = c( 2 , 3 , 1 ) ,
                labels = c( 'di_only' , 'concurrent' , 'ssi' )
            )

    )

Calculate the final ten rows of exhibit 4 and confirm each statistics and t-test matches:

for( i in seq( nrow( ex_4 ) ) ){

    this_formula <- as.formula( paste( "~" , ex_4[ i , 'variable_name' ] ) )

    overall_percent <- svymean( this_formula , r3_design )
    
    stopifnot( 100 * round( coef( overall_percent ) , 2 ) == ex_4[ i , 'overall_percent' ] )
    
    benefit_percent <- svyby( this_formula , ~ benefit_type , r3_design , svymean )
    
    stopifnot(
        all.equal( 
            100 * as.numeric( round( coef( benefit_percent ) , 2 ) ) , 
            as.numeric( ex_4[ i , c( 'di_only' , 'concurrent' , 'ssi' ) ] )
        )
    )
    
    ttest_formula <- as.formula( paste( ex_4[ i , 'variable_name' ] , "~ benefit_type" ) )
    
    di_only_con_design <-
        subset( r3_design , benefit_type %in% c( 'di_only' , 'concurrent' ) )
    
    con_ttest <- svyttest( ttest_formula , di_only_con_design )

    stopifnot(
        all.equal( 
            as.logical( con_ttest$p.value < 0.05 ) , 
            as.logical( ex_4[ i , 'concurrent_vs_di' ] )
        )
    )
    
    di_only_ssi_design <-
        subset( r3_design , benefit_type %in% c( 'di_only' , 'ssi' ) )
    
    ssi_ttest <- svyttest( ttest_formula , di_only_ssi_design )

    stopifnot(
        all.equal(
            as.logical( ssi_ttest$p.value < 0.05 ) , 
            as.logical( ex_4[ i , 'ssi_vs_di' ] ) 
        )
    )

}

Analysis Examples with srvyr  

The R srvyr library calculates summary statistics from survey data, such as the mean, total or quantile using dplyr-like syntax. srvyr allows for the use of many verbs, such as summarize, group_by, and mutate, the convenience of pipe-able functions, the tidyverse style of non-standard evaluation and more consistent return types than the survey package. This vignette details the available features. As a starting point for NBS users, this code replicates previously-presented examples:

library(srvyr)
nbs_srvyr_design <- as_survey( nbs_design )

Calculate the mean (average) of a linear variable, overall and by groups:

nbs_srvyr_design %>%
    summarize( mean = survey_mean( r7_n_totssbenlastmnth_pub , na.rm = TRUE ) )

nbs_srvyr_design %>%
    group_by( age_categories ) %>%
    summarize( mean = survey_mean( r7_n_totssbenlastmnth_pub , na.rm = TRUE ) )