Programme for the International Assessment of Adult Competencies (PIAAC)

A cross-national study designed to understand the skills of workers in advanced-nation labor markets.

One row per sampled adult.
A multiply-imputed, complex sample survey designed to generalize to the population aged 16 to 65 across thirty three OECD nations.
No expected release timeline.
Administered by the Organisation for Economic Co-operation and Development.

Please skim before you begin:

Technical Report of the Survey of Adult Skills
Wikipedia Entry
A haiku regarding this microdata:

# what color collar
# workforce poets, potters, or
# pythagoreans

Download, Import, Preparation

library(haven)
library(httr)

tf <- tempfile()

this_url <- "https://webfs.oecd.org/piaac/cy1-puf-data/SAS/SAS7BDAT/prgusap1_2012.sas7bdat"

GET( this_url , write_disk( tf ) , progress() )

piaac_tbl <- read_sas( tf )

piaac_df <- data.frame( piaac_tbl )

names( piaac_df ) <- tolower( names( piaac_df ) )

Save Locally

Save the object at any point:

# piaac_fn <- file.path( path.expand( "~" ) , "PIAAC" , "this_file.rds" )
# saveRDS( piaac_df , file = piaac_fn , compress = FALSE )

Load the same object:

# piaac_df <- readRDS( piaac_fn )

Survey Design Definition

Construct a multiply-imputed, complex sample survey design:

library(survey)
library(mitools)

pvals <- c( "pvlit" , "pvnum" , "pvpsl" )
pvars <- outer( pvals , 1:10 , paste0 )
non.pvals <- names(piaac_df)[ !( names(piaac_df) %in% pvars ) ]

for(k in 1:10){
    
    piaac_imp <- piaac_df[ , c( non.pvals , paste0( pvals , k ) ) ]
    
    for( j in pvals ){
        
        piaac_imp[ , j ] <- piaac_imp[ , paste0( j , k ) ]
        
        piaac_imp[ , paste0( j , k ) ] <- NULL

    }
    
    if( k == 1 ){
        piaac_mi <- list( piaac_imp )
    } else {
        piaac_mi <- c( piaac_mi , list( piaac_imp ) )
    }
}

jk.method <- unique( piaac_df[ , 'vemethod' ] )

stopifnot(length(jk.method) == 1)

stopifnot(jk.method %in% c("JK1", "JK2"))

if (jk.method == "JK2") jk.method <- "JKn"

piaac_design <-
    svrepdesign(
        weights = ~spfwt0 ,
        repweights = "spfwt[1-9]" ,
        rscales = rep( 1 , 80 ) ,
        scale = ifelse( jk.method == "JKn" , 1 , 79/80 ) ,
        type = jk.method ,
        data = imputationList( piaac_mi ) ,
        mse = TRUE
    )

Variable Recoding

Add new columns to the data set:

piaac_design <-
    update(
        piaac_design ,
        
        one = 1 ,
        
        sex = factor( gender_r , labels = c( "male" , "female" ) ) ,

        age_categories = 
            factor( 
                ageg10lfs , 
                levels = 1:5 , 
                labels = c( "24 or less" , "25-34" , "35-44" , "45-54" , "55 plus" ) 
            ) ,
        
        working_at_paid_job_last_week = as.numeric( c_q01a == 1 )
        
    )

Analysis Examples with the `survey` library

Unweighted Counts

Count the unweighted number of records in the survey sample, overall and by groups:

MIcombine( with( piaac_design , svyby( ~ one , ~ one , unwtd.count ) ) )

MIcombine( with( piaac_design , svyby( ~ one , ~ age_categories , unwtd.count ) ) )

Weighted Counts

Count the weighted size of the generalizable population, overall and by groups:

MIcombine( with( piaac_design , svytotal( ~ one ) ) )

MIcombine( with( piaac_design ,
    svyby( ~ one , ~ age_categories , svytotal )
) )

Descriptive Statistics

Calculate the mean (average) of a linear variable, overall and by groups:

MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )

MIcombine( with( piaac_design ,
    svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
) )

Calculate the distribution of a categorical variable, overall and by groups:

MIcombine( with( piaac_design , svymean( ~ sex ) ) )

MIcombine( with( piaac_design ,
    svyby( ~ sex , ~ age_categories , svymean )
) )

Calculate the sum of a linear variable, overall and by groups:

MIcombine( with( piaac_design , svytotal( ~ pvnum , na.rm = TRUE ) ) )

MIcombine( with( piaac_design ,
    svyby( ~ pvnum , ~ age_categories , svytotal , na.rm = TRUE )
) )

Calculate the weighted sum of a categorical variable, overall and by groups:

MIcombine( with( piaac_design , svytotal( ~ sex ) ) )

MIcombine( with( piaac_design ,
    svyby( ~ sex , ~ age_categories , svytotal )
) )

Calculate the median (50th percentile) of a linear variable, overall and by groups:

MIcombine( with( piaac_design ,
    svyquantile(
        ~ pvnum ,
        0.5 , se = TRUE , na.rm = TRUE 
) ) )

MIcombine( with( piaac_design ,
    svyby(
        ~ pvnum , ~ age_categories , svyquantile ,
        0.5 , se = TRUE ,
        ci = TRUE , na.rm = TRUE
) ) )

Estimate a ratio:

MIcombine( with( piaac_design ,
    svyratio( numerator = ~ pvnum , denominator = ~ pvlit , na.rm = TRUE )
) )

Subsetting

Restrict the survey design to self-reported fair or poor health:

sub_piaac_design <- subset( piaac_design , i_q08 %in% 4:5 )

Calculate the mean (average) of this subset:

MIcombine( with( sub_piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )

Measures of Uncertainty

Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:

this_result <-
    MIcombine( with( piaac_design ,
        svymean( ~ pvnum , na.rm = TRUE )
    ) )

coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )

grouped_result <-
    MIcombine( with( piaac_design ,
        svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
    ) )

coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )

Calculate the degrees of freedom of any survey design object:

degf( piaac_design$designs[[1]] )

Calculate the complex sample survey-adjusted variance of any statistic:

MIcombine( with( piaac_design , svyvar( ~ pvnum , na.rm = TRUE ) ) )

Include the complex sample design effect in the result for a specific statistic:

# SRS without replacement
MIcombine( with( piaac_design ,
    svymean( ~ pvnum , na.rm = TRUE , deff = TRUE )
) )

# SRS with replacement
MIcombine( with( piaac_design ,
    svymean( ~ pvnum , na.rm = TRUE , deff = "replace" )
) )

Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop for alternatives:

# MIsvyciprop( ~ working_at_paid_job_last_week , piaac_design ,
#   method = "likelihood" )

Regression Models and Tests of Association

Perform a design-based t-test:

# MIsvyttest( pvnum ~ working_at_paid_job_last_week , piaac_design )

Perform a chi-squared test of association for survey data:

# MIsvychisq( ~ working_at_paid_job_last_week + sex , piaac_design )

Perform a survey-weighted generalized linear model:

glm_result <- 
    MIcombine( with( piaac_design ,
        svyglm( pvnum ~ working_at_paid_job_last_week + sex )
    ) )
    
summary( glm_result )

Replication Example

This example matches the statistics and standard errors from OECD’s Technical Report Table 18.9:

usa_pvlit <-
    MIcombine( with( piaac_design , svymean( ~ pvlit , na.rm = TRUE ) ) )
    
usa_pvnum <-
    MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )

usa_pvpsl <-
    MIcombine( with( piaac_design , svymean( ~ pvpsl , na.rm = TRUE ) ) )

stopifnot( round( coef( usa_pvlit ) ) == 270 )
stopifnot( round( SE( usa_pvlit ) , 1 ) == 1.0 )
stopifnot( round( coef( usa_pvnum ) ) == 253 )
stopifnot( round( SE( usa_pvnum ) , 1 ) == 1.2 )
stopifnot( round( coef( usa_pvpsl ) ) == 277 )
stopifnot( round( SE( usa_pvpsl ) , 1 ) == 1.1 )