Programme for the International Assessment of Adult Competencies (PIAAC)
A cross-national study designed to understand the skills of workers in advanced-nation labor markets.
One row per sampled adult.
A multiply-imputed, complex sample survey designed to generalize to the population aged 16 to 65 across thirty three OECD nations.
No expected release timeline.
Administered by the Organisation for Economic Co-operation and Development.
Please skim before you begin:
A haiku regarding this microdata:
Download, Import, Preparation
library(haven)
library(httr)
tf <- tempfile()
this_url <- "https://webfs.oecd.org/piaac/cy1-puf-data/SAS/SAS7BDAT/prgusap1_2012.sas7bdat"
GET( this_url , write_disk( tf ) , progress() )
piaac_tbl <- read_sas( tf )
piaac_df <- data.frame( piaac_tbl )
names( piaac_df ) <- tolower( names( piaac_df ) )Save Locally
Save the object at any point:
# piaac_fn <- file.path( path.expand( "~" ) , "PIAAC" , "this_file.rds" )
# saveRDS( piaac_df , file = piaac_fn , compress = FALSE )Load the same object:
Survey Design Definition
Construct a multiply-imputed, complex sample survey design:
library(survey)
library(mitools)
pvals <- c( "pvlit" , "pvnum" , "pvpsl" )
pvars <- outer( pvals , 1:10 , paste0 )
non.pvals <- names(piaac_df)[ !( names(piaac_df) %in% pvars ) ]
for(k in 1:10){
    
    piaac_imp <- piaac_df[ , c( non.pvals , paste0( pvals , k ) ) ]
    
    for( j in pvals ){
        
        piaac_imp[ , j ] <- piaac_imp[ , paste0( j , k ) ]
        
        piaac_imp[ , paste0( j , k ) ] <- NULL
    }
    
    if( k == 1 ){
        piaac_mi <- list( piaac_imp )
    } else {
        piaac_mi <- c( piaac_mi , list( piaac_imp ) )
    }
}
jk.method <- unique( piaac_df[ , 'vemethod' ] )
stopifnot(length(jk.method) == 1)
stopifnot(jk.method %in% c("JK1", "JK2"))
if (jk.method == "JK2") jk.method <- "JKn"
piaac_design <-
    svrepdesign(
        weights = ~spfwt0 ,
        repweights = "spfwt[1-9]" ,
        rscales = rep( 1 , 80 ) ,
        scale = ifelse( jk.method == "JKn" , 1 , 79/80 ) ,
        type = jk.method ,
        data = imputationList( piaac_mi ) ,
        mse = TRUE
    )Variable Recoding
Add new columns to the data set:
piaac_design <-
    update(
        piaac_design ,
        
        one = 1 ,
        
        sex = factor( gender_r , labels = c( "male" , "female" ) ) ,
        age_categories = 
            factor( 
                ageg10lfs , 
                levels = 1:5 , 
                labels = c( "24 or less" , "25-34" , "35-44" , "45-54" , "55 plus" ) 
            ) ,
        
        working_at_paid_job_last_week = as.numeric( c_q01a == 1 )
        
    )Analysis Examples with the survey library  
Unweighted Counts
Count the unweighted number of records in the survey sample, overall and by groups:
Descriptive Statistics
Calculate the mean (average) of a linear variable, overall and by groups:
MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )
MIcombine( with( piaac_design ,
    svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
) )Calculate the distribution of a categorical variable, overall and by groups:
MIcombine( with( piaac_design , svymean( ~ sex ) ) )
MIcombine( with( piaac_design ,
    svyby( ~ sex , ~ age_categories , svymean )
) )Calculate the sum of a linear variable, overall and by groups:
MIcombine( with( piaac_design , svytotal( ~ pvnum , na.rm = TRUE ) ) )
MIcombine( with( piaac_design ,
    svyby( ~ pvnum , ~ age_categories , svytotal , na.rm = TRUE )
) )Calculate the weighted sum of a categorical variable, overall and by groups:
MIcombine( with( piaac_design , svytotal( ~ sex ) ) )
MIcombine( with( piaac_design ,
    svyby( ~ sex , ~ age_categories , svytotal )
) )Calculate the median (50th percentile) of a linear variable, overall and by groups:
MIcombine( with( piaac_design ,
    svyquantile(
        ~ pvnum ,
        0.5 , se = TRUE , na.rm = TRUE 
) ) )
MIcombine( with( piaac_design ,
    svyby(
        ~ pvnum , ~ age_categories , svyquantile ,
        0.5 , se = TRUE ,
        ci = TRUE , na.rm = TRUE
) ) )Estimate a ratio:
Subsetting
Restrict the survey design to self-reported fair or poor health:
Calculate the mean (average) of this subset:
Measures of Uncertainty
Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:
this_result <-
    MIcombine( with( piaac_design ,
        svymean( ~ pvnum , na.rm = TRUE )
    ) )
coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )
grouped_result <-
    MIcombine( with( piaac_design ,
        svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
    ) )
coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )Calculate the degrees of freedom of any survey design object:
Calculate the complex sample survey-adjusted variance of any statistic:
Include the complex sample design effect in the result for a specific statistic:
# SRS without replacement
MIcombine( with( piaac_design ,
    svymean( ~ pvnum , na.rm = TRUE , deff = TRUE )
) )
# SRS with replacement
MIcombine( with( piaac_design ,
    svymean( ~ pvnum , na.rm = TRUE , deff = "replace" )
) )Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop for alternatives:
Regression Models and Tests of Association
Perform a design-based t-test:
Perform a chi-squared test of association for survey data:
Perform a survey-weighted generalized linear model:
glm_result <- 
    MIcombine( with( piaac_design ,
        svyglm( pvnum ~ working_at_paid_job_last_week + sex )
    ) )
    
summary( glm_result )Replication Example
This example matches the statistics and standard errors from OECD’s Technical Report Table 18.9:
usa_pvlit <-
    MIcombine( with( piaac_design , svymean( ~ pvlit , na.rm = TRUE ) ) )
    
usa_pvnum <-
    MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )
usa_pvpsl <-
    MIcombine( with( piaac_design , svymean( ~ pvpsl , na.rm = TRUE ) ) )
stopifnot( round( coef( usa_pvlit ) ) == 270 )
stopifnot( round( SE( usa_pvlit ) , 1 ) == 1.0 )
stopifnot( round( coef( usa_pvnum ) ) == 253 )
stopifnot( round( SE( usa_pvnum ) , 1 ) == 1.2 )
stopifnot( round( coef( usa_pvpsl ) ) == 277 )
stopifnot( round( SE( usa_pvpsl ) , 1 ) == 1.1 )