Programme for the International Assessment of Adult Competencies (PIAAC)
A cross-national study designed to understand the skills of workers in advanced-nation labor markets.
One row per sampled adult.
A multiply-imputed, complex sample survey designed to generalize to the population aged 16 to 65 across thirty three OECD nations.
No expected release timeline.
Administered by the Organisation for Economic Co-operation and Development.
Please skim before you begin:
This human-composed haiku or a bouquet of artificial intelligence-generated limericks
# what color collar
# workforce poets, potters, or
# pythagoreans
Download, Import, Preparation
library(haven)
library(httr)
<- tempfile()
tf
<- "https://webfs.oecd.org/piaac/puf-data/SAS/SAS7BDAT/prgusap1_2012.sas7bdat"
this_url
GET( this_url , write_disk( tf ) , progress() )
<- read_sas( tf )
piaac_tbl
<- data.frame( piaac_tbl )
piaac_df
names( piaac_df ) <- tolower( names( piaac_df ) )
Save locally
Save the object at any point:
# piaac_fn <- file.path( path.expand( "~" ) , "PIAAC" , "this_file.rds" )
# saveRDS( piaac_df , file = piaac_fn , compress = FALSE )
Load the same object:
# piaac_df <- readRDS( piaac_fn )
Survey Design Definition
Construct a multiply-imputed, complex sample survey design:
library(survey)
library(mitools)
<- c( "pvlit" , "pvnum" , "pvpsl" )
pvals <- outer( pvals , 1:10 , paste0 )
pvars <- names(piaac_df)[ !( names(piaac_df) %in% pvars ) ]
non.pvals
for(k in 1:10){
<- piaac_df[ , c( non.pvals , paste0( pvals , k ) ) ]
piaac_imp
for( j in pvals ){
<- piaac_imp[ , paste0( j , k ) ]
piaac_imp[ , j ]
paste0( j , k ) ] <- NULL
piaac_imp[ ,
}
if( k == 1 ){
<- list( piaac_imp )
piaac_mi else {
} <- c( piaac_mi , list( piaac_imp ) )
piaac_mi
}
}
<- unique( piaac_df[ , 'vemethod' ] )
jk.method
stopifnot(length(jk.method) == 1)
stopifnot(jk.method %in% c("JK1", "JK2"))
if (jk.method == "JK2") jk.method <- "JKn"
<-
piaac_design svrepdesign(
weights = ~spfwt0 ,
repweights = "spfwt[1-9]" ,
rscales = rep( 1 , 80 ) ,
scale = ifelse( jk.method == "JKn" , 1 , 79/80 ) ,
type = jk.method ,
data = imputationList( piaac_mi ) ,
mse = TRUE
)
Variable Recoding
Add new columns to the data set:
<-
piaac_design update(
piaac_design ,
one = 1 ,
sex = factor( gender_r , labels = c( "male" , "female" ) ) ,
age_categories =
factor(
ageg10lfs , levels = 1:5 ,
labels = c( "24 or less" , "25-34" , "35-44" , "45-54" , "55 plus" )
) ,
working_at_paid_job_last_week = as.numeric( c_q01a == 1 )
)
Analysis Examples with the survey
library
Unweighted Counts
Count the unweighted number of records in the survey sample, overall and by groups:
MIcombine( with( piaac_design , svyby( ~ one , ~ one , unwtd.count ) ) )
MIcombine( with( piaac_design , svyby( ~ one , ~ age_categories , unwtd.count ) ) )
Weighted Counts
Count the weighted size of the generalizable population, overall and by groups:
MIcombine( with( piaac_design , svytotal( ~ one ) ) )
MIcombine( with( piaac_design ,
svyby( ~ one , ~ age_categories , svytotal )
) )
Descriptive Statistics
Calculate the mean (average) of a linear variable, overall and by groups:
MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )
MIcombine( with( piaac_design ,
svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
) )
Calculate the distribution of a categorical variable, overall and by groups:
MIcombine( with( piaac_design , svymean( ~ sex ) ) )
MIcombine( with( piaac_design ,
svyby( ~ sex , ~ age_categories , svymean )
) )
Calculate the sum of a linear variable, overall and by groups:
MIcombine( with( piaac_design , svytotal( ~ pvnum , na.rm = TRUE ) ) )
MIcombine( with( piaac_design ,
svyby( ~ pvnum , ~ age_categories , svytotal , na.rm = TRUE )
) )
Calculate the weighted sum of a categorical variable, overall and by groups:
MIcombine( with( piaac_design , svytotal( ~ sex ) ) )
MIcombine( with( piaac_design ,
svyby( ~ sex , ~ age_categories , svytotal )
) )
Calculate the median (50th percentile) of a linear variable, overall and by groups:
MIcombine( with( piaac_design ,
svyquantile(
~ pvnum ,
0.5 , se = TRUE , na.rm = TRUE
) ) )
MIcombine( with( piaac_design ,
svyby(
~ pvnum , ~ age_categories , svyquantile ,
0.5 , se = TRUE ,
ci = TRUE , na.rm = TRUE
) ) )
Estimate a ratio:
MIcombine( with( piaac_design ,
svyratio( numerator = ~ pvnum , denominator = ~ pvlit , na.rm = TRUE )
) )
Subsetting
Restrict the survey design to self-reported fair or poor health:
<- subset( piaac_design , i_q08 %in% 4:5 ) sub_piaac_design
Calculate the mean (average) of this subset:
MIcombine( with( sub_piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )
Measures of Uncertainty
Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:
<-
this_result MIcombine( with( piaac_design ,
svymean( ~ pvnum , na.rm = TRUE )
) )
coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )
<-
grouped_result MIcombine( with( piaac_design ,
svyby( ~ pvnum , ~ age_categories , svymean , na.rm = TRUE )
) )
coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )
Calculate the degrees of freedom of any survey design object:
degf( piaac_design$designs[[1]] )
Calculate the complex sample survey-adjusted variance of any statistic:
MIcombine( with( piaac_design , svyvar( ~ pvnum , na.rm = TRUE ) ) )
Include the complex sample design effect in the result for a specific statistic:
# SRS without replacement
MIcombine( with( piaac_design ,
svymean( ~ pvnum , na.rm = TRUE , deff = TRUE )
) )
# SRS with replacement
MIcombine( with( piaac_design ,
svymean( ~ pvnum , na.rm = TRUE , deff = "replace" )
) )
Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop
for alternatives:
# MIsvyciprop( ~ working_at_paid_job_last_week , piaac_design ,
# method = "likelihood" )
Regression Models and Tests of Association
Perform a design-based t-test:
# MIsvyttest( pvnum ~ working_at_paid_job_last_week , piaac_design )
Perform a chi-squared test of association for survey data:
# MIsvychisq( ~ working_at_paid_job_last_week + sex , piaac_design )
Perform a survey-weighted generalized linear model:
<-
glm_result MIcombine( with( piaac_design ,
svyglm( pvnum ~ working_at_paid_job_last_week + sex )
) )
summary( glm_result )
Replication Example
This example matches the statistics and standard errors from OECD’s Technical Report Table 18.9:
<-
usa_pvlit MIcombine( with( piaac_design , svymean( ~ pvlit , na.rm = TRUE ) ) )
<-
usa_pvnum MIcombine( with( piaac_design , svymean( ~ pvnum , na.rm = TRUE ) ) )
<-
usa_pvpsl MIcombine( with( piaac_design , svymean( ~ pvpsl , na.rm = TRUE ) ) )
stopifnot( round( coef( usa_pvlit ) ) == 270 )
stopifnot( round( SE( usa_pvlit ) , 1 ) == 1.0 )
stopifnot( round( coef( usa_pvnum ) ) == 253 )
stopifnot( round( SE( usa_pvnum ) , 1 ) == 1.2 )
stopifnot( round( coef( usa_pvpsl ) ) == 277 )
stopifnot( round( SE( usa_pvpsl ) , 1 ) == 1.1 )