• Please Circulate: Half Day, In-Person, &
    *Free* Survey Data Analysis Workshops!

      Minneapolis MN: May 30th, 2025
      Washington DC: June 11th, 2025
      New York NY: June 18th, 2025
  • Forty-Nine Public Microdatasets, One Easy To Type Website
  • American Community Survey (ACS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
    • Analysis Examples with srvyr  
  • Area Health Resources Files (AHRF)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • American Housing Survey (AHS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • American National Election Studies (ANES)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • American Time Use Survey (ATUS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Behavioral Risk Factor Surveillance System (BRFSS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Consumer Expenditure Survey (CES)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • California Health Interview Survey (CHIS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Census of Governments (COG)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • Current Population Survey (CPS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
    • Analysis Examples with srvyr  
  • Exame Nacional de Desempenho de Estudantes (ENADE)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • Exame Nacional do Ensino Medio (ENEM)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • European Social Survey (ESS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • FDA Adverse Event Reporting System (FAERS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • General Social Survey (GSS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Health and Retirement Study (HRS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Medicare Current Beneficiary Survey (MCBS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Medical Expenditure Panel Survey (MEPS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Medical Large Claims Experience Study (MLCES)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • National Agricultural Workers Survey (NAWS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Beneficiary Survey (NBS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Crime Victimization Survey (NCVS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Financial Capability Study (NFCS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Health and Nutrition Examination Survey (NHANES)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Direct Method of Age-Adjustment Replication Example
    • Analysis Examples with srvyr  
  • National Health Interview Survey (NHIS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • National Household Travel Survey (NHTS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Immunization Survey (NIS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Plan and Provider Enumeration System (NPPES)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • National Survey of Children’s Health (NSCH)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • National Survey on Drug Use and Health (NSDUH)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Survey of Family Growth (NSFG)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • National Sample Survey of Registered Nurses (NSSRN)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • New York City Housing and Vacancy Survey (NYCHVS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Pew Research Center (PEW)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Programme for the International Assessment of Adult Competencies (PIAAC)
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • Progress in International Reading Literacy Study (PIRLS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • Public Libraries Survey (PLS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • Pesquisa Nacional por Amostra de Domicilios (PNAD)
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
    • Analysis Examples with srvyr  
  • Pesquisa Nacional de Saude (PNS)
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Pesquisa de Orcamentos Familiares (POF)
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
    • Analysis Examples with srvyr  
  • Residential Energy Consumption Survey (RECS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Rapid Surveys System (RSS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Survey of Business Owners (SBO)
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • Survey of Consumer Finances (SCF)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
  • Survey of Income and Program Participation (SIPP)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Poverty and Inequality Estimation with convey  
    • Analysis Examples with srvyr  
  • Social Security Public-Use Data Files (SSA)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Variable Recoding
    • Analysis Examples with base R  
      • Unweighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with dplyr  
    • Analysis Examples with data.table  
    • Analysis Examples with duckdb  
  • Trends in International Mathematics and Science Study (TIMSS)
    • Recommended Reading
    • Function Definitions
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
  • Violence Against Children And Youth Surveys (VACS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Youth Risk Behavior Surveillance System (YRBSS)
    • Recommended Reading
    • Download, Import, Preparation
      • Save Locally  
      • Survey Design Definition
      • Variable Recoding
    • Analysis Examples with the survey library  
      • Unweighted Counts
      • Weighted Counts
      • Descriptive Statistics
      • Subsetting
      • Measures of Uncertainty
      • Regression Models and Tests of Association
    • Replication Example
    • Analysis Examples with srvyr  
  • Trend Analysis of Complex Survey Data
    • Download, Import, Preparation
    • Append Polynomials to Each Year
    • Unadjusted Analysis Examples
    • Calculate Joinpoints Needed
    • Calculate Predicted Marginals
    • Identify Joinpoint(s) or Breakpoint(s)
    • Interpret and Conclude
  • Published with bookdown

Analyze Survey Data for Free

Trends in International Mathematics and Science Study (TIMSS)

License: GPL v3 Github Actions Badge

A comparative study of student achievement in math and science across more than 50 nations.

  • Grade-specific tables with one record per school, student, teacher, plus files containing student achievement, home background, student-teacher linkage, and within-country scoring reliability.

  • A complex survey generalizing to fourth- and eighth-grade populations of participating countries.

  • Released quadrennially since 1995.

  • Funded by the International Association for the Evaluation of Educational Achievement, run at BC.


Recommended Reading

Four Example Strengths & Limitations:

✔️ Sixty-five education systems, six benchmarking systems participated in 4th or 8th grade assessment

✔️ Collects rich array of data related to teacher characteristics

❌ Differences in sample selectivity across countries potentially undermines validity of rankings

❌ Low stakes examination potentially biased by non-serious test-takers


Three Example Findings:

  1. New Zealand Year 9 student chemistry performance improved between 2019 and 2023.

  2. Students who were food-insecure had lower math achievement than their peers in 2019.

  3. In mathematics, U.S. 4th- and 8th-graders scored lower, on average, in 2023 than they did in 2019.


Two Methodology Documents:

TIMSS 2019 User Guide for the International Database, 2nd Edition

Methods and Procedures: TIMSS 2019 Technical Report


One Haiku:

# brando for stella,
# gump's jenny, rock's adrian,
# students toward math test

Function Definitions

This survey uses a multiply-imputed variance estimation technique described in Methods Chapter 14. Most users do not need to study this function carefully. Define a function specific to only this dataset:

timss_MIcombine <-
    function (results, variances, call = sys.call(), df.complete = Inf, ...) {
        m <- length(results)
        oldcall <- attr(results, "call")
        if (missing(variances)) {
            variances <- suppressWarnings(lapply(results, vcov))
            results <- lapply(results, coef)
        }
        vbar <- variances[[1]]
        cbar <- results[[1]]
        for (i in 2:m) {
            cbar <- cbar + results[[i]]
            vbar <- vbar + variances[[i]]
        }
        cbar <- cbar/m
        vbar <- vbar/m

        # MODIFICATION
        # evar <- var(do.call("rbind", results))
        evar <- sum( ( unlist( results ) - cbar )^2 / 4 )

        
        r <- (1 + 1/m) * evar/vbar
        df <- (m - 1) * (1 + 1/r)^2
        if (is.matrix(df)) df <- diag(df)
        if (is.finite(df.complete)) {
            dfobs <- ((df.complete + 1)/(df.complete + 3)) * df.complete *
            vbar/(vbar + evar)
            if (is.matrix(dfobs)) dfobs <- diag(dfobs)
            df <- 1/(1/dfobs + 1/df)
        }
        if (is.matrix(r)) r <- diag(r)
        rval <- list(coefficients = cbar, variance = vbar + evar *
        (m + 1)/m, call = c(oldcall, call), nimp = m, df = df,
        missinfo = (r + 2/(df + 3))/(r + 1))
        class(rval) <- "MIresult"
        rval
    }

Download, Import, Preparation

Download and unzip the 2019 fourth grade international database:

library(httr)

tf <- tempfile()

this_url <- "https://timss2019.org/international-database/downloads/T19_G4_SPSS%20Data.zip"

GET( this_url , write_disk( tf ) , progress() )

unzipped_files <- unzip( tf , exdir = tempdir() )

Import and stack each of the student context data files for Albania through Canada:

library(haven)

# limit unzipped files to those starting with `asg` followed by three letters followed by `m7`
asg_fns <- unzipped_files[ grepl( '^asg[a-z][a-z][a-z]m7' , basename( unzipped_files ) ) ]

# further limit asg files to the first ten countries
countries_thru_canada <- c("alb", "arm", "aus", "aut", "aze", "bhr", "bfl", "bih", "bgr", "can")

fns_thru_canada <- paste0( paste0( '^asg' , countries_thru_canada , 'm7' ) , collapse = "|" )

asg_alb_can_fns <- asg_fns[ grepl( fns_thru_canada , basename( asg_fns ) ) ]

timss_df <- NULL

for( spss_fn in asg_alb_can_fns ){

    this_tbl <- read_spss( spss_fn )
    
    this_tbl <- zap_labels( this_tbl )
    
    this_df <- data.frame( this_tbl )
    
    names( this_df ) <- tolower( names( this_df ) )
    
    timss_df <- rbind( timss_df , this_df )
    
}

# order the data.frame by unique student id
timss_df <- timss_df[ with( timss_df , order( idcntry , idstud ) ) , ]

Save Locally  

Save the object at any point:

# timss_fn <- file.path( path.expand( "~" ) , "TIMSS" , "this_file.rds" )
# saveRDS( timss_df , file = timss_fn , compress = FALSE )

Load the same object:

# timss_df <- readRDS( timss_fn )

Survey Design Definition

Construct a multiply-imputed, complex sample survey design:

From among possibly plausible values, determine all columns that are multiply-imputed plausible values:

# identify all columns ending with `01` thru `05`
ppv <- grep( "(.*)0[1-5]$" , names( timss_df ) , value = TRUE )

# remove those ending digits
ppv_prefix <- gsub( "0[1-5]$" , "" , ppv )

# identify each of the possibilities with exactly five matches (five implicates)
pv <- names( table( ppv_prefix )[ table( ppv_prefix ) == 5 ] )

# identify each of the `01` thru `05` plausible value columns
pv_columns <-
    grep( 
        paste0( "^" , pv , "0[1-5]$" , collapse = "|" ) , 
        names( timss_df ) , 
        value = TRUE 
    )

Extract those multiply-imputed columns into a separate data.frame, then remove them from the source:

pv_wide_df <- timss_df[ c( 'idcntry' , 'idstud' , pv_columns ) ]

timss_df[ pv_columns ] <- NULL

Reshape these columns from one record per student to one record per student per implicate:

pv_long_df <- 
    reshape( 
        pv_wide_df , 
        varying = lapply( paste0( pv , '0' ) , paste0 , 1:5 ) , 
        direction = 'long' , 
        timevar = 'implicate' , 
        idvar = c( 'idcntry' , 'idstud' ) 
    )

names( pv_long_df ) <- gsub( "01$" , "" , names( pv_long_df ) )

Merge the columns from the source data.frame onto the one record per student per implicate data.frame:

timss_long_df <- merge( timss_df , pv_long_df )

timss_long_df <- timss_long_df[ with( timss_long_df , order( idcntry , idstud ) ) , ]

stopifnot( nrow( timss_long_df ) == nrow( pv_long_df ) )

stopifnot( nrow( timss_long_df ) / 5 == nrow( timss_df ) )

Divide the five plausible value implicates into a list with five data.frames based on the implicate number:

timss_list <- split( timss_long_df , timss_long_df[ , 'implicate' ] )

Construct a replicate weights table following the estimation technique described in Methods Chapter 14:

weights_df <- timss_df[ c( 'jkrep' , 'jkzone' ) ]

for( j in 1:75 ){
    for( i in 0:1 ){
        weights_df[ weights_df[ , 'jkzone' ] != j , paste0( 'rw' , i , j ) ] <- 1
        
        weights_df[ weights_df[ , 'jkzone' ] == j , paste0( 'rw' , i , j ) ] <- 
            2 * ( weights_df[ weights_df[ , 'jkzone' ] == j , 'jkrep' ] == i )
    }
}

weights_df[ c( 'jkrep' , 'jkzone' ) ] <- NULL

Define the design:

library(survey)
library(mitools)

timss_design <- 
    svrepdesign(
        weights = ~totwgt ,
        repweights = weights_df , 
        data = imputationList( timss_list ) ,
        type = "other" ,
        scale = 0.5 ,
        rscales = rep( 1 , 150 ) ,
        combined.weights = FALSE ,
        mse = TRUE
    )

Variable Recoding

Add new columns to the data set:

timss_design <- 
    update( 
        timss_design , 
        
        one = 1 ,
        
        countries_thru_canada = 
        
            factor( 
            
                as.numeric( idcntry ) ,
                
                levels = c(8L, 51L, 36L, 40L, 31L, 48L, 956L, 70L, 100L, 124L) ,

                labels =
                    c("Albania", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain",
                    "Belgium (Flemish)", "Bosnia and Herzegovina", "Bulgaria", "Canada")
                
            ) ,
        
        sex = factor( asbg01 , levels = 1:2 , labels = c( "female" , "male" ) ) ,
        
        born_in_country = ifelse( asbg07 %in% 1:2 , as.numeric( asbg07 == 1 ) , NA )

    )

Analysis Examples with the survey library  

Unweighted Counts

Count the unweighted number of records in the survey sample, overall and by groups:

timss_MIcombine( with( timss_design , svyby( ~ one , ~ one , unwtd.count ) ) )

timss_MIcombine( with( timss_design , svyby( ~ one , ~ sex , unwtd.count ) ) )

Weighted Counts

Count the weighted size of the generalizable population, overall and by groups:

timss_MIcombine( with( timss_design , svytotal( ~ one ) ) )

timss_MIcombine( with( timss_design ,
    svyby( ~ one , ~ sex , svytotal )
) )

Descriptive Statistics

Calculate the mean (average) of a linear variable, overall and by groups:

timss_MIcombine( with( timss_design , svymean( ~ asmmat , na.rm = TRUE ) ) )

timss_MIcombine( with( timss_design ,
    svyby( ~ asmmat , ~ sex , svymean , na.rm = TRUE )
) )

Calculate the distribution of a categorical variable, overall and by groups:

timss_MIcombine( with( timss_design , svymean( ~ countries_thru_canada ) ) )

timss_MIcombine( with( timss_design ,
    svyby( ~ countries_thru_canada , ~ sex , svymean )
) )

Calculate the sum of a linear variable, overall and by groups:

timss_MIcombine( with( timss_design , svytotal( ~ asmmat , na.rm = TRUE ) ) )

timss_MIcombine( with( timss_design ,
    svyby( ~ asmmat , ~ sex , svytotal , na.rm = TRUE )
) )

Calculate the weighted sum of a categorical variable, overall and by groups:

timss_MIcombine( with( timss_design , svytotal( ~ countries_thru_canada ) ) )

timss_MIcombine( with( timss_design ,
    svyby( ~ countries_thru_canada , ~ sex , svytotal )
) )

Calculate the median (50th percentile) of a linear variable, overall and by groups:

timss_MIcombine( with( timss_design ,
    svyquantile(
        ~ asmmat ,
        0.5 , se = TRUE , na.rm = TRUE 
) ) )

timss_MIcombine( with( timss_design ,
    svyby(
        ~ asmmat , ~ sex , svyquantile ,
        0.5 , se = TRUE ,
        ci = TRUE , na.rm = TRUE
) ) )

Estimate a ratio:

timss_MIcombine( with( timss_design ,
    svyratio( numerator = ~ asssci , denominator = ~ asmmat )
) )

Subsetting

Restrict the survey design to Australia, Austria, Azerbaijan, Belgium (French):

sub_timss_design <- subset( timss_design , idcntry %in% c( 36 , 40 , 31 , 956 ) )

Calculate the mean (average) of this subset:

timss_MIcombine( with( sub_timss_design , svymean( ~ asmmat , na.rm = TRUE ) ) )

Measures of Uncertainty

Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:

this_result <-
    timss_MIcombine( with( timss_design ,
        svymean( ~ asmmat , na.rm = TRUE )
    ) )

coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )

grouped_result <-
    timss_MIcombine( with( timss_design ,
        svyby( ~ asmmat , ~ sex , svymean , na.rm = TRUE )
    ) )

coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )

Calculate the degrees of freedom of any survey design object:

degf( timss_design$designs[[1]] )

Calculate the complex sample survey-adjusted variance of any statistic:

timss_MIcombine( with( timss_design , svyvar( ~ asmmat , na.rm = TRUE ) ) )

Include the complex sample design effect in the result for a specific statistic:

# SRS without replacement
timss_MIcombine( with( timss_design ,
    svymean( ~ asmmat , na.rm = TRUE , deff = TRUE )
) )

# SRS with replacement
timss_MIcombine( with( timss_design ,
    svymean( ~ asmmat , na.rm = TRUE , deff = "replace" )
) )

Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop for alternatives:

# MIsvyciprop( ~ born_in_country , timss_design ,
#   method = "likelihood" , na.rm = TRUE )

Regression Models and Tests of Association

Perform a design-based t-test:

# MIsvyttest( asmmat ~ born_in_country , timss_design )

Perform a chi-squared test of association for survey data:

# MIsvychisq( ~ born_in_country + countries_thru_canada , timss_design )

Perform a survey-weighted generalized linear model:

glm_result <- 
    timss_MIcombine( with( timss_design ,
        svyglm( asmmat ~ born_in_country + countries_thru_canada )
    ) )
    
summary( glm_result )

Replication Example

This example matches the mean proficiency and standard error of the Australia row of the Summary Statistics and Standard Errors for Proficiency in Overall Mathematics-Grade 4 table from the Appendix 14A: Summary Statistics and Standard Errors for Proficiency in Grade 4 Mathematics:

australia_design <- subset( timss_design , countries_thru_canada %in% "Australia" )

stopifnot( nrow( australia_design ) == 5890 )

result <- timss_MIcombine( with( australia_design , svymean( ~ asmmat ) ) )

stopifnot( round( coef( result ) , 3 ) == 515.880 )

stopifnot( round( SE( result ) , 3 ) == 2.776 )

This example matches the jackknife sampling, imputation, and total variances of the same row:

australia_fn <- unzipped_files[ grepl( 'asgaus' , basename( unzipped_files ) ) ]
australia_tbl <- read_spss( australia_fn )
australia_tbl <- zap_labels( australia_tbl )
australia_df <- data.frame( australia_tbl )
names( australia_df ) <- tolower( names( australia_df ) )

estimate <-
    mean( c(
        with( australia_df , weighted.mean( asmmat01 , totwgt ) ) ,
        with( australia_df , weighted.mean( asmmat02 , totwgt ) ) ,
        with( australia_df , weighted.mean( asmmat03 , totwgt ) ) ,
        with( australia_df , weighted.mean( asmmat04 , totwgt ) ) ,
        with( australia_df , weighted.mean( asmmat05 , totwgt ) )
    ) )

stopifnot( round( estimate , 3 ) == 515.880 )

for( k in 1:5 ){

    this_variance <- 0
    
    for( j in 1:75 ){
        for( i in 0:1 ){
            this_variance <- 
                this_variance + 
                ( 
                    weighted.mean( 
                        australia_df[ , paste0( 'asmmat0' , k ) ] , 
                        ifelse( 
                            j == australia_df[ , 'jkzone' ] , 
                            australia_df[ , 'totwgt' ] * 2 * ( australia_df[ , 'jkrep' ] == i ) , 
                            australia_df[ , 'totwgt' ] 
                        )
                    ) -
                    weighted.mean( 
                        australia_df[ , paste0( 'asmmat0' , k ) ] , 
                        australia_df[ , 'totwgt' ]
                    )
                )^2
        }
    }
    
    assign( paste0( 'v' , k ) , this_variance * 0.5 )

}

sampling_variance <- mean( c( v1 , v2 , v3 , v4 , v5 ) )
stopifnot( round( sampling_variance , 3 ) == 7.397 )

imputation_variance <-
    ( 6 / 5 ) * 
    ( 
        ( ( with( australia_df , weighted.mean( asmmat01 , totwgt ) ) - estimate )^2 / 4 ) +
        ( ( with( australia_df , weighted.mean( asmmat02 , totwgt ) ) - estimate )^2 / 4 ) +
        ( ( with( australia_df , weighted.mean( asmmat03 , totwgt ) ) - estimate )^2 / 4 ) +
        ( ( with( australia_df , weighted.mean( asmmat04 , totwgt ) ) - estimate )^2 / 4 ) +
        ( ( with( australia_df , weighted.mean( asmmat05 , totwgt ) ) - estimate )^2 / 4 ) 
    )

stopifnot( round( imputation_variance , 3 ) == 0.309 )

stopifnot( round( sampling_variance + imputation_variance , 3 ) == 7.706 )