Trends in International Mathematics and Science Study (TIMSS)
A comparative study of student achievement in math and science across more than 50 nations.
Grade-specific tables with one record per school, student, teacher, plus files containing student achievement, home background, student-teacher linkage, and within-country scoring reliability.
A complex survey generalizing to fourth- and eighth-grade populations of participating countries.
Released quadrennially since 1995.
Funded by the International Association for the Evaluation of Educational Achievement, run at BC.
Recommended Reading
Four Example Strengths & Limitations:
✔️ Sixty-five education systems, six benchmarking systems participated in 4th or 8th grade assessment
✔️ Collects rich array of data related to teacher characteristics
❌ Differences in sample selectivity across countries potentially undermines validity of rankings
❌ Low stakes examination potentially biased by non-serious test-takers
Three Example Findings:
New Zealand Year 9 student chemistry performance improved between 2019 and 2023.
Students who were food-insecure had lower math achievement than their peers in 2019.
In mathematics, U.S. 4th- and 8th-graders scored lower, on average, in 2023 than they did in 2019.
Two Methodology Documents:
TIMSS 2019 User Guide for the International Database, 2nd Edition
One Haiku:
Function Definitions
This survey uses a multiply-imputed variance estimation technique described in Methods Chapter 14. Most users do not need to study this function carefully. Define a function specific to only this dataset:
timss_MIcombine <-
function (results, variances, call = sys.call(), df.complete = Inf, ...) {
m <- length(results)
oldcall <- attr(results, "call")
if (missing(variances)) {
variances <- suppressWarnings(lapply(results, vcov))
results <- lapply(results, coef)
}
vbar <- variances[[1]]
cbar <- results[[1]]
for (i in 2:m) {
cbar <- cbar + results[[i]]
vbar <- vbar + variances[[i]]
}
cbar <- cbar/m
vbar <- vbar/m
# MODIFICATION
# evar <- var(do.call("rbind", results))
evar <- sum( ( unlist( results ) - cbar )^2 / 4 )
r <- (1 + 1/m) * evar/vbar
df <- (m - 1) * (1 + 1/r)^2
if (is.matrix(df)) df <- diag(df)
if (is.finite(df.complete)) {
dfobs <- ((df.complete + 1)/(df.complete + 3)) * df.complete *
vbar/(vbar + evar)
if (is.matrix(dfobs)) dfobs <- diag(dfobs)
df <- 1/(1/dfobs + 1/df)
}
if (is.matrix(r)) r <- diag(r)
rval <- list(coefficients = cbar, variance = vbar + evar *
(m + 1)/m, call = c(oldcall, call), nimp = m, df = df,
missinfo = (r + 2/(df + 3))/(r + 1))
class(rval) <- "MIresult"
rval
}
Download, Import, Preparation
Download and unzip the 2019 fourth grade international database:
library(httr)
tf <- tempfile()
this_url <- "https://timss2019.org/international-database/downloads/T19_G4_SPSS%20Data.zip"
GET( this_url , write_disk( tf ) , progress() )
unzipped_files <- unzip( tf , exdir = tempdir() )
Import and stack each of the student context data files for Albania through Canada:
library(haven)
# limit unzipped files to those starting with `asg` followed by three letters followed by `m7`
asg_fns <- unzipped_files[ grepl( '^asg[a-z][a-z][a-z]m7' , basename( unzipped_files ) ) ]
# further limit asg files to the first ten countries
countries_thru_canada <- c("alb", "arm", "aus", "aut", "aze", "bhr", "bfl", "bih", "bgr", "can")
fns_thru_canada <- paste0( paste0( '^asg' , countries_thru_canada , 'm7' ) , collapse = "|" )
asg_alb_can_fns <- asg_fns[ grepl( fns_thru_canada , basename( asg_fns ) ) ]
timss_df <- NULL
for( spss_fn in asg_alb_can_fns ){
this_tbl <- read_spss( spss_fn )
this_tbl <- zap_labels( this_tbl )
this_df <- data.frame( this_tbl )
names( this_df ) <- tolower( names( this_df ) )
timss_df <- rbind( timss_df , this_df )
}
# order the data.frame by unique student id
timss_df <- timss_df[ with( timss_df , order( idcntry , idstud ) ) , ]
Save Locally
Save the object at any point:
# timss_fn <- file.path( path.expand( "~" ) , "TIMSS" , "this_file.rds" )
# saveRDS( timss_df , file = timss_fn , compress = FALSE )
Load the same object:
Survey Design Definition
Construct a multiply-imputed, complex sample survey design:
From among possibly plausible values, determine all columns that are multiply-imputed plausible values:
# identify all columns ending with `01` thru `05`
ppv <- grep( "(.*)0[1-5]$" , names( timss_df ) , value = TRUE )
# remove those ending digits
ppv_prefix <- gsub( "0[1-5]$" , "" , ppv )
# identify each of the possibilities with exactly five matches (five implicates)
pv <- names( table( ppv_prefix )[ table( ppv_prefix ) == 5 ] )
# identify each of the `01` thru `05` plausible value columns
pv_columns <-
grep(
paste0( "^" , pv , "0[1-5]$" , collapse = "|" ) ,
names( timss_df ) ,
value = TRUE
)
Extract those multiply-imputed columns into a separate data.frame, then remove them from the source:
Reshape these columns from one record per student to one record per student per implicate:
pv_long_df <-
reshape(
pv_wide_df ,
varying = lapply( paste0( pv , '0' ) , paste0 , 1:5 ) ,
direction = 'long' ,
timevar = 'implicate' ,
idvar = c( 'idcntry' , 'idstud' )
)
names( pv_long_df ) <- gsub( "01$" , "" , names( pv_long_df ) )
Merge the columns from the source data.frame onto the one record per student per implicate data.frame:
timss_long_df <- merge( timss_df , pv_long_df )
timss_long_df <- timss_long_df[ with( timss_long_df , order( idcntry , idstud ) ) , ]
stopifnot( nrow( timss_long_df ) == nrow( pv_long_df ) )
stopifnot( nrow( timss_long_df ) / 5 == nrow( timss_df ) )
Divide the five plausible value implicates into a list with five data.frames based on the implicate number:
Construct a replicate weights table following the estimation technique described in Methods Chapter 14:
weights_df <- timss_df[ c( 'jkrep' , 'jkzone' ) ]
for( j in 1:75 ){
for( i in 0:1 ){
weights_df[ weights_df[ , 'jkzone' ] != j , paste0( 'rw' , i , j ) ] <- 1
weights_df[ weights_df[ , 'jkzone' ] == j , paste0( 'rw' , i , j ) ] <-
2 * ( weights_df[ weights_df[ , 'jkzone' ] == j , 'jkrep' ] == i )
}
}
weights_df[ c( 'jkrep' , 'jkzone' ) ] <- NULL
Define the design:
Variable Recoding
Add new columns to the data set:
timss_design <-
update(
timss_design ,
one = 1 ,
countries_thru_canada =
factor(
as.numeric( idcntry ) ,
levels = c(8L, 51L, 36L, 40L, 31L, 48L, 956L, 70L, 100L, 124L) ,
labels =
c("Albania", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain",
"Belgium (Flemish)", "Bosnia and Herzegovina", "Bulgaria", "Canada")
) ,
sex = factor( asbg01 , levels = 1:2 , labels = c( "female" , "male" ) ) ,
born_in_country = ifelse( asbg07 %in% 1:2 , as.numeric( asbg07 == 1 ) , NA )
)
Analysis Examples with the survey
library
Unweighted Counts
Count the unweighted number of records in the survey sample, overall and by groups:
Descriptive Statistics
Calculate the mean (average) of a linear variable, overall and by groups:
timss_MIcombine( with( timss_design , svymean( ~ asmmat , na.rm = TRUE ) ) )
timss_MIcombine( with( timss_design ,
svyby( ~ asmmat , ~ sex , svymean , na.rm = TRUE )
) )
Calculate the distribution of a categorical variable, overall and by groups:
timss_MIcombine( with( timss_design , svymean( ~ countries_thru_canada ) ) )
timss_MIcombine( with( timss_design ,
svyby( ~ countries_thru_canada , ~ sex , svymean )
) )
Calculate the sum of a linear variable, overall and by groups:
timss_MIcombine( with( timss_design , svytotal( ~ asmmat , na.rm = TRUE ) ) )
timss_MIcombine( with( timss_design ,
svyby( ~ asmmat , ~ sex , svytotal , na.rm = TRUE )
) )
Calculate the weighted sum of a categorical variable, overall and by groups:
timss_MIcombine( with( timss_design , svytotal( ~ countries_thru_canada ) ) )
timss_MIcombine( with( timss_design ,
svyby( ~ countries_thru_canada , ~ sex , svytotal )
) )
Calculate the median (50th percentile) of a linear variable, overall and by groups:
timss_MIcombine( with( timss_design ,
svyquantile(
~ asmmat ,
0.5 , se = TRUE , na.rm = TRUE
) ) )
timss_MIcombine( with( timss_design ,
svyby(
~ asmmat , ~ sex , svyquantile ,
0.5 , se = TRUE ,
ci = TRUE , na.rm = TRUE
) ) )
Estimate a ratio:
Subsetting
Restrict the survey design to Australia, Austria, Azerbaijan, Belgium (French):
Calculate the mean (average) of this subset:
Measures of Uncertainty
Extract the coefficient, standard error, confidence interval, and coefficient of variation from any descriptive statistics function result, overall and by groups:
this_result <-
timss_MIcombine( with( timss_design ,
svymean( ~ asmmat , na.rm = TRUE )
) )
coef( this_result )
SE( this_result )
confint( this_result )
cv( this_result )
grouped_result <-
timss_MIcombine( with( timss_design ,
svyby( ~ asmmat , ~ sex , svymean , na.rm = TRUE )
) )
coef( grouped_result )
SE( grouped_result )
confint( grouped_result )
cv( grouped_result )
Calculate the degrees of freedom of any survey design object:
Calculate the complex sample survey-adjusted variance of any statistic:
Include the complex sample design effect in the result for a specific statistic:
# SRS without replacement
timss_MIcombine( with( timss_design ,
svymean( ~ asmmat , na.rm = TRUE , deff = TRUE )
) )
# SRS with replacement
timss_MIcombine( with( timss_design ,
svymean( ~ asmmat , na.rm = TRUE , deff = "replace" )
) )
Compute confidence intervals for proportions using methods that may be more accurate near 0 and 1. See ?svyciprop
for alternatives:
Regression Models and Tests of Association
Perform a design-based t-test:
Perform a chi-squared test of association for survey data:
Perform a survey-weighted generalized linear model:
glm_result <-
timss_MIcombine( with( timss_design ,
svyglm( asmmat ~ born_in_country + countries_thru_canada )
) )
summary( glm_result )
Replication Example
This example matches the mean proficiency and standard error of the Australia
row of the Summary Statistics and Standard Errors for Proficiency in Overall Mathematics-Grade 4
table from the Appendix 14A: Summary Statistics and Standard Errors for Proficiency in Grade 4 Mathematics:
australia_design <- subset( timss_design , countries_thru_canada %in% "Australia" )
stopifnot( nrow( australia_design ) == 5890 )
result <- timss_MIcombine( with( australia_design , svymean( ~ asmmat ) ) )
stopifnot( round( coef( result ) , 3 ) == 515.880 )
stopifnot( round( SE( result ) , 3 ) == 2.776 )
This example matches the jackknife sampling, imputation, and total variances of the same row:
australia_fn <- unzipped_files[ grepl( 'asgaus' , basename( unzipped_files ) ) ]
australia_tbl <- read_spss( australia_fn )
australia_tbl <- zap_labels( australia_tbl )
australia_df <- data.frame( australia_tbl )
names( australia_df ) <- tolower( names( australia_df ) )
estimate <-
mean( c(
with( australia_df , weighted.mean( asmmat01 , totwgt ) ) ,
with( australia_df , weighted.mean( asmmat02 , totwgt ) ) ,
with( australia_df , weighted.mean( asmmat03 , totwgt ) ) ,
with( australia_df , weighted.mean( asmmat04 , totwgt ) ) ,
with( australia_df , weighted.mean( asmmat05 , totwgt ) )
) )
stopifnot( round( estimate , 3 ) == 515.880 )
for( k in 1:5 ){
this_variance <- 0
for( j in 1:75 ){
for( i in 0:1 ){
this_variance <-
this_variance +
(
weighted.mean(
australia_df[ , paste0( 'asmmat0' , k ) ] ,
ifelse(
j == australia_df[ , 'jkzone' ] ,
australia_df[ , 'totwgt' ] * 2 * ( australia_df[ , 'jkrep' ] == i ) ,
australia_df[ , 'totwgt' ]
)
) -
weighted.mean(
australia_df[ , paste0( 'asmmat0' , k ) ] ,
australia_df[ , 'totwgt' ]
)
)^2
}
}
assign( paste0( 'v' , k ) , this_variance * 0.5 )
}
sampling_variance <- mean( c( v1 , v2 , v3 , v4 , v5 ) )
stopifnot( round( sampling_variance , 3 ) == 7.397 )
imputation_variance <-
( 6 / 5 ) *
(
( ( with( australia_df , weighted.mean( asmmat01 , totwgt ) ) - estimate )^2 / 4 ) +
( ( with( australia_df , weighted.mean( asmmat02 , totwgt ) ) - estimate )^2 / 4 ) +
( ( with( australia_df , weighted.mean( asmmat03 , totwgt ) ) - estimate )^2 / 4 ) +
( ( with( australia_df , weighted.mean( asmmat04 , totwgt ) ) - estimate )^2 / 4 ) +
( ( with( australia_df , weighted.mean( asmmat05 , totwgt ) ) - estimate )^2 / 4 )
)
stopifnot( round( imputation_variance , 3 ) == 0.309 )
stopifnot( round( sampling_variance + imputation_variance , 3 ) == 7.706 )