Skip to contents

This data set contains fictional achievement scores of 13524 students in two domains (reading, listening) in the long format.

Usage

data(trends)

Format

'data.frame': 404281 obs. of 17 variables:

year

Year of evaluation

idstud

Student identifier

idclass

Class identifier

wgt

individual student weight

jkzone

jackknife zone (primary sampling unit)

jkrep

jackknife replicate

country

The country an examinee stems from

language

spoken language at home

ses

student's socio economical status

sex

student's sex

domain

The domain the variable belongs to

booklet

booklet identifier. equal booklet identifiers indicate equal booklets across years (assessment cycles)

block

block identifier

task

task identifier

item

item identifier

format

item format

pos

position of the block within the booklet

value

The response of the student to the item (0=incorrect; 1=correct)

Source

Simulated data

Examples

data(trends)
# number of students per year, country and domain
by(data=trends, INDICES = trends[,"year"], FUN = function(x) { tapply(x[,"idstud"], x[,c("country", "domain")], FUN = function(y){length(unique(y))})})
#> trends[, "year"]: 2010
#>           domain
#> country    listening reading
#>   countryA      1598    1598
#>   countryB      1309    1309
#>   countryC      1569    1569
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2015
#>           domain
#> country    listening reading
#>   countryA      1482    1502
#>   countryB      1220    1237
#>   countryC      1709    1777
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2020
#>           domain
#> country    listening reading
#>   countryA      1283    1363
#>   countryB      1207    1245
#>   countryC      1830    1865
# number of items per year, country and domain
by(data=trends, INDICES = trends[,"year"], FUN = function(x) { tapply(x[,"item"], x[,c("country", "domain")], FUN = function(y){length(unique(y))})})
#> trends[, "year"]: 2010
#>           domain
#> country    listening reading
#>   countryA        51      80
#>   countryB        51      80
#>   countryC        51      80
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2015
#>           domain
#> country    listening reading
#>   countryA        96     119
#>   countryB        96     119
#>   countryC        96     119
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2020
#>           domain
#> country    listening reading
#>   countryA       119     137
#>   countryB       119     137
#>   countryC       119     137

# no overlapping student IDs between assessment cycles
ids <- by(data=trends, INDICES = trends[,"year"], FUN = function (x) {unique(x[,"idstud"])})
length(intersect(ids[[1]], ids[[2]]))
#> [1] 0
length(intersect(ids[[1]], ids[[3]]))
#> [1] 0
length(intersect(ids[[2]], ids[[3]]))
#> [1] 0

# sampling weights substantially differ between countries due to stratified sampling
eatTools::roundDF(do.call("rbind",  by(data=trends, INDICES = trends[,c("year", "country")], FUN = function (x) {data.frame ( x[1,c("year", "country")], eatTools::descr(x[!duplicated(x[,"idstud"]),"wgt"])[,c("Minimum", "Maximum", "Mean", "Median", "SD")], stringsAsFactors = FALSE)})), digits = 3)
#>       year  country Minimum Maximum    Mean Median     SD
#> 577   2010 countryA   1.152   7.489   3.106  3.457  1.077
#> 12660 2015 countryA   1.062  12.738   3.280  3.185  1.526
#> 16615 2020 countryA   1.089  15.680   3.804  3.578  1.670
#> 1889  2010 countryB   8.312  18.480  12.039 11.375  2.716
#> 13428 2015 countryB   2.895  24.221  11.993 12.212  4.134
#> 17111 2020 countryB   5.542  83.014  12.157 11.694  5.402
#> 1     2010 countryC  76.281 337.244 102.684 99.498 24.658
#> 11716 2015 countryC   2.000 397.493  82.224 88.332 47.131
#> 16105 2020 countryC  48.145 249.383  74.794 68.996 22.566

# which booklets occur in which assessment cycles?
# see, for example: Bo01 only occurs 2010; Bo02 occurs 2010, 2015, and 2022; Bo83 occurs 2015 and 2020
reshape2::dcast(do.call("rbind", by(data=trends, INDICES = trends[,"year"], FUN = function (x) {data.frame ( x[1,"year", drop=FALSE], table(x[!duplicated(x[,"idstud"]),"booklet"]), stringsAsFactors = FALSE)})), year~Var1, value.var = "Freq")
#> Warning: row names were found from a short variable and have been discarded
#> Warning: row names were found from a short variable and have been discarded
#> Warning: row names were found from a short variable and have been discarded
#>   year Bo01 Bo02 Bo03 Bo04 Bo05 Bo06 Bo07 Bo09 Bo13 Bo17 Bo18 Bo19 Bo20 Bo21 Bo22 Bo23 Bo24 Bo25 Bo26 Bo27 Bo28 Bo29 Bo30 Bo31 Bo32 Bo33 Bo34 Bo35 Bo36 Bo37 Bo38 Bo39 Bo40 Bo41 Bo42 Bo43 Bo44 Bo45 Bo46 Bo91 Bo08 Bo10 Bo11 Bo12 Bo14 Bo15 Bo16 Bo47 Bo48 Bo49 Bo50 Bo51 Bo52 Bo53 Bo54 Bo55 Bo56 Bo57 Bo58 Bo59 Bo60 Bo61 Bo62 Bo63 Bo64 Bo83 Bo84 Bo85 Bo86 Bo87 Bo88 Bo89 Bo90 Bo65 Bo66 Bo67 Bo68 Bo69 Bo70 Bo71 Bo72 Bo73 Bo74 Bo75 Bo76 Bo77 Bo78 Bo79 Bo80 Bo81 Bo82
#> 1 2010  183  172  188  168  185  167  184  168  177  162  166  154  169  193  179  180  171  190  177  176   44   43   45   41   47   47   66   58   55   56   55   52   48   46   49   44   42   38   48   43   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
#> 2 2015   NA  133   NA  136   NA   NA  119  145  126  136   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   94  114  127  153  116  100  123  111  119   79  124  109  106   89  115  104  147  112  142  134  116   77   91   83  119   80  129  106  108  105  153  105  131   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
#> 3 2020   NA  105   NA  128   NA   NA  128  122  121  126   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA  112  115   97   94  120  137  122   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA  105  130  136  122  129  123   95  129  112   80   93  112  135   87  118  102   98  127  134  106  105  142  130  144  111  100

# which reading tasks occur in which assessment cycles?
# see, for example: T01 occurs 2010, 2015, and 2022; T27 only occurs 2020
reshape2::dcast(do.call("rbind", by(data=subset(trends,domain=="reading"), INDICES = subset(trends,domain=="reading")[,"year"], FUN = function (x) {data.frame ( x[1,"year", drop=FALSE], table(x[!duplicated(x[,"idstud"]),"task"]), stringsAsFactors = FALSE)})), year~Var1, value.var = "Freq")
#> Warning: row names were found from a short variable and have been discarded
#> Warning: row names were found from a short variable and have been discarded
#> Warning: row names were found from a short variable and have been discarded
#>   year T01 T02 T03 T04 T05 T06 T07 T08 T09 T10 T11 T22 T23 T24 T25 T26 T27
#> 1 2010 478 600 522 271 237 197 456 444 532 388 351  NA  NA  NA  NA  NA  NA
#> 2 2015 201 321 362 173 316 139 481 277 344 346 190 361 335 376 294  NA  NA
#> 3 2020 206 410 286 132 255 181 391 203 237 325 136 289 303 347 408 183 181

# students nested in classes?
reformulas::isNested(trends[,"idstud"], trends[,"idclass"])
#> [1] TRUE
# items nested in tasks?
reformulas::isNested(trends[,"item"], trends[,"task"])
#> [1] TRUE
# tasks nested in blocks? no, few tasks occur in more than one block
reformulas::isNested(trends[,"task"], trends[,"block"])
#> [1] FALSE
# tasks nested in blocks for specific years?
by(data=trends, INDICES = trends[,"year"], FUN = function (y) {lme4::isNested(y[,"task"], y[,"block"]) })
#> Warning: the ‘isNested’ function has moved to the reformulas package. Please update your imports, or ask an upstream package maintainer to do so.
#> This warning is displayed once per session.
#> trends[, "year"]: 2010
#> [1] TRUE
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2015
#> [1] FALSE
#> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
#> trends[, "year"]: 2020
#> [1] FALSE
# blocks nested in booklets?
lme4::isNested(trends[,"block"], trends[,"booklet"])
#> [1] FALSE