# R-CODE FOR THE JARRETT, FAREWELL & HERZBERG PAPER 13 JULY 2020 #---------------------------------------------------------------- # READ IN THE REDUCED DATA # The reduced data includes a random sample of 6 Raters from each Training group # These are Raters numbered 2, 11, 14, 15, 17, 33, 44, 48, 49, 54, 61, 66 plaidr <- read.csv("plaidr.csv") plaidr$Rater <- factor(plaidr$Rater) plaidr$Patient <- factor(plaidr$Patient) # plaidr2 averages this data over the two levels of M # This is the data reported in the "Simplified Example" plaidr2 <- plaidr[plaidr$M==-1,] plaidrp <- plaidr[plaidr$M==1,] plaidr2$Y <- (plaidr2$Y+plaidrp$Y)/2 # This ANOVA captures all the terms in Table 2 lmTwor1 <- aov(Y ~ (T/Rater)*(E/Patient)+Error(Rater*Patient), data=plaidr2) summary(lmTwor1) # More precisely lmTwor2 <- aov(Y ~ (T/Rater)*(E/Patient),data=plaidr2) anova(lmTwor2)[[2]] # PERMUTATION TESTS # First rename the values of "Rater" to numbers 1-12 plaidr2$Rater <- as.numeric(factor(plaidr2$Rater)) table(plaidr2$Rater) # Create matrices whose columns are the selected combinations coT <- combn(12,6) coP <- combn(8,4) dim(coT) dim(coP) # Set up matrix to hold all the SS from all the ANOVAs results <- matrix(0,9,64680) dimnames(results)[[1]] <- dimnames(anova(lmTwor2))[[1]] # Now loop over 924 permutations of Raters and 70 permutations of Patients options(scipen=999) # or scipen=0 start.time <- Sys.time() ll <- 1 for (j in 1:924) {# First produce factors Tr and Rr plaidr2$Tr <- factor(1*(plaidr2$Rater%in%coT[,j])) tr <- table(plaidr2$Tr,plaidr2$Rater) whr <- c(as.numeric(dimnames(tr)[[2]])[tr[1,]>0], as.numeric(dimnames(tr)[[2]])[tr[2,]>0]) plaidr2$Rr <- rep(0,96) for (i in 1:6) {plaidr2$Rr[plaidr2$Rater==whr[i]] <- i plaidr2$Rr[plaidr2$Rater==whr[i+6]] <- i} plaidr2$Rr <- factor(plaidr2$Rr) for (k in 1:70) {# Now factors Er and Pr plaidr2$Er <- factor(1*(plaidr2$Patient%in%coP[,k])) er <- table(plaidr2$Er,plaidr2$Patient) whp <- c(as.numeric(dimnames(er)[[2]])[er[1,]>0], as.numeric(dimnames(er)[[2]])[er[2,]>0]) plaidr2$Pr <- rep(0,96) for (i in 1:4) {plaidr2$Pr[plaidr2$Patient==whp[i]] <- i plaidr2$Pr[plaidr2$Patient==whp[i+4]] <- i} plaidr2$Pr <- factor(plaidr2$Pr) lmTwor3 <- aov(Y ~ (Tr/Rr)*(Er/Pr),data=plaidr2) results[,ll] <- anova(lmTwor3)[[2]] ll <- ll+1}} end.time <- Sys.time() end.time-start.time results[,1:6] # The first "combination" is the observed set of data # Permutation test for T length(results[1,results[1,]>=33.1836])/70 # So, the prob of getting this or a more extreme result is 38/924 = 0.0411. # Figure 1: Graph of permutation distribution against Normal theory tf <- sort(results[1,]*10/results[3,]) # Sorted F-values plot(c(0.0000001,tf),c(0:64680)/64680,log="x",xlab="T",ylab="cdf", cex=0.5,type="l") lines(qf((0:64680)/64680,1,10),(0:64680)/64680,col=2,lty=2) abline(v= results[1,1]*10/results[3,1]) # Actual value of F-ratio legend(locator(1),legend=c("Permutation","Normal theory"),lty=1:2,col=1:2) # An alternative permutation distribution for T # This time, we want to look at the ratio of T/(d+b-c) denom <- apply(results[c(3,6,8),],2,function(x) {x[1]/10+x[2]/6-x[3]/60}) rT <- results[1,]/denom # Just the 924, which ones are they? (1:64680)[abs(results[2,]-1263.3)<0.05][1:20] fT <- rT[1+70*(0:923)] dT <- denom[1+70*(0:923)] plot(10*results[1,1+70*(0:923)]/results[3,1+70*(0:923)],fT[1:924], xlab="T/R",ylab="T/(R+TP-RP)") abline(v=4.778,h=3.4817) # Plot the "actual" one! # Which is the actual one? I think its fT[1] points(10*results[1,1]/results[3,1],fT[1],col=2,pch=16) length(fT[fT>=3.4817]) # So the P-value here is 28/924=0.0303 ttf <- sort(fT) plot(c(0.0000001,ttf),c(0:924)/924,log="x",xlab="T",ylab="cdf",type="l") lines(qf((0:924)/924,1,20),(0:924)/924,col=2,lty=2) abline(v=3.48) legend(locator(1),legend=c("Permutation","Normal theory"),lty=1:2,col=1:2) # EXPECTED MEAN SQUARES FOR THE SIMPLIFIED EXAMPLE # Matrix "ems1" will have coefficients for variance components in Table 3 # Note: this is set up for "nr" raters, so can be used here or for full data set nr <- 6 ems1 <- matrix(0,9,5) tm <- c("T","R","E","P","TE","RE","TP","RP") vm <- c("vrp","vr","vp","vtp","vre") dimnames(ems1) <- list(c("G",tm),vm) # Set up matrix functions we need I <- function(n){diag(1,n)} J <- function(n){matrix(1/n,n,n)} K <- function(n){diag(1,n)-matrix(1/n,n,n)} # First we have 5 incidence matrices imat1 <- list(res = I(16*nr), r = rep(1,8)%x%I(2*nr), p = I(8)%x%rep(1,2*nr), tp = I(8)%x%I(2)%x%rep(1,nr), re = I(2)%x%rep(1,4)%x%I(2*nr)) # Then we have the 9 projector matrices (see Table 3) Proj1 <- list(G = J(2)%x%J(4)%x%J(2)%x%J(nr), T = J(2)%x%J(4)%x%K(2)%x%J(nr), R = J(2)%x%J(4)%x%I(2)%x%K(nr), E = K(2)%x%J(4)%x%J(2)%x%J(nr), P = I(2)%x%K(4)%x%J(2)%x%J(nr), TE = K(2)%x%J(4)%x%K(2)%x%J(nr), RE = K(2)%x%J(4)%x%I(2)%x%K(nr), TP = I(2)%x%K(4)%x%K(2)%x%J(nr), RP = I(2)%x%K(4)%x%I(2)%x%K(nr)) # Now form the matrix "ems1" with the coefficients for Table 3 # The first step is to get Expected SS for (i in 1:9) {for (j in 1:5) {ems1[i,j] <- sum(diag(t(imat1[[j]])%*%Proj1[[i]]%*%imat1[[j]]))}} # Check that this adds up to 96 for each variance component apply(ems1,2,sum) # Divide by df to get ems as in Table 3 df1 <- c(1,1,2*(nr-1),1,6,1,2*(nr-1),6,12*(nr-1)) for (j in 1:5) {ems1[,j] <- ems1[,j]/df1} round(ems1,2) #------------------------------------------------------------ # READ IN THE FULL SET OF DATA plaid <- read.csv("plaid.csv") plaid$Rater <- factor(plaid$Rater) plaid$Patient <- factor(plaid$Patient) # ANOVA IN TABLE 4 lmTwo1 <- aov(Y ~ (T/Rater)*(E/Patient)*M+Error(Rater*(Patient/M)), data=plaid) summary(lmTwo1) # FIND THE "EFFECTS" AND THE TESTS FOR THEM AS IN TABLE 6 X <- as.matrix(plaid[,c(9,10,12,11,13:15)]) X <- cbind(rep(1,1184)/2,X) te <- t(X)%*%plaid$Y/592 round(t(te),4) # Sums of squares for effects t(te^2*296) # Create the matrix to turn "means" into "effects" xmat <- matrix(1,8,8) xmat[2,c(1,3,5,7)] <- -1 xmat[3,c(1,2,5,6)] <- -1 xmat[5,c(1,2,3,4)] <- -1 xmat[4,] <- xmat[2,]*xmat[3,] xmat[6,] <- xmat[2,]*xmat[5,] xmat[7,] <- xmat[3,]*xmat[5,] xmat[8,] <- xmat[2,]*xmat[7,] t(xmat)%*%xmat xmat[1,] <- xmat[1,]/8 xmat[2:8,] <- xmat[2:8,]/4 dimnames(xmat) <- list(c("G","T","E","TE","M","TM","EM","TEM"), c("(1)","t","e","te","m","tm","em","tem")) xmat # MEANS FOR TABLE 7 # The inverse of xmat takes "effects" to "means" # This creates the means for the 8 treatment combinations tm <- solve(xmat)%*%te round(t(tm),4) # # EXPECTED MEAN SQUARES FOR THE FULL EXAMPLE # Matrix "ems" will have coefficients for variance components in Table 5 # Note: this is set up for "nr" raters, # We have done this with nr=7, as nr=37 makes the matrices very big nr <- 7 ems <- matrix(0,18,10) dimnames(ems) <- list(c("G",tm,"M",paste(tm,"M",sep="")), c(vm,paste(vm,rep("m",5),sep=""))) # We now have 10 incidence matrices imat <- list(rp = I(8)%x%rep(1,2)%x%I(2*nr), r = rep(1,8)%x%rep(1,2)%x%I(2*nr), p = I(8)%x%rep(1,2)%x%rep(1,2*nr), tp = I(8)%x%rep(1,2)%x%I(2)%x%rep(1,nr), re = I(2)%x%rep(1,4)%x%rep(1,2)%x%I(2*nr), rpm = I(8)%x%I(2)%x%I(2*nr), rm = rep(1,8)%x%I(2)%x%I(2*nr), pm = I(8)%x%I(2)%x%rep(1,2*nr), tpm = I(8)%x%I(2)%x%I(2)%x%rep(1,nr), rem = I(2)%x%rep(1,4)%x%I(2)%x%I(2*nr)) # Then we have the 18 projector matrices Proj <- list(G = J(2)%x%J(4)%x%J(2)%x%J(2)%x%J(nr), T = J(2)%x%J(4)%x%J(2)%x%K(2)%x%J(nr), R = J(2)%x%J(4)%x%J(2)%x%I(2)%x%K(nr), E = K(2)%x%J(4)%x%J(2)%x%J(2)%x%J(nr), P = I(2)%x%K(4)%x%J(2)%x%J(2)%x%J(nr), TE = K(2)%x%J(4)%x%J(2)%x%K(2)%x%J(nr), RE = K(2)%x%J(4)%x%J(2)%x%I(2)%x%K(nr), TP = I(2)%x%K(4)%x%J(2)%x%K(2)%x%J(nr), RP = I(2)%x%K(4)%x%J(2)%x%I(2)%x%K(nr), M = J(2)%x%J(4)%x%K(2)%x%J(2)%x%J(nr), TM = J(2)%x%J(4)%x%K(2)%x%K(2)%x%J(nr), RM = J(2)%x%J(4)%x%K(2)%x%I(2)%x%K(nr), EM = K(2)%x%J(4)%x%K(2)%x%J(2)%x%J(nr), PM = I(2)%x%K(4)%x%K(2)%x%J(2)%x%J(nr), TEM = K(2)%x%J(4)%x%K(2)%x%K(2)%x%J(nr), REM = K(2)%x%J(4)%x%K(2)%x%I(2)%x%K(nr), TPM = I(2)%x%K(4)%x%K(2)%x%K(2)%x%J(nr), RPM = I(2)%x%K(4)%x%K(2)%x%I(2)%x%K(nr)) # Now form the matrix "ems" with the coefficients for Table 5 # The first step is to get Expected SS for (i in 1:18) {for (j in 1:10) {ems[i,j] <- sum(diag(t(imat[[j]])%*%Proj[[i]]%*%imat[[j]]))}} # Check that this adds up to 224 for each variance component apply(ems,2,sum) # Divide by df to get ems df1 <- c(1,1,2*(nr-1),1,6,1,2*(nr-1),6,12*(nr-1)) df <- rep(df1,times=2) for (j in 1:10) {ems[,j] <- ems[,j]/df} round(ems,2) # Convert df and coefficients to the case with nr=37 df[c(3,7,9,12,16,18)] <- df[c(3,7,9,12,16,18)]*36/6 df for (j in c(3,4,8,9)) {ems[,j] <- ems[,j]*37/7} round(ems,2) # Choose the rows that are used to estimate the variances round(ems[c(3,5,7,8,9,12,14,16,17,18),],4) # Provide the actual mean squares for these val <- c(8.72,2258,8.229,9.934,3.183,8.26,114.5, 7.48,9.89,3.355) names(val) <- dimnames(ems[c(3,5,7,8,9,12,14,16,17,18),])[[1]] # Now invert the matrix to get the solution for the 10 variance components # Note that these are not restricted to be non-negative. vest <- solve(ems[c(3,5,7,8,9,12,14,16,17,18),])%*%val round(t(vest),3) # These agree with ASReml # PRODUCE TABLE 6 # Now create the variances for all effects # This takes the rows for the effects and calculates the variances as in Table 5 veff <- ems[-c(3,5,7,8,9,12,14,16,17,18),]%*%vest round(t(veff),3) # These agree very closely with Table 6 in the paper # Now eff^2/var=F, so var=eff^2/F and hence sd=eff/sqrt(F). But F=296eff^2/veff # Hence sdeff <- sqrt(veff/296) round(t(sdeff),4) # CODE TO FIND THE DF OF EACH TEST # Get variance estimate for each effect tem <- ems[c(3,5,7,8,9,12,14,16,17,18),] # First give the matrix which combines the stratum variances vtem <- matrix(0,7,10,dimnames=list(dimnames(tu)[[1]],dimnames(tem)[[1]])) vtem[1,c(1,4,5)] <- c(1,1,-1) vtem[2,c(2,3,5)] <- c(1,1,-1) vtem[3,c(3,4,5)] <- c(1,1,-1) vtem[4,c(6,7,10)] <- c(1,1,-1) vtem[5,c(6,9,10)] <- c(1,1,-1) vtem[6,c(8,7,10)] <- c(1,1,-1) vtem[7,c(8,9,10)] <- c(1,1,-1) vtem # Create new estimate of variances of effects vtem%*%val # And calculate the df for each effect test dftem <- df[c(3,5,7,8,9,12,14,16,17,18)] # Find df of each test dftest <- (vtem%*%val)^2/((vtem^2)%*%diag(1/dftem)%*%(val^2)) tu <- cbind(te[-1],296*te[-1]^2,veff[-1], 296*te[-1]^2/veff[-1],dftest) dimnames(tu)[[2]] <- c("Effect","Mean Sq","Variance","F-value","df") round(tu,3) # MEANS FOR THE 8 COMBINATIONS AND THEIR SED's (TABLE 7) # The inverse of xmat takes "effects" to "means" tm <- t(solve(xmat))%*%c(mean(plaid$Y),te) round(t(tm),4) # We have shown elsewhere that the effects are uncorrelated so Var(effects) # is a diagonal matrix. So the variance matrix of the means (excluding the # grand mean) is the following. vm <- solve(xmat)%*%diag(c(0,as.vector(sdeff)[-1]^2))%*%t(solve(xmat)) round(vm,4) # And then SEDs between means are: one <- rep(1,8) sed <- round(sqrt(diag(vm)%*%t(one)+one%*%t(diag(vm))-2*vm),2) sed table(sed) # WEIGHTED LEAST SQUARES ESTIMATES OF EFFECTS # Now repeat this exercise using the estimated variance matrix for the whole experiment E <- function(n) {matrix(1,n,n)} I <- function(n){diag(1,n)} V <- 3.355*I(1184) + 0.1768*I(2)%x%E(37)%x%I(16)+ 1.0325*I(74)%x%I(2)%x%E(4)%x%I(2)+ 1.4147*E(74)%x%I(16)+ 0.0975*I(74)%x%E(8)%x%I(2)- 0.086*I(74)%x%I(8)%x%E(2)+ 0.003* I(2)%x%E(37)%x%I(8)%x%E(2)+ 0.115* I(74)%x%I(2)%x%E(4)%x%E(2)+ 14.482* E(74)%x%I(8)%x%E(2)- 0.018* I(74)%x%E(8)%x%E(2) U <- solve(V) # Now construct the "T" matrix. Use Tt for the treatments and Tu for the contrasts one <- rep(1,1184) Tu <- as.matrix(cbind(one,X/2)) # Necessary to /2 to get "effects" Vu <- solve(t(Tu)%*%U%*%Tu) tu <- Vu%*%t(Tu)%*%U%*%plaid$Y tu <- cbind(tu,sqrt(diag(Vu))) tu <- cbind(tu,tu[,1]/tu[,2]) tu <- cbind(tu,tu[,3]^2) dimnames(tu)[[2]] <- c("Effect","SE","t-value","F-value") tu # ASREML CODE library(asreml) # Note: this requires a licence from VSNI # Set up the 10 variance component factors plaid2 <- plaid plaid2$TP <- factor(as.numeric(plaid2$Train)*8-8+as.numeric(plaid2$Patient)) plaid2$RE <- factor(as.numeric(plaid2$Rater)*2-2+(as.numeric(plaid2$E)+1)/2) plaid2$RP <- factor(as.numeric(plaid2$Rater)*8-8+as.numeric(plaid2$Patient)) plaid2$RM <- factor(as.numeric(plaid2$Rater)*2-2+(as.numeric(plaid2$M) +1)/2) plaid2$PM <- factor(as.numeric(plaid2$Patient)*2-2+(as.numeric(plaid2$M) +1)/2) plaid2$TPM <- factor(as.numeric(plaid2$TP)*2-2+(as.numeric(plaid2$M) +1)/2) plaid2$REM <- factor(as.numeric(plaid2$RE)*2-2+(as.numeric(plaid2$M) +1)/2) # Set up to allow negative variance components vgam <- rep(-0.001,9) names(vgam) <- rep("U",9) plaid.asr <- asreml(Y ~ Train*Expr*Move, random = ~ idv(Rater,init=vgam[1])+idv(Patient,init=vgam[2])+ idv(TP,init=vgam[4])+idv(RE,init=vgam[5])+ idv(RP,init=vgam[3])+idv(RM,init=vgam[6])+idv(PM,init=vgam[7])+ idv(TPM,init=vgam[8])+ idv(REM,init=vgam[9]), residual=~id(units), data = plaid2) # Show the tests, as in Table 6 wald(plaid.asr,denDF="default")$Wald # Show the formulae for stratum variances as in Table 5 round(wald(plaid.asr,denDF="default")$stratumVariances,4) # Show the estimates of variance components summary(plaid.asr)$varcomp # Determine the predicted means and their SEDs as in Table 7 trm <- predict(plaid.asr, classify = "Train*Expr*Move", maxiter = 1, sed=TRUE) trm$pvals round(trm$sed,4) table(round(as.matrix(trm$sed),2))