library(boot) #u is 1920 city population data u<-matrix(c(138, 93,61,179,48,37,29,23,30,2,38,46,71,25,298,74,50,76,381, 387,78,60,507,50,77,64,40,136,243,256,94,36,45,67,120,172,66,46,121,44,64, 56,40,116,87,43,43,161,36), nrow=49, ncol=1) #x is 1930 city population data x<-matrix(c(143,104,69,260,75,63,50,48,111,50,52,53,79,57,317,93,58,80,464,459, 106,57,634,64,89,77,60,139,291,288,85,46,53,67,115,183,86,65,113,58,63,142, 64,130,105,61,50,232,54), nrow=49, ncol=1) u.bar.N<-mean(u) true.theta<-u.bar.N*(sum(x))/(sum(u)) N<- nrow(u) n<- 10 f<- n/N #Sampling Fraction for this example R<-999 M <- 100 i.s <- seq(1, N, 1) j <- seq(1,n, 1) x.1000 <- matrix(NA, nrow = n, ncol = M) # Each sample is a row u.1000 <- matrix(NA, nrow = n, ncol = M) # Each sample is a row norm.ci.bc<-matrix(1, nrow=M, ncol=2) mod.2 <-matrix(1, nrow=M, ncol=2) mod.11 <-matrix(1, nrow=M, ncol=2) mirror <-matrix(1, nrow=M, ncol=2) pop <-matrix(1, nrow=M, ncol=2) pop.2 <-matrix(1, nrow=M, ncol=2) super <-matrix(1, nrow=M, ncol=2) # Find 1000 size 10 w/o replacement samples set.seed(2105) for (j in 1:M){ sel <- sample(i.s, n, replace=FALSE) x.1000[,j] <- x[sel] u.1000[,j] <- u[sel] } # Functions to be used stats<-function(x.data, u.data, i){ d<-x.data[i] c<-u.data[i] t.rat<-u.bar.N*(sum(d))/(sum(c)) v.rat<-((1-f)/(n*(n-1)))*sum((d-((c*t.rat)/u.bar.N))^2) c(t.rat, sqrt(v.rat)) } calc.t <- function(data, i){ d <-data[i,] t.rat<-u.bar.N*(sum(d[,1]))/(sum(d[,2])) v.rat<-((1-f)/(n*(n-1)))*sum((d[,1]-((d[,2]*t.rat)/u.bar.N))^2) c(t.rat, sqrt(v.rat)) } tees <- matrix(NA, nrow = M, ncol = 4) for (p in 1:M){ x.samp<- x.1000[,p] u.samp<- u.1000[,p] samp.3<-data.frame(x.samp, u.samp) t.0<-stats(x.samp, u.samp, 1:10) tees[p,1]<-t.0[1] # t.rat tees[p,2]<-t.0[2] # v.rat } # Normal (Bias corrected) Interval set.seed(9105) for (p in 1:M){ x.samp<- x.1000[,p] u.samp<- u.1000[,p] samp.3<-data.frame(x.samp, u.samp) boot.res<-boot(data=samp.3, statistic=calc.t, sim='ordinary', R=999) t.rat.bias <- sum(boot.res$t[,1]-tees[p,1])/999 alpha<-0.10 ub.trat.bc<-tees[p,1]-t.rat.bias-qnorm(alpha/2)*(tees[p,2]) #my normal approximation is larger than what the book got lb.trat.bc<-tees[p,1]-t.rat.bias-qnorm(1-alpha/2)*(tees[p,2]) norm.ci.bc[p,1] <- lb.trat.bc norm.ci.bc[p,2] <- ub.trat.bc } L.norm <- sum(true.theta<=norm.ci.bc[,1]) U.norm <- sum(true.theta<=norm.ci.bc[,2]) norm.sum <- c(L.norm, U.norm) ############################################################################## #Mod size n.prime=2 n<-2 j <- seq(1,10,1) t.mod.2 <- matrix(NA, nrow=R, ncol=M) v.mod.2 <- matrix(NA, nrow=R, ncol=M) z.star.mod.2 <- matrix(NA, nrow=R, ncol=M) set.seed(9105) for (b in 1:M){ x.samp<-x.1000[,b] u.samp<-u.1000[,b] for (i in 1:R){ j.star<-sample(j, size=2, replace=FALSE) y.star<-x.samp[j.star] u.star<-u.samp[j.star] sol<- stats(y.star, u.star, 1:2) t.mod.2[i,b] <- sol[1] #t.rat v.mod.2[i,b] <- sol[2] z.star.mod.2[i,b] <- (t.mod.2[i,b] - tees[b,1])/v.mod.2[i,b] } } # Mod 2 Studentized CI for (b in 1:M) { z <- sort(z.star.mod.2[,b]) mod.2[b,1] <- tees[b,1]-z[950]*(tees[b,2]) mod.2[b,2] <- tees[b,1]-z[50]*(tees[b,2]) } L.2<-sum(true.theta<=mod.2[,1]) U.2<-sum(true.theta<=mod.2[,2]) mod.2.sum <- c(L.2,U.2) ############################################################################### #Mod size n.prime=11 n<-11 j <- seq(1,10,1) t.mod.11 <- matrix(NA, nrow=R, ncol=M) v.mod.11 <- matrix(NA, nrow=R, ncol=M) z.star.mod.11 <- matrix(NA, nrow=R, ncol=M) set.seed(9105) for (b in 1:M) { x.samp<-x.1000[,b] u.samp<-u.1000[,b] for (i in 1:R){ j.star<-sample(j, size=11, replace=TRUE) y.star<-x.samp[j.star] u.star<-u.samp[j.star] sol <- stats(y.star, u.star, 1:11) t.mod.11[i,b] <- sol[1] #t.rat v.mod.11[i,b] <- sol[2] z.star.mod.11[i,b] <- (t.mod.11[i,b] - tees[b,1])/v.mod.11[i,b] } } # Mod 11 Studentized CI for (b in 1:M) { z <- sort(z.star.mod.11[,b]) mod.11[b,1] <- tees[b,1]-z[950]*(tees[b,2]) mod.11[b,2] <- tees[b,1]-z[50]*(tees[b,2]) } head(mod.11) L.11<-sum(true.theta<=mod.11[,1]) U.11<-sum(true.theta<=mod.11[,2]) mod.11.sum<-c(L.11,U.11) ############################################################################### #Mirror match m=2, k = 5 n<-10 m<-2 k<-5 t.mir <- matrix(NA, nrow=R, ncol=M) v.mir <- matrix(NA, nrow=R, ncol=M) z.star.mir <- matrix(NA, nrow=R, ncol=M) set.seed(9105) for (b in 1:M) { x.samp<-x.1000[,b] u.samp<-u.1000[,b] for (i in 1:R){ j.star.1<-sample(j, k, replace=FALSE)#, prob=c(n/N, n/N, n/N, n/N, n/N)) j.star.2<-sample(j, k, replace=FALSE)#, prob=c(n/N, n/N, n/N, n/N, n/N)) y.star<-matrix(c(x.samp[j.star.1], x.samp[j.star.2]), nrow=n, ncol=1) u.star<-matrix(c(u.samp[j.star.1], u.samp[j.star.2]), nrow=n, ncol=1) sol <- stats(y.star, u.star, 1:n) t.mir[i,b] <- sol[1] #t.rat v.mir[i,b] <- sol[2] #v.rat z.star.mir[i,b] <- (t.mir[i,b] - tees[b,1])/v.mir[i,b] } } # Mirror, m=2 & k=5 Studentized CI for (b in 1:M) { z <- sort(z.star.mir[,b]) mirror[b,1] <- tees[b,1]-z[950]*(tees[b,2]) mirror[b,2] <- tees[b,1]-z[50]*(tees[b,2]) } head(mirror) L.mir<-sum(true.theta<=mirror[,1]) U.mir<-sum(true.theta<=mirror[,2]) mir.sum<-c(L.mir,U.mir) ################################################################################ #population n<-10 j <- seq(1,10,1) t.pop.2<- matrix(NA, nrow=R, ncol=M) v.pop.2<-matrix(NA, nrow=R, ncol=M) z.star.pop.2<- matrix(NA, nrow=R, ncol=M) for (b in 1:M){ x.samp<-x.1000[,b] u.samp<-u.1000[,b] for (i in 1:R){ kn.x<- c(x.samp, x.samp, x.samp, x.samp) kn.u<- c(u.samp, u.samp, u.samp, u.samp) j.star<- sample(j, size=9, replace=FALSE) l.x<- x.samp[j.star] l.u<- u.samp[j.star] Y.star <- matrix(c(kn.x, l.x), nrow=N, ncol=1) U.star <- matrix(c(kn.u, l.u), nrow=N, ncol=1) G<-sample(seq(1, N, 1), n, replace=FALSE) sol <- stats(Y.star[G], U.star[G], 1:n) t.pop.2[i,b] <- sol[1] #t.rat v.pop.2[i,b] <- sol[2] z.star.pop.2[i,b] <- (t.pop.2[i,b]-tees[b,1])/(v.pop.2[i,b]) } } # Population Studentized CI for (b in 1:M) { z.2 <- sort(z.star.pop.2[,b]) pop.2[b,1] <- tees[b,1]-z.2[950]*(tees[b,2]) pop.2[b,2] <- tees[b,1]-z.2[50]*(tees[b,2]) } head(pop.2) L.pop<-sum(true.theta<=pop.2[,1]) U.pop<-sum(true.theta<=pop.2[,2]) pop.sum<-c(L.pop,U.pop) #superpopulation n<-10 j<-seq(1,10,1) t.super<-matrix(NA, nrow=R, ncol=M) v.super<-matrix(NA, nrow=R, ncol=M) z.star<- matrix(NA, nrow=R, ncol=M) for (p in 1:M){ x.samp<-x.1000[,p] u.samp<-u.1000[,p] for (i in 1:R){ j.star.N<-sample(j, N, replace=TRUE) j.star<-sample(j.star.N, n, replace=FALSE) y.star<-x.samp[j.star] u.star<-u.samp[j.star] sol<-stats(y.star, u.star, 1:n) t.super[i,p]<-sol[1] #t.rat v.super[i,p]<-sol[2] #sqrt(v.rat) z.star[i,p] <- (t.super[i,p]-tees[p,1])/(v.super[i,p]) } } # Super-population Studentized CI for (b in 1:M) { z <- sort(z.star[,b]) super[b,1] <- tees[b,1]-z[950]*(tees[b,2]) super[b,2] <- tees[b,1]-z[50]*(tees[b,2]) } head(super) L.norm <- sum(true.theta<=norm.ci.bc[,1])*100/M U.norm <- sum(true.theta<=norm.ci.bc[,2])*100/M O.norm <- (U.norm-L.norm) norm.sum <- c(L.norm, U.norm, O.norm) L.2 <- sum(true.theta<=mod.2[,1])*100/M U.2 <- sum(true.theta<=mod.2[,2])*100/M O.2 <- (U.2-L.2) mod.2.sum <- c(L.2, U.2, O.2) L.11<-sum(true.theta<=mod.11[,1])*100/M U.11<-sum(true.theta<=mod.11[,2])*100/M O.11 <- (U.11-L.11) mod.11.sum<-c(L.11,U.11, O.11) L.mir<-sum(true.theta<=mirror[,1])*100/M U.mir<-sum(true.theta<=mirror[,2])*100/M O.mir <- (U.mir-L.mir) mir.sum<-c(L.mir, U.mir, O.mir) L.pop<-sum(true.theta<=pop.2[,1])*100/M U.pop<-sum(true.theta<=pop.2[,2])*100/M O.pop<- (U.pop-L.pop) pop.sum<-c(L.pop, U.pop, O.pop) L.sup<-sum(true.theta<=super[,1])*100/M U.sup<-sum(true.theta<=super[,2])*100/M O.sup <- (U.sup-L.sup) super.sum<-c(L.sup,U.sup, O.sup) data.frame(c('lower', 'upper', 'overall'), norm.sum, mod.2.sum, mod.11.sum, mir.sum, pop.sum, super.sum) ######################## # Find average lengths of CIs len.norm <- mean(norm.ci.bc[,2]-norm.ci.bc[,1]) len.mod.2 <- mean(mod.2[,2]-mod.2[,1]) len.mod.11 <- mean(mod.11[,2]-mod.11[,1]) len.mirror <- mean(mirror[,2]-mirror[,1]) len.pop <- mean(pop[,2]-pop[,1]) len.pop.2 <- mean(pop.2[,2]-pop.2[,1]) len.super <- mean(super[,2]-super[,1]) sd.norm <- sqrt(var(norm.ci.bc[,2]-norm.ci.bc[,1])) sd.mod.2 <- sqrt(var(mod.2[,2]-mod.2[,1])) sd.mod.11 <- sqrt(var(mod.11[,2]-mod.11[,1])) sd.mirror <- sqrt(var(mirror[,2]-mirror[,1])) sd.pop <- sqrt(var(pop[,2]-pop[,1])) sd.pop.2 <- sqrt(var(pop.2[,2]-pop.2[,1])) sd.super <- sqrt(var(super[,2]-super[,1])) lens <- c(len.norm, len.mod.2, len.mod.11, len.mirror, len.pop, len.pop.2, len.super) sd.len <-c(sd.norm, sd.mod.2, sd.mod.11, sd.mirror, sd.pop, sd.pop.2, sd.super) data.frame(c('norm', 'mod.2', 'mod.11', 'mirror', 'pop', 'pop.2', 'super'), lens, sd.len)