Simulation
set.seed(0314,sample.kind="Rounding")
Sim <-100 # repetition of simulation
G <- 500
Block <- c(100,100,100) # 3 dense block (each size=100)
Rho <- 0.8
Nrho <- -0.4
cohd1 <- 0.3
cohd2 <- 0.3
Area1.CoReg <- Area1.OLS <- Area1.SVA <- rep(0,Sim)
Area2.CoReg <- Area2.OLS <- Area2.SVA <- rep(0,Sim)
AreaC.CoReg <- AreaC.OLS <- AreaC.SVA <- rep(0,Sim)
# Signal Strength in bocks
rho1 <- 0.8
rho2 <- 0.6
rho3 <- 0.4
rho <- 0.8 # for noise
block <- Block
block1 <- block[1]
block2 <- block[2]
block3 <- block[3]
# Generating Covariance matrix
Sigma=eye(G);
Sigma_1=squareform(rho1*rep(1,block[1]*(block[1]-1)/2)); # Use direct rho (actually r)
Sigma_2=squareform(rho2*rep(1,block[2]*(block[2]-1)/2)); # Use direct rho (actually r)
Sigma_3=squareform(rho3*rep(1,block[3]*(block[3]-1)/2)); # Use direct rho (actually r)
Sigma=eye(G);
Sigma[1:block1,1:block1]=Sigma[1:block1,1:block1]+Sigma_1;
Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]=Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]+Sigma_2;
Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]=Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]+Sigma_3;
Sigma[1:block1,(block1+1):(block1+block2)]= Nrho
Sigma[(block1+1):(block1+block2),1:block1]= Nrho
noise <- rep(0,G*(G-1)/2)
length.noise <- length(noise)
rate <- 0.01
N.noise <- length(noise)*rate
noise[sample(length.noise,N.noise,replace=FALSE)] <- rep(rho,N.noise)
Sigma_noise <- squareform(noise)
Sigma <- Sigma+Sigma_noise
Sigma <- Sigma*(Sigma<=1)
Sigma <- as.matrix(nearPD(Sigma,corr = TRUE, keepDiag = TRUE)$mat)
R = chol(Sigma);
R[abs(R)<0.01] <- 0
# Results save
Auc.mean <- matrix(0,nrow=length(Rho),ncol=3)
Auc.sd <- matrix(0,nrow=length(Rho),ncol=3)
Measures <- array(0,c(3,5,length(Rho)))
Measures.sd <- array(0,c(3,5,length(Rho)))
for(j in 1:length(Rho)){
block <- Block
block1 <- block[1]
block2 <- block[2]
block3 <- block[3]
R = chol(Sigma);
n=100
p1=0.9
p0=0.1;
ROC.LM.plot.list <- array(0,c(G+1,Sim,2))
ROC.SCFA.plot.list <- array(0,c(G+1,Sim,2))
ROC.SVA.plot.list <- array(0,c(G+1,Sim,2))
SVA.AUC <- c(0,Sim)
LM.AUC <- c(0,Sim)
SCFA.AUC <- c(0,Sim)
Measure.SCFA <- matrix(0,nrow=Sim,ncol=5)
Measure.LM <- matrix(0,nrow=Sim,ncol=5)
Measure.SVA <- matrix(0,nrow=Sim,ncol=5)
# Repetition
for(s in 1:Sim){
print(s)
# First Data
X <- rep(c(0,1),each=n)
B <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
mu <- matrix(rep(X,G),ncol=G)*B*cohd1
data <- mu+matrix(rnorm(2*n*G,0,sqrt(0.5)),ncol=G)%*%R #0.25
Y <- data
# Second Data
X2 <- rep(c(0,1),each=n)
B2 <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
mu2 <- matrix(rep(X,G),ncol=G)*B*cohd2
data2 <- mu+matrix(rnorm(2*n*G,0,sqrt(1)),ncol=G)%*%R #0.25
Y2 <- data2
# Fit LM
lm.fit <- lm(as.matrix(Y)~X)
LM.Result <- coef(summary(lm.fit))
Residual.LM <- residuals(lm.fit)
q <- ncol(Y)
Pval.LM <- sapply(1:q, function(i) LM.Result[[i]][2,4])
length(which(Pval.LM<0.05))
Qval.LM <- p.adjust(Pval.LM,method="BH")
length(which(Qval.LM<0.05))
T.LM <- sapply(1:q, function(i) LM.Result[[i]][2,3]) # t-values for GLM
# Fit 2nd LM
lm.fit2 <- lm(as.matrix(Y2)~X2)
LM.Result2 <- coef(summary(lm.fit2))
Residual.LM2 <- residuals(lm.fit2)
q <- ncol(Y2)
Pval.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,4])
length(which(Pval.LM2<0.05))
Qval.LM2 <- p.adjust(Pval.LM2,method="BH")
length(which(Qval.LM2<0.05))
T.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,3]) # t-values for GLM
# Fit CoReg
# Tuning 1st
Lambda <- seq(0.3,0.8,length.out = 20)
Norm <- rep(0,length(Lambda))
for(l in 1:length(Lambda)){
dense.result.temp <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = Lambda[l])
ord.index.temp <- dense.result.temp$Clist
scfa.fit.temp <- scfa(Residual.LM, dense.result.temp$CID, dense.result.temp$Clist)
FC.temp <- t(scfa.fit.temp$factorscore)
Norm[l] <- norm(cov(Residual.LM)- scfa.fit.temp$loading%*%cov(FC.temp)%*%t(scfa.fit.temp$loading),type="F")+sum(svd(scfa.fit.temp$loading)$d)
}
best.Lambda <- Lambda[which.min(Norm)]
# Fit CoReg with optimal
dense.result <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = best.Lambda)
ord.index <- dense.result$Clist
scfa.fit<- scfa(Residual.LM, dense.result$CID, dense.result$Clist)
FC <- t(scfa.fit$factorscore)
SCFA <- lm(as.matrix(Y)~X+FC)
SCFA.Result <- coef(summary(SCFA))
cormat.cfa <- cor(SCFA$residuals)
colnames(cormat.cfa) <- NULL
rownames(cormat.cfa) <- NULL
melt.cfa <- melt(cormat.cfa)
Pval.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,4])
length(which(Pval.SCFA<0.05))
Qval.SCFA <- p.adjust(Pval.SCFA,method="BH")
length(which(Qval.SCFA<0.05))
T.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,3]) # t-values for CoReg
# Tuning 2nd
Lambda <- seq(0.3,0.8,length.out = 20)
Norm2 <- rep(0,length(Lambda))
for(l in 1:length(Lambda)){
dense.result.temp2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = Lambda[l])
ord.index.temp2 <- dense.result.temp2$Clist
scfa.fit.temp2 <- scfa(Residual.LM2, dense.result.temp2$CID, dense.result.temp2$Clist)
FC.temp2 <- t(scfa.fit.temp2$factorscore)
Norm2[l] <- norm(cov(Residual.LM2)- scfa.fit.temp2$loading%*%cov(FC.temp2)%*%t(scfa.fit.temp2$loading),type="F")+sum(svd(scfa.fit.temp2$loading)$d)
}
best.Lambda <- Lambda[which.min(Norm2)]
# Fit CoReg with optimal
dense.result2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = best.Lambda)
ord.index2 <- dense.result2$Clist
scfa.fit2<- scfa(Residual.LM2, dense.result2$CID, dense.result2$Clist)
FC2 <- t(scfa.fit2$factorscore)
SCFA2 <- lm(as.matrix(Y2)~X2+FC2)
SCFA.Result2 <- coef(summary(SCFA2))
Pval.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,4])
length(which(Pval.SCFA2<0.05))
Qval.SCFA2 <- p.adjust(Pval.SCFA2,method="BH")
length(which(Qval.SCFA2<0.05))
T.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,3]) # t-values for SECM
### Fit SVA
### SVA for the first data
X <- as.matrix(X,ncol=1)
sva.fit <- sva(t(Y),mod=cbind(1,X),mod0=matrix(1,nrow=nrow(X)))
G.factor <- sva.fit$sv
sva.fit <- lm(as.matrix(Y)~X+G.factor)
SVA.Result <- coef(summary(sva.fit))
Residual.SVA <- residuals(sva.fit)
cormat.sva <- cor(Residual.SVA)
q <- ncol(Y)
colnames(cormat.sva) <- NULL
rownames(cormat.sva) <- NULL
melt.sva <- melt(cormat.sva)
Pval.SVA <- sapply(1:q, function(i) SVA.Result[[i]][2,4])
length(which(Pval.SVA<0.05))
Qval.SVA <- p.adjust(Pval.SVA,method="BH")
length(which(Qval.SVA<0.05))
### SVA for the second data
X2 <- as.matrix(X2,ncol=1)
sva.fit2 <- sva(t(Y2),mod=cbind(1,X2),mod0=matrix(1,nrow=nrow(X2)))
G.factor2 <- sva.fit2$sv
sva.fit2 <- lm(as.matrix(Y2)~X2+G.factor2)
SVA.Result2 <- coef(summary(sva.fit2))
Residual.SVA2 <- residuals(sva.fit2)
Pval.SVA2 <- sapply(1:q, function(i) SVA.Result2[[i]][2,4])
length(which(Pval.SVA2<0.05))
Qval.SVA2 <- p.adjust(Pval.SVA2,method="BH")
length(which(Qval.SVA2<0.05))
nonzero <- which(B[1,]>0)
TP1.SCFA <- c(1*(Qval.SCFA[nonzero]<0.05))
TP2.SCFA <- c(1*(Qval.SCFA2[nonzero]<0.05))
TP1.SVA <- c(1*(Qval.SVA[nonzero]<0.05))
TP2.SVA <- c(1*(Qval.SVA2[nonzero]<0.05))
TP1.LM <- c(1*(Qval.LM[nonzero]<0.05))
TP2.LM <- c(1*(Qval.LM2[nonzero]<0.05))
Area1.CoReg[s] <- length(which(Qval.SCFA<0.05))
Area2.CoReg[s] <- length(which(Qval.SCFA2<0.05))
AreaC.CoReg[s] <- length(intersect(which(Qval.SCFA<0.05),which(Qval.SCFA2<0.05)))
Area1.OLS[s] <- length(which(Qval.LM<0.05))
Area2.OLS[s] <- length(which(Qval.LM2<0.05))
AreaC.OLS[s] <- length(intersect(which(Qval.LM<0.05),which(Qval.LM2<0.05)))
Area1.SVA[s] <- length(which(Qval.SVA<0.05))
Area2.SVA[s] <- length(which(Qval.SVA2<0.05))
AreaC.SVA[s] <- length(intersect(which(Qval.SVA<0.05),which(Qval.SVA2<0.05)))
}
}
Results
Number of overlapped True Positive findings in each iteration
Repeat <- seq(1:Sim)
Common <- data.frame(Repeat,AreaC.OLS,AreaC.SVA,AreaC.CoReg)
colnames(Common) <- c("Repeat","OLS","SVA","CoReg")
kable(Common)
| 1 |
63 |
13 |
130 |
| 2 |
75 |
83 |
244 |
| 3 |
16 |
106 |
223 |
| 4 |
36 |
78 |
200 |
| 5 |
17 |
39 |
190 |
| 6 |
0 |
71 |
157 |
| 7 |
99 |
93 |
223 |
| 8 |
214 |
211 |
269 |
| 9 |
9 |
130 |
186 |
| 10 |
76 |
88 |
229 |
| 11 |
118 |
115 |
228 |
| 12 |
92 |
101 |
218 |
| 13 |
126 |
92 |
169 |
| 14 |
46 |
81 |
178 |
| 15 |
21 |
35 |
192 |
| 16 |
18 |
92 |
197 |
| 17 |
1 |
10 |
168 |
| 18 |
99 |
57 |
161 |
| 19 |
1 |
96 |
180 |
| 20 |
104 |
64 |
215 |
| 21 |
56 |
100 |
243 |
| 22 |
71 |
104 |
182 |
| 23 |
64 |
81 |
134 |
| 24 |
0 |
93 |
128 |
| 25 |
98 |
95 |
133 |
| 26 |
78 |
46 |
115 |
| 27 |
120 |
122 |
253 |
| 28 |
79 |
98 |
134 |
| 29 |
107 |
100 |
200 |
| 30 |
100 |
166 |
196 |
| 31 |
25 |
92 |
200 |
| 32 |
54 |
19 |
204 |
| 33 |
97 |
101 |
179 |
| 34 |
23 |
165 |
203 |
| 35 |
145 |
172 |
254 |
| 36 |
8 |
83 |
189 |
| 37 |
96 |
71 |
227 |
| 38 |
94 |
167 |
246 |
| 39 |
95 |
97 |
248 |
| 40 |
2 |
104 |
199 |
| 41 |
106 |
98 |
200 |
| 42 |
89 |
66 |
245 |
| 43 |
104 |
96 |
188 |
| 44 |
1 |
100 |
175 |
| 45 |
51 |
99 |
210 |
| 46 |
81 |
88 |
214 |
| 47 |
81 |
99 |
244 |
| 48 |
1 |
15 |
146 |
| 49 |
1 |
96 |
159 |
| 50 |
46 |
140 |
201 |
| 51 |
29 |
13 |
133 |
| 52 |
59 |
30 |
195 |
| 53 |
134 |
98 |
224 |
| 54 |
0 |
52 |
210 |
| 55 |
4 |
5 |
116 |
| 56 |
0 |
66 |
169 |
| 57 |
46 |
102 |
223 |
| 58 |
76 |
88 |
192 |
| 59 |
49 |
160 |
189 |
| 60 |
134 |
165 |
209 |
| 61 |
48 |
129 |
210 |
| 62 |
20 |
69 |
206 |
| 63 |
109 |
119 |
194 |
| 64 |
65 |
112 |
207 |
| 65 |
96 |
86 |
153 |
| 66 |
91 |
94 |
237 |
| 67 |
1 |
59 |
175 |
| 68 |
0 |
89 |
211 |
| 69 |
195 |
175 |
268 |
| 70 |
42 |
6 |
139 |
| 71 |
106 |
90 |
225 |
| 72 |
93 |
91 |
234 |
| 73 |
71 |
94 |
178 |
| 74 |
2 |
7 |
139 |
| 75 |
3 |
106 |
207 |
| 76 |
86 |
120 |
221 |
| 77 |
29 |
100 |
215 |
| 78 |
50 |
98 |
204 |
| 79 |
136 |
12 |
179 |
| 80 |
22 |
119 |
239 |
| 81 |
4 |
88 |
177 |
| 82 |
55 |
95 |
155 |
| 83 |
131 |
106 |
228 |
| 84 |
134 |
97 |
226 |
| 85 |
128 |
13 |
219 |
| 86 |
95 |
105 |
115 |
| 87 |
64 |
92 |
137 |
| 88 |
64 |
19 |
189 |
| 89 |
126 |
98 |
179 |
| 90 |
16 |
2 |
209 |
| 91 |
49 |
98 |
197 |
| 92 |
49 |
123 |
237 |
| 93 |
26 |
58 |
118 |
| 94 |
2 |
99 |
131 |
| 95 |
0 |
90 |
183 |
| 96 |
17 |
52 |
200 |
| 97 |
22 |
25 |
233 |
| 98 |
108 |
94 |
241 |
| 99 |
3 |
95 |
194 |
| 100 |
177 |
118 |
256 |
Average number of overlapped True Positive findings (Venn Diagram)
# Defind colors for each method
myCol <- brewer.pal(3, "Pastel2")
myCol.red <- c("#FF6347","#FFA07A")
myCol.blue <- c("#4169E1","#6495ED")
myCol.green <- c("#98FB98","#C1E1C1")
# Venn Diagram for CoReg
venn.plot.CoReg <- draw.pairwise.venn(
area1 = round(mean(Area1.CoReg)),
area2 = round(mean(Area2.CoReg)),
cross.area = round(mean(AreaC.CoReg)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.blue,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = c(-0.05,-0.2),
ext.length = 0.1,
ext.line.lwd = 0,
ext.line.lty = "blank"
);
# Venn Diagram for OLS
venn.plot.OLS <- draw.pairwise.venn(
area1 = round(mean(Area1.OLS)),
area2 = round(mean(Area2.OLS)),
cross.area = round(mean(AreaC.OLS)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.red,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = -0.2,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "blank"
);
# Venn Diagram for SVA
venn.plot.SVA <- draw.pairwise.venn(
area1 = round(mean(Area1.SVA)),
area2 = round(mean(Area2.SVA)),
cross.area = round(mean(AreaC.SVA)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.green,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = -0.2,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "blank"
);
# Combined plot
p1 <- as.ggplot(~ grid.draw(venn.plot.OLS))
p1 <- as.ggplot(p1 + ggtitle("OLS")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
p2 <- as.ggplot(~ grid.draw(venn.plot.SVA))
p2 <- as.ggplot(p2 + ggtitle("SVA")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
p3 <- as.ggplot(~ grid.draw(venn.plot.CoReg))
p3 <- as.ggplot(p3 + ggtitle("CoReg")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
Plot.diff.noise <- p1+ p2 +p3+
plot_layout(widths = c(1, 1, 1))+
plot_annotation(title = "",
theme = theme(
plot.title = element_text(size = 22) # Set the overall title size here
))
Plot.diff.noise
