Simulation
set.seed(0314,sample.kind="Rounding")
Sim <-100 # repetition of simulation
G <- 500
Block <- c(100,100,100) # 3 dense block (each size=100)
Rho <- 0.8
Nrho <- -0.4
cohd1 <- 0.3
cohd2 <- 0.3
Area1.CoReg <- Area1.OLS <- Area1.SVA <- rep(0,Sim)
Area2.CoReg <- Area2.OLS <- Area2.SVA <- rep(0,Sim)
AreaC.CoReg <- AreaC.OLS <- AreaC.SVA <- rep(0,Sim)
# Signal Strength in bocks
rho1 <- 0.8
rho2 <- 0.6
rho3 <- 0.4
rho <- 0.8 # for noise
block <- Block
block1 <- block[1]
block2 <- block[2]
block3 <- block[3]
# Generating Covariance matrix
Sigma=eye(G);
Sigma_1=squareform(rho1*rep(1,block[1]*(block[1]-1)/2)); # Use direct rho (actually r)
Sigma_2=squareform(rho2*rep(1,block[2]*(block[2]-1)/2)); # Use direct rho (actually r)
Sigma_3=squareform(rho3*rep(1,block[3]*(block[3]-1)/2)); # Use direct rho (actually r)
Sigma=eye(G);
Sigma[1:block1,1:block1]=Sigma[1:block1,1:block1]+Sigma_1;
Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]=Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]+Sigma_2;
Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]=Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]+Sigma_3;
Sigma[1:block1,(block1+1):(block1+block2)]= Nrho
Sigma[(block1+1):(block1+block2),1:block1]= Nrho
noise <- rep(0,G*(G-1)/2)
length.noise <- length(noise)
rate <- 0.01
N.noise <- length(noise)*rate
noise[sample(length.noise,N.noise,replace=FALSE)] <- rep(rho,N.noise)
Sigma_noise <- squareform(noise)
Sigma <- Sigma+Sigma_noise
Sigma <- Sigma*(Sigma<=1)
Sigma <- as.matrix(nearPD(Sigma,corr = TRUE, keepDiag = TRUE)$mat)
R = chol(Sigma);
R[abs(R)<0.01] <- 0
# Results save
Auc.mean <- matrix(0,nrow=length(Rho),ncol=3)
Auc.sd <- matrix(0,nrow=length(Rho),ncol=3)
Measures <- array(0,c(3,5,length(Rho)))
Measures.sd <- array(0,c(3,5,length(Rho)))
for(j in 1:length(Rho)){
block <- Block
block1 <- block[1]
block2 <- block[2]
block3 <- block[3]
R = chol(Sigma);
n=100
p1=0.9
p0=0.1;
ROC.LM.plot.list <- array(0,c(G+1,Sim,2))
ROC.SCFA.plot.list <- array(0,c(G+1,Sim,2))
ROC.SVA.plot.list <- array(0,c(G+1,Sim,2))
SVA.AUC <- c(0,Sim)
LM.AUC <- c(0,Sim)
SCFA.AUC <- c(0,Sim)
Measure.SCFA <- matrix(0,nrow=Sim,ncol=5)
Measure.LM <- matrix(0,nrow=Sim,ncol=5)
Measure.SVA <- matrix(0,nrow=Sim,ncol=5)
# Repetition
for(s in 1:Sim){
print(s)
# First Data
X <- rep(c(0,1),each=n)
B <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
mu <- matrix(rep(X,G),ncol=G)*B*cohd1
data <- mu+matrix(rnorm(2*n*G,0,sqrt(0.5)),ncol=G)%*%R #0.25
Y <- data
# Second Data
X2 <- rep(c(0,1),each=n)
B2 <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
mu2 <- matrix(rep(X,G),ncol=G)*B*cohd2
data2 <- mu+matrix(rnorm(2*n*G,0,sqrt(1)),ncol=G)%*%R #0.25
Y2 <- data2
# Fit LM
lm.fit <- lm(as.matrix(Y)~X)
LM.Result <- coef(summary(lm.fit))
Residual.LM <- residuals(lm.fit)
q <- ncol(Y)
Pval.LM <- sapply(1:q, function(i) LM.Result[[i]][2,4])
length(which(Pval.LM<0.05))
Qval.LM <- p.adjust(Pval.LM,method="BH")
length(which(Qval.LM<0.05))
T.LM <- sapply(1:q, function(i) LM.Result[[i]][2,3]) # t-values for GLM
# Fit 2nd LM
lm.fit2 <- lm(as.matrix(Y2)~X2)
LM.Result2 <- coef(summary(lm.fit2))
Residual.LM2 <- residuals(lm.fit2)
q <- ncol(Y2)
Pval.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,4])
length(which(Pval.LM2<0.05))
Qval.LM2 <- p.adjust(Pval.LM2,method="BH")
length(which(Qval.LM2<0.05))
T.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,3]) # t-values for GLM
# Fit CoReg
# Tuning 1st
Lambda <- seq(0.3,0.8,length.out = 20)
Norm <- rep(0,length(Lambda))
for(l in 1:length(Lambda)){
dense.result.temp <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = Lambda[l])
ord.index.temp <- dense.result.temp$Clist
scfa.fit.temp <- scfa(Residual.LM, dense.result.temp$CID, dense.result.temp$Clist)
FC.temp <- t(scfa.fit.temp$factorscore)
Norm[l] <- norm(cov(Residual.LM)- scfa.fit.temp$loading%*%cov(FC.temp)%*%t(scfa.fit.temp$loading),type="F")+sum(svd(scfa.fit.temp$loading)$d)
}
best.Lambda <- Lambda[which.min(Norm)]
# Fit CoReg with optimal
dense.result <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = best.Lambda)
ord.index <- dense.result$Clist
scfa.fit<- scfa(Residual.LM, dense.result$CID, dense.result$Clist)
FC <- t(scfa.fit$factorscore)
SCFA <- lm(as.matrix(Y)~X+FC)
SCFA.Result <- coef(summary(SCFA))
cormat.cfa <- cor(SCFA$residuals)
colnames(cormat.cfa) <- NULL
rownames(cormat.cfa) <- NULL
melt.cfa <- melt(cormat.cfa)
Pval.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,4])
length(which(Pval.SCFA<0.05))
Qval.SCFA <- p.adjust(Pval.SCFA,method="BH")
length(which(Qval.SCFA<0.05))
T.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,3]) # t-values for CoReg
# Tuning 2nd
Lambda <- seq(0.3,0.8,length.out = 20)
Norm2 <- rep(0,length(Lambda))
for(l in 1:length(Lambda)){
dense.result.temp2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = Lambda[l])
ord.index.temp2 <- dense.result.temp2$Clist
scfa.fit.temp2 <- scfa(Residual.LM2, dense.result.temp2$CID, dense.result.temp2$Clist)
FC.temp2 <- t(scfa.fit.temp2$factorscore)
Norm2[l] <- norm(cov(Residual.LM2)- scfa.fit.temp2$loading%*%cov(FC.temp2)%*%t(scfa.fit.temp2$loading),type="F")+sum(svd(scfa.fit.temp2$loading)$d)
}
best.Lambda <- Lambda[which.min(Norm2)]
# Fit CoReg with optimal
dense.result2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = best.Lambda)
ord.index2 <- dense.result2$Clist
scfa.fit2<- scfa(Residual.LM2, dense.result2$CID, dense.result2$Clist)
FC2 <- t(scfa.fit2$factorscore)
SCFA2 <- lm(as.matrix(Y2)~X2+FC2)
SCFA.Result2 <- coef(summary(SCFA2))
Pval.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,4])
length(which(Pval.SCFA2<0.05))
Qval.SCFA2 <- p.adjust(Pval.SCFA2,method="BH")
length(which(Qval.SCFA2<0.05))
T.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,3]) # t-values for SECM
### Fit SVA
### SVA for the first data
X <- as.matrix(X,ncol=1)
sva.fit <- sva(t(Y),mod=cbind(1,X),mod0=matrix(1,nrow=nrow(X)))
G.factor <- sva.fit$sv
sva.fit <- lm(as.matrix(Y)~X+G.factor)
SVA.Result <- coef(summary(sva.fit))
Residual.SVA <- residuals(sva.fit)
cormat.sva <- cor(Residual.SVA)
q <- ncol(Y)
colnames(cormat.sva) <- NULL
rownames(cormat.sva) <- NULL
melt.sva <- melt(cormat.sva)
Pval.SVA <- sapply(1:q, function(i) SVA.Result[[i]][2,4])
length(which(Pval.SVA<0.05))
Qval.SVA <- p.adjust(Pval.SVA,method="BH")
length(which(Qval.SVA<0.05))
### SVA for the second data
X2 <- as.matrix(X2,ncol=1)
sva.fit2 <- sva(t(Y2),mod=cbind(1,X2),mod0=matrix(1,nrow=nrow(X2)))
G.factor2 <- sva.fit2$sv
sva.fit2 <- lm(as.matrix(Y2)~X2+G.factor2)
SVA.Result2 <- coef(summary(sva.fit2))
Residual.SVA2 <- residuals(sva.fit2)
Pval.SVA2 <- sapply(1:q, function(i) SVA.Result2[[i]][2,4])
length(which(Pval.SVA2<0.05))
Qval.SVA2 <- p.adjust(Pval.SVA2,method="BH")
length(which(Qval.SVA2<0.05))
nonzero <- which(B[1,]>0)
TP1.SCFA <- c(1*(Qval.SCFA[nonzero]<0.05))
TP2.SCFA <- c(1*(Qval.SCFA2[nonzero]<0.05))
TP1.SVA <- c(1*(Qval.SVA[nonzero]<0.05))
TP2.SVA <- c(1*(Qval.SVA2[nonzero]<0.05))
TP1.LM <- c(1*(Qval.LM[nonzero]<0.05))
TP2.LM <- c(1*(Qval.LM2[nonzero]<0.05))
Area1.CoReg[s] <- length(which(Qval.SCFA<0.05))
Area2.CoReg[s] <- length(which(Qval.SCFA2<0.05))
AreaC.CoReg[s] <- length(intersect(which(Qval.SCFA<0.05),which(Qval.SCFA2<0.05)))
Area1.OLS[s] <- length(which(Qval.LM<0.05))
Area2.OLS[s] <- length(which(Qval.LM2<0.05))
AreaC.OLS[s] <- length(intersect(which(Qval.LM<0.05),which(Qval.LM2<0.05)))
Area1.SVA[s] <- length(which(Qval.SVA<0.05))
Area2.SVA[s] <- length(which(Qval.SVA2<0.05))
AreaC.SVA[s] <- length(intersect(which(Qval.SVA<0.05),which(Qval.SVA2<0.05)))
}
}
Results
Number of overlapped True Positive findings in each iteration
Repeat <- seq(1:Sim)
Common <- data.frame(Repeat,AreaC.OLS,AreaC.SVA,AreaC.CoReg)
colnames(Common) <- c("Repeat","OLS","SVA","CoReg")
kable(Common)
1 |
63 |
13 |
130 |
2 |
75 |
83 |
244 |
3 |
16 |
106 |
223 |
4 |
36 |
78 |
200 |
5 |
17 |
39 |
190 |
6 |
0 |
71 |
157 |
7 |
99 |
93 |
223 |
8 |
214 |
211 |
269 |
9 |
9 |
130 |
186 |
10 |
76 |
88 |
229 |
11 |
118 |
115 |
228 |
12 |
92 |
101 |
218 |
13 |
126 |
92 |
169 |
14 |
46 |
81 |
178 |
15 |
21 |
35 |
192 |
16 |
18 |
92 |
197 |
17 |
1 |
10 |
168 |
18 |
99 |
57 |
161 |
19 |
1 |
96 |
180 |
20 |
104 |
64 |
215 |
21 |
56 |
100 |
243 |
22 |
71 |
104 |
182 |
23 |
64 |
81 |
134 |
24 |
0 |
93 |
128 |
25 |
98 |
95 |
133 |
26 |
78 |
46 |
115 |
27 |
120 |
122 |
253 |
28 |
79 |
98 |
134 |
29 |
107 |
100 |
200 |
30 |
100 |
166 |
196 |
31 |
25 |
92 |
200 |
32 |
54 |
19 |
204 |
33 |
97 |
101 |
179 |
34 |
23 |
165 |
203 |
35 |
145 |
172 |
254 |
36 |
8 |
83 |
189 |
37 |
96 |
71 |
227 |
38 |
94 |
167 |
246 |
39 |
95 |
97 |
248 |
40 |
2 |
104 |
199 |
41 |
106 |
98 |
200 |
42 |
89 |
66 |
245 |
43 |
104 |
96 |
188 |
44 |
1 |
100 |
175 |
45 |
51 |
99 |
210 |
46 |
81 |
88 |
214 |
47 |
81 |
99 |
244 |
48 |
1 |
15 |
146 |
49 |
1 |
96 |
159 |
50 |
46 |
140 |
201 |
51 |
29 |
13 |
133 |
52 |
59 |
30 |
195 |
53 |
134 |
98 |
224 |
54 |
0 |
52 |
210 |
55 |
4 |
5 |
116 |
56 |
0 |
66 |
169 |
57 |
46 |
102 |
223 |
58 |
76 |
88 |
192 |
59 |
49 |
160 |
189 |
60 |
134 |
165 |
209 |
61 |
48 |
129 |
210 |
62 |
20 |
69 |
206 |
63 |
109 |
119 |
194 |
64 |
65 |
112 |
207 |
65 |
96 |
86 |
153 |
66 |
91 |
94 |
237 |
67 |
1 |
59 |
175 |
68 |
0 |
89 |
211 |
69 |
195 |
175 |
268 |
70 |
42 |
6 |
139 |
71 |
106 |
90 |
225 |
72 |
93 |
91 |
234 |
73 |
71 |
94 |
178 |
74 |
2 |
7 |
139 |
75 |
3 |
106 |
207 |
76 |
86 |
120 |
221 |
77 |
29 |
100 |
215 |
78 |
50 |
98 |
204 |
79 |
136 |
12 |
179 |
80 |
22 |
119 |
239 |
81 |
4 |
88 |
177 |
82 |
55 |
95 |
155 |
83 |
131 |
106 |
228 |
84 |
134 |
97 |
226 |
85 |
128 |
13 |
219 |
86 |
95 |
105 |
115 |
87 |
64 |
92 |
137 |
88 |
64 |
19 |
189 |
89 |
126 |
98 |
179 |
90 |
16 |
2 |
209 |
91 |
49 |
98 |
197 |
92 |
49 |
123 |
237 |
93 |
26 |
58 |
118 |
94 |
2 |
99 |
131 |
95 |
0 |
90 |
183 |
96 |
17 |
52 |
200 |
97 |
22 |
25 |
233 |
98 |
108 |
94 |
241 |
99 |
3 |
95 |
194 |
100 |
177 |
118 |
256 |
Average number of overlapped True Positive findings (Venn Diagram)
# Defind colors for each method
myCol <- brewer.pal(3, "Pastel2")
myCol.red <- c("#FF6347","#FFA07A")
myCol.blue <- c("#4169E1","#6495ED")
myCol.green <- c("#98FB98","#C1E1C1")
# Venn Diagram for CoReg
venn.plot.CoReg <- draw.pairwise.venn(
area1 = round(mean(Area1.CoReg)),
area2 = round(mean(Area2.CoReg)),
cross.area = round(mean(AreaC.CoReg)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.blue,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = c(-0.05,-0.2),
ext.length = 0.1,
ext.line.lwd = 0,
ext.line.lty = "blank"
);
# Venn Diagram for OLS
venn.plot.OLS <- draw.pairwise.venn(
area1 = round(mean(Area1.OLS)),
area2 = round(mean(Area2.OLS)),
cross.area = round(mean(AreaC.OLS)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.red,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = -0.2,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "blank"
);
# Venn Diagram for SVA
venn.plot.SVA <- draw.pairwise.venn(
area1 = round(mean(Area1.SVA)),
area2 = round(mean(Area2.SVA)),
cross.area = round(mean(AreaC.SVA)),
euler.d=TRUE,
scaled=TRUE,
category = c("Data 1", "Data 2"),
print.mode = c("raw","percent"),
fill = alpha(myCol.green,0.5),
lty = "blank",
cex = 1.2,
cat.cex = 1.5,
cat.dist=0.01,
ext.pos = 0,
ext.dist = -0.2,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "blank"
);
# Combined plot
p1 <- as.ggplot(~ grid.draw(venn.plot.OLS))
p1 <- as.ggplot(p1 + ggtitle("OLS")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
p2 <- as.ggplot(~ grid.draw(venn.plot.SVA))
p2 <- as.ggplot(p2 + ggtitle("SVA")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
p3 <- as.ggplot(~ grid.draw(venn.plot.CoReg))
p3 <- as.ggplot(p3 + ggtitle("CoReg")+
theme(title=element_text(size=20),
plot.title = element_text(hjust = 0.5)),
plot.margin = unit(c(1, 1, 1, 1), "cm"))
Plot.diff.noise <- p1+ p2 +p3+
plot_layout(widths = c(1, 1, 1))+
plot_annotation(title = "",
theme = theme(
plot.title = element_text(size = 22) # Set the overall title size here
))
Plot.diff.noise
