1 Load Required Packages and Source files

library(reshape2)
library(ggplot2)
library(viridis)
library(viridisLite)
library(pracma)
library(pROC)
library(ggplotify) 
library(xtable)
library(sva)
library(matrixcalc)
library(Matrix)
library(ggVennDiagram)
library(VennDiagram)
library(patchwork)
library(locfdr)
library(RColorBrewer)
library(knitr)

setwd("C:\\Users\\hylee\\Documents\\Research\\CoReg\\Simulation")
source("Measure_new.R")
source("scfa_revision.R")

2 Simulation

set.seed(0314,sample.kind="Rounding")
Sim <-100 # repetition of simulation
G <- 500
Block <- c(100,100,100) # 3 dense block (each size=100)
Rho <- 0.8
Nrho <- -0.4
cohd1 <- 0.3
cohd2 <- 0.3

Area1.CoReg <- Area1.OLS <- Area1.SVA <- rep(0,Sim)
Area2.CoReg <- Area2.OLS <- Area2.SVA <- rep(0,Sim)
AreaC.CoReg <- AreaC.OLS <- AreaC.SVA <- rep(0,Sim)

# Signal Strength in bocks
rho1 <- 0.8
rho2 <- 0.6
rho3 <- 0.4
rho <- 0.8 # for noise

block <- Block
block1 <- block[1]
block2 <- block[2]
block3 <- block[3]

# Generating Covariance matrix
Sigma=eye(G);
Sigma_1=squareform(rho1*rep(1,block[1]*(block[1]-1)/2)); # Use direct rho (actually r)
Sigma_2=squareform(rho2*rep(1,block[2]*(block[2]-1)/2)); # Use direct rho (actually r)
Sigma_3=squareform(rho3*rep(1,block[3]*(block[3]-1)/2)); # Use direct rho (actually r)

Sigma=eye(G);
Sigma[1:block1,1:block1]=Sigma[1:block1,1:block1]+Sigma_1;
Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]=Sigma[(block1+1):(block1+block2),(block1+1):(block1+block2)]+Sigma_2;  
Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]=Sigma[(block1+block2+1):(block1+block2+block3),(block1+block2+1):(block1+block2+block3)]+Sigma_3;  
Sigma[1:block1,(block1+1):(block1+block2)]= Nrho
Sigma[(block1+1):(block1+block2),1:block1]= Nrho

noise <- rep(0,G*(G-1)/2)
length.noise <- length(noise)
rate <- 0.01
N.noise <- length(noise)*rate
noise[sample(length.noise,N.noise,replace=FALSE)] <- rep(rho,N.noise)
Sigma_noise <- squareform(noise)
Sigma <- Sigma+Sigma_noise
Sigma <- Sigma*(Sigma<=1)
Sigma <- as.matrix(nearPD(Sigma,corr = TRUE, keepDiag = TRUE)$mat)  
R = chol(Sigma);
R[abs(R)<0.01] <- 0


# Results save
Auc.mean <- matrix(0,nrow=length(Rho),ncol=3)
Auc.sd <- matrix(0,nrow=length(Rho),ncol=3)
Measures <- array(0,c(3,5,length(Rho)))
Measures.sd <- array(0,c(3,5,length(Rho)))


for(j in 1:length(Rho)){

  block <- Block
  block1 <- block[1]
  block2 <- block[2]
  block3 <- block[3]  
  
  R = chol(Sigma);
  n=100 
  p1=0.9
  p0=0.1;

  
  ROC.LM.plot.list <- array(0,c(G+1,Sim,2))
  ROC.SCFA.plot.list <- array(0,c(G+1,Sim,2)) 
  ROC.SVA.plot.list <- array(0,c(G+1,Sim,2)) 
  
  
  SVA.AUC <- c(0,Sim)
  LM.AUC <- c(0,Sim)
  SCFA.AUC <- c(0,Sim)
  
  
  Measure.SCFA <- matrix(0,nrow=Sim,ncol=5)
  Measure.LM <- matrix(0,nrow=Sim,ncol=5)
  Measure.SVA <- matrix(0,nrow=Sim,ncol=5)
  
  
# Repetition 
    for(s in 1:Sim){
    print(s)
      
    # First Data
    X <- rep(c(0,1),each=n)
    B <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
    mu <- matrix(rep(X,G),ncol=G)*B*cohd1
    data <- mu+matrix(rnorm(2*n*G,0,sqrt(0.5)),ncol=G)%*%R #0.25
    Y <- data
    
    
    # Second Data
    X2 <- rep(c(0,1),each=n)
    B2 <- matrix(rep(c(1*(seq(block1)<=block1*0.9), 1*(seq(block2)<=block2*0.9),1*(seq(block3)<=block3*0.9),1*(seq(G-sum(Block))<=0.1*(G-sum(Block)))),each=2*n),ncol=G)
    mu2 <- matrix(rep(X,G),ncol=G)*B*cohd2
    data2 <- mu+matrix(rnorm(2*n*G,0,sqrt(1)),ncol=G)%*%R #0.25
    Y2 <- data2   
    
    
    # Fit LM
    lm.fit <- lm(as.matrix(Y)~X)
    LM.Result <- coef(summary(lm.fit))
    Residual.LM <- residuals(lm.fit)
    q <- ncol(Y)
    Pval.LM <- sapply(1:q, function(i) LM.Result[[i]][2,4])
    length(which(Pval.LM<0.05))
    Qval.LM <- p.adjust(Pval.LM,method="BH")
    length(which(Qval.LM<0.05))
    T.LM <- sapply(1:q, function(i) LM.Result[[i]][2,3]) # t-values for GLM  
    
    
    # Fit 2nd LM
    lm.fit2 <- lm(as.matrix(Y2)~X2)
    LM.Result2 <- coef(summary(lm.fit2))
    Residual.LM2 <- residuals(lm.fit2)
    q <- ncol(Y2)
    Pval.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,4])
    length(which(Pval.LM2<0.05))
    Qval.LM2 <- p.adjust(Pval.LM2,method="BH")
    length(which(Qval.LM2<0.05))
    T.LM2 <- sapply(1:q, function(i) LM.Result2[[i]][2,3]) # t-values for GLM      
    
    
    
    
    # Fit CoReg    
    # Tuning 1st
    Lambda <- seq(0.3,0.8,length.out = 20)
    Norm <- rep(0,length(Lambda))
    for(l in 1:length(Lambda)){
      dense.result.temp <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = Lambda[l])
      ord.index.temp <- dense.result.temp$Clist
      scfa.fit.temp <- scfa(Residual.LM, dense.result.temp$CID, dense.result.temp$Clist)
      FC.temp <- t(scfa.fit.temp$factorscore)
      Norm[l] <- norm(cov(Residual.LM)- scfa.fit.temp$loading%*%cov(FC.temp)%*%t(scfa.fit.temp$loading),type="F")+sum(svd(scfa.fit.temp$loading)$d)
    }
    best.Lambda <- Lambda[which.min(Norm)]
    
    # Fit CoReg with optimal
    dense.result <- dense(cor(Residual.LM),threshold = c(rho2-0.4), lambda = best.Lambda)
    ord.index <- dense.result$Clist
    scfa.fit<- scfa(Residual.LM, dense.result$CID, dense.result$Clist)
    FC <- t(scfa.fit$factorscore)
    SCFA <- lm(as.matrix(Y)~X+FC)
    SCFA.Result <- coef(summary(SCFA))
    cormat.cfa <- cor(SCFA$residuals)
    colnames(cormat.cfa) <- NULL
    rownames(cormat.cfa) <- NULL
    melt.cfa <-  melt(cormat.cfa)
    Pval.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,4])
    length(which(Pval.SCFA<0.05))
    Qval.SCFA <- p.adjust(Pval.SCFA,method="BH")
    length(which(Qval.SCFA<0.05))
    T.SCFA <- sapply(1:q, function(i) SCFA.Result[[i]][2,3]) # t-values for CoReg
    
    
    # Tuning 2nd
    Lambda <- seq(0.3,0.8,length.out = 20)
    Norm2 <- rep(0,length(Lambda))
    for(l in 1:length(Lambda)){
      dense.result.temp2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = Lambda[l])
      ord.index.temp2 <- dense.result.temp2$Clist
      scfa.fit.temp2 <- scfa(Residual.LM2, dense.result.temp2$CID, dense.result.temp2$Clist)
      FC.temp2 <- t(scfa.fit.temp2$factorscore)
      Norm2[l] <- norm(cov(Residual.LM2)- scfa.fit.temp2$loading%*%cov(FC.temp2)%*%t(scfa.fit.temp2$loading),type="F")+sum(svd(scfa.fit.temp2$loading)$d)
    }
    best.Lambda <- Lambda[which.min(Norm2)]
    # Fit CoReg with optimal
    dense.result2 <- dense(cor(Residual.LM2),threshold = c(rho2-0.4), lambda = best.Lambda)
    ord.index2 <- dense.result2$Clist
    scfa.fit2<- scfa(Residual.LM2, dense.result2$CID, dense.result2$Clist)
    FC2 <- t(scfa.fit2$factorscore)
    SCFA2 <- lm(as.matrix(Y2)~X2+FC2)
    SCFA.Result2 <- coef(summary(SCFA2))
    Pval.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,4])
    length(which(Pval.SCFA2<0.05))
    Qval.SCFA2 <- p.adjust(Pval.SCFA2,method="BH")
    length(which(Qval.SCFA2<0.05))
    T.SCFA2 <- sapply(1:q, function(i) SCFA.Result2[[i]][2,3]) # t-values for SECM
    
    
    
    ### Fit SVA    
    ### SVA for the first data
    X <- as.matrix(X,ncol=1)
    sva.fit <- sva(t(Y),mod=cbind(1,X),mod0=matrix(1,nrow=nrow(X)))   
    G.factor <- sva.fit$sv
    sva.fit <- lm(as.matrix(Y)~X+G.factor)
    SVA.Result <- coef(summary(sva.fit))
    Residual.SVA <- residuals(sva.fit)
    cormat.sva <- cor(Residual.SVA)
    q <- ncol(Y)
    colnames(cormat.sva) <- NULL
    rownames(cormat.sva) <- NULL
    melt.sva <-  melt(cormat.sva)
    Pval.SVA <- sapply(1:q, function(i) SVA.Result[[i]][2,4])
    length(which(Pval.SVA<0.05))
    Qval.SVA <- p.adjust(Pval.SVA,method="BH")
    length(which(Qval.SVA<0.05))
    
    
    
    ### SVA for the second data
    X2 <- as.matrix(X2,ncol=1)
    sva.fit2 <- sva(t(Y2),mod=cbind(1,X2),mod0=matrix(1,nrow=nrow(X2)))  
    G.factor2 <- sva.fit2$sv
    sva.fit2 <- lm(as.matrix(Y2)~X2+G.factor2)
    SVA.Result2 <- coef(summary(sva.fit2))
    Residual.SVA2 <- residuals(sva.fit2)
    Pval.SVA2 <- sapply(1:q, function(i) SVA.Result2[[i]][2,4])
    length(which(Pval.SVA2<0.05))
    Qval.SVA2 <- p.adjust(Pval.SVA2,method="BH")
    length(which(Qval.SVA2<0.05))    
    
    
    nonzero <- which(B[1,]>0)
    TP1.SCFA <- c(1*(Qval.SCFA[nonzero]<0.05))
    TP2.SCFA <- c(1*(Qval.SCFA2[nonzero]<0.05))    
    
    TP1.SVA <- c(1*(Qval.SVA[nonzero]<0.05))
    TP2.SVA <- c(1*(Qval.SVA2[nonzero]<0.05)) 
    
    
    TP1.LM <- c(1*(Qval.LM[nonzero]<0.05))
    TP2.LM <- c(1*(Qval.LM2[nonzero]<0.05)) 
    
    
    
    Area1.CoReg[s] <- length(which(Qval.SCFA<0.05))
    Area2.CoReg[s] <- length(which(Qval.SCFA2<0.05))
    AreaC.CoReg[s] <- length(intersect(which(Qval.SCFA<0.05),which(Qval.SCFA2<0.05)))
    
    Area1.OLS[s] <- length(which(Qval.LM<0.05))
    Area2.OLS[s] <- length(which(Qval.LM2<0.05))
    AreaC.OLS[s] <- length(intersect(which(Qval.LM<0.05),which(Qval.LM2<0.05)))
    
    Area1.SVA[s] <- length(which(Qval.SVA<0.05))
    Area2.SVA[s] <- length(which(Qval.SVA2<0.05))
    AreaC.SVA[s] <- length(intersect(which(Qval.SVA<0.05),which(Qval.SVA2<0.05)))
    
  }
  
  
}

3 Results

3.1 Number of overlapped True Positive findings in each iteration

Repeat <- seq(1:Sim)
Common <- data.frame(Repeat,AreaC.OLS,AreaC.SVA,AreaC.CoReg)
colnames(Common) <- c("Repeat","OLS","SVA","CoReg")
kable(Common)
Repeat OLS SVA CoReg
1 63 13 130
2 75 83 244
3 16 106 223
4 36 78 200
5 17 39 190
6 0 71 157
7 99 93 223
8 214 211 269
9 9 130 186
10 76 88 229
11 118 115 228
12 92 101 218
13 126 92 169
14 46 81 178
15 21 35 192
16 18 92 197
17 1 10 168
18 99 57 161
19 1 96 180
20 104 64 215
21 56 100 243
22 71 104 182
23 64 81 134
24 0 93 128
25 98 95 133
26 78 46 115
27 120 122 253
28 79 98 134
29 107 100 200
30 100 166 196
31 25 92 200
32 54 19 204
33 97 101 179
34 23 165 203
35 145 172 254
36 8 83 189
37 96 71 227
38 94 167 246
39 95 97 248
40 2 104 199
41 106 98 200
42 89 66 245
43 104 96 188
44 1 100 175
45 51 99 210
46 81 88 214
47 81 99 244
48 1 15 146
49 1 96 159
50 46 140 201
51 29 13 133
52 59 30 195
53 134 98 224
54 0 52 210
55 4 5 116
56 0 66 169
57 46 102 223
58 76 88 192
59 49 160 189
60 134 165 209
61 48 129 210
62 20 69 206
63 109 119 194
64 65 112 207
65 96 86 153
66 91 94 237
67 1 59 175
68 0 89 211
69 195 175 268
70 42 6 139
71 106 90 225
72 93 91 234
73 71 94 178
74 2 7 139
75 3 106 207
76 86 120 221
77 29 100 215
78 50 98 204
79 136 12 179
80 22 119 239
81 4 88 177
82 55 95 155
83 131 106 228
84 134 97 226
85 128 13 219
86 95 105 115
87 64 92 137
88 64 19 189
89 126 98 179
90 16 2 209
91 49 98 197
92 49 123 237
93 26 58 118
94 2 99 131
95 0 90 183
96 17 52 200
97 22 25 233
98 108 94 241
99 3 95 194
100 177 118 256

3.2 Average number of overlapped True Positive findings (Venn Diagram)

# Defind colors for each method
myCol <- brewer.pal(3, "Pastel2")
myCol.red <- c("#FF6347","#FFA07A")
myCol.blue <- c("#4169E1","#6495ED")
myCol.green <- c("#98FB98","#C1E1C1")


# Venn Diagram for CoReg
venn.plot.CoReg <- draw.pairwise.venn(
  area1 = round(mean(Area1.CoReg)),
  area2 = round(mean(Area2.CoReg)),
  cross.area = round(mean(AreaC.CoReg)),
  euler.d=TRUE,
  scaled=TRUE,
  category = c("Data 1", "Data 2"),
  print.mode = c("raw","percent"),
  fill = alpha(myCol.blue,0.5),
  lty = "blank",
  cex = 1.2,
  cat.cex = 1.5,
  cat.dist=0.01,
  ext.pos = 0,
  ext.dist = c(-0.05,-0.2),
  ext.length = 0.1,
  ext.line.lwd = 0,
  ext.line.lty = "blank"
);


# Venn Diagram for OLS
venn.plot.OLS <- draw.pairwise.venn(
  area1 = round(mean(Area1.OLS)),
  area2 = round(mean(Area2.OLS)),
  cross.area = round(mean(AreaC.OLS)),
  euler.d=TRUE,
  scaled=TRUE,
  category = c("Data 1", "Data 2"),
  print.mode = c("raw","percent"),
  fill = alpha(myCol.red,0.5),
  lty = "blank",
  cex = 1.2,
  cat.cex = 1.5,
  cat.dist=0.01,
  ext.pos = 0,
  ext.dist = -0.2,
  ext.length = 0.85,
  ext.line.lwd = 2,
  ext.line.lty = "blank"
);


# Venn Diagram for SVA
venn.plot.SVA <- draw.pairwise.venn(
  area1 = round(mean(Area1.SVA)),
  area2 = round(mean(Area2.SVA)),
  cross.area = round(mean(AreaC.SVA)),
  euler.d=TRUE,
  scaled=TRUE,
  category = c("Data 1", "Data 2"),
  print.mode = c("raw","percent"),
  fill = alpha(myCol.green,0.5),
  lty = "blank",
  cex = 1.2,
  cat.cex = 1.5,
  cat.dist=0.01,
  ext.pos = 0,
  ext.dist = -0.2,
  ext.length = 0.85,
  ext.line.lwd = 2,
  ext.line.lty = "blank"
);
# Combined plot
p1 <- as.ggplot(~ grid.draw(venn.plot.OLS))
p1 <- as.ggplot(p1 + ggtitle("OLS")+
                  theme(title=element_text(size=20),
                        plot.title = element_text(hjust = 0.5)),
                plot.margin = unit(c(1, 1, 1, 1), "cm"))


p2 <- as.ggplot(~ grid.draw(venn.plot.SVA))
p2 <- as.ggplot(p2 + ggtitle("SVA")+
                  theme(title=element_text(size=20),
                        plot.title = element_text(hjust = 0.5)),
                plot.margin = unit(c(1, 1, 1, 1), "cm"))

p3 <- as.ggplot(~ grid.draw(venn.plot.CoReg))
p3 <- as.ggplot(p3 + ggtitle("CoReg")+
                  theme(title=element_text(size=20),
                        plot.title = element_text(hjust = 0.5)),
                plot.margin = unit(c(1, 1, 1, 1), "cm"))

Plot.diff.noise <- p1+ p2 +p3+
  plot_layout(widths = c(1, 1, 1))+
  plot_annotation(title = "",
                  theme = theme(
                    plot.title = element_text(size = 22)  # Set the overall title size here
                  ))

Plot.diff.noise