### methylation residuals#####

### data input:

data <- data frame including: 
[1] "Cohort_ID" 
[2] "Array_Number"
[3] "Position_on_Array" 
[4] "Gender"
[5] "Age"
[6] "nrgranulocytes"   
[7] "nrlymphocytes"    
[8] "nrmonocytes"
[9]-[2557]   "NORMALIZED DNA METHYLATION DATA in Beta-values for CpGs, adjust columns when needed" 

#optional : replace the NA with mean value 

for(i in 9:2557){
  data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
}

##only include complete cases
df <- data[complete.cases(data), ]
names(df[1:8])
   [1] "Cohort_ID" 
   [2] "Array_Number"
   [3] "Position_on_Array" 
   [4] "Gender"
   [5] "Age"
   [6] "nrgranulocytes"   
   [7] "nrlymphocytes"    
   [8] "nrmonocytes"
  
#calculate residuales
library(lme4)
## please adjust "(df[,9:2557])" to the columns that include the CpGs
mod<- apply(as.data.frame (df[,9:2557]),2,FUN=function(x){lmer(x~(1|df[,2])+(1|df[,3])+ df[,4]+ df[,5]+ df[,6]+ df[,7]+ df[,8], data=df)})

# extract residuals
res<- sapply(mod, function(x){resid(x)})
cpg<- data.frame (res)
matrix_cpg<- as.matrix (res)
colnames(matrix_cpg) <- paste(colnames(matrix_cpg), "res",  sep = "_")
allcpg<- cbind(df, matrix_cpg)
write.table(allcpg, "New_COV_CpGs_res.txt", quote=F, row.names=F)


### gene expression residuals#####

data <- data frame including: 
[1] "Cohort_ID" 
[2] "Plate ID"
[3] "RNA quality score" 
[4] "Gender"
[5] "Age"
[6] "nrgranulocytes"   
[7] "nrlymphocytes"    
[8] "nrmonocytes"
[9]-[451] "NORMALIZED Gene expression data for gene expression probes, adjust columns when needed" 

df <- data[complete.cases(data), ]

##please adjust "(df[,9:451])" to the columns that include the gene expression probes
mod<- apply(as.data.frame (df[,9:451]),2,FUN=function(x){lmer(x~(1|df[,2])+ df[,3]+ df[,4]+ df[,5]+ df[,6]+ df[,7]+ df[,8], data=df)})

# extract residuals
res<- sapply(mod, function(x){resid(x)})
cpg<- data.frame (res)
matrix_genes<- as.matrix (res)
colnames(matrix_genes) <- paste(colnames(matrix_genes), "res",  sep = "_")
allgenes<- cbind(df, matrix_genes)
write.table(allgenes, "New_COV_Genes_res.txt", quote=F, row.names=F)

## eQTM analysis ##

mer<- merge(allcpg, allgenes, by="Cohort_ID")
dataset <-mer
cols_dataset <- colnames(dataset)

setwd("/results_eQTM/") ## make a new directory for the csv files per CpG site

## please change the columns if they do not match with your dataset
for(i in 2558:5106){ ## columns that include the residuals for the CpGs
  
  eqtm <- apply(as.data.frame(dataset[,5557:5999]),2,FUN=function(x){lm(x~dataset[,i])})  ## columns that include residuals gene probes
  
  pvals<- sapply (eqtm, function(x){summary(x)$coefficients[,4]})
  coeff<- sapply (eqtm, function(x){summary(x)$coefficients[,1]})
  pvals<- data.frame (pvals)
  
  coeff<- coeff[2,]
  pvals1<- pvals[2,]
  df<- data.frame(pvals1)
  tpval<-t(df)
  df<- data.frame (tpval)
  coeff<- data.frame (coeff)
  df2 <- cbind(df,coeff)
  
  df2$probe_exp<- row.names(df2)
  names(df2)[1] <- paste(cols_dataset[i],"_pval", sep = "", collapse = NULL)
  names(df2)[2] <- paste("coeff")
  file_output <-paste(cols_dataset[i],"_eQTM.csv", sep = "", collapse = NULL)
  write.table (df2, file_output, row.names=F)
}

