suppressMessages(library(GenomicRanges))
suppressMessages(library(tidyverse))
suppressMessages(library(rtracklayer))
`%+%`<-paste0
seqinfSY <- readRDS("Data/seqinfSY.rds")
rDNA_SY <- GRanges("CP029160.1",IRanges(3879940,3934000),strand="*",seqinfo=seqinfSY)
maskedTy_SY <- readRDS("Data/maskedTy_SY.rds")
masked_ura3d0 <- GRanges("CP029160.1",IRanges(1051200,1051500),strand="*",seqinfo=seqinfSY,type="ura3d0")
masked_SY <- c(maskedTy_SY,masked_ura3d0)
source("Helper_function.r")
ars2keep <- import("Data/ars2keep_lim10k_cl1500_2_dtac1500.bed") %>% sort
ars2keep$score <- NULL
Init overlapping with Ty_chrIII, Ty_chrXVI, ura3d0 and rDNA are removed.
initerSY <- readRDS("Data/initer_SYSY.rds")
initerBY <- readRDS("Data/initer_BYBYSY.rds")
initSY.gr <- with(initerSY %>% filter(type=="Init"),GRanges(seqnames=chrom,ranges=IRanges(start=pmin(x0,x1),end=pmax(x0,x1)),strand="*",seqinfo=seqinfSY,exp=exp))
initSY.gr <- initSY.gr[!overlapsAny(initSY.gr,c(masked_SY,rDNA_SY))]
initBY.gr <- with(initerBY %>% filter(type=="Init"),GRanges(seqnames=chromSY,ranges=IRanges(start=pmin(x0nSY,x1nSY),end=pmax(x0nSY,x1nSY)),strand="*",seqinfo=seqinfSY,exp=exp))
initBY.gr <- initBY.gr[!overlapsAny(initBY.gr,c(masked_SY,rDNA_SY))]
Computing coverages
### normalise by the sum with respect to the genome length
ICgn_SY <- coverage(initSY.gr)/sum(coverage(initSY.gr))*seqlengths(seqinfSY)
ICgn_BY <- coverage(initBY.gr)/sum(coverage(initBY.gr))*seqlengths(seqinfSY)
export(ICgn_SY,con="BigWig/ICgn_SY.bw")
export(ICgn_BY,con="BigWig/ICgn_BY.bw")
initSY1.gr <- initSY.gr[initSY.gr$exp=="SY_rep1"]
initSY2.gr <- initSY.gr[initSY.gr$exp=="SY_rep2"]
initSY3.gr <- initSY.gr[initSY.gr$exp=="SY_rep3"]
initBY1.gr <- initBY.gr[initBY.gr$exp=="BY_rep1"]
initBY2.gr <- initBY.gr[initBY.gr$exp=="BY_rep2"]
initBY3.gr <- initBY.gr[initBY.gr$exp=="BY_rep3"]
ICgn_SY1 <- coverage(initSY1.gr)/sum(coverage(initSY1.gr))*seqlengths(seqinfSY)
ICgn_BY1 <- coverage(initBY1.gr)/sum(coverage(initBY1.gr))*seqlengths(seqinfSY)
export(ICgn_SY1,con="BigWig/ICgn_SY1.bw")
export(ICgn_BY1,con="BigWig/ICgn_BY1.bw")
ICgn_SY2 <- coverage(initSY2.gr)/sum(coverage(initSY2.gr))*seqlengths(seqinfSY)
ICgn_BY2 <- coverage(initBY2.gr)/sum(coverage(initBY2.gr))*seqlengths(seqinfSY)
export(ICgn_SY2,con="BigWig/ICgn_SY2.bw")
export(ICgn_BY2,con="BigWig/ICgn_BY2.bw")
ICgn_SY3 <- coverage(initSY3.gr)/sum(coverage(initSY3.gr))*seqlengths(seqinfSY)
ICgn_BY3 <- coverage(initBY3.gr)/sum(coverage(initBY3.gr))*seqlengths(seqinfSY)
export(ICgn_SY3,con="BigWig/ICgn_SY3.bw")
export(ICgn_BY3,con="BigWig/ICgn_BY3.bw")
To use the number of init overlapping Ori, we choose to keep only the initiation events overlapping only one of the active ori we selected
initSY1.gr1 <- initSY1.gr[countOverlaps(initSY1.gr,ars2keep)==1]
initSY2.gr1 <- initSY2.gr[countOverlaps(initSY2.gr,ars2keep)==1]
initSY3.gr1 <- initSY3.gr[countOverlaps(initSY3.gr,ars2keep)==1]
initBY1.gr1 <- initBY1.gr[countOverlaps(initBY1.gr,ars2keep)==1]
initBY2.gr1 <- initBY2.gr[countOverlaps(initBY2.gr,ars2keep)==1]
initBY3.gr1 <- initBY3.gr[countOverlaps(initBY3.gr,ars2keep)==1]
### generate the coverages
ICgn1_SY1 <- coverage(initSY1.gr1)/sum(coverage(initSY1.gr1))*seqlengths(seqinfSY)
ICgn1_BY1 <- coverage(initBY1.gr1)/sum(coverage(initBY1.gr1))*seqlengths(seqinfSY)
export(ICgn1_SY1,con="BigWig/ICgn1_SY1.bw")
export(ICgn1_BY1,con="BigWig/ICgn1_BY1.bw")
ICgn1_SY2 <- coverage(initSY2.gr1)/sum(coverage(initSY2.gr1))*seqlengths(seqinfSY)
ICgn1_BY2 <- coverage(initBY2.gr1)/sum(coverage(initBY2.gr1))*seqlengths(seqinfSY)
export(ICgn1_SY2,con="BigWig/ICgn1_SY2.bw")
export(ICgn1_BY2,con="BigWig/ICgn1_BY2.bw")
ICgn1_SY3 <- coverage(initSY3.gr1)/sum(coverage(initSY3.gr1))*seqlengths(seqinfSY)
ICgn1_BY3 <- coverage(initBY3.gr1)/sum(coverage(initBY3.gr1))*seqlengths(seqinfSY)
export(ICgn1_SY3,con="BigWig/ICgn1_SY3.bw")
export(ICgn1_BY3,con="BigWig/ICgn1_BY3.bw")
### use these init cov at Ori to do MAnorm2
ars2 <- ars2keep
ars2$initBY1 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initBY1.gr1))
ars2$initBY2 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initBY2.gr1))
ars2$initBY3 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initBY3.gr1))
ars2$initSY1 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initSY1.gr1))
ars2$initSY2 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initSY2.gr1))
ars2$initSY3 <- sapply(seq_along(ars2),function(x) countOverlaps(ars2[x],initSY3.gr1))
We then decided to use these counts of initiations at Ori in the MAnorm2 framework
require(MAnorm2)
#packageVersion("MAnorm2")
# [1] ‘1.2.2’
count2test <- mcols(ars2)
x <- as.matrix(count2test[,2:7])
eff.mat <- x
occ.mat <- eff.mat>0
test.mat <- cbind(eff.mat,occ.mat)
norm <- normalize(test.mat,count=c(1:3),occupancy=c(7:9))
norm <- normalize(norm,count=c(4:6),occupancy=c(10:12))
conds <- list( BY= bioCond(norm[c(1:3)],norm[c(7:9)],name="BY"),
SY=bioCond(norm[c(4:6)],norm[c(10:12)],name="SY"))
conds <- normBioCond(conds)
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE,init.coef = c(0.1, 10))
During the parametric fit procedure:
After iteration 1: coef = (0.012839, 1.2317); 0 (0.00%) outlier(s) detected
Converged.
res <- diffTest(conds[[1]], conds[[2]])
ars.res <- bind_cols(as_tibble(mcols(ars2)),res)
min(ars.res$padj)
[1] 2.83347e-08
# 2.83347e-08
ars.res %>% filter(padj<=0.01) %>% nrow
[1] 20
# 20
ars2export <- ars2[ars2$name %in% (ars.res %>% filter(padj<=1e-2) %>% pull(name))]
export(ars2export,con="Data/ARSinitMAnorm2_1e-2.bed")
saveRDS(ars.res,file="Data/ARSinitMAnorm2.rds")
Please note that the test is based on the raw counting of the number of init without . We choose to delegate any normalisation procedure to MAnorm2. As a results, the ICgn might sometimes not reflect what the MAnorm2 describe as significant.
We also generated Termination coverage the same way.
terSY.gr <- with(initerSY %>% filter(type=="Ter"),GRanges(seqnames=chrom,ranges=IRanges(start=pmin(x0,x1),end=pmax(x0,x1)),strand="*",seqinfo=seqinfSY,exp=exp))
terSY.gr <- terSY.gr[!overlapsAny(terSY.gr,c(maskedTy_SY,rDNA_SY))]
terBY.gr <- with(initerBY %>% filter(type=="Ter"),GRanges(seqnames=chromSY,ranges=IRanges(start=pmin(x0nSY,x1nSY),end=pmax(x0nSY,x1nSY)),strand="*",seqinfo=seqinfSY,exp=exp))
terBY.gr <- terBY.gr[!overlapsAny(terBY.gr,c(maskedTy_SY,rDNA_SY))]
TCgn_SY <- coverage(terSY.gr)/sum(coverage(terSY.gr))*seqlengths(seqinfSY)
TCgn_BY <- coverage(terBY.gr)/sum(coverage(terBY.gr))*seqlengths(seqinfSY)
export(TCgn_SY,con="BigWig/TCgn_SY.bw")
export(TCgn_BY,con="BigWig/TCgn_BY.bw")
### looks OK
### idem but separating the replicate
terSY1.gr <- terSY.gr[terSY.gr$exp=="SY_rep1"]
terSY2.gr <- terSY.gr[terSY.gr$exp=="SY_rep2"]
terSY3.gr <- terSY.gr[terSY.gr$exp=="SY_rep3"]
terBY1.gr <- terBY.gr[terBY.gr$exp=="BY_rep1"]
terBY2.gr <- terBY.gr[terBY.gr$exp=="BY_rep2"]
terBY3.gr <- terBY.gr[terBY.gr$exp=="BY_rep3"]
TCgn_SY1 <- coverage(terSY1.gr)/sum(coverage(terSY1.gr))*seqlengths(seqinfSY)
TCgn_BY1 <- coverage(terBY1.gr)/sum(coverage(terBY1.gr))*seqlengths(seqinfSY)
export(TCgn_SY1,con="BigWig/TCgn_SY1.bw")
export(TCgn_BY1,con="BigWig/TCgn_BY1.bw")
TCgn_SY2 <- coverage(terSY2.gr)/sum(coverage(terSY2.gr))*seqlengths(seqinfSY)
TCgn_BY2 <- coverage(terBY2.gr)/sum(coverage(terBY2.gr))*seqlengths(seqinfSY)
export(TCgn_SY2,con="BigWig/TCgn_SY2.bw")
export(TCgn_BY2,con="BigWig/TCgn_BY2.bw")
TCgn_SY3 <- coverage(terSY3.gr)/sum(coverage(terSY3.gr))*seqlengths(seqinfSY)
TCgn_BY3 <- coverage(terBY3.gr)/sum(coverage(terBY3.gr))*seqlengths(seqinfSY)
export(TCgn_SY3,con="BigWig/TCgn_SY3.bw")
export(TCgn_BY3,con="BigWig/TCgn_BY3.bw")