05_Adding bibliographic data to Ori


In this notebook, we add bibliographic data concerning RIF1, CTF19, FKH1/FKH2 to known ARS coming from OriDB and transpose these information to the SY_ARS (see notebook 02_Genome Annotation).

Importing Ori

suppressMessages(library(GenomicRanges))
suppressMessages(library(tidyverse))
suppressMessages(library(rtracklayer))
`%+%`<- paste0
bibliodata_path <- "Data_raw/"
ARS_oriDB <- read_tsv("Data_raw/ARSfromOriDB20231204.txt",show_col_types = FALSE) %>%
    mutate(chrRom="chr" %+% as.roman(chr))%>% 
    mutate(chr="chr" %+% chr)
### create a naming for ARS oriDB
ARS_oriDB2 <- ARS_oriDB %>%
    mutate(newname=pmap_chr(., function(name,chrRom,status,...) {
        if (status %in% c("Likely","Dubious"))
            {res=paste(status,chrRom,sep="_")}
            else
            {res=name}
            return(res)
        }))%>%
    group_by(newname) %>%
    mutate(newname2=ifelse((!status %in% c("Likely","Dubious") & name!="ARS302"),name,paste(newname,1:n(),sep="_"))) %>% 
    ungroup %>%
    mutate(name=ifelse(is.na(name),"putative_ARS",name))

RIF1 data

ARSrif1 <- read_tsv(bibliodata_path %+% "Hafner_RIF1.txt",show_col_types = FALSE) %>%
    mutate(rif1_Hafner=map_chr(rif1_, function(x) case_when(x %in% c("pos")~"rep",x=="neg"~"act",x=="no difference"~"null",T~NA))) %>%
    mutate(rif1RBM=map_chr(rif1_RBM, function(x) case_when(x %in% c("pos")~"act",x=="neg"~"rep",x=="no difference"~"null"))) %>%
    select(-c(4,5,6))
ARS_merge1 <- left_join(ARS_oriDB2,ARSrif1, by=join_by(chrRom==chrom,start,end))

FKH1/FKH2 data

ARSfkh <- read_tsv(bibliodata_path %+% "Knott_FKH12.txt",skip=1,col_names=c("name","Other_name","Chromosome","start","end","fkh1","fkh2","fkh1fkh2"),show_col_types = FALSE) %>%
    select(-c(6,7,9)) %>% 
    mutate(chrom=paste0("chr",as.roman(Chromosome)))    %>%
    mutate(fkh1fkh2_Knott=map_chr(fkh1fkh2, function(x) 
        case_when(x==1~"rep",x==-1~"act",x==0~"null"))) %>%
    select(-fkh1fkh2)
ARS_merge1.gr <- with(ARS_merge1,GRanges(seqnames=chrRom,range=IRanges(start,end),strand="*",name=newname2))

ARSfkh.gr <- with(ARSfkh,GRanges(seqnames=chrom,range=IRanges(start,end),strand="*",fkh1fkh2_Knott=fkh1fkh2_Knott))

ol1 <- findOverlaps(ARSfkh.gr,ARS_merge1.gr)
ARS_merge2 <- ARS_merge1
ARS_merge2$fkh1fkh2_Knott <- sapply(1:length(ARS_merge1.gr), function(x) {res=ARSfkh.gr[queryHits(ol1)[subjectHits(ol1)==x]]$fkh1fkh2_Knott;if (length(res)==0) {res=NA};return(res)})

CTF19 data

ARSctf19 <- read_tsv(bibliodata_path %+% "MyCTF19fromWig20230403.txt",show_col_types = FALSE) %>% mutate(start=start+1) %>% mutate(ctf19_Natsume=map_chr(CTF19D, function(x) case_when(x=="p"~"act",T~"null"))) %>% dplyr::rename(chrRom=chrom)

ARS_merge3 <- full_join(ARS_merge2,ARSctf19 %>% select(c(1,2,3,8)),by = join_by(start, end, chrRom))

write_tsv(ARS_merge3,file="Data/ARS_withBiblioData.tsv")

Transpostion to BY_SY Ori

ARS_BYSY <- readRDS("Data/ARS_BYonSY.rds")
ARS_merge4 <- ARS_merge3 %>% dplyr::rename(chromROM=chrRom)
ARS_merge5 <- full_join(ARS_BYSY,ARS_merge4 %>% select(-name) %>% dplyr::rename(name=newname2,startsc1=start,endsc1=end),by = join_by(name, chromROM)) %>% arrange(chromROM,start)
write_tsv(ARS_merge5,file="Data/ARS_withBiblioData_namesBYSY.tsv")

ARSwCTF19 <- with(ARS_merge5 %>% filter(!is.na(chromSY),ctf19_Natsume=="act"),GRanges(seqnames=chromSY,ranges=IRanges(startnSY,endnSY),name=name))
rtracklayer::export(ARSwCTF19,con="Data/ARS_SYwCTF19.bed")
ARSwRIF1p <- with(ARS_merge5 %>% filter(!is.na(chromSY),rif1_Hafner=="rep"),GRanges(seqnames=chromSY,ranges=IRanges(startnSY,endnSY),name=name))
rtracklayer::export(ARSwRIF1p,con="Data/ARS_SYwRIF1r.bed")
ARSwRIF1n <- with(ARS_merge5 %>% filter(!is.na(chromSY),rif1_Hafner=="act"),GRanges(seqnames=chromSY,ranges=IRanges(startnSY,endnSY),name=name))
rtracklayer::export(ARSwRIF1n,con="Data/ARS_SYwRIF1a.bed")
ARSwFKHa <- with(ARS_merge5 %>% filter(!is.na(chromSY),fkh1fkh2_Knott=="act"),GRanges(seqnames=chromSY,ranges=IRanges(startnSY,endnSY),name=name))
rtracklayer::export(ARSwFKHa,con="Data/ARS_SYwFKHa.bed")
ARSwFKHr <- with(ARS_merge5 %>% filter(!is.na(chromSY),fkh1fkh2_Knott=="rep"),GRanges(seqnames=chromSY,ranges=IRanges(startnSY,endnSY),name=name))
rtracklayer::export(ARSwFKHr,con="Data/ARS_SYwFKHr.bed")

There are duplication in name generated by transposition and split-mapping for Dubious_chrIII_3,Dubious_chrIII_5,Dubious_chrX_2 and ARS1106.5

LS0tCnRpdGxlOiAiQllTWSBwcm9qZWN0IE5vdGVib29rIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tICAKIyAwNV9BZGRpbmcgYmlibGlvZ3JhcGhpYyBkYXRhIHRvIE9yaSAgCgoqKiogIAoKSW4gdGhpcyBub3RlYm9vaywgd2UgYWRkIGJpYmxpb2dyYXBoaWMgZGF0YSBjb25jZXJuaW5nIFtSSUYxXShIYWZuZXIgZXQgYWwuLCAyMDE4KSwgW0NURjE5XShOYXRzdW1lIGV0IGFsLiwgMjAxMyksIFtGS0gxL0ZLSDJdKEtub3R0IGV0IGFsLiwgMjAxMikgdG8ga25vd24gQVJTIGNvbWluZyBmcm9tIE9yaURCIGFuZCB0cmFuc3Bvc2UgdGhlc2UgaW5mb3JtYXRpb24gdG8gdGhlIFNZX0FSUyAoc2VlIG5vdGVib29rIFswMl9HZW5vbWUgQW5ub3RhdGlvbl0oKSkuICAKCiMjIEltcG9ydGluZyBPcmkKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0Kc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KEdlbm9taWNSYW5nZXMpKQpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkodGlkeXZlcnNlKSkKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KHJ0cmFja2xheWVyKSkKYCUrJWA8LSBwYXN0ZTAKYmlibGlvZGF0YV9wYXRoIDwtICJEYXRhX3Jhdy8iCkFSU19vcmlEQiA8LSByZWFkX3RzdigiRGF0YV9yYXcvQVJTZnJvbU9yaURCMjAyMzEyMDQudHh0IixzaG93X2NvbF90eXBlcyA9IEZBTFNFKSAlPiUKCW11dGF0ZShjaHJSb209ImNociIgJSslIGFzLnJvbWFuKGNocikpJT4lIAoJbXV0YXRlKGNocj0iY2hyIiAlKyUgY2hyKQojIyMgY3JlYXRlIGEgbmFtaW5nIGZvciBBUlMgb3JpREIKQVJTX29yaURCMiA8LSBBUlNfb3JpREIgJT4lCgltdXRhdGUobmV3bmFtZT1wbWFwX2NociguLCBmdW5jdGlvbihuYW1lLGNoclJvbSxzdGF0dXMsLi4uKSB7CgkJaWYgKHN0YXR1cyAlaW4lIGMoIkxpa2VseSIsIkR1YmlvdXMiKSkKCQkJe3Jlcz1wYXN0ZShzdGF0dXMsY2hyUm9tLHNlcD0iXyIpfQoJCQllbHNlCgkJCXtyZXM9bmFtZX0KCQkJcmV0dXJuKHJlcykKCQl9KSklPiUKCWdyb3VwX2J5KG5ld25hbWUpICU+JQoJbXV0YXRlKG5ld25hbWUyPWlmZWxzZSgoIXN0YXR1cyAlaW4lIGMoIkxpa2VseSIsIkR1YmlvdXMiKSAmIG5hbWUhPSJBUlMzMDIiKSxuYW1lLHBhc3RlKG5ld25hbWUsMTpuKCksc2VwPSJfIikpKSAlPiUgCgl1bmdyb3VwICU+JQoJbXV0YXRlKG5hbWU9aWZlbHNlKGlzLm5hKG5hbWUpLCJwdXRhdGl2ZV9BUlMiLG5hbWUpKQpgYGAKCgojIyBSSUYxIGRhdGEKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KQVJTcmlmMSA8LSByZWFkX3RzdihiaWJsaW9kYXRhX3BhdGggJSslICJIYWZuZXJfUklGMS50eHQiLHNob3dfY29sX3R5cGVzID0gRkFMU0UpICU+JQoJbXV0YXRlKHJpZjFfSGFmbmVyPW1hcF9jaHIocmlmMV8sIGZ1bmN0aW9uKHgpIGNhc2Vfd2hlbih4ICVpbiUgYygicG9zIil+InJlcCIseD09Im5lZyJ+ImFjdCIseD09Im5vIGRpZmZlcmVuY2UifiJudWxsIixUfk5BKSkpICU+JQoJbXV0YXRlKHJpZjFSQk09bWFwX2NocihyaWYxX1JCTSwgZnVuY3Rpb24oeCkgY2FzZV93aGVuKHggJWluJSBjKCJwb3MiKX4iYWN0Iix4PT0ibmVnIn4icmVwIix4PT0ibm8gZGlmZmVyZW5jZSJ+Im51bGwiKSkpICU+JQoJc2VsZWN0KC1jKDQsNSw2KSkKQVJTX21lcmdlMSA8LSBsZWZ0X2pvaW4oQVJTX29yaURCMixBUlNyaWYxLCBieT1qb2luX2J5KGNoclJvbT09Y2hyb20sc3RhcnQsZW5kKSkKYGBgCgojIyBGS0gxL0ZLSDIgZGF0YQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpBUlNma2ggPC0gcmVhZF90c3YoYmlibGlvZGF0YV9wYXRoICUrJSAiS25vdHRfRktIMTIudHh0Iixza2lwPTEsY29sX25hbWVzPWMoIm5hbWUiLCJPdGhlcl9uYW1lIiwiQ2hyb21vc29tZSIsInN0YXJ0IiwiZW5kIiwiZmtoMSIsImZraDIiLCJma2gxZmtoMiIpLHNob3dfY29sX3R5cGVzID0gRkFMU0UpICU+JQoJc2VsZWN0KC1jKDYsNyw5KSkgJT4lIAoJbXV0YXRlKGNocm9tPXBhc3RlMCgiY2hyIixhcy5yb21hbihDaHJvbW9zb21lKSkpCSU+JQoJbXV0YXRlKGZraDFma2gyX0tub3R0PW1hcF9jaHIoZmtoMWZraDIsIGZ1bmN0aW9uKHgpIAoJCWNhc2Vfd2hlbih4PT0xfiJyZXAiLHg9PS0xfiJhY3QiLHg9PTB+Im51bGwiKSkpICU+JQoJc2VsZWN0KC1ma2gxZmtoMikKQVJTX21lcmdlMS5nciA8LSB3aXRoKEFSU19tZXJnZTEsR1JhbmdlcyhzZXFuYW1lcz1jaHJSb20scmFuZ2U9SVJhbmdlcyhzdGFydCxlbmQpLHN0cmFuZD0iKiIsbmFtZT1uZXduYW1lMikpCgpBUlNma2guZ3IgPC0gd2l0aChBUlNma2gsR1JhbmdlcyhzZXFuYW1lcz1jaHJvbSxyYW5nZT1JUmFuZ2VzKHN0YXJ0LGVuZCksc3RyYW5kPSIqIixma2gxZmtoMl9Lbm90dD1ma2gxZmtoMl9Lbm90dCkpCgpvbDEgPC0gZmluZE92ZXJsYXBzKEFSU2ZraC5ncixBUlNfbWVyZ2UxLmdyKQpBUlNfbWVyZ2UyIDwtIEFSU19tZXJnZTEKQVJTX21lcmdlMiRma2gxZmtoMl9Lbm90dCA8LSBzYXBwbHkoMTpsZW5ndGgoQVJTX21lcmdlMS5nciksIGZ1bmN0aW9uKHgpIHtyZXM9QVJTZmtoLmdyW3F1ZXJ5SGl0cyhvbDEpW3N1YmplY3RIaXRzKG9sMSk9PXhdXSRma2gxZmtoMl9Lbm90dDtpZiAobGVuZ3RoKHJlcyk9PTApIHtyZXM9TkF9O3JldHVybihyZXMpfSkKYGBgCgoKIyMgQ1RGMTkgZGF0YQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpBUlNjdGYxOSA8LSByZWFkX3RzdihiaWJsaW9kYXRhX3BhdGggJSslICJNeUNURjE5ZnJvbVdpZzIwMjMwNDAzLnR4dCIsc2hvd19jb2xfdHlwZXMgPSBGQUxTRSkgJT4lIG11dGF0ZShzdGFydD1zdGFydCsxKSAlPiUgbXV0YXRlKGN0ZjE5X05hdHN1bWU9bWFwX2NocihDVEYxOUQsIGZ1bmN0aW9uKHgpIGNhc2Vfd2hlbih4PT0icCJ+ImFjdCIsVH4ibnVsbCIpKSkgJT4lIGRwbHlyOjpyZW5hbWUoY2hyUm9tPWNocm9tKQoKQVJTX21lcmdlMyA8LSBmdWxsX2pvaW4oQVJTX21lcmdlMixBUlNjdGYxOSAlPiUgc2VsZWN0KGMoMSwyLDMsOCkpLGJ5ID0gam9pbl9ieShzdGFydCwgZW5kLCBjaHJSb20pKQoKd3JpdGVfdHN2KEFSU19tZXJnZTMsZmlsZT0iRGF0YS9BUlNfd2l0aEJpYmxpb0RhdGEudHN2IikKYGBgCgoKIyMgVHJhbnNwb3N0aW9uIHRvIEJZX1NZIE9yaQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpBUlNfQllTWSA8LSByZWFkUkRTKCJEYXRhL0FSU19CWW9uU1kucmRzIikKQVJTX21lcmdlNCA8LSBBUlNfbWVyZ2UzICU+JSBkcGx5cjo6cmVuYW1lKGNocm9tUk9NPWNoclJvbSkKQVJTX21lcmdlNSA8LSBmdWxsX2pvaW4oQVJTX0JZU1ksQVJTX21lcmdlNCAlPiUgc2VsZWN0KC1uYW1lKSAlPiUgZHBseXI6OnJlbmFtZShuYW1lPW5ld25hbWUyLHN0YXJ0c2MxPXN0YXJ0LGVuZHNjMT1lbmQpLGJ5ID0gam9pbl9ieShuYW1lLCBjaHJvbVJPTSkpICU+JSBhcnJhbmdlKGNocm9tUk9NLHN0YXJ0KQp3cml0ZV90c3YoQVJTX21lcmdlNSxmaWxlPSJEYXRhL0FSU193aXRoQmlibGlvRGF0YV9uYW1lc0JZU1kudHN2IikKCkFSU3dDVEYxOSA8LSB3aXRoKEFSU19tZXJnZTUgJT4lIGZpbHRlcighaXMubmEoY2hyb21TWSksY3RmMTlfTmF0c3VtZT09ImFjdCIpLEdSYW5nZXMoc2VxbmFtZXM9Y2hyb21TWSxyYW5nZXM9SVJhbmdlcyhzdGFydG5TWSxlbmRuU1kpLG5hbWU9bmFtZSkpCnJ0cmFja2xheWVyOjpleHBvcnQoQVJTd0NURjE5LGNvbj0iRGF0YS9BUlNfU1l3Q1RGMTkuYmVkIikKQVJTd1JJRjFwIDwtIHdpdGgoQVJTX21lcmdlNSAlPiUgZmlsdGVyKCFpcy5uYShjaHJvbVNZKSxyaWYxX0hhZm5lcj09InJlcCIpLEdSYW5nZXMoc2VxbmFtZXM9Y2hyb21TWSxyYW5nZXM9SVJhbmdlcyhzdGFydG5TWSxlbmRuU1kpLG5hbWU9bmFtZSkpCnJ0cmFja2xheWVyOjpleHBvcnQoQVJTd1JJRjFwLGNvbj0iRGF0YS9BUlNfU1l3UklGMXIuYmVkIikKQVJTd1JJRjFuIDwtIHdpdGgoQVJTX21lcmdlNSAlPiUgZmlsdGVyKCFpcy5uYShjaHJvbVNZKSxyaWYxX0hhZm5lcj09ImFjdCIpLEdSYW5nZXMoc2VxbmFtZXM9Y2hyb21TWSxyYW5nZXM9SVJhbmdlcyhzdGFydG5TWSxlbmRuU1kpLG5hbWU9bmFtZSkpCnJ0cmFja2xheWVyOjpleHBvcnQoQVJTd1JJRjFuLGNvbj0iRGF0YS9BUlNfU1l3UklGMWEuYmVkIikKQVJTd0ZLSGEgPC0gd2l0aChBUlNfbWVyZ2U1ICU+JSBmaWx0ZXIoIWlzLm5hKGNocm9tU1kpLGZraDFma2gyX0tub3R0PT0iYWN0IiksR1JhbmdlcyhzZXFuYW1lcz1jaHJvbVNZLHJhbmdlcz1JUmFuZ2VzKHN0YXJ0blNZLGVuZG5TWSksbmFtZT1uYW1lKSkKcnRyYWNrbGF5ZXI6OmV4cG9ydChBUlN3RktIYSxjb249IkRhdGEvQVJTX1NZd0ZLSGEuYmVkIikKQVJTd0ZLSHIgPC0gd2l0aChBUlNfbWVyZ2U1ICU+JSBmaWx0ZXIoIWlzLm5hKGNocm9tU1kpLGZraDFma2gyX0tub3R0PT0icmVwIiksR1JhbmdlcyhzZXFuYW1lcz1jaHJvbVNZLHJhbmdlcz1JUmFuZ2VzKHN0YXJ0blNZLGVuZG5TWSksbmFtZT1uYW1lKSkKcnRyYWNrbGF5ZXI6OmV4cG9ydChBUlN3RktIcixjb249IkRhdGEvQVJTX1NZd0ZLSHIuYmVkIikKYGBgCgoqVGhlcmUgYXJlIGR1cGxpY2F0aW9uIGluIG5hbWUgZ2VuZXJhdGVkIGJ5IHRyYW5zcG9zaXRpb24gYW5kIHNwbGl0LW1hcHBpbmcgZm9yIER1YmlvdXNfY2hySUlJXzMsRHViaW91c19jaHJJSUlfNSxEdWJpb3VzX2NoclhfMiBhbmQgQVJTMTEwNi41KgoK