## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  library('AnnotationHub')
#  ah <- AnnotationHub()
#  meta <- query(ah, "geneplast")
#  head(meta)

## ----eval=FALSE---------------------------------------------------------------
#  # Dataset derived from STRING database v11.0
#  load(meta[["AH83116"]])

## -----------------------------------------------------------------------------
species <- data.frame (
    label = c("sp01", "sp02", "sp03", "sp04", "sp05", "sp06", "sp07", "sp08", "sp09", "sp10", "sp11", "sp12", "sp13", "sp14", "sp15"),
                scientific_name = c("Homo sapiens", "Pan troglodytes", "Gorilla gorilla gorilla", "Macaca mulatta", "Papio anubis", "Rattus norvegicus", "Mus musculus", "Canis lupus familiaris", "Marmosa mexicana", "Monodelphis domestica", "Gallus gallus", "Meleagris gallopavo", "Xenopus tropicalis", "Latimeria chalumnae", "Danio rerio"),
                taxon_id = c("9606", "9598", "9595", "9544", "9555", "10116", "10090", "9615", "225402", "13616", "9031", "9103", "8364", "7897", "7955")
)
species

## -----------------------------------------------------------------------------
og1 <- expand.grid(cog_id = "OG1", 
                   protein_id = c(
    "sp01.prot1a", "sp01.prot1b", "sp01.prot1c", "sp01.prot1d", "sp01.prot1e", "sp01.prot1f", "sp01.prot1g", 
    "sp02.prot1a", "sp02.prot1b", "sp02.prot1c", "sp02.prot1d", "sp02.prot1e", "sp02.prot1f", 
    "sp03.prot1a", "sp03.prot1b", "sp03.prot1c", 
    "sp04.prot1a", "sp04.prot1b", "sp04.prot1c", "sp04.prot1d", 
    "sp05.prot1a", "sp05.prot1b", "sp05.prot1c", "sp05.prot1d", "sp05.prot1e",
    "sp06.prot1a", "sp06.prot1b",
    "sp07.prot1a", "sp07.prot1b", "sp07.prot1c",
    "sp08.prot1a", "sp08.prot1b",
    "sp11.prot1a", "sp11.prot1b", "sp11.prot1c",
    "sp12.prot1a", "sp12.prot1b", "sp12.prot1c",
    "sp13.prot1a", "sp13.prot1b",
    "sp14.prot1",
    "sp15.prot1a", "sp15.prot1b"
    )
)
og2 <- expand.grid(cog_id = "OG2", 
                   protein_id = c(
    "sp01.prot2a", "sp01.prot2b", "sp01.prot2c", "sp01.prot2d", "sp01.prot2e", "sp01.prot2f",
    "sp02.prot2a", "sp02.prot2b", "sp02.prot2c",
    "sp03.prot2a", "sp03.prot2b", "sp03.prot2c", "sp03.prot2d",
    "sp04.prot2a", "sp04.prot2b", "sp04.prot2c",
    "sp05.prot2a", "sp05.prot2b", "sp05.prot2c", "sp05.prot2d",
    "sp06.prot2",
    "sp08.prot2a", "sp08.prot2b"
    )
)
og3 <- expand.grid(cog_id = "OG3", 
                   protein_id = c(
    "sp01.prot3a", "sp01.prot3b", "sp01.prot3c", "sp01.prot3d", 
    "sp02.prot3a", "sp02.prot3b", "sp02.prot3c",
    "sp03.prot3",
    "sp10.prot3"
    )
)

## -----------------------------------------------------------------------------
library(tibble)
library(stringi)
library(dplyr)
# Bind OGs into a single object
cogdata <- rbind(og1, og2, og3)
# Create a dictionary to map species labels to taxon IDs
species_taxid_lookup <- species |> dplyr::select(label, taxon_id) |> tibble::deframe()
species_name_lookup <- species |> dplyr::select(taxon_id, scientific_name) |> tibble::deframe()
# Add the required ssp_id column
cogdata[["ssp_id"]] <- species_taxid_lookup[stri_split_fixed(cogdata[["protein_id"]], pattern = ".", n = 2, simplify = T)[,1]]
head(cogdata)

## ----eval=FALSE---------------------------------------------------------------
#  library(geneplast.data)
#  cogdata <- geneplast.data::make.cogdata(file = "path/to/orthogroups.tsv")

## -----------------------------------------------------------------------------
library(geneplast.data)
phyloTree <- geneplast.data::make.phyloTree(sspids = species$taxon_id)
phyloTree

## ----eval=FALSE---------------------------------------------------------------
#  # Create from a user's predefined newick file
#  phyloTree <- geneplast.data::make.phyloTree(newick = "path/to/newick_tree.nwk")

## -----------------------------------------------------------------------------
library(geneplast)
phyloTree <- geneplast:::rotatePhyloTree(phyloTree, "9606")
phyloTree$edge.length <- NULL
phyloTree$tip.label <- species_name_lookup[phyloTree$tip.label]
plot(phyloTree, type = "cladogram")

## ----eval=FALSE---------------------------------------------------------------
#  library(geneplast)
#  ogr <- groot.preprocess(cogdata=cogdata, phyloTree=phyloTree, spid="9606")
#  ogr <- groot(ogr, nPermutations=1, verbose=TRUE)
#  
#  library(RedeR)
#  library(igraph)
#  library(RColorBrewer)
#  data(ppi.gs)
#  g <- ogr2igraph(ogr, cogdata, ppi.gs, idkey = "ENTREZ")
#  pal <- brewer.pal(9, "RdYlBu")
#  color_col <- colorRampPalette(pal)(37) #set a color for each root!
#  g <- att.setv(g=g, from="Root", to="nodeColor", cols=color_col, na.col = "grey80", breaks = seq(1,37))
#  g <- att.setv(g = g, from = "SYMBOL", to = "nodeAlias")
#  E(g)$edgeColor <- "grey80"
#  V(g)$nodeLineColor <- "grey80"
#  rdp <- RedPort()
#  calld(rdp)
#  resetd(rdp)
#  addGraph(rdp, g)
#  addLegend.color(rdp, colvec=g$legNodeColor$scale, size=15, labvec=g$legNodeColor$legend, title="Roots inferred from geneplast")
#  g1  <- induced_subgraph(g=g, V(g)$name[V(g)$Apoptosis==1])
#  g2  <- induced_subgraph(g=g, V(g)$name[V(g)$GenomeStability==1])
#  myTheme <- list(nestFontSize=25, zoom=80, isNest=TRUE, gscale=65, theme=2)
#  addGraph(rdp, g1, gcoord=c(25, 50), theme = c(myTheme, nestAlias="Apoptosis"))
#  addGraph(rdp, g2, gcoord=c(75, 50), theme = c(myTheme, nestAlias="Genome Stability"))
#  relax(rdp, p1=50, p2=50, p3=50, p4=50, p5= 50, ps = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  load(meta[["AH83117"]])
#  cogdata$cog_id <- paste0("OMA", cogdata$cog_id)
#  cogids$cog_id <- paste0("OMA", cogids$cog_id)
#  
#  human_entrez_2_oma_Aug2020 <- read_delim("processed_human.entrez_2_OMA.Aug2020.tsv",
#      delim = "\t", escape_double = FALSE,
#      col_names = FALSE, trim_ws = TRUE)
#  names(human_entrez_2_oma_Aug2020) <- c("protein_id", "gene_id")
#  cogdata <- cogdata %>% left_join(human_entrez_2_oma_Aug2020)
#  ogr <- groot.preprocess(cogdata=cogdata, phyloTree=phyloTree, spid="9606")
#  ogr <- groot(ogr, nPermutations=1, verbose=TRUE)
#  
#  g <- ogr2igraph(ogr, cogdata, ppi.gs, idkey = "ENTREZ")
#  pal <- brewer.pal(9, "RdYlBu")
#  color_col <- colorRampPalette(pal)(37) #set a color for each root!
#  g <- att.setv(g=g, from="Root", to="nodeColor", cols=color_col, na.col = "grey80", breaks = seq(1,37))
#  g <- att.setv(g = g, from = "SYMBOL", to = "nodeAlias")
#  E(g)$edgeColor <- "grey80"
#  V(g)$nodeLineColor <- "grey80"
#  # rdp <- RedPort()
#  # calld(rdp)
#  resetd(rdp)
#  addGraph(rdp, g)
#  addLegend.color(rdp, colvec=g$legNodeColor$scale, size=15, labvec=g$legNodeColor$legend, title="Roots inferred from geneplast")
#  g1  <- induced_subgraph(g=g, V(g)$name[V(g)$Apoptosis==1])
#  g2  <- induced_subgraph(g=g, V(g)$name[V(g)$GenomeStability==1])
#  myTheme <- list(nestFontSize=25, zoom=80, isNest=TRUE, gscale=65, theme=2)
#  addGraph(rdp, g1, gcoord=c(25, 50), theme = c(myTheme, nestAlias="Apoptosis"))
#  addGraph(rdp, g2, gcoord=c(75, 50), theme = c(myTheme, nestAlias="Genome Stability"))
#  relax(rdp, p1=50, p2=50, p3=50, p4=50, p5= 50, ps = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  load(meta[["AH83118"]])
#  cogdata$cog_id <- paste0("ODB", cogdata$cog_id)
#  cogids$cog_id <- paste0("ODB", cogids$cog_id)
#  
#  human_entrez_2_odb <- read_delim("odb10v1_genes-human-entrez.tsv",
#      delim = "\t", escape_double = FALSE,
#      col_names = FALSE, trim_ws = TRUE)
#  names(human_entrez_2_odb) <- c("protein_id", "gene_id")
#  cogdata <- cogdata %>% left_join(human_entrez_2_odb)
#  ogr <- groot.preprocess(cogdata=cogdata, phyloTree=phyloTree, spid="9606")
#  ogr <- groot(ogr, nPermutations=1, verbose=TRUE)
#  
#  g <- ogr2igraph(ogr, cogdata, ppi.gs, idkey = "ENTREZ")
#  pal <- brewer.pal(9, "RdYlBu")
#  color_col <- colorRampPalette(pal)(37) #set a color for each root!
#  g <- att.setv(g=g, from="Root", to="nodeColor", cols=color_col, na.col = "grey80", breaks = seq(1,37))
#  g <- att.setv(g = g, from = "SYMBOL", to = "nodeAlias")
#  E(g)$edgeColor <- "grey80"
#  V(g)$nodeLineColor <- "grey80"
#  rdp <- RedPort()
#  calld(rdp)
#  resetd(rdp)
#  addGraph(rdp, g)
#  addLegend.color(rdp, colvec=g$legNodeColor$scale, size=15, labvec=g$legNodeColor$legend, title="Roots inferred from geneplast")
#  g1  <- induced_subgraph(g=g, V(g)$name[V(g)$Apoptosis==1])
#  g2  <- induced_subgraph(g=g, V(g)$name[V(g)$GenomeStability==1])
#  myTheme <- list(nestFontSize=25, zoom=80, isNest=TRUE, gscale=65, theme=2)
#  addGraph(rdp, g1, gcoord=c(25, 50), theme = c(myTheme, nestAlias="Apoptosis"))
#  addGraph(rdp, g2, gcoord=c(75, 50), theme = c(myTheme, nestAlias="Genome Stability"))
#  relax(rdp, p1=50, p2=50, p3=50, p4=50, p5= 50, ps = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  library(AnnotationHub)
#  library(dplyr)
#  library(geneplast)
#  library(tictoc)
#  
#  ah <- AnnotationHub()
#  meta <- query(ah, "geneplast")
#  load(meta[["AH83116"]])
#  writeLines(paste("geneplast input data loaded from", meta["AH83116"]$title))
#  
#  tic("total time")
#  tic("preprocessing")
#  ogr <- groot.preprocess(cogdata=cogdata, phyloTree=phyloTree, spid="9606")
#  toc()
#  tic("rooting")
#  ogr <- groot(ogr)
#  toc()
#  toc()

## -----------------------------------------------------------------------------
sessionInfo()

