library(XML)
##library(RSQLite)

## 
##SQLiteDriver <- dbDriver("SQLite")
##intactSQLiteDbmsName <- "intact.sqlite"
##intactCon <- dbConnect(SQLiteDriver, dbname=intactSQLiteDbmsName)
##deSendQuery(intactCOn, "create table acInfo ();")

tableList <- list()

fileDotXml <- dir("/home/tliu/proj/BioC/anno/tmp/intact/yeast/yeast_2006-01-27", pattern="xml$", full.name=TRUE)

for(xmlFile in fileDotXml){

    ## parse the xml file to be a XMLDocument class 
    doc <- xmlTreeParse(xmlFile)

    ## get the root of the XMLDocument object
    ## become a XMLNode object (a list as well)
    dom <- xmlRoot(doc)

    ##names(dom[["entry"]][["source"]])
    releaseDate <- xmlAttrs(dom[["entry"]][["source"]])

    ## parsing experimentList
    experimentList <- dom[["entry"]][["experimentList"]]
    experiment <- xmlApply(experimentList,
                           function(x){
                               id <- xmlAttrs(x)
                               shortLabel <- xmlValue(x[["names"]][["shortLabel"]])
                               fullName <- xmlValue(x[["names"]][["fullName"]])
                               hostOrganism <- xmlAttrs(x[["hostOrganism"]])["ncbiTaxId"]
                               interactionDetection <- xmlAttrs(x[["interactionDetection"]][["xref"]][["primaryRef"]])["id"]
                               participantDetection <- xmlAttrs(x[["participantDetection"]][["xref"]][["primaryRef"]])["id"]
                               xref <- xmlApply(x[["bibref"]][["xref"]],
                                                function(y){
                                                    return(c(id, xmlAttrs(y), NA))
                                                })
                               return(list(acInfo=c(id, "experiment", shortLabel, fullName),
                                           experimentInfo=c(id, hostOrganism, interactionDetection, participantDetection),
                                           ac2xref=t(data.frame(xref))
                                           ))
                           }
                           )

    for(exper in experiment){
        for(tab in c("acInfo", "experimentInfo")){
            tableList[[tab]] <- rbind(tableList[[tab]], t(data.frame(exper[[tab]])))
        }
        tableList[["ac2xref"]] <- rbind(tableList[["ac2xref"]], exper[["ac2xref"]])
    }


    ## parsing interactorList
    interactorList <- dom[["entry"]][["interactorList"]]
    interactor <- xmlApply(interactorList,
                           function(x){
                               id <- xmlAttrs(x)
                               shortLabel <- xmlValue(x[["names"]][["shortLabel"]])
                               fullName <- xmlValue(x[["names"]][["fullName"]])
                               xref <- xmlApply(x[["xref"]],
                                                function(y){
                                                    c(id, xmlAttrs(y)[c("db", "id", "secondary")])
                                                })
                               return(list(acInfo=c(id, "interactor", shortLabel, fullName),
                                           ac2xref=t(data.frame(xref))
                                           ))
                           })

    for(inter in interactor){
        tableList[["acInfo"]] <- rbind(tableList[["acInfo"]], t(data.frame(inter[["acInfo"]])))
        tableList[["ac2xref"]] <- rbind(tableList[["ac2xref"]], inter[["ac2xref"]])
    }


    ## parsing interactionList
    interactionList <- dom[["entry"]][["interactionList"]]
    interaction <- xmlApply(interactionList,
                            function(x){
                                id <- xmlAttrs(x[["xref"]][["primaryRef"]])["id"]
                                shortLabel <- xmlValue(x[["names"]][["shortLabel"]])
                                experiment <- xmlAttrs(x[["experimentList"]][["experimentRef"]])
                                type <- xmlAttrs(x[["interactionType"]][["xref"]][["primaryRef"]])["id"]
                                confidence <- try(xmlAttrs(x[["confidence"]]))
                                if(inherits(confidence, "try-error")){
                                    confidence <- c(NA, NA)
                                }
                                ppi <- xmlApply(x[["participantList"]],
                                                function(y){
                                                    ref <- xmlAttrs(y[["proteinInteractorRef"]])
                                                    role <- xmlValue(y[["role"]])
                                                    return(c(id, ref, role))
                                                }
                                                )
                                return(list(acInfo=c(id, "interaction", shortLabel, NA),
                                            interactionInfo=c(id, type, confidence),
                                            experiment2interaction=c(experiment, id),
                                            interaction2interactor=t(data.frame(ppi))
                                            )
                                       )
                            })

    for(inter in interaction){
        for(tab in c("acInfo", "interactionInfo", "experiment2interaction")){
            tableList[[tab]] <- rbind(tableList[[tab]], t(data.frame(inter[[tab]])))
        }
        tableList[["interaction2interactor"]] <- rbind(tableList[["interaction2interactor"]], inter[["interaction2interactor"]])
    }

}

colnames(tableList[["acInfo"]])                 <- c("ac", "type", "shortLabel", "fullName")
colnames(tableList[["experimentInfo"]])         <- c("ac", "hostOrganism", "interactionDetection", "participantDetection")
colnames(tableList[["ac2xref"]])                <- c("ac", "db", "id", "secondary")
colnames(tableList[["interactionInfo"]])        <- c("ac", "interactionType", "confidenceUnit", "confidenceValue")
colnames(tableList[["experiment2interaction"]]) <- c("experiment", "interaction")
colnames(tableList[["interaction2interactor"]])  <- c("interaction", "interactor", "role")

rownames(tableList[["acInfo"]])                 <- NULL
rownames(tableList[["experimentInfo"]])         <- NULL
rownames(tableList[["ac2xref"]])                <- NULL
rownames(tableList[["interactionInfo"]])        <- NULL
rownames(tableList[["experiment2interaction"]]) <- NULL
rownames(tableList[["interaction2interactor"]])  <- NULL

save(tableList, file="yeastIntActTableList", compress=TRUE)
