Skip to contents

Take NCBI taxa IDs and get all taxonomic ranks from name and node SQLite database. Ranks that occur more than once are made unique with a postfix through make.unique

Usage

getRawTaxonomy(ids, sqlFile = "nameNode.sqlite")

Arguments

ids

a vector of ids to find taxonomy for

sqlFile

a string giving the path to a SQLite file containing names and nodes tables

Value

a list of vectors with each element containing a vector of taxonomic strings with names corresponding to the taxonomic rank

Examples

sqlFile<-tempfile()
namesText<-c(
  "1\t|\tall\t|\t\t|\tsynonym\t|",
  "1\t|\troot\t|\t\t|\tscientific name\t|",
  "2\t|\tBacteria\t|\tBacteria <prokaryotes>\t|\tscientific name\t|",
  "2\t|\tMonera\t|\tMonera <Bacteria>\t|\tin-part\t|",
  "2\t|\tProcaryotae\t|\tProcaryotae <Bacteria>\t|\tin-part\t|",
  "9606\t|\tHomo sapiens\t|\t\t|\tscientific name",
  "9605\t|\tHomo\t|\t\t|\tscientific name",
  "207598\t|\tHomininae\t|\t\t|\tscientific name",
  "9604\t|\tHominidae\t|\t\t|\tscientific name",
  "314295\t|\tHominoidea\t|\t\t|\tscientific name",
  "9526\t|\tCatarrhini\t|\t\t|\tscientific name",
  "314293\t|\tSimiiformes\t|\t\t|\tscientific name",
  "376913\t|\tHaplorrhini\t|\t\t|\tscientific name",
  "9443\t|\tPrimates\t|\t\t|\tscientific name",
  "314146\t|\tEuarchontoglires\t|\t\t|\tscientific name",
  "1437010\t|\tBoreoeutheria\t|\t\t|\tscientific name",
  "9347\t|\tEutheria\t|\t\t|\tscientific name",
  "32525\t|\tTheria\t|\t\t|\tscientific name",
  "40674\t|\tMammalia\t|\t\t|\tscientific name",
  "32524\t|\tAmniota\t|\t\t|\tscientific name",
  "32523\t|\tTetrapoda\t|\t\t|\tscientific name",
  "1338369\t|\tDipnotetrapodomorpha\t|\t\t|\tscientific name",
  "8287\t|\tSarcopterygii\t|\t\t|\tscientific name",
  "117571\t|\tEuteleostomi\t|\t\t|\tscientific name",
  "117570\t|\tTeleostomi\t|\t\t|\tscientific name",
  "7776\t|\tGnathostomata\t|\t\t|\tscientific name",
  "7742\t|\tVertebrata\t|\t\t|\tscientific name",
  "89593\t|\tCraniata\t|\t\t|\tscientific name",
  "7711\t|\tChordata\t|\t\t|\tscientific name",
  "33511\t|\tDeuterostomia\t|\t\t|\tscientific name",
  "33213\t|\tBilateria\t|\t\t|\tscientific name",
  "6072\t|\tEumetazoa\t|\t\t|\tscientific name",
  "33208\t|\tMetazoa\t|\t\t|\tscientific name",
  "33154\t|\tOpisthokonta\t|\t\t|\tscientific name",
  "2759\t|\tEukaryota\t|\t\t|\tscientific name",
  "131567\t|\tcellular organisms\t|\t\t|\tscientific name"
)
tmpFile<-tempfile()
writeLines(namesText,tmpFile)
taxaNames<-read.names.sql(tmpFile,sqlFile)
nodesText<-c(
 "1\t|\t1\t|\tno rank\t|\t\t|\t8\t|\t0\t|\t1\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "2\t|\t131567\t|\tdomain\t|\t\t|\t0\t|\t0\t|\t11\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "6\t|\t335928\t|\tgenus\t|\t\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t0\t|\t0\t|\t\t|",
  "7\t|\t6\t|\tspecies\t|\tAC\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9\t|\t32199\t|\tspecies\t|\tBA\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9606\t|\t9605\t|\tspecies", "9605\t|\t207598\t|\tgenus", "207598\t|\t9604\t|\tsubfamily",
  "9604\t|\t314295\t|\tfamily", "314295\t|\t9526\t|\tsuperfamily",
  "9526\t|\t314293\t|\tparvorder", "314293\t|\t376913\t|\tinfraorder",
  "376913\t|\t9443\t|\tsuborder", "9443\t|\t314146\t|\torder",
  "314146\t|\t1437010\t|\tsuperorder", "1437010\t|\t9347\t|\tno rank",
  "9347\t|\t32525\t|\tno rank", "32525\t|\t40674\t|\tno rank",
  "40674\t|\t32524\t|\tclass", "32524\t|\t32523\t|\tno rank", "32523\t|\t1338369\t|\tno rank",
  "1338369\t|\t8287\t|\tno rank", "8287\t|\t117571\t|\tno rank",
  "117571\t|\t117570\t|\tno rank", "117570\t|\t7776\t|\tno rank",
  "7776\t|\t7742\t|\tno rank", "7742\t|\t89593\t|\tno rank", "89593\t|\t7711\t|\tsubphylum",
  "7711\t|\t33511\t|\tphylum", "33511\t|\t33213\t|\tno rank", "33213\t|\t6072\t|\tno rank",
  "6072\t|\t33208\t|\tno rank", "33208\t|\t33154\t|\tkingdom",
  "33154\t|\t2759\t|\tno rank", "2759\t|\t131567\t|\tdomain",
  "131567\t|\t1\t|\tno rank"
)
writeLines(nodesText,tmpFile)
taxaNodes<-read.nodes.sql(tmpFile,sqlFile)
getRawTaxonomy(c(9606,9605),sqlFile)
#> $`9606`
#>                species                  genus              subfamily 
#>         "Homo sapiens"                 "Homo"            "Homininae" 
#>                 family            superfamily              parvorder 
#>            "Hominidae"           "Hominoidea"           "Catarrhini" 
#>             infraorder               suborder                  order 
#>          "Simiiformes"          "Haplorrhini"             "Primates" 
#>             superorder                no rank              no rank.1 
#>     "Euarchontoglires"        "Boreoeutheria"             "Eutheria" 
#>              no rank.2                  class              no rank.3 
#>               "Theria"             "Mammalia"              "Amniota" 
#>              no rank.4              no rank.5              no rank.6 
#>            "Tetrapoda" "Dipnotetrapodomorpha"        "Sarcopterygii" 
#>              no rank.7              no rank.8              no rank.9 
#>         "Euteleostomi"           "Teleostomi"        "Gnathostomata" 
#>             no rank.10              subphylum                 phylum 
#>           "Vertebrata"             "Craniata"             "Chordata" 
#>             no rank.11             no rank.12             no rank.13 
#>        "Deuterostomia"            "Bilateria"            "Eumetazoa" 
#>                kingdom             no rank.14                 domain 
#>              "Metazoa"         "Opisthokonta"            "Eukaryota" 
#>             no rank.15 
#>   "cellular organisms" 
#> 
#> $`9605`
#>                  genus              subfamily                 family 
#>                 "Homo"            "Homininae"            "Hominidae" 
#>            superfamily              parvorder             infraorder 
#>           "Hominoidea"           "Catarrhini"          "Simiiformes" 
#>               suborder                  order             superorder 
#>          "Haplorrhini"             "Primates"     "Euarchontoglires" 
#>                no rank              no rank.1              no rank.2 
#>        "Boreoeutheria"             "Eutheria"               "Theria" 
#>                  class              no rank.3              no rank.4 
#>             "Mammalia"              "Amniota"            "Tetrapoda" 
#>              no rank.5              no rank.6              no rank.7 
#> "Dipnotetrapodomorpha"        "Sarcopterygii"         "Euteleostomi" 
#>              no rank.8              no rank.9             no rank.10 
#>           "Teleostomi"        "Gnathostomata"           "Vertebrata" 
#>              subphylum                 phylum             no rank.11 
#>             "Craniata"             "Chordata"        "Deuterostomia" 
#>             no rank.12             no rank.13                kingdom 
#>            "Bilateria"            "Eumetazoa"              "Metazoa" 
#>             no rank.14                 domain             no rank.15 
#>         "Opisthokonta"            "Eukaryota"   "cellular organisms" 
#>