Skip to contents

Take NCBI taxa IDs and get the corresponding taxa ranks from a name and node SQLite database

Usage

getTaxonomy(
  ids,
  sqlFile = "nameNode.sqlite",
  ...,
  desiredTaxa = c("domain", "phylum", "class", "order", "family", "genus", "species"),
  getNames = TRUE
)

Arguments

ids

a vector of ids to find taxonomy for

sqlFile

a string giving the path to a SQLite file containing names and nodes tables

...

legacy additional arguments to original data.table based getTaxonomy function. Used only for support for deprecated function, do not use in new code.

desiredTaxa

a vector of strings giving the desired taxa levels

getNames

a logical indicating whether to convert taxon IDs to names if TRUE or simply return the taxon ID if FALSE

Value

a matrix of taxonomic strings with a row for each id and a column for each desiredTaxa rank

Examples

sqlFile<-tempfile()
namesText<-c(
  "1\t|\tall\t|\t\t|\tsynonym\t|",
  "1\t|\troot\t|\t\t|\tscientific name\t|",
  "2\t|\tBacteria\t|\tBacteria <prokaryotes>\t|\tscientific name\t|",
  "2\t|\tMonera\t|\tMonera <Bacteria>\t|\tin-part\t|",
  "2\t|\tProcaryotae\t|\tProcaryotae <Bacteria>\t|\tin-part\t|",
  "9606\t|\tHomo sapiens\t|\t\t|\tscientific name",
  "9605\t|\tHomo\t|\t\t|\tscientific name",
  "207598\t|\tHomininae\t|\t\t|\tscientific name",
  "9604\t|\tHominidae\t|\t\t|\tscientific name",
  "314295\t|\tHominoidea\t|\t\t|\tscientific name",
  "9526\t|\tCatarrhini\t|\t\t|\tscientific name",
  "314293\t|\tSimiiformes\t|\t\t|\tscientific name",
  "376913\t|\tHaplorrhini\t|\t\t|\tscientific name",
  "9443\t|\tPrimates\t|\t\t|\tscientific name",
  "314146\t|\tEuarchontoglires\t|\t\t|\tscientific name",
  "1437010\t|\tBoreoeutheria\t|\t\t|\tscientific name",
  "9347\t|\tEutheria\t|\t\t|\tscientific name",
  "32525\t|\tTheria\t|\t\t|\tscientific name",
  "40674\t|\tMammalia\t|\t\t|\tscientific name",
  "32524\t|\tAmniota\t|\t\t|\tscientific name",
  "32523\t|\tTetrapoda\t|\t\t|\tscientific name",
  "1338369\t|\tDipnotetrapodomorpha\t|\t\t|\tscientific name",
  "8287\t|\tSarcopterygii\t|\t\t|\tscientific name",
  "117571\t|\tEuteleostomi\t|\t\t|\tscientific name",
  "117570\t|\tTeleostomi\t|\t\t|\tscientific name",
  "7776\t|\tGnathostomata\t|\t\t|\tscientific name",
  "7742\t|\tVertebrata\t|\t\t|\tscientific name",
  "89593\t|\tCraniata\t|\t\t|\tscientific name",
  "7711\t|\tChordata\t|\t\t|\tscientific name",
  "33511\t|\tDeuterostomia\t|\t\t|\tscientific name",
  "33213\t|\tBilateria\t|\t\t|\tscientific name",
  "6072\t|\tEumetazoa\t|\t\t|\tscientific name",
  "33208\t|\tMetazoa\t|\t\t|\tscientific name",
  "33154\t|\tOpisthokonta\t|\t\t|\tscientific name",
  "2759\t|\tEukaryota\t|\t\t|\tscientific name",
  "131567\t|\tcellular organisms\t|\t\t|\tscientific name"
)
tmpFile<-tempfile()
writeLines(namesText,tmpFile)
taxaNames<-read.names.sql(tmpFile,sqlFile)
nodesText<-c(
 "1\t|\t1\t|\tno rank\t|\t\t|\t8\t|\t0\t|\t1\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "2\t|\t131567\t|\tdomain\t|\t\t|\t0\t|\t0\t|\t11\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "6\t|\t335928\t|\tgenus\t|\t\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t0\t|\t0\t|\t\t|",
  "7\t|\t6\t|\tspecies\t|\tAC\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9\t|\t32199\t|\tspecies\t|\tBA\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9606\t|\t9605\t|\tspecies", "9605\t|\t207598\t|\tgenus", "207598\t|\t9604\t|\tsubfamily",
  "9604\t|\t314295\t|\tfamily", "314295\t|\t9526\t|\tsuperfamily",
  "9526\t|\t314293\t|\tparvorder", "314293\t|\t376913\t|\tinfraorder",
  "376913\t|\t9443\t|\tsuborder", "9443\t|\t314146\t|\torder",
  "314146\t|\t1437010\t|\tsuperorder", "1437010\t|\t9347\t|\tno rank",
  "9347\t|\t32525\t|\tno rank", "32525\t|\t40674\t|\tno rank",
  "40674\t|\t32524\t|\tclass", "32524\t|\t32523\t|\tno rank", "32523\t|\t1338369\t|\tno rank",
  "1338369\t|\t8287\t|\tno rank", "8287\t|\t117571\t|\tno rank",
  "117571\t|\t117570\t|\tno rank", "117570\t|\t7776\t|\tno rank",
  "7776\t|\t7742\t|\tno rank", "7742\t|\t89593\t|\tno rank", "89593\t|\t7711\t|\tsubphylum",
  "7711\t|\t33511\t|\tphylum", "33511\t|\t33213\t|\tno rank", "33213\t|\t6072\t|\tno rank",
  "6072\t|\t33208\t|\tno rank", "33208\t|\t33154\t|\tkingdom",
  "33154\t|\t2759\t|\tno rank", "2759\t|\t131567\t|\tdomain",
  "131567\t|\t1\t|\tno rank"
)
writeLines(nodesText,tmpFile)
taxaNodes<-read.nodes.sql(tmpFile,sqlFile)
getTaxonomy(c(9606,9605),sqlFile)
#>      domain      phylum     class      order      family      genus 
#> 9606 "Eukaryota" "Chordata" "Mammalia" "Primates" "Hominidae" "Homo"
#> 9605 "Eukaryota" "Chordata" "Mammalia" "Primates" "Hominidae" "Homo"
#>      species       
#> 9606 "Homo sapiens"
#> 9605 NA