Skip to contents

Take NCBI taxa IDs and get the corresponding taxa ranks from name and node data.tables. NOTE: This function is now deprecated for getTaxonomy (using SQLite rather than data.table).

Usage

getTaxonomy2(
  ids,
  taxaNodes,
  taxaNames,
  desiredTaxa = c("domain", "phylum", "class", "order", "family", "genus", "species"),
  mc.cores = 1,
  debug = FALSE
)

Arguments

ids

a vector of ids to find taxonomy for

taxaNodes

a nodes data.table from read.nodes

taxaNames

a names data.table from read.names

desiredTaxa

a vector of strings giving the desired taxa levels

mc.cores

DEPRECATED the number of cores to use when processing. Note this option is now deprecated and has no effect. Please switch to getTaxonomy (see taxonomizrSwitch) for much faster processing without requiring multiple cores.

debug

if TRUE output node and name vectors with dput for each id (probably useful only for development)

Value

a matrix of taxonomic strings with a row for each id and a column for each desiredTaxa rank

Examples

namesText<-c(
  "1\t|\tall\t|\t\t|\tsynonym\t|",
  "1\t|\troot\t|\t\t|\tscientific name\t|",
  "2\t|\tBacteria\t|\tBacteria <prokaryotes>\t|\tscientific name\t|",
  "2\t|\tMonera\t|\tMonera <Bacteria>\t|\tin-part\t|",
  "2\t|\tProcaryotae\t|\tProcaryotae <Bacteria>\t|\tin-part\t|",
  "9606\t|\tHomo sapiens\t|\t\t|\tscientific name",
  "9605\t|\tHomo\t|\t\t|\tscientific name",
  "207598\t|\tHomininae\t|\t\t|\tscientific name",
  "9604\t|\tHominidae\t|\t\t|\tscientific name",
  "314295\t|\tHominoidea\t|\t\t|\tscientific name",
  "9526\t|\tCatarrhini\t|\t\t|\tscientific name",
  "314293\t|\tSimiiformes\t|\t\t|\tscientific name",
  "376913\t|\tHaplorrhini\t|\t\t|\tscientific name",
  "9443\t|\tPrimates\t|\t\t|\tscientific name",
  "314146\t|\tEuarchontoglires\t|\t\t|\tscientific name",
  "1437010\t|\tBoreoeutheria\t|\t\t|\tscientific name",
  "9347\t|\tEutheria\t|\t\t|\tscientific name",
  "32525\t|\tTheria\t|\t\t|\tscientific name",
  "40674\t|\tMammalia\t|\t\t|\tscientific name",
  "32524\t|\tAmniota\t|\t\t|\tscientific name",
  "32523\t|\tTetrapoda\t|\t\t|\tscientific name",
  "1338369\t|\tDipnotetrapodomorpha\t|\t\t|\tscientific name",
  "8287\t|\tSarcopterygii\t|\t\t|\tscientific name",
  "117571\t|\tEuteleostomi\t|\t\t|\tscientific name",
  "117570\t|\tTeleostomi\t|\t\t|\tscientific name",
  "7776\t|\tGnathostomata\t|\t\t|\tscientific name",
  "7742\t|\tVertebrata\t|\t\t|\tscientific name",
  "89593\t|\tCraniata\t|\t\t|\tscientific name",
  "7711\t|\tChordata\t|\t\t|\tscientific name",
  "33511\t|\tDeuterostomia\t|\t\t|\tscientific name",
  "33213\t|\tBilateria\t|\t\t|\tscientific name",
  "6072\t|\tEumetazoa\t|\t\t|\tscientific name",
  "33208\t|\tMetazoa\t|\t\t|\tscientific name",
  "33154\t|\tOpisthokonta\t|\t\t|\tscientific name",
  "2759\t|\tEukaryota\t|\t\t|\tscientific name",
  "131567\t|\tcellular organisms\t|\t\t|\tscientific name"
)
tmpFile<-tempfile()
writeLines(namesText,tmpFile)
taxaNames<-read.names(tmpFile)
#> Warning: taxonomizr is moving from data.table to SQLite databases to improve performance. This will require changing nodes and names processing. Please see ?read.names.sql or ?taxonomizrSwitch
nodesText<-c(
 "1\t|\t1\t|\tno rank\t|\t\t|\t8\t|\t0\t|\t1\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "2\t|\t131567\t|\tdomain\t|\t\t|\t0\t|\t0\t|\t11\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|",
  "6\t|\t335928\t|\tgenus\t|\t\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t0\t|\t0\t|\t\t|",
  "7\t|\t6\t|\tspecies\t|\tAC\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9\t|\t32199\t|\tspecies\t|\tBA\t|\t0\t|\t1\t|\t11\t|\t1\t|\t0\t|\t1\t|\t1\t|\t0\t|\t\t|",
  "9606\t|\t9605\t|\tspecies", "9605\t|\t207598\t|\tgenus", "207598\t|\t9604\t|\tsubfamily",
  "9604\t|\t314295\t|\tfamily", "314295\t|\t9526\t|\tsuperfamily",
  "9526\t|\t314293\t|\tparvorder", "314293\t|\t376913\t|\tinfraorder",
  "376913\t|\t9443\t|\tsuborder", "9443\t|\t314146\t|\torder",
  "314146\t|\t1437010\t|\tsuperorder", "1437010\t|\t9347\t|\tno rank",
  "9347\t|\t32525\t|\tno rank", "32525\t|\t40674\t|\tno rank",
  "40674\t|\t32524\t|\tclass", "32524\t|\t32523\t|\tno rank", "32523\t|\t1338369\t|\tno rank",
  "1338369\t|\t8287\t|\tno rank", "8287\t|\t117571\t|\tno rank",
  "117571\t|\t117570\t|\tno rank", "117570\t|\t7776\t|\tno rank",
  "7776\t|\t7742\t|\tno rank", "7742\t|\t89593\t|\tno rank", "89593\t|\t7711\t|\tsubphylum",
  "7711\t|\t33511\t|\tphylum", "33511\t|\t33213\t|\tno rank", "33213\t|\t6072\t|\tno rank",
  "6072\t|\t33208\t|\tno rank", "33208\t|\t33154\t|\tkingdom",
  "33154\t|\t2759\t|\tno rank", "2759\t|\t131567\t|\tdomain",
  "131567\t|\t1\t|\tno rank"
)
writeLines(nodesText,tmpFile)
taxaNodes<-read.nodes(tmpFile)
#> Warning: taxonomizr is moving from data.table to SQLite databases to improve performance. This will require changing nodes and names processing. Please see ?read.nodes.sql or ?taxonomizrSwitch
getTaxonomy2(c(9606,9605),taxaNodes,taxaNames,mc.cores=1)
#> Warning: taxonomizr is moving from data.table to SQLite databases to improve performance. This will require changing nodes and names processing. Please see ?getTaxonomy or ?taxonomizrSwitch
#>      domain      phylum     class      order      family      genus 
#> 9606 "Eukaryota" "Chordata" "Mammalia" "Primates" "Hominidae" "Homo"
#> 9605 "Eukaryota" "Chordata" "Mammalia" "Primates" "Hominidae" "Homo"
#>      species       
#> 9606 "Homo sapiens"
#> 9605 NA