gganatogram

gganatogram

https://github.com/jespermaag/gganatogram

Create anatogram images for different organisms.
For now only human male is available.
. The idea for this package came to me after seeing a twitter post for ggseg. I thougt something similar would be good for whole organisms. Since I could not find anything similar, I deided to give creating my first R package a go.

This package uses the tissue coordinates from the figure in ArrayExpress Expression Atlas.
https://www.ebi.ac.uk/gxa/home
https://github.com/ebi-gene-expression-group/anatomogram

Generation of package

Download all svg

To create the package, I first had to retrive the coordinates of all tissues from the Expression Atlas. The anatogram package was downloaded using the following command.

npm install --save anatomogram

Extract coordinates from svg

I used python to extract the coordinates, names, and transformations for each tissue in the homo_sapiens.mal.svg file. This code takes the svg and writes the name, coordinates, and transformation to a file, which is then processed in R.

from xml.dom import minidom
import os
import csv
organism="homo_sapiens.male"
doc = minidom.parse(organism + ".svg")
your_csv_file = open(organism + '_coords.tsv', 'w')
wr = csv.writer(your_csv_file, delimiter='\t')
for path in doc.getElementsByTagName('path'):
    if "outline" in path.getAttribute('id') or "LAYER_OUTLINE" in path.getAttribute('id') :
        wr.writerow([path.getAttribute('id') ,path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')]) 
    if path.getAttribute('id').startswith('UB'):
        wr.writerow([path.getElementsByTagName('title')[0].firstChild.nodeValue, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
    if path.parentNode.attributes['id'].value.startswith('UB'):
        if "transform" not in list(path.parentNode.attributes.keys()): 
            wr.writerow([path.parentNode.attributes['id'].value, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
for path in doc.getElementsByTagName('g')[5:]:
    if len(path.childNodes) >0 :
        for node in path.childNodes:
            if "text" not in node.nodeName:
                print(node.nodeName)
                print(node.attributes.keys())
                if 'd' in list(node.attributes.keys()): 
                    nodeVal = node.attributes['d'].value
                    wr.writerow([path.childNodes[1].attributes['id'].value, nodeVal,  path.attributes['transform'].value])
your_csv_file.close()

Process the coordinates in R, and create a package

I created a function to extract the coordinates into a data frame and transformed the data. Some manual editing was required to get the right coordinates, and remove some tissues that did not work

extractCoords <- function(coords, name, transMatrix) {
    c <- strsplit(coords, " ")
    c[[1]]

    c[[1]][c(grep("M", c[[1]] )+1,grep("M", c[[1]] )+2)] <- NA

    c[[1]] <- c[[1]][grep("[[:alpha:]]", c[[1]], invert=TRUE)]

    anatCoord <- as.data.frame(lapply( c, function(u) 
        matrix(as.numeric(unlist(strsplit(u, ","))),ncol=2,byrow=TRUE) ))
    anatCoord$X2[is.na(anatCoord$X1)] <- NA
    anatCoord$X1[is.na(anatCoord$X2)] <- NA
    anatCoord$id <- name

    if (length(transMatrix[grep('matrix', transMatrix)])>0) {
        transForm <- gsub('matrix\\(|\\)', '', transMatrix)
        transForm <- as.numeric(strsplit(transForm, ",")[[1]])
       
        anatCoord$x <-  (anatCoord$X1* transForm[1]) + (anatCoord$X1* transForm[3]) + transForm[5]
        anatCoord$y <-  (anatCoord$X2* transForm[2]) + (anatCoord$X2* transForm[4]) + transForm[6]
    } else if (grep('translate', transMatrix)) {
        transForm <- gsub('translate\\(|\\)', '', transMatrix)
        transForm <- as.numeric(strsplit(transForm, ",")[[1]])
         if(name =='leukocyte' & transForm[1]==4.5230265) {
            transForm <- c(103.63591+4.5230265,-47.577078+11.586659)
        }
        anatCoord$x <-  anatCoord$X1 + transForm[1]
        anatCoord$y <-  anatCoord$X2 + transForm[2]
    }
    #anatCoord <- anatCoord[complete.cases(anatCoord),]
    if (name == 'bronchus') {
        if (max(anatCoord$x, na.rm=T) >100 ) {
            anatCoord$x <- NA
            anatCoord$y <- NA
        }
    }
    if( any(anatCoord[complete.cases(anatCoord),]$x < -5)) {
            anatCoord$x <- NA
            anatCoord$y <- NA
    }

    if( any(anatCoord[complete.cases(anatCoord),]$x > 150)) {
            anatCoord$x <- NA
            anatCoord$y <- NA
    }
    return(anatCoord)
}

Finally, I processed the python output using the extractCoords function.

hsMale <- read.table('homo_sapiens.male_coords.tsv', sep='\t', stringsAsFactors=F)

hgMale_list <- list()
for (i in 1:nrow(hsMale)) {
    df <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])

    hgMale_list[[i]] <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])
    names(hgMale_list)[i] <-  paste0(hsMale$V1[i],'-', i)
}
names(hgMale_list) <- gsub('-.*', '', names(hgMale_list))

The resulting list was then used as the base for the gganatogram package. The package can be installed from github using the instructions below.

Install

Install from github using devtools.

## install from Github
devtools::install_github("jespermaag/gganatogram")

Usage

This package requires ggplot2 and ggpolypath

library(ggplot2)
library(ggpolypath)
library(gganatogram)
library(dplyr)

In order to use the function gganatogram, you need to have a data frame with organ, colour, and value if you want to.

organPlot <- data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
 type = c("circulation", "circulation",  "nervous system", "nervous system", "digestion", "digestion", "digestion"), 
 colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(10, 5, 1, 8, 2, 5, 5), 
 stringsAsFactors=F)

head(organPlot)
##       organ           type colour value
## 1     heart    circulation    red    10
## 2 leukocyte    circulation    red     5
## 3     nerve nervous system purple     1
## 4     brain nervous system purple     8
## 5     liver      digestion orange     2
## 6   stomach      digestion orange     5

Using the function gganatogram with the filling the organs based on colour.

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour")

plot of chunk organPlot

We can use the ggplot themes and functions to adjust the plots

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
theme_void()

plot of chunk organPlotvoid

We can also plot all tissues available using hgMale_key, which is an available object

hgMale_key$organ
##  [1] "bone marrow"               "frontal cortex"           
##  [3] "prefrontal cortex"         "gastroesophageal junction"
##  [5] "caecum"                    "ileum"                    
##  [7] "rectum"                    "nose"                     
##  [9] "tongue"                    "penis"                    
## [11] "nasal pharynx"             "spinal cord"              
## [13] "throat"                    "diaphragm"                
## [15] "liver"                     "stomach"                  
## [17] "spleen"                    "duodenum"                 
## [19] "gall bladder"              "pancreas"                 
## [21] "colon"                     "small intestine"          
## [23] "appendix"                  "urinary bladder"          
## [25] "bone"                      "cartilage"                
## [27] "esophagus"                 "skin"                     
## [29] "brain"                     "heart"                    
## [31] "lymph_node"                "skeletal_muscle"          
## [33] "leukocyte"                 "temporal_lobe"            
## [35] "atrial_appendage"          "coronary_artery"          
## [37] "hippocampus"               "vas_deferens"             
## [39] "seminal_vesicle"           "epididymis"               
## [41] "tonsil"                    "lung"                     
## [43] "trachea"                   "bronchus"                 
## [45] "nerve"                     "kidney"
gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +theme_void()

plot of chunk organPlotAll

To skip the outline of the graph, use outline=F

organPlot %>%
    dplyr::filter(type %in% c('circulation', 'nervous system')) %>%
gganatogram(outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
theme_void()

plot of chunk organPlotSubset

We can fill the tissues based on the values given to each organ

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
theme_void() +
scale_fill_gradient(low = "white", high = "red")

plot of chunk organPlotValue

We can also use facet_wrap to compare groups.
First create add two data frames together with different values and the conditions in the type column

compareGroups <- rbind(data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
  colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(10, 5, 1, 8, 2, 5, 5), 
 type = rep('Normal', 7), 
 stringsAsFactors=F),
 data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
  colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(5, 5, 10, 8, 2, 5, 5), 
 type = rep('Cancer', 7), 
 stringsAsFactors=F))
gganatogram(data=compareGroups, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
    theme_void() +
    facet_wrap(~type) +
    scale_fill_gradient(low = "white", high = "red") 

plot of chunk Condition

gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
    theme_void() +
    facet_wrap(~type)

plot of chunk organPlotAllWrap

gganatogram(data=hgMale_key, outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
    theme_void() +
    facet_wrap(~type, scale='free')

plot of chunk organPlotAllWrapFree

organtype <- organPlot
organtype %>%
    mutate(type=organ) %>%
    gganatogram( outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
        theme_void() +
        facet_wrap(~type, scale='free')

plot of chunk organPlotWrapFree