Tripobiome Humman ANCON-BC

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
Cargando paquete requerido: permute
Cargando paquete requerido: lattice
This is vegan 2.6-8
library(ggstatsplot)
You can cite this package as:
     Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
     Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(BiodiversityR)
Cargando paquete requerido: tcltk
Warning in check_dep_version(): ABI version mismatch: 
lme4 was built with Matrix ABI version 1
Current Matrix ABI version is 2
Please re-install lme4 from source or restore original 'Matrix' package
BiodiversityR 2.16-1: Use command BiodiversityRGUI() to launch the Graphical User Interface; 
to see changes use BiodiversityRGUI(changeLog=TRUE, backward.compatibility.messages=TRUE)
library(mixOmics)
Cargando paquete requerido: MASS

Adjuntando el paquete: 'MASS'

The following object is masked from 'package:dplyr':

    select


Loaded mixOmics 6.28.0
Thank you for using mixOmics!
Tutorials: http://mixomics.org
Bookdown vignette: https://mixomicsteam.github.io/Bookdown
Questions, issues: Follow the prompts at http://mixomics.org/contact-us
Cite us:  citation('mixOmics')


Adjuntando el paquete: 'mixOmics'

The following object is masked from 'package:vegan':

    pca

The following object is masked from 'package:purrr':

    map
library(ggsci)
library(ANCOMBC)
pathabundance_files <- dir("/storage2/tripobiome/original/", recursive = T, pattern = "pathabundance",full.names = T )
pathcoverage_files <- dir("/storage2/tripobiome/original/", recursive = T, pattern = "pathcoverage",full.names = T)



metadata <- read_csv("/storage2/tripobiome/Metadata.csv")
metadata <- metadata %>% dplyr::select(Sample,tto_previo_no_completo,tto_parasiticida_correcto,GRUPO,sexo,bristol) %>% 
  mutate(GRUPO2 = ifelse(GRUPO == "Control no infectado","Control","Chagas")) %>% 
  replace_na(list(tto_parasiticida_correcto = "No")) %>% 
  mutate(tto_parasiticida_correcto = ifelse(GRUPO == "Control no infectado","Control",tto_parasiticida_correcto))

Transform to tables

#load("/storage2/tripobiome/HumannANCOMSession.rds")

pathabundance_table <- tibble()

for(i in 1:length(pathabundance_files))
{
  name = basename(pathabundance_files[i])
  tmp_table <- vroom::vroom(pathabundance_files[i])
  tmp_table <- tmp_table %>% dplyr::rename(Pathway = 1, Abundance = 2) %>% mutate(Sample =name)

  pathabundance_table <- bind_rows(pathabundance_table,tmp_table)

}
pathabundance_table <- pathabundance_table %>% mutate(Sample = str_replace(Sample,"_pathabundance.tsv",""))



pathcoverage_table <- tibble()

for(i in 1:length(pathcoverage_files))
{
  name = basename(pathcoverage_files[i])
  tmp_table <- vroom::vroom(pathcoverage_files[i])
  tmp_table <- tmp_table %>% dplyr::rename(Pathway = 1, Coverage = 2) %>% mutate(Sample =name)

  pathcoverage_table <- bind_rows(pathcoverage_table,tmp_table)

}
pathcoverage_table <- pathcoverage_table %>% mutate(Sample = str_replace(Sample,"_pathcoverage.tsv",""))

path_table <- dplyr::full_join(pathabundance_table,pathcoverage_table)

path_table <- path_table %>% separate(Sample,c("Sample","Position","Lane","Read","number"), sep="_") %>% group_by(Pathway,Sample) %>% summarise(Abundance = sum(Abundance), Coverage = max(Coverage))
path_table %>% 
  group_by(Sample) %>% 
  summarise(Total = sum(Abundance)) %>% 
  ggplot(aes(x = fct_reorder(Sample,Total), y = Total)) + 
  geom_col() + 
  coord_flip() + 
  labs(title = "Total Reads Pathway")

descartes <- c("S3","S27","S2","S7","S75")

Pathway

path_table_summ <- path_table %>% 
  separate(Pathway, c("Pathway","Taxonomy"), sep = "\\|") %>% 
  group_by(Pathway,Sample) %>% 
  summarise(Abundance = sum(Abundance), Coverage = sum(Coverage))
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 22834 rows [1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
`summarise()` has grouped output by 'Pathway'. You can override using the
`.groups` argument.
path_table_summ <- path_table_summ %>% 
  filter(!(Sample %in% descartes)) %>% 
  group_by(Sample) %>% 
  mutate(TotalAbundance = sum(Abundance)) %>% 
  ungroup()

min_abundance <- round(min(path_table_summ$TotalAbundance))

path_table_summ_matrix <- path_table_summ %>% 
  filter(Coverage > 0.5) %>% 
  group_by(Sample,Pathway) %>% 
  summarise(Abundance = round(sum(Abundance))) %>% 
  pivot_wider(names_from = "Pathway", values_from = "Abundance", values_fill = 0)
`summarise()` has grouped output by 'Sample'. You can override using the
`.groups` argument.

pre-processing data

library(mia)
Cargando paquete requerido: SummarizedExperiment
Cargando paquete requerido: MatrixGenerics
Cargando paquete requerido: matrixStats

Adjuntando el paquete: 'matrixStats'
The following object is masked from 'package:dplyr':

    count

Adjuntando el paquete: 'MatrixGenerics'
The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
    rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
    rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
    rowWeightedSds, rowWeightedVars
Cargando paquete requerido: GenomicRanges
Cargando paquete requerido: stats4
Cargando paquete requerido: BiocGenerics

Adjuntando el paquete: 'BiocGenerics'
The following objects are masked from 'package:lubridate':

    intersect, setdiff, union
The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union
The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs
The following objects are masked from 'package:base':

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
    tapply, union, unique, unsplit, which.max, which.min
Cargando paquete requerido: S4Vectors

Adjuntando el paquete: 'S4Vectors'
The following objects are masked from 'package:lubridate':

    second, second<-
The following objects are masked from 'package:dplyr':

    first, rename
The following object is masked from 'package:tidyr':

    expand
The following object is masked from 'package:utils':

    findMatches
The following objects are masked from 'package:base':

    expand.grid, I, unname
Cargando paquete requerido: IRanges

Adjuntando el paquete: 'IRanges'
The following object is masked from 'package:lubridate':

    %within%
The following objects are masked from 'package:dplyr':

    collapse, desc, slice
The following object is masked from 'package:purrr':

    reduce
Cargando paquete requerido: GenomeInfoDb
Cargando paquete requerido: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Adjuntando el paquete: 'Biobase'
The following object is masked from 'package:MatrixGenerics':

    rowMedians
The following objects are masked from 'package:matrixStats':

    anyMissing, rowMedians
Cargando paquete requerido: SingleCellExperiment
Cargando paquete requerido: TreeSummarizedExperiment
Cargando paquete requerido: Biostrings
Cargando paquete requerido: XVector

Adjuntando el paquete: 'XVector'
The following object is masked from 'package:purrr':

    compact

Adjuntando el paquete: 'Biostrings'
The following object is masked from 'package:base':

    strsplit
Cargando paquete requerido: MultiAssayExperiment

Adjuntando el paquete: 'mia'
The following objects are masked from 'package:dplyr':

    full_join, inner_join, left_join, right_join
library(ANCOMBC)

path_mat <- path_table_summ %>% 
  mutate(Sample = paste0("S",Sample)) %>% 
  group_by(Pathway,Sample) %>% 
  summarise(Abundance = sum(Abundance)) %>% 
  ungroup() %>% 
  filter(!(Sample %in% descartes)) %>% 
  group_by(Pathway) %>% 
  mutate(vari = var(Abundance), N = n()) %>% 
  filter(N > 5) %>% 
  ungroup() %>% 
  dplyr::select(Sample,Pathway,Abundance) %>% 
  pivot_wider(names_from = Sample, values_from = Abundance, values_fill = 0) %>% 
  column_to_rownames("Pathway") %>% as.matrix()
`summarise()` has grouped output by 'Pathway'. You can override using the
`.groups` argument.
assays = SimpleList(counts = path_mat)

smd <- metadata %>% 
  filter(Sample %in% colnames(path_mat)) %>% 
  arrange(match(Sample,colnames(path_mat))) %>% 
  column_to_rownames("Sample")

smd <- DataFrame(smd)

tse <- TreeSummarizedExperiment(assays = assays,
                                colData = smd)

DA

out_grupo2 <- ancombc2(data = tse, fix_formula = "GRUPO2")
out_tto <- ancombc2(data = tse, fix_formula = "tto_parasiticida_correcto",)

Results

out_grupo2$res %>% dplyr::rename(Pathway = 1) %>% 
  filter(diff_GRUPO2Control ==TRUE) %>% 
    ggplot(aes(x = reorder(Pathway,lfc_GRUPO2Control), y = lfc_GRUPO2Control, fill = lfc_GRUPO2Control)) + 
    geom_bar(stat = "identity", width = 0.7, color = "black", 
             position = position_dodge(width = 0.4)) +
    geom_errorbar(aes(ymin = lfc_GRUPO2Control - se_GRUPO2Control, ymax = lfc_GRUPO2Control + se_GRUPO2Control), 
                  width = 0.2, position = position_dodge(0.05), color = "black") +
    scale_fill_gsea(name= "Log fold Change") +
  labs(x = NULL, y = "Log fold change", 
         title = "Log fold changes as one unit increase of Chagas vs Control") + 
  coord_flip() +
  theme_bw() + 
    theme(plot.title = element_text(hjust = 0.5),
          panel.grid.minor.y = element_blank(),
          axis.text.x = element_text(angle = 60, hjust = 1))

out_tto$res %>% dplyr::rename(Pathway = 1) %>% 
  filter(diff_tto_parasiticida_correctoSi ==TRUE) %>% 
    ggplot(aes(x = reorder(Pathway,lfc_tto_parasiticida_correctoSi), y = lfc_tto_parasiticida_correctoSi, fill = lfc_tto_parasiticida_correctoSi)) + 
    geom_bar(stat = "identity", width = 0.7, color = "black", 
             position = position_dodge(width = 0.4)) +
    geom_errorbar(aes(ymin = lfc_tto_parasiticida_correctoSi - se_tto_parasiticida_correctoSi, ymax = lfc_tto_parasiticida_correctoSi + se_tto_parasiticida_correctoSi), 
                  width = 0.2, position = position_dodge(0.05), color = "black") +
  scale_fill_gsea(name= "Log fold Change") +
  labs(x = NULL, y = "Log fold change", 
         title = "Log fold changes as one unit increase of Tratamiento Parasiticida Correcto") + 
  coord_flip() +
  theme_bw() + 
    theme(plot.title = element_text(hjust = 0.5),
          panel.grid.minor.y = element_blank(),
          axis.text.x = element_text(angle = 60, hjust = 1))

Alpha y Beta basados en la normalización de ANCOMBC

bc_counts <- out_grupo2$bias_correct_log_table

bc_counts[is.na(bc_counts)] = 0

bc_counts = (10^bc_counts)
diversities <- estimateR(round(bc_counts) %>% t(), parallel(40))%>% 
   t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  dplyr::inner_join(metadata)
Joining with `by = join_by(Sample)`
ggbetweenstats(diversities, x = GRUPO2, y = S.obs, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = S.chao1, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = se.chao1, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = S.ACE, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = se.ACE, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.obs, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.chao1, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = se.chao1, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.ACE, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = se.ACE, type = "robust")

bc_dist <- vegdist(bc_counts %>% t(), method = "bray")


data_plot_bc <- metaMDS(bc_dist)
Run 0 stress 0.1718587 
Run 1 stress 0.1626342 
... New best solution
... Procrustes: rmse 0.07699866  max resid 0.3605421 
Run 2 stress 0.1835689 
Run 3 stress 0.164288 
Run 4 stress 0.171504 
Run 5 stress 0.1739788 
Run 6 stress 0.1736108 
Run 7 stress 0.1857706 
Run 8 stress 0.1684261 
Run 9 stress 0.1819315 
Run 10 stress 0.1666154 
Run 11 stress 0.1680009 
Run 12 stress 0.1821677 
Run 13 stress 0.1727342 
Run 14 stress 0.183556 
Run 15 stress 0.1820296 
Run 16 stress 0.1654748 
Run 17 stress 0.17482 
Run 18 stress 0.1675087 
Run 19 stress 0.1662152 
Run 20 stress 0.1679837 
*** Best solution was not repeated -- monoMDS stopping criteria:
     1: no. of iterations >= maxit
    18: stress ratio > sratmax
     1: scale factor of the gradient < sfgrmin
data_plot_bc$points %>% 
  as_tibble(rownames ="Sample") %>% 
  dplyr::inner_join(metadata) %>% 
  ggplot(aes(x = MDS1, y = MDS2, label = Sample , fill = GRUPO2)) + geom_label()
Joining with `by = join_by(Sample)`

data_plot_bc$points %>% 
  as_tibble(rownames ="Sample") %>% 
  dplyr::inner_join(metadata) %>% 
  ggplot(aes(x = MDS1, y = MDS2, label = Sample , fill = tto_parasiticida_correcto)) + geom_label()
Joining with `by = join_by(Sample)`

D_meta_bc <- bc_dist %>% as.matrix() %>% as_tibble(rownames= "Sample") %>% dplyr::inner_join(metadata)
Joining with `by = join_by(Sample)`
D_meta_bc_dist = D_meta_bc %>% 
  dplyr::select(Sample:S9) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.dist()

D_meta_bc_data <- D_meta_bc %>% 
  dplyr::select(Sample,GRUPO2,tto_parasiticida_correcto,GRUPO,sexo,bristol) %>% 
  mutate(GRUPO2 = as.factor(GRUPO2),
         tto_parasiticida_correcto = as.factor(tto_parasiticida_correcto)) %>% column_to_rownames("Sample")

adonis2(D_meta_bc_dist ~ GRUPO2 + tto_parasiticida_correcto, data = D_meta_bc_data)
Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = D_meta_bc_dist ~ GRUPO2 + tto_parasiticida_correcto, data = D_meta_bc_data)
         Df SumOfSqs      R2      F Pr(>F)
Model     2   0.5373 0.02932 1.0422   0.34
Residual 69  17.7862 0.97068              
Total    71  18.3236 1.00000              
anosim(D_meta_bc_dist, D_meta_bc_data$GRUPO2) %>% 
  plot(ylab = "Rank Similarities", xlab = "Grupo2")

anosim(D_meta_bc_dist, D_meta_bc_data$tto_parasiticida_correcto)%>% 
  plot(ylab = "Rank Similarities", xlab = "Tratamiento Correcto")

plsda(D_meta_bc_dist,D_meta_bc_data$GRUPO2) %>% plotIndiv(ellipse = TRUE)

plsda(D_meta_bc_dist,D_meta_bc_data$tto_parasiticida_correcto) %>% plotIndiv(ellipse = TRUE)

Correlations

Load data

OTUS Ruminococcaceae, Faecalibacterium, Prevotella y Bacteroides, Parabacteroides spp, Enterococcus hirae, Prevotella

source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")

files = dir("/storage2/tripobiome/taxonomia/",pattern = "mic.kreport",full.names = T)

OTUs <- import_kraken(files,threads = 40)
Cargando paquete requerido: doParallel
Cargando paquete requerido: iterators
Cargando paquete requerido: parallel
OTUs <- OTUs %>% mutate(Sample = gsub(".mic.kreport","",Sample))

OTU_rare <- OTUs %>%
   filter(grepl("Bacteria",Fullname)) %>%
   filter(grepl("S",TaxRank)) %>%
   dplyr::select(Name2,Reads,Sample) %>% 
   distinct() %>% 
   pivot_wider(names_from = Name2, values_from = Reads, values_fill = 0) %>%
   column_to_rownames("Sample") 

OTU_rare <- rrarefy(OTU_rare, sample = min(rowSums(OTU_rare)))

OTU_rare <- OTU_rare %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>%
  pivot_longer(names_to = "OTU", values_to = "Abundance", -Sample) %>% 
  group_by(OTU) %>% 
  mutate(Var = var(Abundance), Mean = mean(Abundance)) %>% 
  filter(Var > 0.25*Mean, Mean > 10) %>%
  ungroup() %>% 
  dplyr::select(Sample,OTU,Abundance) %>% 
  pivot_wider(names_from = OTU, values_from = Abundance, values_fill = 0) %>%
  column_to_rownames("Sample")

Pathway vs Taxonomy

commons <- path_mat %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  dplyr::select(Sample) %>% 
  semi_join(OTU_rare %>%
              as.data.frame() %>% 
              rownames_to_column("Sample") %>% 
              dplyr::select(Sample))
Joining with `by = join_by(Sample)`
path_mat_comm <- path_mat %>%
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  filter(Sample %in% commons$Sample) %>% 
  arrange(Sample) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix()

OTU_matrix <- OTU_rare %>%
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  filter(Sample %in% commons$Sample) %>% 
  arrange(Sample) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix()

OTU_vs_Path<- cor(path_mat_comm,OTU_matrix, method = "spearman")


library(igraph)

Adjuntando el paquete: 'igraph'

The following object is masked from 'package:Biostrings':

    union

The following object is masked from 'package:XVector':

    path

The following object is masked from 'package:GenomicRanges':

    union

The following object is masked from 'package:IRanges':

    union

The following object is masked from 'package:S4Vectors':

    union

The following objects are masked from 'package:BiocGenerics':

    normalize, path, union

The following object is masked from 'package:vegan':

    diversity

The following object is masked from 'package:permute':

    permute

The following objects are masked from 'package:lubridate':

    %--%, union

The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union

The following objects are masked from 'package:purrr':

    compose, simplify

The following object is masked from 'package:tidyr':

    crossing

The following object is masked from 'package:tibble':

    as_data_frame

The following objects are masked from 'package:stats':

    decompose, spectrum

The following object is masked from 'package:base':

    union
library(ggraph)
library(tidygraph)

Adjuntando el paquete: 'tidygraph'

The following object is masked from 'package:igraph':

    groups

The following objects are masked from 'package:mia':

    full_join, inner_join, left_join, right_join

The following object is masked from 'package:XVector':

    slice

The following objects are masked from 'package:IRanges':

    active, slice

The following objects are masked from 'package:S4Vectors':

    active, rename

The following object is masked from 'package:MASS':

    select

The following object is masked from 'package:stats':

    filter
OTU_vs_Path %>% 
  as.data.frame() %>% 
  rownames_to_column("Pathway") %>% 
  pivot_longer(names_to = "OTU", values_to = "Correlation",-Pathway) %>% 
  filter(Correlation > 0.45 ) %>% 
  rename(from = Pathway, to = OTU) %>% 
  as_tbl_graph(nodes = nodes) %>%
  ggraph(layout = "gem") + 
  geom_node_point()+
  geom_node_text(aes(label = name),repel = TRUE) +
  geom_edge_link() +
  theme_void()