Tripobiome Humman ANCON-BC

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(vegan)

Cargando paquete requerido: permute
Cargando paquete requerido: lattice
This is vegan 2.6-8

library(ggstatsplot)

You can cite this package as:
     Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
     Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167

library(BiodiversityR)

Cargando paquete requerido: tcltk

Warning in check_dep_version(): ABI version mismatch: 
lme4 was built with Matrix ABI version 1
Current Matrix ABI version is 2
Please re-install lme4 from source or restore original 'Matrix' package

BiodiversityR 2.16-1: Use command BiodiversityRGUI() to launch the Graphical User Interface; 
to see changes use BiodiversityRGUI(changeLog=TRUE, backward.compatibility.messages=TRUE)

library(mixOmics)

Cargando paquete requerido: MASS

Adjuntando el paquete: 'MASS'

The following object is masked from 'package:dplyr':

    select


Loaded mixOmics 6.28.0
Thank you for using mixOmics!
Tutorials: http://mixomics.org
Bookdown vignette: https://mixomicsteam.github.io/Bookdown
Questions, issues: Follow the prompts at http://mixomics.org/contact-us
Cite us:  citation('mixOmics')


Adjuntando el paquete: 'mixOmics'

The following object is masked from 'package:vegan':

    pca

The following object is masked from 'package:purrr':

    map

library(ggsci)
library(ANCOMBC)

pathabundance_files <- dir("/storage2/tripobiome/original/", recursive = T, pattern = "pathabundance",full.names = T )
pathcoverage_files <- dir("/storage2/tripobiome/original/", recursive = T, pattern = "pathcoverage",full.names = T)



metadata <- read_csv("/storage2/tripobiome/Metadata.csv")
metadata <- metadata %>% dplyr::select(Sample,tto_previo_no_completo,tto_parasiticida_correcto,GRUPO,sexo,bristol) %>% 
  mutate(GRUPO2 = ifelse(GRUPO == "Control no infectado","Control","Chagas")) %>% 
  replace_na(list(tto_parasiticida_correcto = "No")) %>% 
  mutate(tto_parasiticida_correcto = ifelse(GRUPO == "Control no infectado","Control",tto_parasiticida_correcto))

Transform to tables

#load("/storage2/tripobiome/HumannANCOMSession.rds")

pathabundance_table <- tibble()

for(i in 1:length(pathabundance_files))
{
  name = basename(pathabundance_files[i])
  tmp_table <- vroom::vroom(pathabundance_files[i])
  tmp_table <- tmp_table %>% dplyr::rename(Pathway = 1, Abundance = 2) %>% mutate(Sample =name)

  pathabundance_table <- bind_rows(pathabundance_table,tmp_table)

}
pathabundance_table <- pathabundance_table %>% mutate(Sample = str_replace(Sample,"_pathabundance.tsv",""))



pathcoverage_table <- tibble()

for(i in 1:length(pathcoverage_files))
{
  name = basename(pathcoverage_files[i])
  tmp_table <- vroom::vroom(pathcoverage_files[i])
  tmp_table <- tmp_table %>% dplyr::rename(Pathway = 1, Coverage = 2) %>% mutate(Sample =name)

  pathcoverage_table <- bind_rows(pathcoverage_table,tmp_table)

}
pathcoverage_table <- pathcoverage_table %>% mutate(Sample = str_replace(Sample,"_pathcoverage.tsv",""))

path_table <- dplyr::full_join(pathabundance_table,pathcoverage_table)

path_table <- path_table %>% separate(Sample,c("Sample","Position","Lane","Read","number"), sep="_") %>% group_by(Pathway,Sample) %>% summarise(Abundance = sum(Abundance), Coverage = max(Coverage))

path_table %>% 
  group_by(Sample) %>% 
  summarise(Total = sum(Abundance)) %>% 
  ggplot(aes(x = fct_reorder(Sample,Total), y = Total)) + 
  geom_col() + 
  coord_flip() + 
  labs(title = "Total Reads Pathway")

descartes <- c("S3","S27","S2","S7","S75")

Pathway

path_table_summ <- path_table %>% 
  separate(Pathway, c("Pathway","Taxonomy"), sep = "\\|") %>% 
  group_by(Pathway,Sample) %>% 
  summarise(Abundance = sum(Abundance), Coverage = sum(Coverage))

Warning: Expected 2 pieces. Missing pieces filled with `NA` in 22834 rows [1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

`summarise()` has grouped output by 'Pathway'. You can override using the
`.groups` argument.

path_table_summ <- path_table_summ %>% 
  filter(!(Sample %in% descartes)) %>% 
  group_by(Sample) %>% 
  mutate(TotalAbundance = sum(Abundance)) %>% 
  ungroup()

min_abundance <- round(min(path_table_summ$TotalAbundance))

path_table_summ_matrix <- path_table_summ %>% 
  filter(Coverage > 0.5) %>% 
  group_by(Sample,Pathway) %>% 
  summarise(Abundance = round(sum(Abundance))) %>% 
  pivot_wider(names_from = "Pathway", values_from = "Abundance", values_fill = 0)

`summarise()` has grouped output by 'Sample'. You can override using the
`.groups` argument.

pre-processing data

library(mia)

Cargando paquete requerido: SummarizedExperiment

Cargando paquete requerido: MatrixGenerics

Cargando paquete requerido: matrixStats


Adjuntando el paquete: 'matrixStats'

The following object is masked from 'package:dplyr':

    count


Adjuntando el paquete: 'MatrixGenerics'

The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
    rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
    rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
    rowWeightedSds, rowWeightedVars

Cargando paquete requerido: GenomicRanges

Cargando paquete requerido: stats4

Cargando paquete requerido: BiocGenerics


Adjuntando el paquete: 'BiocGenerics'

The following objects are masked from 'package:lubridate':

    intersect, setdiff, union

The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs

The following objects are masked from 'package:base':

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
    tapply, union, unique, unsplit, which.max, which.min

Cargando paquete requerido: S4Vectors


Adjuntando el paquete: 'S4Vectors'

The following objects are masked from 'package:lubridate':

    second, second<-

The following objects are masked from 'package:dplyr':

    first, rename

The following object is masked from 'package:tidyr':

    expand

The following object is masked from 'package:utils':

    findMatches

The following objects are masked from 'package:base':

    expand.grid, I, unname

Cargando paquete requerido: IRanges


Adjuntando el paquete: 'IRanges'

The following object is masked from 'package:lubridate':

    %within%

The following objects are masked from 'package:dplyr':

    collapse, desc, slice

The following object is masked from 'package:purrr':

    reduce

Cargando paquete requerido: GenomeInfoDb

Cargando paquete requerido: Biobase

Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.


Adjuntando el paquete: 'Biobase'

The following object is masked from 'package:MatrixGenerics':

    rowMedians

The following objects are masked from 'package:matrixStats':

    anyMissing, rowMedians

Cargando paquete requerido: SingleCellExperiment

Cargando paquete requerido: TreeSummarizedExperiment

Cargando paquete requerido: Biostrings

Cargando paquete requerido: XVector


Adjuntando el paquete: 'XVector'

The following object is masked from 'package:purrr':

    compact


Adjuntando el paquete: 'Biostrings'

The following object is masked from 'package:base':

    strsplit

Cargando paquete requerido: MultiAssayExperiment


Adjuntando el paquete: 'mia'

The following objects are masked from 'package:dplyr':

    full_join, inner_join, left_join, right_join

library(ANCOMBC)

path_mat <- path_table_summ %>% 
  mutate(Sample = paste0("S",Sample)) %>% 
  group_by(Pathway,Sample) %>% 
  summarise(Abundance = sum(Abundance)) %>% 
  ungroup() %>% 
  filter(!(Sample %in% descartes)) %>% 
  group_by(Pathway) %>% 
  mutate(vari = var(Abundance), N = n()) %>% 
  filter(N > 5) %>% 
  ungroup() %>% 
  dplyr::select(Sample,Pathway,Abundance) %>% 
  pivot_wider(names_from = Sample, values_from = Abundance, values_fill = 0) %>% 
  column_to_rownames("Pathway") %>% as.matrix()

`summarise()` has grouped output by 'Pathway'. You can override using the
`.groups` argument.

assays = SimpleList(counts = path_mat)

smd <- metadata %>% 
  filter(Sample %in% colnames(path_mat)) %>% 
  arrange(match(Sample,colnames(path_mat))) %>% 
  column_to_rownames("Sample")

smd <- DataFrame(smd)

tse <- TreeSummarizedExperiment(assays = assays,
                                colData = smd)

DA

out_grupo2 <- ancombc2(data = tse, fix_formula = "GRUPO2")
out_tto <- ancombc2(data = tse, fix_formula = "tto_parasiticida_correcto",)

Results

out_grupo2$res %>% dplyr::rename(Pathway = 1) %>% 
  filter(diff_GRUPO2Control ==TRUE) %>% 
    ggplot(aes(x = reorder(Pathway,lfc_GRUPO2Control), y = lfc_GRUPO2Control, fill = lfc_GRUPO2Control)) + 
    geom_bar(stat = "identity", width = 0.7, color = "black", 
             position = position_dodge(width = 0.4)) +
    geom_errorbar(aes(ymin = lfc_GRUPO2Control - se_GRUPO2Control, ymax = lfc_GRUPO2Control + se_GRUPO2Control), 
                  width = 0.2, position = position_dodge(0.05), color = "black") +
    scale_fill_gsea(name= "Log fold Change") +
  labs(x = NULL, y = "Log fold change", 
         title = "Log fold changes as one unit increase of Chagas vs Control") + 
  coord_flip() +
  theme_bw() + 
    theme(plot.title = element_text(hjust = 0.5),
          panel.grid.minor.y = element_blank(),
          axis.text.x = element_text(angle = 60, hjust = 1))

out_tto$res %>% dplyr::rename(Pathway = 1) %>% 
  filter(diff_tto_parasiticida_correctoSi ==TRUE) %>% 
    ggplot(aes(x = reorder(Pathway,lfc_tto_parasiticida_correctoSi), y = lfc_tto_parasiticida_correctoSi, fill = lfc_tto_parasiticida_correctoSi)) + 
    geom_bar(stat = "identity", width = 0.7, color = "black", 
             position = position_dodge(width = 0.4)) +
    geom_errorbar(aes(ymin = lfc_tto_parasiticida_correctoSi - se_tto_parasiticida_correctoSi, ymax = lfc_tto_parasiticida_correctoSi + se_tto_parasiticida_correctoSi), 
                  width = 0.2, position = position_dodge(0.05), color = "black") +
  scale_fill_gsea(name= "Log fold Change") +
  labs(x = NULL, y = "Log fold change", 
         title = "Log fold changes as one unit increase of Tratamiento Parasiticida Correcto") + 
  coord_flip() +
  theme_bw() + 
    theme(plot.title = element_text(hjust = 0.5),
          panel.grid.minor.y = element_blank(),
          axis.text.x = element_text(angle = 60, hjust = 1))

Alpha y Beta basados en la normalización de ANCOMBC

bc_counts <- out_grupo2$bias_correct_log_table

bc_counts[is.na(bc_counts)] = 0

bc_counts = (10^bc_counts)

diversities <- estimateR(round(bc_counts) %>% t(), parallel(40))%>% 
   t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  dplyr::inner_join(metadata)

Joining with `by = join_by(Sample)`

ggbetweenstats(diversities, x = GRUPO2, y = S.obs, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = S.chao1, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = se.chao1, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = S.ACE, type = "robust")

ggbetweenstats(diversities, x = GRUPO2, y = se.ACE, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.obs, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.chao1, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = se.chao1, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = S.ACE, type = "robust")

ggbetweenstats(diversities, x = tto_parasiticida_correcto, y = se.ACE, type = "robust")

bc_dist <- vegdist(bc_counts %>% t(), method = "bray")


data_plot_bc <- metaMDS(bc_dist)

Run 0 stress 0.1718587 
Run 1 stress 0.1626342 
... New best solution
... Procrustes: rmse 0.07699866  max resid 0.3605421 
Run 2 stress 0.1835689 
Run 3 stress 0.164288 
Run 4 stress 0.171504 
Run 5 stress 0.1739788 
Run 6 stress 0.1736108 
Run 7 stress 0.1857706 
Run 8 stress 0.1684261 
Run 9 stress 0.1819315 
Run 10 stress 0.1666154 
Run 11 stress 0.1680009 
Run 12 stress 0.1821677 
Run 13 stress 0.1727342 
Run 14 stress 0.183556 
Run 15 stress 0.1820296 
Run 16 stress 0.1654748 
Run 17 stress 0.17482 
Run 18 stress 0.1675087 
Run 19 stress 0.1662152 
Run 20 stress 0.1679837 
*** Best solution was not repeated -- monoMDS stopping criteria:
     1: no. of iterations >= maxit
    18: stress ratio > sratmax
     1: scale factor of the gradient < sfgrmin

data_plot_bc$points %>% 
  as_tibble(rownames ="Sample") %>% 
  dplyr::inner_join(metadata) %>% 
  ggplot(aes(x = MDS1, y = MDS2, label = Sample , fill = GRUPO2)) + geom_label()

Joining with `by = join_by(Sample)`

data_plot_bc$points %>% 
  as_tibble(rownames ="Sample") %>% 
  dplyr::inner_join(metadata) %>% 
  ggplot(aes(x = MDS1, y = MDS2, label = Sample , fill = tto_parasiticida_correcto)) + geom_label()

Joining with `by = join_by(Sample)`

D_meta_bc <- bc_dist %>% as.matrix() %>% as_tibble(rownames= "Sample") %>% dplyr::inner_join(metadata)

Joining with `by = join_by(Sample)`

D_meta_bc_dist = D_meta_bc %>% 
  dplyr::select(Sample:S9) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.dist()

D_meta_bc_data <- D_meta_bc %>% 
  dplyr::select(Sample,GRUPO2,tto_parasiticida_correcto,GRUPO,sexo,bristol) %>% 
  mutate(GRUPO2 = as.factor(GRUPO2),
         tto_parasiticida_correcto = as.factor(tto_parasiticida_correcto)) %>% column_to_rownames("Sample")

adonis2(D_meta_bc_dist ~ GRUPO2 + tto_parasiticida_correcto, data = D_meta_bc_data)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = D_meta_bc_dist ~ GRUPO2 + tto_parasiticida_correcto, data = D_meta_bc_data)
         Df SumOfSqs      R2      F Pr(>F)
Model     2   0.5373 0.02932 1.0422   0.34
Residual 69  17.7862 0.97068              
Total    71  18.3236 1.00000

anosim(D_meta_bc_dist, D_meta_bc_data$GRUPO2) %>% 
  plot(ylab = "Rank Similarities", xlab = "Grupo2")

anosim(D_meta_bc_dist, D_meta_bc_data$tto_parasiticida_correcto)%>% 
  plot(ylab = "Rank Similarities", xlab = "Tratamiento Correcto")

plsda(D_meta_bc_dist,D_meta_bc_data$GRUPO2) %>% plotIndiv(ellipse = TRUE)

plsda(D_meta_bc_dist,D_meta_bc_data$tto_parasiticida_correcto) %>% plotIndiv(ellipse = TRUE)

Correlations

Load data

OTUS Ruminococcaceae, Faecalibacterium, Prevotella y Bacteroides, Parabacteroides spp, Enterococcus hirae, Prevotella

source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")

files = dir("/storage2/tripobiome/taxonomia/",pattern = "mic.kreport",full.names = T)

OTUs <- import_kraken(files,threads = 40)

Cargando paquete requerido: doParallel

Cargando paquete requerido: iterators

Cargando paquete requerido: parallel

OTUs <- OTUs %>% mutate(Sample = gsub(".mic.kreport","",Sample))

OTU_rare <- OTUs %>%
   filter(grepl("Bacteria",Fullname)) %>%
   filter(grepl("S",TaxRank)) %>%
   dplyr::select(Name2,Reads,Sample) %>% 
   distinct() %>% 
   pivot_wider(names_from = Name2, values_from = Reads, values_fill = 0) %>%
   column_to_rownames("Sample") 

OTU_rare <- rrarefy(OTU_rare, sample = min(rowSums(OTU_rare)))

OTU_rare <- OTU_rare %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>%
  pivot_longer(names_to = "OTU", values_to = "Abundance", -Sample) %>% 
  group_by(OTU) %>% 
  mutate(Var = var(Abundance), Mean = mean(Abundance)) %>% 
  filter(Var > 0.25*Mean, Mean > 10) %>%
  ungroup() %>% 
  dplyr::select(Sample,OTU,Abundance) %>% 
  pivot_wider(names_from = OTU, values_from = Abundance, values_fill = 0) %>%
  column_to_rownames("Sample")

Pathway vs Taxonomy

commons <- path_mat %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  dplyr::select(Sample) %>% 
  semi_join(OTU_rare %>%
              as.data.frame() %>% 
              rownames_to_column("Sample") %>% 
              dplyr::select(Sample))

Joining with `by = join_by(Sample)`

path_mat_comm <- path_mat %>%
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  filter(Sample %in% commons$Sample) %>% 
  arrange(Sample) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix()

OTU_matrix <- OTU_rare %>%
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  filter(Sample %in% commons$Sample) %>% 
  arrange(Sample) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix()

OTU_vs_Path<- cor(path_mat_comm,OTU_matrix, method = "spearman")


library(igraph)


Adjuntando el paquete: 'igraph'

The following object is masked from 'package:Biostrings':

    union

The following object is masked from 'package:XVector':

    path

The following object is masked from 'package:GenomicRanges':

    union

The following object is masked from 'package:IRanges':

    union

The following object is masked from 'package:S4Vectors':

    union

The following objects are masked from 'package:BiocGenerics':

    normalize, path, union

The following object is masked from 'package:vegan':

    diversity

The following object is masked from 'package:permute':

    permute

The following objects are masked from 'package:lubridate':

    %--%, union

The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union

The following objects are masked from 'package:purrr':

    compose, simplify

The following object is masked from 'package:tidyr':

    crossing

The following object is masked from 'package:tibble':

    as_data_frame

The following objects are masked from 'package:stats':

    decompose, spectrum

The following object is masked from 'package:base':

    union

library(ggraph)
library(tidygraph)


Adjuntando el paquete: 'tidygraph'

The following object is masked from 'package:igraph':

    groups

The following objects are masked from 'package:mia':

    full_join, inner_join, left_join, right_join

The following object is masked from 'package:XVector':

    slice

The following objects are masked from 'package:IRanges':

    active, slice

The following objects are masked from 'package:S4Vectors':

    active, rename

The following object is masked from 'package:MASS':

    select

The following object is masked from 'package:stats':

    filter

OTU_vs_Path %>% 
  as.data.frame() %>% 
  rownames_to_column("Pathway") %>% 
  pivot_longer(names_to = "OTU", values_to = "Correlation",-Pathway) %>% 
  filter(Correlation > 0.45 ) %>% 
  rename(from = Pathway, to = OTU) %>% 
  as_tbl_graph(nodes = nodes) %>%
  ggraph(layout = "gem") + 
  geom_node_point()+
  geom_node_text(aes(label = name),repel = TRUE) +
  geom_edge_link() +
  theme_void()