Nº of MAGs with AMRs
# AMR presence/absence
amr_presence <- genome_annotations %>%
filter(resistance_type == "AMR") %>%
dplyr::select(genome, resistance_target) %>%
distinct()
#Add the host_type info
amr_with_host_type <- amr_presence %>%
left_join(
genome_metadata %>% dplyr::select(ID, host_type),
by = c("genome" = "ID")
)
# Count how many MAGs in each AMR
amr_mag_counts <- amr_with_host_type %>%
group_by(host_type, resistance_target) %>%
summarise(
n_mags = n(),
.groups = "drop"
)
#Count total MAGs per host_type
total_mags_per_host_type <- genome_metadata %>%
group_by(host_type) %>%
summarise(
total_mags = n_distinct(ID),
.groups = "drop"
)
#Calculate proportions of MAGs from each host_type in each AMR
amr_mag_proportions <- amr_mag_counts %>%
left_join(total_mags_per_host_type, by = "host_type") %>%
mutate(
proportion = n_mags / total_mags,
absent = total_mags - n_mags
)
Statistical testing of MAG proportions
fisher_results <- amr_mag_proportions %>%
dplyr::select(resistance_target, host_type, n_mags, absent) %>%
pivot_wider(
names_from = host_type,
values_from = c(n_mags, absent),
values_fill = 0
) %>%
rowwise() %>%
mutate(
p_value = fisher.test(
matrix(
c(n_mags_animal, absent_animal,
n_mags_human, absent_human),
nrow = 2,
byrow = TRUE
)
)$p.value
) %>%
ungroup() %>%
mutate(p_adj = p.adjust(p_value, method = "BH"))
fisher_results <- fisher_results %>%
mutate(
prop_animal = n_mags_animal / (n_mags_animal + absent_animal),
prop_human = n_mags_human / (n_mags_human + absent_human),
diff_prop = prop_human - prop_animal
)
fisher_results%>% filter(p_adj < 0.05)
# A tibble: 32 × 12
resistance_target n_mags_animal n_mags_human n_mags_NA absent_animal absent_human absent_NA p_value p_adj
<chr> <int> <int> <int> <int> <int> <int> <dbl> <dbl>
1 AMIKACIN/KANAMYCIN… 41 8 91 43 34 -90 0.00173 0.00266
2 AMINOGLYCOSIDE 41 8 91 43 34 -90 0.00173 0.00266
3 AVILAMYCIN 41 8 91 43 34 -90 0.00173 0.00266
4 AZITHROMYCIN/ERYTH… 17 1 43 67 41 -42 0.00610 0.00820
5 BETA-LACTAM 41 8 91 43 34 -90 0.00173 0.00266
6 BLEOMYCIN 39 8 90 45 34 -89 0.00324 0.00449
7 CHLORAMPHENICOL 41 8 91 43 34 -90 0.00173 0.00266
8 CHLORAMPHENICOL/FL… 41 8 91 43 34 -90 0.00173 0.00266
9 CLINDAMYCIN/LINCOM… 41 8 91 43 34 -90 0.00173 0.00266
10 FLORFENICOL/OXAZOL… 41 8 91 43 34 -90 0.00173 0.00266
# ℹ 22 more rows
# ℹ 3 more variables: prop_animal <dbl>, prop_human <dbl>, diff_prop <dbl>
Building an AMR abundance table
# select the AMR annotations
amr_abundance <- genome_annotations %>%
filter(resistance_type == "AMR") %>%
group_by(genome, resistance_target) %>%
summarise(count = n(), .groups="drop") %>%
tidyr::pivot_wider(
names_from = resistance_target,
values_from = count,
values_fill = 0
)
amr_cluster <- amr_abundance %>%
dplyr::full_join(metadata_with_cluster, by= join_by(genome == ID))
amr_cluster %>%
dplyr::select("BETA-LACTAM", "MACROLIDE", genome, cluster) %>%
group_by( cluster) %>%
summarise(mean_beta_lactam = mean(`BETA-LACTAM`, na.rm = TRUE),
mean_macrolide = mean(MACROLIDE, na.rm = TRUE))
# A tibble: 3 × 3
cluster mean_beta_lactam mean_macrolide
<fct> <dbl> <dbl>
1 1 6.08 11.4
2 2 6.08 10.9
3 <NA> 6.13 10.3
amr_matrix <- amr_abundance %>%
column_to_rownames("genome") %>%
as.matrix()
# Abundance heatmap
min_val <- min(amr_matrix, na.rm = TRUE)
max_val <- max(amr_matrix, na.rm = TRUE)
colors <- viridis(100, option = "viridis")
breaks <- seq(min_val, max_val, length.out = 101)
pheatmap( amr_matrix,
color = colors,
cluster_rows = FALSE,
cluster_cols = TRUE,
fontsize = 9,
border_color = NA
)

amr_presence <- amr_abundance %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ ifelse(. > 0, 1, 0)))%>%
as.matrix()
pheatmap(
amr_presence,
cluster_rows = TRUE,
cluster_cols = TRUE,
color = c("white", "darkblue"), # white = absent, black = present
legend_breaks = c(0,1),
legend_labels = c("Absent", "Present"),
border_color = NA
)

pheatmap(
amr_presence,
cluster_rows = FALSE,
cluster_cols = TRUE,
color = c("white", "darkblue"), # white = absent, black = present
legend_breaks = c(0,1),
legend_labels = c("Absent", "Present"),
border_color = NA
)

PERMANOVA presence/absence
# Remove zero-variance AMR features
amr_pa_nz <- amr_presence[, colSums(amr_presence) > 0 & colSums(amr_presence) < nrow(amr_presence), drop = FALSE]
# Determine the common genomes/IDs between AMR matrix and metadata
common_ids <- base::intersect(rownames(amr_pa_nz), genome_metadata$ID)
# Report what will be kept/dropped
message("# common: ", length(common_ids))
# common: 50
message("# in AMR only: ", length(setdiff(rownames(amr_pa_nz), genome_metadata$ID)))
# in AMR only: 90
message("# in metadata only: ", length(setdiff(genome_metadata$ID, rownames(amr_pa_nz))))
# in metadata only: 77
# Subset AMR to the intersection (and keep the identical order)
amr_pa_nz <- amr_pa_nz[common_ids, , drop = FALSE]
# Build aligned metadata (unique IDs, same order as amr_pa_nz)
meta <- genome_metadata %>%
dplyr::filter(ID %in% common_ids) %>%
dplyr::distinct(ID, .keep_all = TRUE) %>%
tibble::column_to_rownames("ID") %>%
.[common_ids, , drop = FALSE]
stopifnot(identical(rownames(meta), rownames(amr_pa_nz)))
# Prepare variables for PERMANOVA
required_vars <- c("genome_size", "completeness", "host_type")
# Coerce types as needed
meta <- meta %>%
dplyr::mutate(
genome_size = as.numeric(genome_size),
completeness = as.numeric(completeness),
host_type = as.factor(host_type)
)
# Align on complete cases for the required variables
ok <- stats::complete.cases(meta[, required_vars, drop = FALSE])
amr_pa_nz <- amr_pa_nz[ok, , drop = FALSE]
meta <- meta[ok, , drop = FALSE]
stopifnot(identical(rownames(meta), rownames(amr_pa_nz)))
# Distance, dispersion, PERMANOVA
amr_dist_pa <- vegan::vegdist(amr_pa_nz, method = "jaccard", binary = TRUE)
disp_pa <- vegan::betadisper(amr_dist_pa, meta$host_type)
Warning in vegan::betadisper(amr_dist_pa, meta$host_type): some squared distances are negative and changed to
zero
Analysis of Variance Table
Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 1 0.000057 0.0000566 0.0091 0.9242
Residuals 47 0.290868 0.0061887
vegan::adonis2(
amr_dist_pa ~ genome_size + completeness + host_type,
data = meta,
permutations = 999,
by = "margin"
)
Permutation test for adonis under reduced model
Marginal effects of terms
Permutation: free
Number of permutations: 999
vegan::adonis2(formula = amr_dist_pa ~ genome_size + completeness + host_type, data = meta, permutations = 999, by = "margin")
Df SumOfSqs R2 F Pr(>F)
genome_size 1 0.01571 0.01467 0.7202 0.533
completeness 1 0.01155 0.01079 0.5296 0.644
host_type 1 0.06544 0.06113 3.0001 0.035 *
Residual 45 0.98148 0.91690
Total 48 1.07044 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PCoA presence/absence
pcoa_pa <- cmdscale(amr_dist_pa, eig = TRUE, k = 2)
variance_explained <- pcoa_pa$eig / sum(pcoa_pa$eig)
pcoa_df <- data.frame(
ID = rownames(amr_pa_nz),
PC1 = pcoa_pa$points[,1],
PC2 = pcoa_pa$points[,2],
host_type = meta$host_type
) %>%
left_join(metadata_with_cluster, by = "ID")
pcoa_amr_pa <- ggplot(pcoa_df, aes(PC1, PC2, color = host_type.x)) +
geom_point(size = 2) +
scale_color_manual(values = host_type_colors, name = "host_type") +
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)
pcoa_amr_pa

ggplot(pcoa_df, aes(PC1, PC2, color = host_order)) +
geom_point(size = 2) +
scale_color_manual(values = host_order_colors, name = "Host Order")+
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by host order)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

ggplot(pcoa_df, aes(PC1, PC2, color = completeness)) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by completeness)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

ggplot(pcoa_df, aes(PC1, PC2, color = genome_size)) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by genome_size)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

ggplot(pcoa_df, aes(PC1, PC2, color = country)) +
geom_point(size = 2) +
scale_color_manual(values = country_palette, na.value = "grey70")+
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by country)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

ggplot(pcoa_df, aes(PC1, PC2, color = continent)) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by continent)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

ggplot(pcoa_df, aes(PC1, PC2, color = cluster)) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "PCoA of AMR annotations across MAGs (colored by functional cluster)",
x = paste0("PCoA1 (", round(variance_explained[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(variance_explained[2] * 100, 1), "%)")
)

Testing proportions (Fisher)
amr_counts <- genome_annotations %>%
filter(resistance_type == "AMR") %>%
dplyr::count(genome, resistance_target) %>%
pivot_wider(names_from = resistance_target, values_from = n, values_fill = 0) %>%
filter(rowSums(dplyr::select(., -genome)) > 0) # Removes MAGs with no AMR genes found
amr_binary <- amr_counts %>%
left_join(genome_metadata %>% dplyr::select(ID, host_type), by = c("genome" = "ID")) %>%
# Convert all numeric columns to 0 or 1
mutate(across(where(is.numeric), ~ ifelse(.x > 0, 1, 0)))
#Function to run Fischer test for each gene
run_fisher <- function(gene_name, df) {
tab <- table(df$host_type, df[[gene_name]])
# Only run if the table is actually 2x2
if(ncol(tab) == 2) {
test <- fisher.test(tab)
return(tidy(test) %>% mutate(AMR_Target = gene_name))
} else {
return(NULL)
}
}
# Apply the function to all AMR columns
amr_names <- colnames(amr_counts)[-1] # everything except mag_id
fisher_results <- map_df(amr_names, ~ run_fisher(.x, amr_binary))
# Adjust for multiple testing (FDR)
fisher_results <- fisher_results %>%
mutate(p.adj = p.adjust(p.value, method = "fdr")) %>%
filter(p.adj < 0.05) %>%
arrange(p.adj)
print(fisher_results)
# A tibble: 0 × 8
# ℹ 8 variables: estimate <dbl>, p.value <dbl>, conf.low <dbl>, conf.high <dbl>, method <chr>,
# alternative <chr>, AMR_Target <chr>, p.adj <dbl>