# Packages
library(tidyverse)
library(here)
library(janitor)
library(waffle)
library(ggh4x)
library(sf)
library(tigris)
library(ggplot2)
# --------------------------
# Load + Clean Data
# --------------------------
# Load data
worms_raw <- read_csv(here("data", "EDI_Data_Metadata_JumpingWorms.csv")) %>%
clean_names()
# Clean data
worms <- worms_raw %>%
mutate(site_id = as.character(site_id),
landcover_type = str_to_title(landcover),
# Fix inconsistent land cover labels
landcover_type = recode(landcover_type,
"Residential_lawn" = "Residential Lawn",
"Residential_garden" = "Residential Garden"),
# Jumping worm presence (YES/NO -> TRUE/FALSE)
jw_present = case_when(
jumping_worm_presence %in% c("YES", "Yes", "yes") ~ TRUE,
jumping_worm_presence %in% c("NO", "No", "no") ~ FALSE,
TRUE ~ NA),
# European worm presence across any of the 3 pours (YES/NO -> TRUE/FALSE)
euro_present = case_when(
pour1_european_earthworm_presence %in% c("YES","Yes","yes") |
pour2_european_earthworm_presence %in% c("YES","Yes","yes") |
pour3_european_earthworm_presence %in% c("YES","Yes","yes") ~ TRUE,
pour1_european_earthworm_presence %in% c("NO","No","no") &
pour2_european_earthworm_presence %in% c("NO","No","no") &
pour3_european_earthworm_presence %in% c("NO","No","no") ~ FALSE,
TRUE ~ NA),
# Mean density across the three pours (invasion intensity metric)
mean_density_3pours = rowMeans(cbind(pour1_density,
pour2_density,
pour3_density),
na.rm = TRUE),
mean_density_3pours = if_else(is.nan(mean_density_3pours),
NA_real_, mean_density_3pours),
# Total jumping worm abundance across species columns
total_worm_count = rowSums(
cbind(abundance_tok, abundance_agr, abundance_unk, abundance_mh),
na.rm = TRUE))
# --------------------------
# Plot 1 — Mean density by Land Cover (low -> high)
# --------------------------
# Summarize mean jumping worm density per habitat, ordered low to high
density_by_landcover <- worms %>%
filter(!is.na(mean_density_3pours), !is.na(landcover_type)) %>%
group_by(landcover_type) %>%
summarize(
mean_density = mean(mean_density_3pours, na.rm = TRUE),
n_sites = n(),
.groups = "drop") %>%
arrange(mean_density)
# Bar chart of mean density by habitat
p_density <- ggplot(
density_by_landcover,
aes(x = reorder(landcover_type, mean_density), y = mean_density)) +
geom_col(fill = "#8B4513") +
labs(title = "Jumping Worm Density Across Landcover Types",
x = "Landcover Type",
y = "Mean Jumping Worm Density") +
scale_x_discrete(labels = scales::label_wrap(12)) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(just = 0.5),
axis.text.x = element_text(size = 10, hjust = 0.5),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
plot.margin = margin(t = 20, r = 20, b = 20, l = 40),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey80"))
p_density
# --------------------------
# Plot 2 — Donut chart: share of sites invaded (NA removed)
# --------------------------
# Calculate percentage of sites with jumping worms present vs. not
invasion_share <- worms %>%
filter(!is.na(jw_present)) %>%
count(jw_present) %>%
mutate(pct = 100 * n / sum(n),
site_status = if_else(jw_present, "Invaded", "Not Invaded")) %>%
select(site_status, n, pct)
# Donut chart showing invaded vs. not-invaded site proportions
p_invaded <- ggplot(invasion_share, aes(x = 2, y = pct,
fill = site_status)) +
geom_col(color = "white") +
coord_polar(theta = "y") +
xlim(0.5, 2.5) +
theme_void() +
scale_fill_manual(values = c("Not Invaded" = "#6B3E26", # dark worm/soil
"Invaded" = "#D9D2C5")) + # pale, depleted soil
labs(fill = "Site Status",
title = "Proportion of Sites Invaded by Jumping Worms") +
theme(legend.position = "right",
plot.title = element_text(size = 13, face = "bold",
margin = margin(b = 4)),
plot.margin = margin(t = 20, r = 20, b = 20, l = 40))
p_invaded
# --------------------------
# Plot 3A — Stack Bar
# --------------------------
# Co-occurrence categories
coexistence <- worms %>%
filter(!is.na(jw_present), !is.na(euro_present), !is.na(landcover_type)) %>%
mutate(worm_status =
case_when(jw_present & euro_present ~ "Both present",
jw_present & !euro_present ~ "Jumping worms only",
!jw_present & euro_present ~ "European worms only",
TRUE ~ "Neither")) %>%
count(landcover_type, worm_status) %>%
group_by(landcover_type) %>%
mutate(pct = 100 * n / sum(n)) %>%
ungroup()
p_species <- ggplot(coexistence, aes(x = landcover_type, y = pct,
fill = worm_status)) +
geom_col() +
scale_x_discrete(labels = scales::label_wrap(12)) +
labs(x = "Landcover Type",
y = "Share of Sites (%)",
fill = "Worm Presence") +
theme_minimal(base_size = 12) +
theme(plot.title = element_blank(),
axis.text.x = element_text(size = 10, hjust = 0.5))
p_species
# --------------------------
# Plot 3B — Waffle Bar
# --------------------------
# One row per site with worm status counts coded by habitat
sites <- bind_rows(
tibble(landcover = "Forest",
status = c(rep("Both Worm Types", 0),
rep("Jumping Worms Only", 4),
rep("European Worms Only", 6),
rep("No Worms Found", 6))),
tibble(landcover = "Grassland",
status = c(rep("Both Worm Types", 0),
rep("Jumping Worms Only", 0),
rep("European Worms Only", 7),
rep("No Worms Found", 9))),
tibble(landcover = "Open Space",
status = c(rep("Both Worm Types", 2),
rep("Jumping Worms Only", 0),
rep("European Worms Only", 8),
rep("No Worms Found", 4))),
tibble(landcover = "Residential Garden",
status = c(rep("Both Worm Types", 3),
rep("Jumping Worms Only", 15),
rep("European Worms Only", 12),
rep("No Worms Found", 9))),
tibble(landcover = "Residential Lawn",
status = c(rep("Both Worm Types", 2),
rep("Jumping Worms Only", 6),
rep("European Worms Only", 8),
rep("No Worms Found", 23)))) %>%
mutate(landcover = factor(landcover,
levels = c("Forest",
"Grassland",
"Open Space",
"Residential Garden",
"Residential Lawn")),
status = factor(status,
levels = c("Jumping Worms Only",
"European Worms Only",
"Both Worm Types",
"No Worms Found"))) %>%
group_by(landcover) %>%
arrange(status, .by_group = TRUE) %>%
mutate(col = (row_number() - 1) %% 16,
row = (row_number() - 1) %/% 16) %>%
ungroup()
# Compute panel heights - tiles stay square across habitats with different row n
panel_heights <- sites %>%
group_by(landcover) %>%
summarise(nrows = max(row) + 1, .groups = "drop") %>%
pull(nrows)
# Plot waffle chart with one facet per habitat, sized proportionally
ggplot(sites, aes(x = col, y = -row, fill = status)) +
geom_tile(colour = "white", linewidth = 0.35,
width = 0.9, height = 0.9) +
facet_wrap(~landcover, ncol = 1,
strip.position = "left", scales = "free_y") +
force_panelsizes(rows = panel_heights) +
scale_fill_manual(values = c("Jumping Worms Only" = "#2E7D32",
"European Worms Only" = "#F39C12",
"Both Worm Types" = "#7B3294",
"No Worms Found" = "#E8E5E0"),
breaks = c("Jumping Worms Only",
"European Worms Only",
"Both Worm Types",
"No Worms Found")) +
labs(title = "Jumping Worms Were Found at Many Surveyed Sites",
subtitle = "Each Square Represents One Survey Site",
fill = NULL) +
theme_void() +
theme(plot.title = element_text(size = 13,
face = "bold",
margin = margin(b = 5)),
plot.subtitle = element_text(size = 11,
margin = margin(b = 16)),
strip.text.y.left = element_text(angle = 0,
hjust = 1,
vjust = 0.5,
size = 9,
face = "bold",
margin = margin(r = 8)),
legend.position = "right",
legend.title = element_blank(),
strip.placement = "outside",
panel.spacing = unit(0.4, "lines"),
plot.margin = margin(t = 20, r = 20, b = 20, l = 40))
# --------------------------
# Plot 4 — Map
# --------------------------
# Get Wisconsin state and Madison city boundaries
options(tigris_use_cache = TRUE)
wisconsin <- states(cb = TRUE) %>%
filter(NAME == "Wisconsin")
madison <- places(state = "WI", cb = TRUE) %>%
filter(NAME == "Madison")
# Plot
ggplot() +
geom_sf(data = wisconsin, fill = NA,
color = "#555555", linewidth = 0.8) +
geom_sf(data = madison, fill = "#F0EDE8",
color = "#C0392B", linewidth = 0.6) +
theme_void()
# Get Madison centroid for label placement
madison_centroid <- st_centroid(madison) %>%
st_coordinates()
# Add annotation for Madison
ggplot() +
geom_sf(data = wisconsin, fill = NA,
color = "#555555", linewidth = 0.8) +
geom_sf(data = madison, fill = "#F0EDE8",
color = "#C0392B", linewidth = 0.6) +
annotate("text",
x = madison_centroid[1] + 0.6,
y = madison_centroid[2] + 0.45,
label = "Madison, WI\nStudy Sites",
size = 3.5,
color = "#333333",
hjust = 0,
family = "sans") +
geom_curve(
aes(x = madison_centroid[1] + 0.55,
y = madison_centroid[2] + 0.35,
xend = madison_centroid[1] + 0.05,
yend = madison_centroid[2] + 0.05),
curvature = -0.3, # controls bend
arrow = arrow(length = unit(0.2, "cm"), type = "closed"),
color = "#555555",
linewidth = 1.1) +
theme_void() +
labs(title = "Study Site: Madison, Wisconsin")