TidyTuesday: Bird Sightings at Sea - New Zealand Logbooks

TidyTuesday

Data Viz

Seabird counts from 10-minute ship surveys near New Zealand (1969-1990): species, season, wind, and geography

Author

chokotto

Published

April 14, 2026

Overview

This week we explore seabird observations recorded under the Australasian Seabird Mapping Scheme: counts from ships during 10-minute periods near New Zealand waters. The data links bird records to ship position, weather, and season.

Data Source: TidyTuesday 2026-04-14 / Te Papa Tongarewa
Angle: Which species dominate counts, how observations vary by season and wind, and where surveys cluster in space

Dataset

Code

library(tidyverse)
library(scales)
library(glue)
library(patchwork)
library(ggtext)
library(ggrepel)
library(showtext)
library(colorspace)
library(janitor)
library(forcats)

Code

birds_path <- file.path(getwd(), "data", "birds.csv")
ships_path <- file.path(getwd(), "data", "ships.csv")
base_url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14"

if (!file.exists(birds_path)) {
  birds_path <- paste0(base_url, "/birds.csv")
}
if (!file.exists(ships_path)) {
  ships_path <- paste0(base_url, "/ships.csv")
}

birds <- readr::read_csv(birds_path, show_col_types = FALSE, na = c("", "NA"))

Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)

Code

ships <- readr::read_csv(ships_path, show_col_types = FALSE)

birds_ship <- birds |>
  inner_join(ships, by = "record_id") |>
  mutate(
    species_label = coalesce(species_scientific_name, species_common_name, "Unknown"),
    count = replace_na(count, 0L)
  ) |>
  filter(!is.na(species_label), species_label != "Unknown")

beaufort_path <- file.path(getwd(), "data", "beaufort_scale.csv")
if (!file.exists(beaufort_path)) {
  beaufort_path <- paste0(base_url, "/beaufort_scale.csv")
}
beaufort <- readr::read_csv(beaufort_path, show_col_types = FALSE)

Code

NOTE_TEXT   <- "Seabird logbook entries; counts may be censored at 99999 for very large flocks"
SOURCE_TEXT <- "TidyTuesday 2026-04-14 / Te Papa Tongarewa"
CAPTION     <- glue("Note: {NOTE_TEXT}  |  Source: {SOURCE_TEXT}  |  \u00A9 2026 chokotto")

STYLE_MODE <- "figmamake"

theme_fm <- theme_minimal(base_size = 12) +
  theme(
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "#f8fafc", color = NA),
    panel.grid.major = element_line(color = "#e2e8f0", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    text = element_text(color = "#334155"),
    axis.text = element_text(color = "#475569"),
    plot.title = element_text(color = "#1e293b", face = "bold", size = 14),
    plot.subtitle = element_text(color = "#64748b", size = 10),
    plot.caption = element_text(
      face = "italic", color = "#94a3b8", size = 9,
      hjust = 0, margin = margin(t = 12)
    ),
    plot.caption.position = "plot",
    strip.text = element_text(color = "#1e293b", face = "bold"),
    legend.background = element_rect(fill = "white", color = NA),
    legend.text = element_text(color = "#475569"),
    plot.margin = margin(15, 15, 15, 15)
  )

theme_active <- theme_fm

COL_PRIMARY   <- "#0ea5e9"
COL_SECONDARY <- "#f59e0b"
COL_ALERT     <- "#ef4444"
COL_POSITIVE  <- "#10b981"
COL_BASE      <- "#94a3b8"

Analysis

Albatrosses and petrels lead the species list

Code

species_tot <- birds_ship |>
  group_by(species_label) |>
  summarise(total_count = sum(count, na.rm = TRUE), n_obs = n(), .groups = "drop") |>
  filter(total_count > 0) |>
  slice_max(total_count, n = 15) |>
  mutate(species_label = fct_reorder(species_label, total_count))

ggplot(species_tot, aes(x = total_count, y = species_label)) +
  geom_col(fill = COL_PRIMARY, width = 0.7, alpha = 0.85) +
  geom_text(
    aes(label = comma(total_count)),
    hjust = -0.1, size = 3, color = "#475569"
  ) +
  scale_x_continuous(labels = comma_format(), expand = expansion(mult = c(0, 0.12))) +
  labs(
    title = "Top seabird taxa by summed count in the survey window",
    subtitle = "15 species with highest total birds recorded (all observations combined)",
    x = "Total birds counted",
    y = NULL,
    caption = CAPTION
  ) +
  theme_active +
  theme(panel.grid.major.y = element_blank())

Summer surveys capture the most activity

Code

season_n <- birds_ship |>
  filter(!is.na(season)) |>
  count(season, name = "n_records")

ggplot(season_n, aes(x = season, y = n_records, fill = season)) +
  geom_col(width = 0.65, show.legend = FALSE) +
  scale_fill_manual(
    values = c(
      "summer" = COL_PRIMARY,
      "autumn" = COL_SECONDARY,
      "winter" = lighten(COL_ALERT, 0.2),
      "spring" = COL_POSITIVE
    )
  ) +
  scale_y_continuous(labels = comma_format()) +
  labs(
    title = "Observation records peak in southern summer",
    subtitle = "Number of bird records linked to ship logs by hemisphere season",
    x = NULL,
    y = "Number of records",
    caption = CAPTION
  ) +
  theme_active +
  theme(panel.grid.major.x = element_blank())

Higher wind classes still show heavy bird activity

Code

wind_df <- birds_ship |>
  filter(!is.na(wind_speed_class)) |>
  group_by(wind_speed_class) |>
  summarise(
    mean_count = mean(count, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) |>
  left_join(
    beaufort |> select(wind_speed_class, wind_description),
    by = "wind_speed_class"
  )

ggplot(wind_df, aes(x = factor(wind_speed_class), y = mean_count)) +
  geom_col(fill = lighten(COL_SECONDARY, 0.25), width = 0.65) +
  geom_text(
    aes(label = comma(round(mean_count, 1))),
    vjust = -0.4, size = 3, color = "#475569"
  ) +
  labs(
    title = "Mean flock size by Beaufort wind class",
    subtitle = "Higher classes are rougher seas; counts remain substantial across conditions",
    x = "Beaufort wind class",
    y = "Mean birds per observation",
    caption = CAPTION
  ) +
  theme_active

Surveys trace a belt of observations around New Zealand waters

Code

set.seed(42)
map_df <- birds_ship |>
  filter(!is.na(latitude), !is.na(longitude))

# slice_sample(n = min(8000, n())) fails: inside slice_sample(), n() is not nrow(.)
if (nrow(map_df) > 0L) {
  n_map <- min(8000L, nrow(map_df))
  map_df <- slice_sample(map_df, n = n_map)
}

if (nrow(map_df) == 0L) {
  ggplot() +
    annotate(
      "text", x = 0.5, y = 0.5, size = 4, color = "#64748b",
      label = "No latitude/longitude available for mapping."
    ) +
    theme_void()
} else {
  ggplot(map_df, aes(x = longitude, y = latitude)) +
    geom_point(aes(color = log1p(count)), alpha = 0.25, size = 0.6) +
    scale_color_gradientn(
      colors = c("#e0f2fe", "#0369a1", "#0f172a"),
      name = "log1p(count)"
    ) +
    labs(
      title = "Geographic footprint of sampled 10-minute counts",
      subtitle = "Random subsample of up to 8,000 records for readability",
      x = "Longitude",
      y = "Latitude",
      caption = CAPTION
    ) +
    theme_active +
    theme(legend.position = "right")
}

Key Findings

Large albatross and petrel aggregates drive the highest summed counts, reflecting both abundance and schooling behavior in the Southern Ocean survey corridor.
Southern summer contains the bulk of linked records, consistent with more research and fishing vessel activity in warmer months.
Wind conditions span calm to rough seas; mean per-observation counts stay meaningful even at higher Beaufort classes.
Spatially, observations concentrate in a band consistent with New Zealand’s offshore and subantarctic routes.

This post is part of the TidyTuesday weekly data visualization project.

Disclaimer

This analysis is for educational and practice purposes only. Data visualizations and interpretations are based on the provided dataset and may not represent complete or current information.