Indianapolis Weather Trends Analysis (2020 - 2025)

Author

Ephraim Retta

1. DATA ACQUISITION ———————

Indianapolis coordinates (latitude: 39.7684°N, longitude: 86.1581°W) are set to retrieve localized weather data. Data is fetched dynamically for the past five years up to the current date, ensuring the analysis is always up-to-date.

Define Indianapolis coordinates

indy_lat <- 39.7684
indy_lon <- -86.1581

Calculate date range

end_date <- Sys.Date()
start_date <- end_date - years(5)

Get weather data using openmeteo

indy_weather_raw <- weather_history(
  location = c(indy_lat, indy_lon),
  start = start_date,
  end = end_date,
  hourly = c("temperature_2m", "relative_humidity_2m", "precipitation", "wind_speed_10m"),
  daily = c("temperature_2m_max", "temperature_2m_min", "precipitation_sum")
)

View the structure

str(indy_weather_raw)
tibble [45,675 × 9] (S3: tbl_df/tbl/data.frame)
 $ date                       : Date[1:45675], format: "2020-11-26" "2020-11-27" ...
 $ time                       : chr [1:45675] NA NA NA NA ...
 $ daily_temperature_2m_max   : num [1:45675] 9.9 10.1 7.9 10.2 4.7 0.6 5.6 7.3 8.2 4.9 ...
 $ daily_temperature_2m_min   : num [1:45675] 7.1 3.7 -2 -1.8 -1.5 -3.4 -5 -3.6 -1.5 -1.8 ...
 $ daily_precipitation_sum    : num [1:45675] 0.1 0 0 0 5.1 0 0 0 0 0 ...
 $ hourly_temperature_2m      : num [1:45675] NA NA NA NA NA NA NA NA NA NA ...
 $ hourly_relative_humidity_2m: int [1:45675] NA NA NA NA NA NA NA NA NA NA ...
 $ hourly_precipitation       : num [1:45675] NA NA NA NA NA NA NA NA NA NA ...
 $ hourly_wind_speed_10m      : num [1:45675] NA NA NA NA NA NA NA NA NA NA ...

2. DATA PREPARATION ———————

Extract relevant columns (daily maximum/minimum temperatures, precipitation). Convert Celsius temperatures to Fahrenheit. Generate new informative variables: average temperature, seasonal labels, extreme temperature flags, rain events, and temperature range. Limit dataset to precisely five years (1825 days).

Clean and structure the data

indy_weather <- indy_weather_raw %>%
  select(date, 
         daily_temperature_2m_max, 
         daily_temperature_2m_min, 
         daily_precipitation_sum) %>%
  rename(
    temp_max = daily_temperature_2m_max,
    temp_min = daily_temperature_2m_min,
    precipitation = daily_precipitation_sum
  ) %>%
  mutate(
    date = ymd(date),
    temp_max = as.numeric(temp_max),
    temp_min = as.numeric(temp_min),
    year = year(date),
    month = month(date),
    day = day(date),
    temp_avg = (temp_max + temp_min) / 2,
    temp_avg_f = (temp_avg * 9/5) + 32,
    temp_max_f = (temp_max * 9/5) + 32,
    temp_min_f = (temp_min * 9/5) + 32,
    month_name = month(date, label = TRUE, abbr = TRUE),
    season = case_when(
      month %in% c(12, 1, 2) ~ "Winter",
      month %in% c(3, 4, 5) ~ "Spring",
      month %in% c(6, 7, 8) ~ "Summer",
      month %in% c(9, 10, 11) ~ "Fall"
    ),
    extreme_heat = temp_max_f >= 90,
    extreme_cold = temp_min_f <= 20,
    rain_day = precipitation > 0,
    heavy_rain = precipitation > 25,
    daily_temp_range = temp_max_f - temp_min_f
  ) %>%
  slice(1:1825)  # Keep only the correct rows

Create seasonal summaries

Summarize data by season, including temperature averages and extreme weather events counts.

seasonal_summary <- indy_weather %>%
  group_by(year, season) %>%
  summarize(
    avg_temp = mean(temp_avg_f, na.rm = TRUE),
    avg_max_temp = mean(temp_max_f, na.rm = TRUE),
    avg_min_temp = mean(temp_min_f, na.rm = TRUE),
    total_precip = sum(precipitation, na.rm = TRUE),
    extreme_heat_days = sum(extreme_heat, na.rm = TRUE),
    extreme_cold_days = sum(extreme_cold, na.rm = TRUE),
    rainy_days = sum(rain_day, na.rm = TRUE),
    heavy_rain_days = sum(heavy_rain, na.rm = TRUE),
    .groups = "drop"
  )

Monthly summaries for trend analysis

Monthly summaries to highlight detailed monthly patterns and trends.

monthly_summary <- indy_weather %>%
  group_by(year, month, month_name) %>%
  summarize(
    avg_temp = mean(temp_avg_f, na.rm = TRUE),
    avg_max_temp = mean(temp_max_f, na.rm = TRUE),
    avg_min_temp = mean(temp_min_f, na.rm = TRUE),
    total_precip = sum(precipitation, na.rm = TRUE),
    rainy_days = sum(rain_day, na.rm = TRUE),
    .groups = "drop"
  )

Yearly summaries

Yearly summaries provide an overview for each year for easy comparisons.

yearly_summary <- indy_weather %>%
  group_by(year) %>%
  summarize(
    avg_temp = mean(temp_avg_f, na.rm = TRUE),
    avg_max_temp = mean(temp_max_f, na.rm = TRUE),
    avg_min_temp = mean(temp_min_f, na.rm = TRUE),
    total_precip = sum(precipitation, na.rm = TRUE),
    extreme_heat_days = sum(extreme_heat, na.rm = TRUE),
    extreme_cold_days = sum(extreme_cold, na.rm = TRUE),
    rainy_days = sum(rain_day, na.rm = TRUE),
    heavy_rain_days = sum(heavy_rain, na.rm = TRUE),
    .groups = "drop"
  )

3. ANALYSIS & DATA STORY —————–

Generate visual insights into temperature and precipitation patterns.

Time series plot for daily average temperature

p1 <- ggplot(indy_weather, aes(x = date, y = temp_avg_f)) +
  geom_line(color = "blue") +
  labs(title = "Daily Average Temperature (°F)",
       x = "Date", y = "Avg Temperature (°F)") +
  theme_minimal()

Bar plot for daily precipitation

p2 <- ggplot(indy_weather, aes(x = date, y = precipitation)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Daily Precipitation (mm)",
       x = "Date", y = "Precipitation (mm)") +
  theme_minimal()

Seasonal summary plot for average maximum temperature (grouped by year and season)

p3 <- ggplot(seasonal_summary, aes(x = interaction(year, season, sep = " - "), y = avg_max_temp)) +
  geom_col(fill = "coral") +
  labs(title = "Seasonal Average Maximum Temperature (°F)",
       x = "Year - Season", y = "Avg Max Temperature (°F)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Display the plots

print(p1)

print(p2)

print(p3)

4. DASHBOARD —————————

Create an interactive Shiny-powered dashboard to explore weather data dynamically by adjusting date ranges.

Select Date Range

shiny::sliderInput(
  inputId = "daterange",
  label = "Select Date Range:",
  min = min(indy_weather$date),
  max = max(indy_weather$date),
  value = c(min(indy_weather$date), max(indy_weather$date)),
  timeFormat = "%Y-%m-%d"
)

Seasonal Summary

renderPlot({
  seasonal_summary <- indy_weather %>%
    filter(date >= input$daterange[1] & date <= input$daterange[2]) %>%
    group_by(year, season) %>%
    summarize(avg_max_temp = mean(temp_max_f, na.rm = TRUE), .groups = "drop")
  
  ggplot(seasonal_summary, aes(x = interaction(year, season, sep = " - "), y = avg_max_temp)) +
    geom_col(fill = "coral") +
    labs(title = "Seasonal Average Maximum Temperature (°F)",
         x = "Year - Season", y = "Avg Max Temperature (°F)") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
})