library(upscaler)
##add_folder() ## ONLY RUN ONCE
To start, go to this link and scroll down to “Download Data”. From there, Sort by Site to download the “BART” site dataset for years 2013-2023. In this compressed folder, you should see a list of six folders organized by year in the file name. Store that for now somewhere on your desktop.
BART: January 2015 - December 2023
Within each year’s folder, you will only be using a file from each year labeled “countdata” in its title. Using for loops, iterate through each year’s folders to gather the file names of these “countdata” .csv files.
years <- 2023-2015+1
directory <- "NEON_count-landbird"
subfolders <- list.files(directory)
startstring <- "NEON.D01.BART.DP1.10003.001.brd_countdata"
filenames <- vector(length=years)
filepaths <- vector(length=years)
for (i in 1:years){
#year <- 2015+i-1
folder <- subfolders[i]
files <- list.files(paste0(directory, "/", folder))
name <- files[startsWith(files, startstring)]
filenames[i] <- name
filepaths[i] <- paste0(directory, "/", folder, "/", name)
}
print(filenames)
## [1] "NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20241118T065914Z.csv"
## [2] "NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20241118T142515Z.csv"
## [3] "NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20241118T043125Z.csv"
## [4] "NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20241118T105926Z.csv"
## [5] "NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20241118T064156Z.csv"
## [6] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20241118T184512Z.csv"
## [7] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-07.basic.20241118T010504Z.csv"
## [8] "NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20241118T105538Z.csv"
## [9] "NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20241118T033934Z.csv"
Starting with pseudo-code, generate functions for 1) Cleaning the data for any empty/missing cases, 2) Extract the year from each file name, 3) Calculate Abundance for each year (Total number of individuals found), 4) Calculate Species Richness for each year(Number of unique species found), 5) Run a simple regression model for Species Richness (S) vs. Abundance for every year, 6) Generate histograms for both Abundance and Species Richness (S) and store the plots
functions <- vector(length=6)
## - - - - - - -
## 1) Cleaning the data for any empty/missing cases
functions[1] <- "clean_data"
## 2) Extract the year from each file name
functions[2] <- "extract_year"
## 3) Calculate Abundance for each year (Total number of individuals found)
functions[3] <- "calc_abundance"
## 4) Calculate Species Richness for each year(Number of unique species found)
functions[4] <- "calc_richness"
## 5) Run a simple regression model for Species Richness (S) vs. Abundance for every year
functions[5] <- "run_regression"
## 6) Generate histograms for both Abundance and Species Richness (S) and store the plots
functions[6] <- "make_histograms"
## - - - - - - -
##build_function(functions) ## ONLY RUN ONCE
## - - - - - - -
fxs <- list.files("Functions")
for (i in 1:length(fxs)){
fx <- paste0("Functions/", fxs[i])
source(fx)
}
## Warning: package 'ggplot2' was built under R version 4.3.3
Create an initial empty data frame to hold the above summary statistics-you should have columns for the file name, one for abundance, one for species richness, one for year, and the regression model summary statistics.
. <- rep(NA, years)
stats <- data.frame(filename=.,
abundance=.,
richness=.,
year=.)
regression <- data.frame(estimate=c(NA,NA),
std.err=c(NA,NA),
t.value=c(NA,NA),
p.value=c(NA,NA))
rownames(regression) <- c("intercept", "abundance")
Using a for loop, run your created functions as a batch process for each folder, changing the working directory as necessary to read in the correct files, calculating summary statistics with your created functions, and then writing them out into your summary statistics data frame.
for (i in 1:length(filenames)){
name_i <- filenames[i]
df_i <- clean_data(filepaths[i])
stats[i,] <- list(name_i, calc_abundance(df_i), calc_richness(df_i), extract_year(name_i))
}
stats[,2:4]
## abundance richness year
## 1 454 40 2015
## 2 883 39 2016
## 3 685 35 2017
## 4 772 37 2018
## 5 628 44 2019
## 6 626 46 2020
## 7 89 18 2020
## 8 1015 50 2021
## 9 699 39 2022
## - - - - - - -
regression[,] <- summary(run_regression(stats$richness, stats$abundance))$coefficients
regression
## estimate std.err t.value p.value
## intercept 21.33140361 5.644479398 3.779162 0.006899822
## abundance 0.02666508 0.008105909 3.289585 0.013310138
## - - - - - - -
make_histograms(stats)
## [[1]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##
## [[2]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(stats, aes(x=abundance, y=richness, label=year)) +
geom_point(col="seagreen", pch=18, size=2.5) +
geom_text(hjust=0.25, size=2.5, nudge_x = 25, color="grey25") +
labs(x="Abundance",
y="Richness",
caption="Site: BART") +
theme(plot.title=element_text(hjust=0.5, vjust=4),
plot.subtitle=element_text(hjust=0.5, vjust=4),
plot.caption=element_text(vjust=-7),
axis.title.x=element_text(vjust=-4),
axis.title.y=element_text(vjust=4),
plot.margin=margin(1, 1, 1, 1, "cm"))