Get Barracudar ready
# load packages ----
# load packages ----
library(log4r)
##
## Attaching package: 'log4r'
## The following object is masked from 'package:base':
##
## debug
library(TeachingDemos)
## Warning: package 'TeachingDemos' was built under R version 4.3.2
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pracma)
##
## Attaching package: 'pracma'
##
## The following object is masked from 'package:purrr':
##
## cross
library(ggmosaic)
library(stringr)
# load any additional packages here...
# source function files ----
setwd("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/DataTableTemplate.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/AddFolder.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/BuildFunction.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/MetaDataTemplate.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/CreatePaddedLabel.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/InitiateSeed.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/SetUpLog.R")
source("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/barracudar/SourceBatch.R")
setwd("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/OriginalData")
Question 2
# gathering the file names that we actually want to look at in a vector called filenames
# list.files() will gather character strings of file names
filelist <- list.files("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/OriginalData",pattern="BART")
# use a for loop for number of files that we're concerned with, pull out files
# paste() or paste0() function concatenates strings
# paste0("Here is ","the ","filepath: ", filelist[1]) # example
# make an empty vector
filenames <- c()
# make matrix of file names
for (i in 1:10) {
setwd(paste0("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/OriginalData","/", filelist[i])) # create new file path to go to
filenames[i] <- list.files(pattern="countdata") # any time the file has "countdata" in it, it pulls it out
}
filenames
## [1] "NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv"
## [2] "NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv"
## [3] "NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv"
## [4] "NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv"
## [5] "NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv"
## [6] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv"
## [7] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-07.basic.20231227T225020Z.csv"
## [8] "NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv"
## [9] "NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv"
## [10] "NEON.D01.BART.DP1.10003.001.brd_countdata.2023-06.basic.20240131T234742Z.csv"
Questions 3 & 4
# generating functions to get rid of empty/missing cases, extract the year from the file name, calculate total number of individuals found, and calculate number of unique species found
getinfo <- function(filelist,filenames) {
metadata <- matrix(0,length(filelist),4)
colnames(metadata) <- c("File","Year","Total # Individuals", "Species Richness")
for (i in 1:10) { # for every folder (there is only one good csv per folder)
# go into the folder listed at filelist[i]
setwd(paste0("~/Desktop/GitHub/ComputationalBiology/OppenheimerBio6100/Homework11/OriginalData","/", filelist[i]))
# get the csv data from filename
csvdata <- read.csv(filenames[i]) # read in the csv from this folder, off of the list of good csvs!
csvdata
dfcsv <- data.frame(csvdata)
dfcsv
# GETTING INFORMATION
# get rid of empty/missing cases
dfcsv <- na.omit(dfcsv[,1:20]) # rows after 20 are either all NA or no NA, so this seems prudent if I still want to have stuff to work with
dfcsv
# extract the year from the file name
year <- str_sub(filenames[i],43,46)
# calculate total individuals found
total_individuals <- nrow(dfcsv)
# calculate total number of unique species
species_richness <- length(unique(dfcsv[,12]))
# fill in the empty vector for each csv
metadata[i,1:4] <- c(filelist[i], year, total_individuals, species_richness)
# number of items to replace is not a multiple of replacement length
}
return(metadata)
}
getinfo(filelist,filenames)
## File
## [1,] "NEON.D01.BART.DP1.10003.001.2015-06.basic.20240127T000425Z.RELEASE-2024"
## [2,] "NEON.D01.BART.DP1.10003.001.2016-06.basic.20240127T000425Z.RELEASE-2024"
## [3,] "NEON.D01.BART.DP1.10003.001.2017-06.basic.20240127T000425Z.RELEASE-2024"
## [4,] "NEON.D01.BART.DP1.10003.001.2018-06.basic.20240127T000425Z.RELEASE-2024"
## [5,] "NEON.D01.BART.DP1.10003.001.2019-06.basic.20240127T000425Z.RELEASE-2024"
## [6,] "NEON.D01.BART.DP1.10003.001.2020-06.basic.20240127T000425Z.RELEASE-2024"
## [7,] "NEON.D01.BART.DP1.10003.001.2020-07.basic.20240127T000425Z.RELEASE-2024"
## [8,] "NEON.D01.BART.DP1.10003.001.2021-06.basic.20240127T000425Z.RELEASE-2024"
## [9,] "NEON.D01.BART.DP1.10003.001.2022-06.basic.20240127T000425Z.RELEASE-2024"
## [10,] "NEON.D01.BART.DP1.10003.001.2023-06.basic.20240131T234742Z.PROVISIONAL"
## Year Total # Individuals Species Richness
## [1,] "2015" "453" "40"
## [2,] "2016" "680" "38"
## [3,] "2017" "411" "34"
## [4,] "2018" "512" "36"
## [5,] "2019" "372" "39"
## [6,] "2020" "447" "43"
## [7,] "2020" "50" "16"
## [8,] "2021" "869" "45"
## [9,] "2022" "578" "37"
## [10,] "2023" "504" "33"