# Fixing the error with the dates & missing rows
library(tidyverse)
library(lubridate)
data_file = "data_workshop/WaterLevelsData_UploadExercise_ID/data/PHY_Castaneda_001.csv"
new_data_file = "data_workshop/WaterLevelsData_UploadExercise_ID/data/PHY_Castaneda_001_updated.csv"
# First, read it without specifying column types
data_raw = read_csv(data_file)
# This will trigger some warnings (due to the date issues)
problems() # this will display them
# NOte that the row nubmers below are off by 1 because they're counting the header
#     row   col expected        actual     file 
# 1 12260     2 date in ISO8601 30-Jul-20… /hom…
# 2 12261     2 date in ISO8601 30-Jul-20… /hom…
# 3 12262     2 date in ISO8601 30-Jul-20… /hom…
# 4 12263     2 date in ISO8601 30-Jul-20… /hom…
# 5 12264     2 date in ISO8601 31-Jul-20… /hom…


# Now, we're going to re-import them as characters,
# Remove a few empty rows, and add an ID column

data = read_csv(data_file, col_types = "ccccc") |>  
  mutate(ID = 1:n()) |> 
  filter(!is.na(SITENAME))  # Remove blanks

# Re-format the wrong dates
data_wrong = data |> 
  filter(ID |> between(12259,12263)) |> 
  mutate(Date = dmy(Date) |> as.character())

# Remove the wrong dates from the data, 
data_new = data |> 
  filter(!(ID |> between(12259,12263))) |> 
  # replace with correct dates
  bind_rows(data_wrong) |> 
  # Re-sort and remove ID
  arrange(ID) |> 
  select(-ID)
# Export the data
write_csv(data_new, new_data_file)