# Fixing the error with the dates & missing rows library(tidyverse) library(lubridate) data_file = "data_workshop/WaterLevelsData_UploadExercise_ID/data/PHY_Castaneda_001.csv" new_data_file = "data_workshop/WaterLevelsData_UploadExercise_ID/data/PHY_Castaneda_001_updated.csv" # First, read it without specifying column types data_raw = read_csv(data_file) # This will trigger some warnings (due to the date issues) problems() # this will display them # NOte that the row nubmers below are off by 1 because they're counting the header # row col expected actual file # 1 12260 2 date in ISO8601 30-Jul-20… /hom… # 2 12261 2 date in ISO8601 30-Jul-20… /hom… # 3 12262 2 date in ISO8601 30-Jul-20… /hom… # 4 12263 2 date in ISO8601 30-Jul-20… /hom… # 5 12264 2 date in ISO8601 31-Jul-20… /hom… # Now, we're going to re-import them as characters, # Remove a few empty rows, and add an ID column data = read_csv(data_file, col_types = "ccccc") |> mutate(ID = 1:n()) |> filter(!is.na(SITENAME)) # Remove blanks # Re-format the wrong dates data_wrong = data |> filter(ID |> between(12259,12263)) |> mutate(Date = dmy(Date) |> as.character()) # Remove the wrong dates from the data, data_new = data |> filter(!(ID |> between(12259,12263))) |> # replace with correct dates bind_rows(data_wrong) |> # Re-sort and remove ID arrange(ID) |> select(-ID) # Export the data write_csv(data_new, new_data_file)