Rotorua QC

Load data

The raw data was downloaded from the Bay of Plenty hydroTel SFTP server. The data were then collated into a CSV file which can be downloaded from GitHub or within R using the piggyback package. The data are stored in the data-raw folder.

download <- TRUE

if (download) {
  dir.create("data-raw", showWarnings = FALSE)
  # Hydrotel data
  piggyback::pb_download(
    file = "rotorua_data_raw.zip",
    dest = ".",
    repo = "limnotrack/f_rotorua",
    tag = "v0.0.1", overwrite = TRUE
  )
  
  # Unzip the file to tempdir
  tmp_dir <- tempdir()
  unzip("rotorua_data_raw.zip", exdir = tmp_dir)
  
  # Move files into data-raw
  file.copy(list.files(file.path(tmp_dir, "rotorua_data_raw"), full.names = TRUE),
            "data-raw", overwrite = TRUE)
  
  # Delete zip file
  unlink("rotorua_data_raw.zip", force = TRUE)
}

# Read in raw data
dat <- read_csv("data-raw/rotorua_sftp_compiled_raw.csv") |> 
  mutate(datetime = as_datetime(Date)) |> 
  select(-Date) |> 
  arrange(datetime)

Load metadata

Load metadata about the site, site events, devices, and variables. This includes the site location, device IDs, variable names, and sensor calibration data.

site_sel <- "f_Rotorua"
site <- read_csv("data-raw/sites.csv", col_types = cols())
site_events <- read_csv("data-raw/site_events.csv", col_types = cols())
site_devices <- read_csv("data-raw/site_devices.csv", col_types = cols())

device_var <- read_csv("data-raw/device_variable.csv", col_types = cols())

device_position <- read_csv("data-raw/device_position.csv", col_types = cols())

sensor_reference <- read_csv("data-raw/sensor_reference.csv", col_types = cols())

sensor_calibrations <- read_csv("data-raw/sensor_calibrations.csv", col_types = cols())
sensor_scaling <- read_csv("data-raw/sensor_scaling.csv", col_types = cols())

variable_ref <- read_csv("data-raw/variables.csv", col_types = cols())

qc_filters <- read_csv("data-raw/qc_filters.csv", col_types = cols())

qc_update_df <- read_csv("data-raw/qc_update_table.csv", col_types = cols())

sensor_map <- map_sensors(site_devices = site_devices, 
                          device_var = device_var, 
                          device_position = device_position)
sensor_map
# A tibble: 65 × 8
   site     device_id date_from           date_to             var_abbr reference
   <chr>    <chr>     <dttm>              <dttm>              <chr>    <chr>    
 1 f_Rotor… wip_ZT_h… 2010-02-05 18:15:00 2022-01-18 04:00:00 <NA>     <NA>     
 2 f_Rotor… sns_ZT_d… 2007-07-13 12:00:00 2015-08-21 12:00:00 c_do     d        
 3 f_Rotor… sns_IS_r… 2015-10-23 10:00:00 2022-01-18 04:00:00 c_do     d        
 4 f_Rotor… sns_ZT_d… 2007-07-13 12:00:00 2010-11-09 12:15:00 c_do     d        
 5 f_Rotor… sns_ZT_d… 2010-11-09 12:15:00 2012-04-29 06:09:00 c_do     d        
 6 f_Rotor… sns_ZT_d… 2012-04-29 06:09:00 2022-01-22 13:54:00 c_do     d        
 7 f_Rotor… sns_ZT_d… 2007-07-13 12:00:00 2015-08-21 12:00:00 c_do_sat d        
 8 f_Rotor… sns_IS_r… 2015-10-23 10:00:00 2022-01-18 04:00:00 c_do_sat d        
 9 f_Rotor… sns_ZT_d… 2007-07-13 12:00:00 2010-11-09 12:15:00 c_do_sat d        
10 f_Rotor… sns_ZT_d… 2010-11-09 12:15:00 2012-04-29 06:09:00 c_do_sat d        
# ℹ 55 more rows
# ℹ 2 more variables: z_relative <dbl>, var_ref_id <chr>
Figure 1: Sensor timeline.

1 Site location

Lake Rotorua is located in the Bay of Plenty region of New Zealand. The monitoring site is located near the centre of the lake around its deepest point.

Figure 2: Site location on Lake Rotorua.

2 Compile raw hydroTel data from BoP

Check no variables are time-aggregated

2.1 Standardise to 15-minute data (i.e. modify any periods where data were logged every 5-minutes.

  • Circular average for wind direction, sum for rainfall, mean for everything else.
rain <- dat |> 
  select(datetime, contains("rainfall")) |>
  standardise(15, FUN = sum, na.rm = TRUE)

wind_dir <- dat |> 
  select(datetime, contains("wind_direction")) |>
  standardise(15, FUN = avg_circ)

mean_vars <- c("datetime", 
               "do_cont_0.5m", "do_cont_20m", "do_sat_0.5m", 
               "do_sat_20m", "do_temp_0.5m", "do_temp_20m", 
               # "light_level_1", "light_level_2",
               "ph_decommissioned", "ph_raw",
               "temp_0.5m", "temp_2.5m", "temp_4.5m", 
               "temp_6.5m", "temp_8.5m",  "temp_10.5m", "temp_12.5m",
               "temp_14.5m", "temp_16.5m", "temp_18.5m",  "temp_20.5m", 
               # "temphion_3", 
               "phyco", 
               # "trios_chl_a",
               "chlorophyll", 
               "air_temp", "humidity", "barometric_pressure",
               "wind_speed_m-s", "wind_speed")
means <- dat |>
  select(contains(mean_vars)) |>
  standardise(15, FUN = mean, na.rm = TRUE)

date_df <- data.frame(datetime = seq.POSIXt(round_date(min(dat$datetime), "15 mins"),
                                            round_date(max(dat$datetime), "15 mins"),
                                            by = 15*60))

dat2 <- list(date_df, means, rain, wind_dir) |> 
  # left_join(means, by = "datetime") |>
  purrr::reduce(function(x, y) {left_join(x, y, by = "datetime")}) |>
  arrange(datetime)

2.2 Check and correct any variable name misalignments

(e.g., temperature depths incorrectly assigned , Temperature & DO swapped (e.g., Rotorua around 2016)

DO at 0.5m

plotly_data(dat2, y1 = "do_cont_20m", y2 = "do_temp_20m", sub = 50)

Swap DO & temperature back into their correct columns.

swap_start <- "2021-03-23 19:00:00"
swap_end <- "2022-02-02 15:00:00"

dat2 <- swap_cols(dat2, var1 = "do_cont_20m", var2 = "do_temp_20m", 
                  date_range = c(swap_start, swap_end))
plotly_data(dat2, y1 = "do_cont_20m", y2 = "do_temp_20m",
            date_range = c(swap_start, swap_end), buffer = 240, sub = 20)
ggplot() +
  geom_scattermore(data = dat2, aes(x = datetime, y = chlorophyll, colour = "chl1")) +
  geom_scattermore(data = dat2, aes(x = datetime, y = trios_chl_a, colour = "chl2")) +
  coord_cartesian(ylim = c(0, 500)) +
  theme_bw()

Sensors were all 0.5-1m, we will call them 1m.

Map column names to standard names used in the database.

col_mapping <- c(
  # "battery_voltage", 
  "f_chl_d100" = "chlorophyll",
  # "f_chl_d110" = "trios_chl_a",
  "f_phyc_d100" = "phyco",
  "c_do_d100" = "do_cont_0.5m",
  "c_do_d2000" = "do_cont_20m",
  "c_do_sat_d100" = "do_sat_0.5m",
  "c_do_sat_d2000" = "do_sat_20m",
  "t_wtr_d50" = "temp_0.5m",
  "t_wtr_d100" = "do_temp_0.5m",
  "t_wtr_d250" = "temp_2.5m",
  "t_wtr_d450" = "temp_4.5m",
  "t_wtr_d650" = "temp_6.5m",
  "t_wtr_d850" = "temp_8.5m",
  "t_wtr_d1050" = "temp_10.5m",
  "t_wtr_d1250" = "temp_12.5m",
  "t_wtr_d1450" = "temp_14.5m",
  "t_wtr_d1650" = "temp_16.5m",
  "t_wtr_d1850" = "temp_18.5m",
  "t_wtr_d2000" = "do_temp_20m",
  "t_wtr_d2050" = "temp_20.5m",
  "c_ph_d100" = "ph_decommissioned",
  "c_ph_raw_d100" = "ph_raw",
  
  "t_air_h150" = "air_temp",
  "pr_baro_h150" = "barometric_pressure", 
  "pp_rain_h150" = "rainfall",
  "h_rh_h150" = "humidity",
  "r_swd_h150" = "solar_radiation",
  "w_spd_h150" = "wind_speed_m-s",
  "w_dir_h150" = "wind_direction")


dat3 <- standardise_columns(dat2, col_mapping)

Write level one raw data to file.

write_csv(dat3, "data-raw/rotorua_sftp_compiled_raw_level1.csv")

3 Sensor Attribution

Map each sensor onto each data point.

Convert wide dataframe to standardised long format.

raw_long <- raw |> 
  pivot_longer(cols = -datetime, names_to = "var_ref_id", values_to = "raw_value") 

# Conditional join raw_long with sensor_map using var_ref_id and when datetime is between date_from and date_to
raw_long_device <- sensor_map |> 
  # slice(1:3) |>
  group_by(device_id) |> 
  left_join(raw_long, by = "var_ref_id") |>
  filter(datetime >= date_from & datetime <= date_to) |> 
  ungroup() |> 
  arrange(device_id, datetime) |>
  mutate(qc_value = round(raw_value, 2),
         qc_flag = "", 
         qc_code = case_when(
           is.na(raw_value) ~ "QC 100",
           TRUE ~ "QC 200"
         )) |> 
  select(site, datetime, device_id, var_abbr, var_ref_id, raw_value, qc_value, 
         qc_flag, qc_code) |> 
  distinct(datetime, var_ref_id, .keep_all = TRUE) 

head(raw_long_device)
# A tibble: 6 × 9
  site      datetime            device_id var_abbr var_ref_id raw_value qc_value
  <chr>     <dttm>              <chr>     <chr>    <chr>          <dbl>    <dbl>
1 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d10…      10.6     10.6
2 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d12…      12.9     12.9
3 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d14…      11.2     11.2
4 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d16…      10.1     10.1
5 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d18…      10.7     10.7
6 f_Rotorua 2007-07-13 14:45:00 sns_Ap_t… t_wtr    t_wtr_d20…      11.4     11.4
# ℹ 2 more variables: qc_flag <chr>, qc_code <chr>

Sensor plots

3.1 Remove based on site events

raw_long_device <- raw_long_device |> 
  remove_site_events(site_events = site_events, sensor_map = sensor_map) 
# A tibble: 39 × 5
   site      device_id               var_ref        qc_flag        
   <chr>     <chr>                   <chr>          <chr>          
 1 f_Rotorua all                     all            buoy_out       
 2 f_Rotorua all                     all            buoy_out       
 3 f_Rotorua all                     all            buoy_out       
 4 f_Rotorua all                     all            buoy_out       
 5 f_Rotorua all                     all            buoy_out       
 6 f_Rotorua all                     all            buoy_out       
 7 f_Rotorua all                     all            buoy_out       
 8 f_Rotorua all                     all            buoy_out       
 9 f_Rotorua all                     all            buoy_out       
10 f_Rotorua all                     all            buoy_out       
11 f_Rotorua all                     all            buoy_out       
12 f_Rotorua all                     all            buoy_out       
13 f_Rotorua all                     all            battery_failure
14 f_Rotorua all                     all            battery_failure
15 f_Rotorua all                     all            battery_failure
16 f_Rotorua all                     all            battery_failure
17 f_Rotorua all                     all            battery_failure
18 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
19 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
20 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
21 f_Rotorua sns_Sp_chlor_9999-rua   f_chl          sensor_fault   
22 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
23 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
24 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
25 f_Rotorua sns_Sp_chlor_2779       f_chl          sensor_fault   
26 f_Rotorua sns_TD_c7c_2103928      f_chl          sensor_fault   
27 f_Rotorua sns_TD_c7c_2103928      f_chl          sensor_fault   
28 f_Rotorua sns_Vs_wxt510_9999-rua2 w_spd          sensor_fault   
29 f_Rotorua sns_Vs_wxt510_9999-rua2 w_dir          sensor_fault   
30 f_Rotorua sns_ZT_dopto_2338       c_do_d2000     sensor_fault   
31 f_Rotorua sns_ZT_dopto_2338       c_do_d2000     sensor_fault   
32 f_Rotorua sns_ZT_dopto_2338       c_do_sat_d2000 sensor_fault   
33 f_Rotorua sns_ZT_dopto_2338       c_do_sat_d2000 sensor_fault   
34 f_Rotorua sns_Tr_microp_1035      f_phyc         sensor_fault   
35 f_Rotorua sns_Tr_microp_1035      f_phyc         sensor_fault   
36 f_Rotorua sns_TD_c7p_2103927      f_phyc         sensor_fault   
37 f_Rotorua sns_ZT_dopto_2378       c_do_d100      sensor_fault   
38 f_Rotorua sns_ZT_dopto_2378       c_do_sat_d100  sensor_fault   
39 f_Rotorua sns_St_tstringds_017    t_wtr_d1850    sensor_fault   
# ℹ 1 more variable: date_interval <Interval>
raw_long_device |> 
  filter(qc_flag == "sensor_fault") 
# A tibble: 94,640 × 9
   site     datetime            device_id var_abbr var_ref_id raw_value qc_value
   <chr>    <dttm>              <chr>     <chr>    <chr>          <dbl>    <dbl>
 1 f_Rotor… 2013-07-18 17:00:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 2 f_Rotor… 2013-07-18 17:15:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 3 f_Rotor… 2013-07-18 17:30:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 4 f_Rotor… 2013-07-18 17:45:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 5 f_Rotor… 2013-07-18 18:00:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 6 f_Rotor… 2013-07-18 18:15:00 sns_ZT_d… c_do     c_do_d100       11.4       NA
 7 f_Rotor… 2013-07-18 18:30:00 sns_ZT_d… c_do     c_do_d100       11.3       NA
 8 f_Rotor… 2013-07-18 18:45:00 sns_ZT_d… c_do     c_do_d100       11.3       NA
 9 f_Rotor… 2013-07-18 19:00:00 sns_ZT_d… c_do     c_do_d100       11.3       NA
10 f_Rotor… 2013-07-18 19:15:00 sns_ZT_d… c_do     c_do_d100       11.2       NA
# ℹ 94,630 more rows
# ℹ 2 more variables: qc_flag <chr>, qc_code <chr>
# plot_qc_data(data = raw_long_device_filt)

4 Meteorology

4.1 Relative humidity drift

Over the entire period, the relative humidity sensor appears to have drifted in the middle of the time period.

raw_long_device |> 
  filter(var_ref_id == "h_rh_h150") |> 
  summary()
     site              datetime                    device_id        
 Length:499268      Min.   :2007-07-13 14:45:00   Length:499268     
 Class :character   1st Qu.:2011-05-15 15:26:15   Class :character  
 Mode  :character   Median :2014-12-05 19:37:30   Mode  :character  
                    Mean   :2014-11-17 02:43:05                     
                    3rd Qu.:2018-06-27 23:48:45                     
                    Max.   :2022-01-18 04:00:00                     
                                                                    
   var_abbr          var_ref_id          raw_value        qc_value    
 Length:499268      Length:499268      Min.   :  0.0   Min.   :  0.0  
 Class :character   Class :character   1st Qu.: 63.0   1st Qu.: 63.0  
 Mode  :character   Mode  :character   Median : 75.0   Median : 75.0  
                                       Mean   : 65.7   Mean   : 65.7  
                                       3rd Qu.: 83.0   3rd Qu.: 83.0  
                                       Max.   :100.0   Max.   :100.0  
                                       NA's   :36489   NA's   :38250  
   qc_flag            qc_code         
 Length:499268      Length:499268     
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
plot_sensor(data = raw_long_device, var_ref_id = "h_rh_h150",
            variable_ref =  variable_ref, 
            sensor_calibrations = sensor_calibrations,
            sensor_reference = sensor_reference,
            sensor_scaling = sensor_scaling) +
  geom_hline(yintercept = 100, linetype = "dashed", colour = "red") +
  coord_cartesian(ylim = c(0, 100)) 
Figure 3: Relative humidity drift.

5 Quality control

We used a set of quality control codes to assess the quality of the data. The codes are from the National Environmental Monitoring Standards (NEMS). The codes are as follows:

qc_codes <- c("Missing Record" = "QC 100",
              "No Quality or Non Verified" = "QC 200",
              "Synthetic" = "QC 300",
              "Poor Quality" = "QC 400",
              "Fair Quality" = "QC 500",
              "Good Quality" = "QC 600")

qc_code_col_scale = c(
  "QC 100" = "#FF0000",
  "QC 200" = "#8B5A00",
  "QC 300" = "#D3D3D3",
  "QC 400" = "#FFA500",
  "QC 500" = "#00BFFF",
  "QC 600" = "#006400"
)

qc_df <- data.frame(
  qc_code = qc_codes,
  qc_zone = names(qc_codes),
  qc_code_col = unname(qc_code_col_scale),
  stringsAsFactors = FALSE
)

qc_df |> 
  datatable(rownames = FALSE,
            options = list(
              pageLength = 6,
              dom = "t",
              columnDefs = list(list(className = 'dt-center', targets = "_all"))
            )
  ) |> 
  formatStyle(
    'qc_code_col',
    target = 'row',
    backgroundColor = styleEqual(qc_code_col_scale, qc_code_col_scale)
  )

6 pH standardisation

To standardise the pH data, we apply a sensor scaling based on reference pH measurements of 4, 7, and 10. The pH sensor is a ISFET sensor, which is a type of pH sensor that uses a field-effect transistor (FET) to measure the pH of a solution.

We use the raw pH signal which is a value between 0 and 1000. The pH sensor is then mapped onto the reference points and corrected for drift.

Drift corrected pH values

ph_raw <- raw_long_device |> 
  filter(var_abbr %in% c("c_ph_raw")) |> 
  mutate(new_val = qc_value) |> 
  select(datetime, new_val)

ph_dc <- ph_count |> 
  filter(var_abbr %in% c("c_ph")) |> 
  left_join(ph_raw, by = "datetime") |>
  mutate(
    qc_value = case_when(
      is.na(qc_value) ~ new_val, .default = qc_value
    )
  )

date_range <- raw_long_device |> 
  filter(var_abbr %in% c("c_ph", "c_ph_raw")) |>
  pull(datetime) |>
  range()

# ph_dc <- ph #ph_count |>
# filter(!is.na(qc_value) & !duplicated(datetime)) |>
# mutate(var_ref_id = "c_ph_d100", var_abbr = "c_ph") 

any(duplicated(ph_dc$datetime))
[1] FALSE
data <- ph_dc
ph_dc <- ph_dc |> 
  adjust_linear_drift(var_ref_id = "c_ph_d100", sensor_refs = sensor_refs, 
                      date_range = date_range, expand_coeffs = TRUE) |> 
  apply_adjustment(var_ref_id = "c_ph_d100",
                   # date_range = c("2013-07-19", "2017-07-13"),
                   FUN = \(x) ifelse(x <= 5 | x >= 10, NA, x)) |> 
  mutate(
    qc_code = case_when(
      is.na(raw_value) ~ "QC 100",
      is.na(qc_value) ~ "QC 200",
      .default = qc_code
    )
  )
summary(ph_dc)
    datetime                    var_ref_id            site          
 Min.   :2014-02-20 12:00:00   Length:136033      Length:136033     
 1st Qu.:2015-02-09 18:00:00   Class :character   Class :character  
 Median :2016-01-30 00:00:00   Mode  :character   Mode  :character  
 Mean   :2016-01-30 00:00:00                                        
 3rd Qu.:2017-01-18 06:00:00                                        
 Max.   :2018-01-07 12:00:00                                        
                                                                    
  device_id           var_abbr           raw_value         qc_value    
 Length:136033      Length:136033      Min.   :-0.970   Min.   :5.013  
 Class :character   Class :character   1st Qu.: 6.690   1st Qu.:6.520  
 Mode  :character   Mode  :character   Median : 7.160   Median :6.847  
                                       Mean   : 7.338   Mean   :6.932  
                                       3rd Qu.: 7.790   3rd Qu.:7.225  
                                       Max.   :14.070   Max.   :9.980  
                                       NA's   :12393    NA's   :18829  
   qc_flag            qc_code         
 Length:136033      Length:136033     
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
# viz_data(data = ph_dc, var_abbr = "var_ref_id",
#          site_events = site_events)

# plotly_data(ph_dc, y1 = "c_ph_d100", sub = 20)


ph_orig <- raw_long_device |> 
  filter(var_abbr %in% c( "c_ph")) |>
  mutate(
    raw_value = case_when(
      # datetime > as_datetime("2019-04-01") & 
      #   var_ref_id == "c_phraw_d100" ~ NA,
      datetime > as_datetime("2017-07-13") & 
        var_ref_id == "c_ph_d100" ~ NA, 
      .default = raw_value
    )
  ) |> 
  filter(!is.na(raw_value)) 


# plotly_data(data = ph_dc, y1 = "drift_correct", sub = 20)

ggplot() +
  geom_point(data = ph_orig, aes(datetime, raw_value, colour = "Original"),
             size = 0.2) +
  geom_point(data = ph_dc, aes(datetime, qc_value, 
                               colour = "Drift corrected"), size = 0.2) +
  coord_cartesian(ylim = c(5, 11)) +
  theme_bw()
Figure 4: Drift corrected pH values.

6.1 Basic QC - filters

  1. Filter extreme outliers (order of magnitude plus)
  2. Filter repetitions/stuck values (careful genuine repeats e.g., rainfall == 0)
  3. Remove data for any periods where buoy was clearly out of the water (see notes or visual check of data)
raw_long_device_filt <- apply_filters(raw_long_device2, filters = qc_filters)

plot_flag_ts(raw_long_device_filt)
Figure 5: Plot raw data with basic QC filters applied.

7 Fluorescence sensors

fluor_df <- raw_long_device_filt |> 
  filter(var_abbr %in% c("f_chl", "f_phyc")) |> 
  apply_adjustment("f_chl_d100",
                   FUN = \(x) ifelse(x <= 0, NA, x)) |> 
  apply_adjustment("f_chl_d100",
                   FUN = \(x) ifelse(x >= 1000, NA, x))

# viz_data(data = fluor_df, var_abbr = "var_ref_id",
#          site_events = site_events,
#          raw_value = "qc_value")


fluor_sensors <- sensor_map |> 
  filter(device_id %in% fluor_df$device_id) |> 
  select(device_id, var_ref_id, date_from, date_to)

sensor_refs <- sensor_reference |> 
  left_join(fluor_sensors, by = "device_id") |>
  filter(device_id %in% fluor_sensors$device_id,
         !is.na(value_actual)
         # date > "2013-06-28",
         # value_actual %in% c(4, 7, 10), 
         # units_sensor == "counts"
  ) 

sensor_scales <- sensor_scaling |> 
  arrange(date) |>
  filter(device_id %in% fluor_sensors$device_id) 

plot_sensor(data = fluor_df, var_ref_id = c("f_chl_d100"),#, "f_phyc_d100"),
            variable_ref =  variable_ref, 
            sensor_calibrations = sensor_calibrations, 
            sensor_reference = sensor_reference,
            sensor_scaling = sensor_scaling, clip_ylim = F) 
Figure 6: Fluorescence sensors an their associated device ids.

Correct fluorescence data by removing bad data and applying corrections to get the chlorophyll at it its raw 0-5V raw signal, then recorrecting to get the corrected chlorophyll value inrelative fluorescene units (RFU).

raw_chl <- fluor_df |> 
  filter(var_ref_id == "f_chl_d100")

# viz_data(data = raw_chl, site_events = site_events, variable = "var_ref_id", 
#          value = "qc_value")

sensor_scales <- sensor_scaling |> 
  filter(device_id %in% raw_chl$device_id) |> 
  select(device_id, date, offset, multiplier, range) 

raw_v <- raw_chl |> 
  mutate(var_ref_id = "f_chl_raw_v_d100",
         var_abbr = "f_chl_raw_v",
         # raw_value = NA_real_
  ) |>
  left_join(sensor_scales, by = c("device_id", "datetime" = "date")) |>
  tidyr::fill(offset, multiplier, .direction = "down") |>
  mutate(qc_value = (qc_value - offset) / multiplier,
         qc_code = case_when(
           is.na(raw_value) ~ "QC 100",
           is.na(qc_value) ~ "QC 200",
           .default = "QC 500"
         )
  ) |>
  # apply_adjustment("f_chl_raw_v_d100",
  #                  date_range = c("2009-12-02 10:53:01", "2010-01-15 11:51:31"),
  #                  FUN = \(x) x / 10
  # ) |>
  apply_adjustment("f_chl_raw_v_d100",
                   date_range = c("2013-04-10 11:59:16", "2014-08-28 04:09:46"),
                   FUN = \(x) x / 5
  ) |>
  apply_adjustment("f_chl_raw_v_d100",
                   date_range = c("2015-01-30 14:02:14", "2016-01-23 10:57:48"),
                   FUN = \(x) x / 10
  ) |>
  apply_adjustment("f_chl_raw_v_d100",
                   date_range = c("2019-01-29 09:34:58", "2019-08-01 15:16:35"),
                   FUN = \(x) x / 10
  ) |>
  apply_adjustment("f_chl_raw_v_d100",
                   date_range = c("2019-08-01 15:11:20", "2022-01-18 08:54:32"),
                   FUN = \(x) x * 3
  ) #|>
# # brief period where it was 20-fold higher, NYE issue across HydroTel??
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2007-12-31 23:59:00", "2008-01-04 10:25"),
#                  FUN = \(x) x / 20
# ) |>
# # looks like changed to lower range on this date\
# # still seapoint sensor, values approx x3 so guess was changed from 0-50 to 0-15
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2013-04-10 16:14", "2015-12-24 08:24"),
#                  FUN = \(x) x * 15 / 50
# ) |>
# # values higher... /10?
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2015-12-24 08:24", "2016-01-20 11:53"),
#                  FUN = \(x) x / 10
# ) |>
# # later period where it was also 10-fold higher for some reason?
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2019-01-29 09:00", "2019-08-01 15:13"),
#                  FUN = \(x) x / 10
# ) |>
# # seapoint sensor was always logged at 10x proper range, but applying an
# # approx correction to match to Turner sensor values..
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2007-07-13 14:45", "2016-02-05 05:14"),
#                  FUN = \(x) x / 10 * 3
# ) |>
#     # first few years of Turner was logged with multiplier = 20, when should have been 10
# apply_adjustment("f_chl_raw_v_d100",
#                  date_range = c("2016-02-05 05:14", "2019-06-04 14:41"),
#                  FUN = \(x) x / 2
# ) #|>
# update_qc_code_vectorized(qc_update_df = qc_update_df)

# chl_adj <- viz_data(data = raw_v, site_events = site_events, variable = "var_ref_id",
#          value = "qc_value")
# 
# write_csv(chl_adj, "qc_adj_f_chl_d100_raw_V.csv")

# plot_raw_qc(data = raw_v, variable_ref = variable_ref)

# chl_adj <- read_csv("qc_adj_f_chl_d100_raw_V.csv", col_types = cols()) 
# qc_update_df <- bind_rows(qc_update_df, chl_adj)
# write_csv(qc_update_df, "qc_update_df.csv")

chla_corr <- raw_v |>
  update_qc_code_vectorized(qc_update_df = qc_update_df) |>
  mutate(qc_value = qc_value * 10,
         var_ref_id = "f_chl_d100",
         var_abbr = "f_chl",
  )


chla_corr |> 
  plot_sensor(var_ref_id = "f_chl_d100", variable_ref =  variable_ref, 
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE,
              colour = "qc_code")
Figure 7: Corrected chlorophyll data. Points are coloured according to the quality control codes.

7.1 Load in field measurements

Compare field measurements to sensor data.

field <- read_csv("data-raw/rotorua_field_data.csv") |> 
  mutate(Date2 = as.Date(Date))

chla <- field |> 
  filter(grepl("Chla", Parameter)) |> 
  select(Date2, LocationName, DepthFrom, Sample_Depth, Value) |> 
  rename(Date = Date2)

ref_times <- raw_long_device_filt |> 
  mutate(hour = hour(datetime)) |> 
  filter(var_abbr %in% c("f_chl")) |>
  filter(hour %in% c(0:6, 21:23)) |> 
  filter(as.Date(datetime) %in% as.Date(chla$Date))

sub_sensor <- raw_v |> 
  mutate(Date = as.Date(datetime)) |>
  filter(datetime %in% ref_times$datetime) |> 
  group_by(Date, device_id) |>
  summarise(qc_value = mean(qc_value, na.rm = TRUE),
            median = median(qc_value, na.rm = TRUE), .groups = "drop")



df <- sub_sensor |> 
  left_join(chla, by = c("Date" = "Date")) |> 
  mutate(year = year(Date)) |> 
  filter(!is.na(qc_value))

ggplot() +
  geom_point(data = df, aes(qc_value, Value, colour = LocationName)) +
  geom_smooth(data = df, aes(qc_value, Value, colour = LocationName), method = "lm") +
  # facet_wrap(year~device_id, scales = "free") +
  labs(x = "Sensor value (Volts)",
       y = "Field value (ug/L)") +
  coord_cartesian(ylim = c(0, 50)) +
  theme_bw() +
  theme(legend.position = "bottom") 

ggplot() +
  geom_point(data = df, aes(qc_value, Value, colour = device_id)) +
  geom_smooth(data = df, aes(qc_value, Value, colour = device_id), method = "lm") +
  facet_wrap(~device_id, scales = "free") +
  labs(x = "Sensor value (Volts)",
       y = "Field value (ug/L)") +
  # coord_cartesian(ylim = c(0, 50)) +
  theme_bw() +
  theme(legend.position = "bottom") 
Figure 8: Compare field measurements to sensor data.
Figure 9: Compare field measurements to sensor data.

7.2 Phycocyanin data

Repeat similar procedure to phycocyanin data.

phyc_corr <- fluor_df |> 
  filter(var_ref_id == "f_phyc_d100",
         duplicated(datetime))

# viz_data(data = phyc_corr, qc_update_df = qc_update_df, 
#          site_events = site_events, variable = "var_ref_id",
#          value = "qc_value")

sensor_scales <- sensor_scaling |> 
  filter(device_id %in% phyc_corr$device_id) |> 
  arrange(date) |> 
  select(device_id, date, offset, multiplier, range) 

phyc_corr <- phyc_corr |>
  left_join(sensor_scales, by = c("device_id", "datetime" = "date")) |> 
  tidyr::fill(offset, multiplier, .direction = "down") |>
  mutate(qc_value = (qc_value - offset) / multiplier,
         var_ref_id = "f_phyc_raw_v_d100",
         var_abbr = "f_phyc_raw_v"
  ) |> 
  drift_correction(var_ref_id = "f_phyc_raw_v_d100", 
                   date_range = c("2013-02-21 12:04:53", "2013-06-26 23:55:06"), 
                   low = c(0, 0, 0), high = c(0.2, 0.2, 0.39)) 
phyc_corr <- phyc_corr |> 
  mutate(qc_value = qc_value, # * 10,
         var_ref_id = "f_phyc_d100",
         var_abbr = "f_phyc"
  )

phyc_corr |> 
  plot_sensor(var_ref_id = "f_phyc_d100", variable_ref =  variable_ref, 
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling,
              clip_ylim = FALSE, colour = "qc_code")
Figure 10: Phycocyanin data quality control.

8 Oxygen sensors

Remove bad data and correct for linear drift

do_1m <- raw_long_device_filt |> 
  filter(var_ref_id %in% c("c_do_sat_d100"))

plot_raw_qc(data = do_1m, variable_ref = variable_ref, ylim = c(80, 150))
# do_1m |> 
#   viz_data(qc_update_df = qc_update_df, site_events = site_events,
#            variable = "var_ref_id", value = "qc_value")
do_1m |> 
  # plotly_data(y1 = "c_do_sat_d100", sub = 20)
  plot_sensor(var_ref_id = c("c_do_sat_d100"), variable_ref =  variable_ref, 
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE,
              colour = "qc_code") 
Figure 11: Oxygen sensor data quality control.
do_20m <- raw_long_device_filt |> 
  filter(var_ref_id %in% c("c_do_sat_d2000")) |> 
  update_qc_code_vectorized(qc_update_df = qc_update_df)
do_20m <- do_20m 


do_20m |> 
  # plotly_data(y1 = "c_do_sat_d1000", sub = 20)
  plot_sensor(var_ref_id = c("c_do_sat_d2000"), variable_ref =  variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = F, colour = "qc_code") +
  geom_hline(yintercept = 0)
Figure 12: Oxygen sensor data at 20m quality control.

9 Recalculate DO concentrations using temperature and DO saturation

temp_1m <- raw_long_device_filt |> 
  filter(var_ref_id == "t_wtr_d100") |> 
  select(datetime, raw_value) |> 
  rename(temp = raw_value)

pr_baro <- raw_long_device_filt |> 
  filter(var_ref_id == "pr_baro_h150") |> 
  select(datetime, raw_value) |> 
  rename(pr_baro = raw_value)

do_conc2_1m <- left_join(do_1m, temp_1m, by = "datetime") |> 
  left_join(pr_baro, by = "datetime") |> 
  mutate(do_sat_mgL = calc_DOsat_mg(temp, pr_baro),
         var_abbr = "c_do",
         var_ref_id = "c_do2_d100",
         qc_value = qc_value / 100 * do_sat_mgL
  ) |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, qc_value, qc_flag)

do_conc_1m <- raw_long_device_filt |> 
  filter(var_ref_id == "c_do_d100") |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, raw_value)


do_conc2_1m <- do_conc2_1m |> 
  mutate(var_ref_id = "c_do_d100") |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, qc_value,
         qc_flag)

do_conc3_1m <- left_join(do_conc_1m, do_conc2_1m, by = c("datetime", "var_ref_id", "site", "device_id", "var_abbr")) |> 
  mutate(
    qc_code = case_when(
      !is.na(qc_value) ~ "QC 300",
      is.na(raw_value) ~ "QC 100",
      is.na(qc_value) ~ "QC 200"
    )
  )
do_conc3_1m <- do_conc3_1m |> 
  update_qc_code_vectorized(qc_update_df = qc_update_df)


temp_20m <- raw_long_device_filt |> 
  filter(var_ref_id == "t_wtr_d2000") |> 
  select(datetime, raw_value) |> 
  rename(temp = raw_value)

pr_baro <- raw_long_device_filt |> 
  filter(var_ref_id == "pr_baro_h150") |> 
  select(datetime, raw_value) |> 
  rename(pr_baro = raw_value)

do_conc2_20m <- left_join(do_20m, temp_20m, by = "datetime") |> 
  left_join(pr_baro, by = "datetime") |> 
  mutate(do_sat_mgL = calc_DOsat_mg(temp, pr_baro),
         var_abbr = "c_do",
         var_ref_id = "c_do2_d100",
         qc_value = qc_value / 100 * do_sat_mgL
  ) |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, qc_value, qc_flag)

do_conc_20m <- raw_long_device_filt |> 
  filter(var_ref_id == "c_do_d2000") |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, raw_value)


do_conc2_20m <- do_conc2_20m |> 
  mutate(var_ref_id = "c_do_d2000") |> 
  select(datetime, var_ref_id, site, device_id, var_abbr, qc_value,
         qc_flag)

do_conc3_20m <- left_join(do_conc_20m, do_conc2_20m, by = c("datetime", "var_ref_id", "site", "device_id", "var_abbr")) |> 
  mutate(
    qc_code = case_when(
      !is.na(qc_value) ~ "QC 300",
      is.na(raw_value) ~ "QC 100",
      is.na(qc_value) ~ "QC 200"
    )
  )
do_conc3_20m <- do_conc3_20m |> 
  update_qc_code_vectorized(qc_update_df = qc_update_df)


do <- bind_rows(do_conc3_1m, do_conc3_20m, do_1m, do_20m) 
# qc_update_df <- viz_data(data = do, qc_update_df = qc_update_df, 
#                           site_events = site_events, variable = "var_ref_id",
#                           value = "qc_value")

9.1 Water temperature

wtemp <- raw_long_device_filt |> 
  filter(var_abbr %in% c("t_wtr")) |>
  update_qc_code_vectorized(qc_update_df = qc_update_df)
plot_raw_qc(data = wtemp, variable_ref = variable_ref, ylim = c(7, 30))
# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df)


wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d50"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 13: Temperature sensor data at 1m quality control.

Water temperature high in the bottom waters

high_temp <- wtemp |> 
  filter(var_ref_id %in% c("t_wtr_d2050"), raw_value >= 22,raw_value <= 35) |> 
  mutate(over_22 = raw_value > 22,
         tdiff = datetime - lag(datetime, default = first(datetime)),
         group = data.table::rleid(tdiff)
  ) |> 
  group_by(var_ref_id, group) |>
  mutate(n = n(), tstep = 1:n() * 15, date = datetime[1]) |> 
  filter(n > 1, max(tstep) >= 60) |> 
  ungroup() |> 
  filter(group %in% sample(unique(group), size = 20))
high_temp
# A tibble: 1,669 × 15
   datetime            var_ref_id  site    device_id var_abbr raw_value qc_value
   <dttm>              <chr>       <chr>   <chr>     <chr>        <dbl>    <dbl>
 1 2007-12-21 12:45:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     NA  
 2 2007-12-21 13:00:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.4     NA  
 3 2007-12-21 13:15:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.5     NA  
 4 2007-12-21 13:30:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.6     NA  
 5 2008-01-19 16:15:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
 6 2008-01-19 16:30:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
 7 2008-01-19 16:45:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
 8 2008-01-19 17:00:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
 9 2008-01-19 17:15:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
10 2008-01-19 17:30:00 t_wtr_d2050 f_Roto… sns_Ap_t… t_wtr         22.2     22.2
# ℹ 1,659 more rows
# ℹ 8 more variables: qc_flag <chr>, qc_code <chr>, over_22 <lgl>,
#   tdiff <drtn>, group <int>, n <int>, tstep <dbl>, date <dttm>
nxt_temp <- wtemp |> 
  filter(var_ref_id %in% c("t_wtr_d2000"), datetime %in% high_temp$datetime) |> 
  left_join(high_temp |> 
              select(datetime, group, date), by = c("datetime")) 

ggplot() +
  geom_point(data = high_temp, aes(datetime, raw_value, colour = var_ref_id), size = 0.2) +
  geom_line(data = high_temp, aes(datetime, raw_value, colour = var_ref_id, group = var_ref_id)) +
  geom_line(data = nxt_temp, aes(datetime, raw_value, colour = var_ref_id), size = 0.2) +
  facet_wrap(~date, scales = "free") +
  # coord_cartesian(xlim = c(0, 60 * 24), ylim = c(22, 30)) +
  # labs(x = "Minutes") +
  theme_bw()

Water temperature at 1m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d100")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d100"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 14: Temperature sensor data at 1m quality control.

Water temperature at 2.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d250")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d250"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 15: Temperature sensor data at 2.5m quality control.

Water temperature at 4.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d450")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d450"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 16: Temperature sensor data at 4.5m quality control.

Water temperature at 6.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d650")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d650"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 17: Temperature sensor data at 6.5m quality control.

Water temperature at 8.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d850")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d850"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 18: Temperature sensor data at 8.5m quality control.

Water temperature at 10.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d1050")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d1050"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 19: Temperature sensor data at 10.5m quality control.

Water temperature at 12.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d1250")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d1250"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 20: Temperature sensor data at 12.5m quality control.

Water temperature at 14.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d1450")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d1450"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE,
              colour = "qc_code")
Figure 21: Temperature sensor data at 14.5m quality control.

Water temperature at 16.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d1650")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d1650"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 22: Temperature sensor data at 16.5m quality control.

Water temperature at 18.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d1850")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d1850"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 23: Temperature sensor data at 18.5m quality control.

Water temperature at 20.0m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d2000")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d2000"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 24: Temperature sensor data at 20.0m quality control.

Water temperature at 20.5m

# wtemp <- wtemp |>
#   update_qc_code_vectorized(qc_update_df = qc_update_df, 
#                             var_ref_id = "t_wtr_d2050")

wtemp |> 
  plot_sensor(var_ref_id = c("t_wtr_d2050"), variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 25: Temperature sensor data at 20.5m quality control.

9.2 Check for offsets between temperature nodes

Generally the temperature data falls within the expected range, but there are some offsets between the sensors. The following plot shows the estimated temperatures for 10 and 25 degrees C, using calculated offsets based on when the temperature difference between the sensors are in 5% of the distribution.

The temperature from the DO sensors (1m and 10m) have a lower accuracy (0.5 C).

temp_strings <- wtemp |>
  # Filter out oxygen depths
  filter(!var_ref_id %in% c("t_wtr_d50", "t_wtr_d100", "t_wtr_d2000")) |> 
  pull(device_id) |> 
  unique()


temp_devices <- site_devices |> 
  filter(device_id %in% temp_strings)
Figure 26: Check for offsets between temperature nodes
Figure 27: Check for offsets between temperature nodes

10 Assign QC codes for met variables

met <- raw_long_device_filt |> 
  filter(var_abbr %in% c("pp_rain", "pr_baro", "t_air", "h_rh", 
                         "w_dir", "w_spd"))
met <- met |>
  update_qc_code_vectorized(qc_update_df = qc_update_df, 
                            var_ref_id = c("pp_rain_h150", "pr_baro_h150", 
                                           "t_air_h150", "h_rh_h150", 
                                           "w_dir_h150", "w_spd_h150"))

met |> 
  plot_sensor(var_ref_id = c("pp_rain_h150", "pr_baro_h150", "t_air_h150",
                             "h_rh_h150", "w_dir_h150", "w_spd_h150"), 
              variable_ref = variable_ref,
              sensor_calibrations = sensor_calibrations, 
              sensor_reference = sensor_reference,
              sensor_scaling = sensor_scaling, clip_ylim = FALSE, colour = "qc_code")
Figure 28: Temperature sensor data at 20.5m quality control.
plot_raw_qc(data = met, variable_ref = variable_ref)

11 Visual summaries

# heck for duplicate values
# dups1 <- data |> 
#   group_by(datetime, var_ref_id) |> 
#   summarise(n = n()) |> 
#   filter(n > 1)
# dups1

Plot each variable

11.1 Temperature

plot_var_ts_qc(data = data, var_ref_id = c("t_wtr_d50", "t_wtr_d100",
                                           "t_wtr_d250", "t_wtr_d450",
                                           "t_wtr_d650", "t_wtr_d850",
                                           "t_wtr_d1050", "t_wtr_d1250",
                                           "t_wtr_d1450", "t_wtr_d1650", 
                                           "t_wtr_d1850", "t_wtr_d2000",
                                           "t_wtr_d2050"))
Figure 29: Temperature data at various depths quality control.

11.2 Oxygen

plot_var_ts_qc(data = data, var_ref_id = c("c_do_sat_d100", "c_do_sat_d2000"))
Figure 30: Oxygen saturation data at 1 and 10m depths quality control.
plot_var_ts_qc(data = data, var_ref_id = c("c_do_d100", "c_do_d2000"))
Figure 31: Oxygen data at 1 and 10m depths quality control.

11.3 Chlorophyll

plot_var_ts_qc(data = data, var_ref_id = c("f_chl_d100"))
Figure 32: Chlorophyll data at 1.0m quality control.

11.4 Phycocyanin

plot_var_ts_qc(data = data, var_ref_id = c("f_phyc_d100"))
Figure 33: Phycocyanin data at 1.0m quality control.

11.5 pH

plot_var_ts_qc(data = data, var_ref_id = c("c_ph_d100"))
Figure 34: pH data at 1.0m quality control.

11.6 Meteorological variables

12 Download data

Download the data as a zip folder. The data is in wide format an comes with metadata files. The metadata files contain the information needed to reconstruct the quality control.

Download Data