Skip to contents

This function aggregates numeric columns in a data frame or sf object by rows based on a specified aggregation function (e.g., mean(), sum(), median()). It allows column inclusion/exclusion using string matching and supports rational rounding.

Usage

ngr_str_df_col_agg(
  dat,
  col_str_match,
  col_result,
  fun = "mean",
  col_str_negate = NULL,
  decimal_places = 1
)

Arguments

dat

data.frame or sf object. The input data.

col_str_match

character A string to match column names for aggregation.

col_result

character The name of the resulting column with aggregated values.

fun

character The aggregation function to use. Options are "mean", "sum", "median", "min", or "max". Default is "mean".

col_str_negate

character (Optional) A string to exclude matching columns from aggregation. Default is NULL.

decimal_places

numeric Number of decimal places for rounding. Default is 1.

Value

The input dat with a new column named col_result containing aggregated values.

Details

This function identifies columns matching col_str_match while excluding those matching col_str_negate, applies the specified aggregation function row-wise. It attaches the result as a new column in the input data. If the result column exists within the dataframe it is updated with the aggregated values.

Rounding is performed using rational rounding (e.g., 1.55 rounds to 1.6).

See also

Examples

# Load example data

dat_raw <- data.frame(
  channel_width_m_1 = c(5.0, 4.2, 6.3, NA, 5.5),
  channel_width_m_2 = c(5.5, 4.0, 6.1, NA, 5.8),
  wetted_width_m_1 = c(3.0, 2.8, 3.5, NA, 3.3),
  wetted_width_m_2 = c(3.2, 2.9, 3.4, NA, 3.6),
  residual_pool_m_1 = c(0.8, 0.7, 0.9, NA, 0.85),
  residual_pool_m_2 = c(0.75, 0.65, 0.92, NA, 0.88),
  gradient_percent_1 = c(2.5, 2.0, 3.0, NA, 2.7),
  gradient_percent_2 = c(2.6, 2.1, 3.1, NA, 2.8),
  bankfull_depth_m_1 = c(1.2, 1.1, 1.3, NA, 1.25),
  bankfull_depth_m_2 = c(1.15, 1.05, 1.35, NA, 1.3),
  wetted_width_m_2.channel_width_m_2_time = c("2023-12-01", "2023-12-02", "2023-12-03", NA, "2023-12-05"),
  method_for_wetted_width = c("manual", "manual", "manual", NA, "manual"),
  method_for_channel_width = c("automatic", "automatic", "automatic", NA, "automatic"),
  avg_channel_width_m = NA_real_,
  avg_wetted_width_m = NA_real_,
  average_residual_pool_depth_m = NA_real_,
  average_gradient_percent = NA_real_,
  average_bankfull_depth_m = NA_real_
)

col_str_negate = "time|method|avg|average"
col_str_to_agg <- c("channel_width", "wetted_width", "residual_pool", "gradient", "bankfull_depth")
columns_result <- c("avg_channel_width_m", "avg_wetted_width_m", "average_residual_pool_depth_m", "average_gradient_percent", "average_bankfull_depth_m")

# Initialize dat as a copy of dat_raw to preserve the original and allow cumulative updates
dat <- dat_raw

# Use mapply with cumulative updates
# Suppress mapply output by assigning it to invisible
invisible(mapply(
  FUN = function(col_str_match, col_result) {
    # Update dat cumulatively using double assignment operator to update each already updated version of the dataframe
    dat <<- ngr_str_df_col_agg(
      dat = dat,  # Use the updated dat for each iteration
      col_str_match = col_str_match,
      col_result = col_result,
      col_str_negate = col_str_negate,
      decimal_places = 1
    )
  },
  col_str_match = col_str_to_agg,
  col_result = columns_result
))

# Print the first few rows of the resulting data after subsetting for clarity
dat_subset <- dat[1:5, grep("average|avg", names(dat))]
head(dat_subset)
#>   avg_channel_width_m avg_wetted_width_m average_residual_pool_depth_m
#> 1                 5.3                3.1                           0.8
#> 2                 4.1                2.8                           0.7
#> 3                 6.2                3.5                           0.9
#> 4                  NA                 NA                            NA
#> 5                 5.7                3.5                           0.9
#>   average_gradient_percent average_bankfull_depth_m
#> 1                      2.6                      1.2
#> 2                      2.1                      1.1
#> 3                      3.1                      1.3
#> 4                       NA                       NA
#> 5                      2.8                      1.3
if (FALSE) { # \dontrun{
#'Load the geopackage shipped with the package
path <- system.file("extdata", "form_fiss_site_2024.gpkg", package = "ngr")

dat_raw <- sf::st_read(path)

# Use purrr::reduce with cumulative updates
dat <- purrr::reduce(
  .x = seq_along(col_str_to_agg),
  .f = function(acc_df, i) {
    ngr_str_df_col_agg(
      dat = acc_df,
      col_str_match = col_str_to_agg[i],
      col_result = columns_result[i],
      col_str_negate = col_str_negate,
      decimal_places = 1
    )
  },
  .init = dat_raw
)

# Print the first few rows of the resulting data
# Convert to a plain data.frame
dat_subset <- dat[1:5, grep("average|avg", names(dat))]
head(dat_subset)
} # }