This function aggregates numeric columns in a data frame or sf
object by rows based on a specified aggregation function (e.g., mean()
, sum()
, median()
). It allows column inclusion/exclusion using string matching and supports rational rounding.
Usage
ngr_str_df_col_agg(
dat,
col_str_match,
col_result,
fun = "mean",
col_str_negate = NULL,
decimal_places = 1
)
Arguments
- dat
data.frame or sf object. The input data.
- col_str_match
character A string to match column names for aggregation.
- col_result
character The name of the resulting column with aggregated values.
- fun
character The aggregation function to use. Options are "mean", "sum", "median", "min", or "max". Default is "mean".
- col_str_negate
character (Optional) A string to exclude matching columns from aggregation. Default is
NULL
.- decimal_places
numeric Number of decimal places for rounding. Default is 1.
Details
This function identifies columns matching col_str_match
while excluding those matching col_str_negate
,
applies the specified aggregation function row-wise. It attaches the result as a new column in the input data. If
the result column exists within the dataframe it is updated with the aggregated values.
Rounding is performed using rational rounding (e.g., 1.55 rounds to 1.6).
Examples
# Load example data
dat_raw <- data.frame(
channel_width_m_1 = c(5.0, 4.2, 6.3, NA, 5.5),
channel_width_m_2 = c(5.5, 4.0, 6.1, NA, 5.8),
wetted_width_m_1 = c(3.0, 2.8, 3.5, NA, 3.3),
wetted_width_m_2 = c(3.2, 2.9, 3.4, NA, 3.6),
residual_pool_m_1 = c(0.8, 0.7, 0.9, NA, 0.85),
residual_pool_m_2 = c(0.75, 0.65, 0.92, NA, 0.88),
gradient_percent_1 = c(2.5, 2.0, 3.0, NA, 2.7),
gradient_percent_2 = c(2.6, 2.1, 3.1, NA, 2.8),
bankfull_depth_m_1 = c(1.2, 1.1, 1.3, NA, 1.25),
bankfull_depth_m_2 = c(1.15, 1.05, 1.35, NA, 1.3),
wetted_width_m_2.channel_width_m_2_time = c("2023-12-01", "2023-12-02", "2023-12-03", NA, "2023-12-05"),
method_for_wetted_width = c("manual", "manual", "manual", NA, "manual"),
method_for_channel_width = c("automatic", "automatic", "automatic", NA, "automatic"),
avg_channel_width_m = NA_real_,
avg_wetted_width_m = NA_real_,
average_residual_pool_depth_m = NA_real_,
average_gradient_percent = NA_real_,
average_bankfull_depth_m = NA_real_
)
col_str_negate = "time|method|avg|average"
col_str_to_agg <- c("channel_width", "wetted_width", "residual_pool", "gradient", "bankfull_depth")
columns_result <- c("avg_channel_width_m", "avg_wetted_width_m", "average_residual_pool_depth_m", "average_gradient_percent", "average_bankfull_depth_m")
# Initialize dat as a copy of dat_raw to preserve the original and allow cumulative updates
dat <- dat_raw
# Use mapply with cumulative updates
# Suppress mapply output by assigning it to invisible
invisible(mapply(
FUN = function(col_str_match, col_result) {
# Update dat cumulatively using double assignment operator to update each already updated version of the dataframe
dat <<- ngr_str_df_col_agg(
dat = dat, # Use the updated dat for each iteration
col_str_match = col_str_match,
col_result = col_result,
col_str_negate = col_str_negate,
decimal_places = 1
)
},
col_str_match = col_str_to_agg,
col_result = columns_result
))
# Print the first few rows of the resulting data after subsetting for clarity
dat_subset <- dat[1:5, grep("average|avg", names(dat))]
head(dat_subset)
#> avg_channel_width_m avg_wetted_width_m average_residual_pool_depth_m
#> 1 5.3 3.1 0.8
#> 2 4.1 2.8 0.7
#> 3 6.2 3.5 0.9
#> 4 NA NA NA
#> 5 5.7 3.5 0.9
#> average_gradient_percent average_bankfull_depth_m
#> 1 2.6 1.2
#> 2 2.1 1.1
#> 3 3.1 1.3
#> 4 NA NA
#> 5 2.8 1.3
if (FALSE) { # \dontrun{
#'Load the geopackage shipped with the package
path <- system.file("extdata", "form_fiss_site_2024.gpkg", package = "ngr")
dat_raw <- sf::st_read(path)
# Use purrr::reduce with cumulative updates
dat <- purrr::reduce(
.x = seq_along(col_str_to_agg),
.f = function(acc_df, i) {
ngr_str_df_col_agg(
dat = acc_df,
col_str_match = col_str_to_agg[i],
col_result = columns_result[i],
col_str_negate = col_str_negate,
decimal_places = 1
)
},
.init = dat_raw
)
# Print the first few rows of the resulting data
# Convert to a plain data.frame
dat_subset <- dat[1:5, grep("average|avg", names(dat))]
head(dat_subset)
} # }