Given a set of bin ranges, assign each value to a bin and provide the label

set_bins_df(
  .df,
  .x,
  breaks = stats::quantile(.df[[.x]], na.rm = TRUE),
  .name = NULL,
  .label = NULL,
  lower_bound = -Inf,
  upper_bound = Inf,
  quiet = TRUE,
  between = NULL,
  inclusive = TRUE
)

Arguments

.df

data frame

.x

name of column

breaks

breaks for each bin, defaults to quantiles

.name

name of new binned column, defaults to appending _bin to column name

.label

name of the new label column, defaults to appending _label to bin column name

lower_bound

set a lower bound for the first bin, defaults to -Inf

upper_bound

set an upper bound for the last bind, defaults to Inf

quiet

whether to give additional information regarding bins and assigned range for each

between

defaults to NULL, a special case of setting all inside the specified range

inclusive

include max value of largest user defined bin even though lower bins are non-inclusive

Details

set_bins_df offers the ability to create bins from a data frame and get both the binning column as well as a label column with the range of values associated with a given bin

See also

set_bins: This function creates bins from a data frame. The output is the new assigned bin columns.

Examples

x <- Theoph$conc

head(x)
#> [1]  0.74  2.84  6.57 10.50  9.66  8.58

#basic example
res <- set_bins_df(.df = Theoph, .x= "conc")

head(res[,5:7],3)
#>   conc conc_bins conc_bins_label
#> 1 0.74         1      [0_2.8775)
#> 2 2.84         1      [0_2.8775)
#> 3 6.57         3    [5.275_7.14)

table(res$conc_bins_label)
#> 
#>       [-Inf_0)     [0_2.8775) [2.8775_5.275)   [5.275_7.14)    [7.14_11.4) 
#>              0             33             33             32             34 
#>     [11.4_Inf) 
#>              0 

#assign all obs < lower bound to NA
res <- set_bins_df(
  .df = Theoph,
  .x = "conc",
  breaks = stats::quantile(x, na.rm = TRUE, probs = c(0.1, 0.5, 1)),
  lower_bound = 1)

head(res[,5:7],3)
#>   conc conc_bins conc_bins_label
#> 1 0.74        NA            <NA>
#> 2 2.84         0   [0.864_5.275)
#> 3 6.57         1    [5.275_11.4)

table(res$conc_bins_label)
#> 
#> [0.864_5.275)  [5.275_11.4)    [11.4_Inf) 
#>            52            66             0 

#use inclusive argument to get desired bins
## include max value of largest user defined bin
xbreak <- stats::quantile(x, na.rm = TRUE, probs= c(0, 0.5, 1))
xupper = Inf

res1 <- set_bins_df(
  .df = Theoph,
  .x = "conc",
  breaks = xbreak,
  upper_bound = xupper,
  inclusive = TRUE)

table(res1$conc_bins_label)
#> 
#>     [-Inf_0)    [0_5.275) [5.275_11.4)   [11.4_Inf) 
#>            0           66           66            0 

## do not include max value of largest user-defined bin- create new bin for it
res2 <- set_bins_df(
  .df = Theoph,
  .x = "conc",
  breaks = xbreak,
  upper_bound = xupper,
  inclusive = FALSE)

table(res2$conc_bins_label)
#> 
#>     [-Inf_0)    [0_5.275) [5.275_11.4)   [11.4_Inf) 
#>            0           66           65            1 

# use between argument to cut obs at certain values. For example, want a bin of conc between 3-7
res <- set_bins_df(.df = Theoph, .x= "conc",  between = c(3, 7)) 

head(res)
#> [1] 0 0 1 2 2 2

table(res)
#> res
#>  0  1  2 
#> 34 62 36 
res
#>   [1] 0 0 1 2 2 2 2 2 1 1 1 0 0 2 2 2 1 1 1 1 1 0 0 1 1 2 2 2 1 1 1 1 0 0 0 1 2
#>  [38] 2 2 1 1 1 1 0 0 0 1 2 2 2 2 2 1 1 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 1 1 2 1 1
#>  [75] 1 1 0 0 1 1 2 2 1 1 1 1 1 0 0 2 2 2 1 1 1 1 1 1 0 0 0 1 1 2 2 2 2 2 1 0 0
#> [112] 1 2 2 1 1 1 1 1 0 0 0 0 1 2 2 2 2 1 1 1 0