Use this to scavenge a data frame for columns that you want to keep. Select additional columns through .... Do not use this for final column selection; use dplyr::select() instead.

ys_prune(data, spec, ..., .report = FALSE)

Arguments

data

A data frame with at least one column that is found in spec.

spec

A yspec object.

...

Additional columns carry into the output, specified using tidy-select syntax.

.report

If TRUE, report missing columns.

Value

A data frame with common columns with spec and ..., in the order they appear in spec.

Details

An error is generated if there are no columns in common between data and spec.

Examples

data <- ys_help$data()
spec <- ys_help$spec()
data$STUDY <- NULL

head(ys_prune(data, spec))
#>    C NUM ID SUBJ TIME SEQ CMT EVID AMT      DV   AGE    WT   CRCL ALB   BMI
#> 1 NA   1  1    1 0.00   0   1    1   5   0.000 28.03 55.16 114.45 4.4 21.67
#> 2 NA   2  1    1 0.61   1   2    0  NA  61.005 28.03 55.16 114.45 4.4 21.67
#> 3 NA   3  1    1 1.15   1   2    0  NA  90.976 28.03 55.16 114.45 4.4 21.67
#> 4 NA   4  1    1 1.73   1   2    0  NA 122.210 28.03 55.16 114.45 4.4 21.67
#> 5 NA   5  1    1 2.15   1   2    0  NA 126.090 28.03 55.16 114.45 4.4 21.67
#> 6 NA   6  1    1 3.19   1   2    0  NA  84.682 28.03 55.16 114.45 4.4 21.67
#>      AAG  SCR   AST   ALT     HT CP TAFD  TAD LDOS MDV BLQ PHASE   RF
#> 1 106.36 1.14 11.88 12.66 159.55  0 0.00 0.00    5   1   0     1 norm
#> 2 106.36 1.14 11.88 12.66 159.55  0 0.61 0.61    5   0   0     1 norm
#> 3 106.36 1.14 11.88 12.66 159.55  0 1.15 1.15    5   0   0     1 norm
#> 4 106.36 1.14 11.88 12.66 159.55  0 1.73 1.73    5   0   0     1 norm
#> 5 106.36 1.14 11.88 12.66 159.55  0 2.15 2.15    5   0   0     1 norm
#> 6 106.36 1.14 11.88 12.66 159.55  0 3.19 3.19    5   0   0     1 norm
head(ys_prune(data, spec, .report = TRUE))
#> Column not found: STUDY
#>    C NUM ID SUBJ TIME SEQ CMT EVID AMT      DV   AGE    WT   CRCL ALB   BMI
#> 1 NA   1  1    1 0.00   0   1    1   5   0.000 28.03 55.16 114.45 4.4 21.67
#> 2 NA   2  1    1 0.61   1   2    0  NA  61.005 28.03 55.16 114.45 4.4 21.67
#> 3 NA   3  1    1 1.15   1   2    0  NA  90.976 28.03 55.16 114.45 4.4 21.67
#> 4 NA   4  1    1 1.73   1   2    0  NA 122.210 28.03 55.16 114.45 4.4 21.67
#> 5 NA   5  1    1 2.15   1   2    0  NA 126.090 28.03 55.16 114.45 4.4 21.67
#> 6 NA   6  1    1 3.19   1   2    0  NA  84.682 28.03 55.16 114.45 4.4 21.67
#>      AAG  SCR   AST   ALT     HT CP TAFD  TAD LDOS MDV BLQ PHASE   RF
#> 1 106.36 1.14 11.88 12.66 159.55  0 0.00 0.00    5   1   0     1 norm
#> 2 106.36 1.14 11.88 12.66 159.55  0 0.61 0.61    5   0   0     1 norm
#> 3 106.36 1.14 11.88 12.66 159.55  0 1.15 1.15    5   0   0     1 norm
#> 4 106.36 1.14 11.88 12.66 159.55  0 1.73 1.73    5   0   0     1 norm
#> 5 106.36 1.14 11.88 12.66 159.55  0 2.15 2.15    5   0   0     1 norm
#> 6 106.36 1.14 11.88 12.66 159.55  0 3.19 3.19    5   0   0     1 norm

data$FOO <- 1
data$BAR <- 2
data$YAK <- 3

head(ys_prune(data, spec, YAK, FOO))
#>    C NUM ID SUBJ TIME SEQ CMT EVID AMT      DV   AGE    WT   CRCL ALB   BMI
#> 1 NA   1  1    1 0.00   0   1    1   5   0.000 28.03 55.16 114.45 4.4 21.67
#> 2 NA   2  1    1 0.61   1   2    0  NA  61.005 28.03 55.16 114.45 4.4 21.67
#> 3 NA   3  1    1 1.15   1   2    0  NA  90.976 28.03 55.16 114.45 4.4 21.67
#> 4 NA   4  1    1 1.73   1   2    0  NA 122.210 28.03 55.16 114.45 4.4 21.67
#> 5 NA   5  1    1 2.15   1   2    0  NA 126.090 28.03 55.16 114.45 4.4 21.67
#> 6 NA   6  1    1 3.19   1   2    0  NA  84.682 28.03 55.16 114.45 4.4 21.67
#>      AAG  SCR   AST   ALT     HT CP TAFD  TAD LDOS MDV BLQ PHASE   RF YAK FOO
#> 1 106.36 1.14 11.88 12.66 159.55  0 0.00 0.00    5   1   0     1 norm   3   1
#> 2 106.36 1.14 11.88 12.66 159.55  0 0.61 0.61    5   0   0     1 norm   3   1
#> 3 106.36 1.14 11.88 12.66 159.55  0 1.15 1.15    5   0   0     1 norm   3   1
#> 4 106.36 1.14 11.88 12.66 159.55  0 1.73 1.73    5   0   0     1 norm   3   1
#> 5 106.36 1.14 11.88 12.66 159.55  0 2.15 2.15    5   0   0     1 norm   3   1
#> 6 106.36 1.14 11.88 12.66 159.55  0 3.19 3.19    5   0   0     1 norm   3   1

# Use this for final subsetting
# It will fail if all the columns aren't there
data <- ys_help$data()
head(dplyr::select(data, names(spec)))
#>    C NUM ID SUBJ TIME SEQ CMT EVID AMT      DV   AGE    WT   CRCL ALB   BMI
#> 1 NA   1  1    1 0.00   0   1    1   5   0.000 28.03 55.16 114.45 4.4 21.67
#> 2 NA   2  1    1 0.61   1   2    0  NA  61.005 28.03 55.16 114.45 4.4 21.67
#> 3 NA   3  1    1 1.15   1   2    0  NA  90.976 28.03 55.16 114.45 4.4 21.67
#> 4 NA   4  1    1 1.73   1   2    0  NA 122.210 28.03 55.16 114.45 4.4 21.67
#> 5 NA   5  1    1 2.15   1   2    0  NA 126.090 28.03 55.16 114.45 4.4 21.67
#> 6 NA   6  1    1 3.19   1   2    0  NA  84.682 28.03 55.16 114.45 4.4 21.67
#>      AAG  SCR   AST   ALT     HT CP TAFD  TAD LDOS MDV BLQ PHASE STUDY   RF
#> 1 106.36 1.14 11.88 12.66 159.55  0 0.00 0.00    5   1   0     1     1 norm
#> 2 106.36 1.14 11.88 12.66 159.55  0 0.61 0.61    5   0   0     1     1 norm
#> 3 106.36 1.14 11.88 12.66 159.55  0 1.15 1.15    5   0   0     1     1 norm
#> 4 106.36 1.14 11.88 12.66 159.55  0 1.73 1.73    5   0   0     1     1 norm
#> 5 106.36 1.14 11.88 12.66 159.55  0 2.15 2.15    5   0   0     1     1 norm
#> 6 106.36 1.14 11.88 12.66 159.55  0 3.19 3.19    5   0   0     1     1 norm