diff --git a/.Rbuildignore b/.Rbuildignore index 5b8e1e5..5b23720 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^extras$ ^revdep$ ^Examples$ +^CRAN-SUBMISSION$ diff --git a/DESCRIPTION b/DESCRIPTION index 97639d6..4074604 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: cdata Type: Package Title: Fluid Data Transformations -Version: 1.2.0 -Date: 2021-06-11 +Version: 1.2.1 +Date: 2023-08-19 Authors@R: c( person("John", "Mount", email = "jmount@win-vector.com", role = c("aut", "cre")), person("Nina", "Zumel", email = "nzumel@win-vector.com", role = c("aut")), @@ -17,13 +17,13 @@ Description: Supplies higher-order coordinatized data specification and fluid tr Works on in-memory data or on remote data using 'rquery' and 'SQL' database interfaces. License: GPL-2 | GPL-3 Encoding: UTF-8 -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.3 Depends: R (>= 3.4.0), - wrapr (>= 2.0.2) + wrapr (>= 2.0.9) Imports: - rquery (>= 1.4.5), - rqdatatable (>= 1.2.8), + rquery (>= 1.4.9), + rqdatatable (>= 1.3.2), methods, stats Suggests: diff --git a/NEWS.md b/NEWS.md index 981f38e..d8d86cd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ +# cdata 1.2.1 2022/08/119 + + * Work around https://github.com/r-lib/roxygen2/issues/1491 + # cdata 1.2.0 2021/06/11 * Remove LazyData decl. diff --git a/R/checks.R b/R/checks.R index 841e83f..f104ab3 100644 --- a/R/checks.R +++ b/R/checks.R @@ -45,7 +45,7 @@ checkControlTable <- function(controlTable, controlTableKeys, strict) { return("control table key values must not be NA") } if(!check_cols_form_unique_keys(controlTable, controlTableKeys)) { - return("controlTable rows must be uniquely keyed by controlTableKeys key columns") + return("controlTable rows must be uniquely keyed by declared controlTableKeys key columns (controlTableKeys defaults to first column)") } toCheck <- list( "column names" = colnames(controlTable), diff --git a/R/package.R b/R/package.R index 26672ab..8244b9c 100644 --- a/R/package.R +++ b/R/package.R @@ -1,3 +1,4 @@ + #' \code{cdata}: Fluid Data Transformations. #' #' Supplies implementations of higher order "fluid data" transforms. These @@ -12,9 +13,7 @@ #' \url{https://github.com/WinVector/cdata} and \url{https://winvector.github.io/FluidData/FluidData.html}. #' #' -#' @docType package -#' @name cdata -NULL +"_PACKAGE" # make sure dot doesn't look like an unbound ref . <- NULL diff --git a/cran-comments.md b/cran-comments.md index 3734710..87ba069 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -2,44 +2,16 @@ ## Test environments -### OSX - - - R CMD check --as-cran cdata_1.2.0.tar.gz - * using R version 4.0.2 (2020-06-22) - * using platform: x86_64-apple-darwin17.0 (64-bit) - * using session charset: UTF-8 - * using option ‘--as-cran’ - * checking for file ‘cdata/DESCRIPTION’ ... OK - * checking extension type ... Package - * this is package ‘cdata’ version ‘1.2.0’ - * package encoding: UTF-8 - * checking CRAN incoming feasibility ... Note_to_CRAN_maintainers - Maintainer: ‘John Mount ’ - ... - Status: OK - -### Windows + R CMD check --as-cran cdata_1.2.1.tar.gz + * using R version 4.3.0 (2023-04-21) + * using platform: x86_64-apple-darwin20 (64-bit) devtools::check_win_devel() - -#### Linux - - rhub::check_for_cran() - 2290#> About to run xvfb-run R CMD check --as-cran cdata_1.2.0.tar.gz - 2296#> * using R version 4.1.0 (2021-05-18) - 2297#> * using platform: x86_64-pc-linux-gnu (64-bit) - 2304#> * checking CRAN incoming feasibility ... Note_to_CRAN_maintainers - 2305#> Maintainer: ‘John Mount ’ - 2362#> Status: OK + * using R Under development (unstable) (2023-08-18 r84986 ucrt) + * using platform: x86_64-w64-mingw32 rhub::check_for_cran() - 1946#> About to run xvfb-run R CMD check --as-cran cdata_1.2.0.tar.gz - 1950#> * using R Under development (unstable) (2021-06-10 r80480) - 1951#> * using platform: x86_64-pc-linux-gnu (64-bit) - 1958#> * checking CRAN incoming feasibility ... Note_to_CRAN_maintainers - 1959#> Maintainer: ‘John Mount ’ - 2016#> Status: OK + skipped ## Downstream dependencies diff --git a/docs/404.html b/docs/404.html index 8f288c2..545e16a 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - - - + + + + - Page not found (404) • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
- +
+ + - - diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index c22e77c..3722ecb 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,66 +1,12 @@ - - - - - - - -License • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -License • cdata + + - - - - -
-
- -
- -
+
+
-
- +
- - + + diff --git a/docs/articles/blocksrecs.html b/docs/articles/blocksrecs.html index 9ea3706..0901f4b 100644 --- a/docs/articles/blocksrecs.html +++ b/docs/articles/blocksrecs.html @@ -19,6 +19,8 @@ + +
+
-
-

-Block Records and Row Records

-

The theory of cdata data transforms is based on the principles:

+
+

Block Records and Row Records +

+

The theory of cdata data +transforms is based on the principles:

  • data has coordinates
  • data is naturally grouped into records.
-

The idea of data coordinates is related to Codd’s 2nd rule:

+

The idea of data coordinates is related to Codd’s 2nd +rule:

-

Each and every datum (atomic value) in a relational data base is guaranteed to be logically accessible by resorting to a combination of table name, primary key value and column name.

+

Each and every datum (atomic value) in a relational data base is +guaranteed to be logically accessible by resorting to a combination of +table name, primary key value and column name.

-

The coordinatized data concept is that the exact current data realization is incidental. One can perform a data change of basis to get the data into the right format (where the physical layout of records is altered to match the desired logical layout of the data).

-

The idea of data records (and these records possibly being different than simple rows) is a staple of computer science: harking at least back to record-oriented filesystems.

-

The core of the cdata package is to supply transforms between what we call “row records” (records that happen to be implemented as a single row) and block records (records that span multiple rows). These two methods are:

+

The coordinatized data concept is that the exact current +data realization is incidental. One can perform a data change of +basis to get the data into the right format (where the physical +layout of records is altered to match the desired logical layout of the +data).

+

The idea of data records (and these records possibly being different +than simple rows) is a staple of computer science: harking at least back +to record-oriented +filesystems.

+

The core of the cdata package is to supply transforms +between what we call “row records” (records that happen to be +implemented as a single row) and block records (records that span +multiple rows). These two methods are:

-

All the other cdata functions are helpers allowing abbreviated notation in special cases (such as unpivot_to_blocks() pivot_to_rowrecs()) and adapters (allowing these operations to be performed directly in databases and large data systems such as Apache Spark).

-

The current favored idiomatic interfaces to cdata are:

+

All the other cdata functions are helpers allowing +abbreviated notation in special cases (such as +unpivot_to_blocks() pivot_to_rowrecs()) and +adapters (allowing these operations to be performed directly in +databases and large data systems such as Apache Spark).

+

The current favored idiomatic interfaces to cdata +are:

  • -pivot_to_rowrecs(), a convenience function for moving data from multi-row block records with one value per row to single row records.
  • +pivot_to_rowrecs(), +a convenience function for moving data from multi-row block records with +one value per row to single row records.
  • -unpivot_to_blocks(), a convenience function for moving data from single-row records to possibly multi row block records with one row per value.
  • +unpivot_to_blocks(), +a convenience function for moving data from single-row records to +possibly multi row block records with one row per value.
  • -rowrecs_to_blocks_spec(), for specifying how single row records relate to general multi-row (or block) records.
  • +rowrecs_to_blocks_spec(), +for specifying how single row records relate to general multi-row (or +block) records.
  • -blocks_to_rowrecs_spec(), for specifying how multi-row block records relate to single-row records.
  • +blocks_to_rowrecs_spec(), +for specifying how multi-row block records relate to single-row +records.
  • -layout_by() or the wrapr dot arrow pipe for applying a layout to re-arrange data.
  • +layout_by() +or the wrapr +dot arrow pipe for applying a layout to re-arrange data.
  • -t() (transpose/adjoint) to invert or reverse layout specifications.
  • +t() (transpose/adjoint) to invert or reverse layout +specifications.
  • -wrapr::qchar_frame() a helper in specifying record control table layout specifications.
  • +wrapr::qchar_frame() +a helper in specifying record control table layout specifications.
  • -layout_specification(), for specifying transforms from multi-row records to other multi-row records.
  • +layout_specification(), +for specifying transforms from multi-row records to other multi-row +records.

Let’s look at cdata with some specific data.

-

For our example let’s take the task of re-organizing the iris data for a faceted plot, as discussed here.

+

For our example let’s take the task of re-organizing the +iris data for a faceted plot, as discussed here.

-library(cdata)
-#> Loading required package: wrapr
-
-iris <- data.frame(iris)
-iris$iris_id <- seq_len(nrow(iris))
-
-head(iris, n=1)
-#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species iris_id
-#> 1          5.1         3.5          1.4         0.2  setosa       1
-

To transform this data into a format ready for our ggplot2 task we design (as taught here) a “transform control table” that shows how to move from our row-oriented form into a block oriented form. Which in this case looks like the following.

+library(cdata) +#> Loading required package: wrapr + +iris <- data.frame(iris) +iris$iris_id <- seq_len(nrow(iris)) + +head(iris, n=1) +#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species iris_id +#> 1 5.1 3.5 1.4 0.2 setosa 1
+

To transform this data into a format ready for our +ggplot2 task we design (as taught +here) a “transform control table” that shows how to move from our +row-oriented form into a block oriented form. Which in this case looks +like the following.

In R the transform table is specified as follows.

-controlTable <- wrapr::qchar_frame(
-  "flower_part", "Length"      , "Width"     |
-  "Petal"      , Petal.Length  , Petal.Width |
-  "Sepal"      , Sepal.Length  , Sepal.Width )
-
-layout <- rowrecs_to_blocks_spec(
-  controlTable,
-  recordKeys = c("iris_id", "Species"))
-
-print(layout)
-#> {
-#>  row_record <- wrapr::qchar_frame(
-#>    "iris_id"  , "Species", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width" |
-#>      .        , .        , Petal.Length  , Petal.Width  , Sepal.Length  , Sepal.Width   )
-#>  row_keys <- c('iris_id', 'Species')
-#> 
-#>  # becomes
-#> 
-#>  block_record <- wrapr::qchar_frame(
-#>    "iris_id"  , "Species", "flower_part", "Length"    , "Width"     |
-#>      .        , .        , "Petal"      , Petal.Length, Petal.Width |
-#>      .        , .        , "Sepal"      , Sepal.Length, Sepal.Width )
-#>  block_keys <- c('iris_id', 'Species', 'flower_part')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-

And then applying it converts rows from our iris data into ready to plot 2-row blocks.

+controlTable <- wrapr::qchar_frame( + "flower_part", "Length" , "Width" | + "Petal" , Petal.Length , Petal.Width | + "Sepal" , Sepal.Length , Sepal.Width ) + +layout <- rowrecs_to_blocks_spec( + controlTable, + recordKeys = c("iris_id", "Species")) + +print(layout) +#> { +#> row_record <- wrapr::qchar_frame( +#> "iris_id" , "Species", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width" | +#> . , . , Petal.Length , Petal.Width , Sepal.Length , Sepal.Width ) +#> row_keys <- c('iris_id', 'Species') +#> +#> # becomes +#> +#> block_record <- wrapr::qchar_frame( +#> "iris_id" , "Species", "flower_part", "Length" , "Width" | +#> . , . , "Petal" , Petal.Length, Petal.Width | +#> . , . , "Sepal" , Sepal.Length, Sepal.Width ) +#> block_keys <- c('iris_id', 'Species', 'flower_part') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> }
+

And then applying it converts rows from our iris data +into ready to plot 2-row blocks.

-iris %.>%
-  head(., n = 1) %.>%
-  knitr::kable(.)
+iris %.>% + head(., n = 1) %.>% + knitr::kable(.)
++++++++ @@ -214,13 +258,13 @@

Sepal.Length Sepal.Width
-
-iris_aug <- iris %.>%
-  head(., n = 1) %.>%
-  layout
-
-iris_aug %.>%
-  knitr::kable(.)
+ +iris_aug <- iris %.>% + head(., n = 1) %.>% + layout + +iris_aug %.>% + knitr::kable(.)
@@ -246,16 +290,19 @@

iris_id
-

To perform the reverse transform we use the same transform control table, but we think of it as specifying the reverse transform (from its own block form into a row). The reverse can be specified using the t() transpose/adjoint method.

+

To perform the reverse transform we use the same transform control +table, but we think of it as specifying the reverse transform (from its +own block form into a row). The reverse can be specified using the +t() transpose/adjoint method.

-# re-do the forward transform, this time
-# with more records so we can see more
-iris_aug <- iris %.>%
-  head(., n = 3) %.>%
-  layout
-
-knitr::kable(iris_aug)
+# re-do the forward transform, this time +# with more records so we can see more +iris_aug <- iris %.>% + head(., n = 3) %.>% + layout + +knitr::kable(iris_aug)
@@ -310,33 +357,41 @@

iris_id
-
-inv_layout <- t(layout)
-
-print(inv_layout)
-#> {
-#>  block_record <- wrapr::qchar_frame(
-#>    "iris_id"  , "Species", "flower_part", "Length"    , "Width"     |
-#>      .        , .        , "Petal"      , Petal.Length, Petal.Width |
-#>      .        , .        , "Sepal"      , Sepal.Length, Sepal.Width )
-#>  block_keys <- c('iris_id', 'Species', 'flower_part')
-#> 
-#>  # becomes
-#> 
-#>  row_record <- wrapr::qchar_frame(
-#>    "iris_id"  , "Species", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width" |
-#>      .        , .        , Petal.Length  , Petal.Width  , Sepal.Length  , Sepal.Width   )
-#>  row_keys <- c('iris_id', 'Species')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# demonstrate the reverse transform
-iris_back <- iris_aug %.>%
-  inv_layout
-
-knitr::kable(iris_back)
+ +inv_layout <- t(layout) + +print(inv_layout) +#> { +#> block_record <- wrapr::qchar_frame( +#> "iris_id" , "Species", "flower_part", "Length" , "Width" | +#> . , . , "Petal" , Petal.Length, Petal.Width | +#> . , . , "Sepal" , Sepal.Length, Sepal.Width ) +#> block_keys <- c('iris_id', 'Species', 'flower_part') +#> +#> # becomes +#> +#> row_record <- wrapr::qchar_frame( +#> "iris_id" , "Species", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width" | +#> . , . , Petal.Length , Petal.Width , Sepal.Length , Sepal.Width ) +#> row_keys <- c('iris_id', 'Species') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# demonstrate the reverse transform +iris_back <- iris_aug %.>% + inv_layout + +knitr::kable(iris_back)
++++++++ @@ -372,29 +427,48 @@

iris_id Species
-

cdata considers the row-record a universal intermediate form, and this has the advantage of being able to represent a different type per value (as each value per-record is in a different column)

-

This differs from reshape2 where the melt() to “molten” (or thin RDF-triple-like) is used as the universal intermediate form that one then dcast()s into desired arrangements.

-

As we have said, a tutorial on how to design a controlTable can be found here and here.

-

Some additional (older) tutorials on cdata data transforms can are given below:

+

cdata considers the row-record a universal intermediate +form, and this has the advantage of being able to represent a different +type per value (as each value per-record is in a different column)

+

This differs from reshape2 where the melt() +to “molten” (or thin RDF-triple-like) +is used as the universal intermediate form that one then +dcast()s into desired arrangements.

+

As we have said, a tutorial on how to design a +controlTable can be found here +and here.

+

Some additional (older) tutorials on cdata data +transforms can are given below:

-
-

-Appendix

-

The cdata operators can be related to Codd’s relational operators as follows:

+
+

Appendix +

+

The cdata operators can be related to Codd’s relational +operators as follows:

  • -rowrecs_to_blocks() is a variation of a relational-join of the data with the control table. This is why you get one row per pair of original data rows and control table rows.
  • +rowrecs_to_blocks() is a variation of a relational-join +of the data with the control table. This is why you get one row per pair +of original data rows and control table rows.
  • -blocks_to_rowrecs() is essentially an aggregation or relational-projection (actually even a coalesce) over a widened table. This is why this operation decreases the number of rows.
  • +blocks_to_rowrecs() is essentially an aggregation or +relational-projection (actually even a coalesce) over a widened table. +This is why this operation decreases the number of rows.
@@ -410,11 +484,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -423,5 +499,7 @@

+ + diff --git a/docs/articles/cdata.html b/docs/articles/cdata.html index 4c25b04..d7d0253 100644 --- a/docs/articles/cdata.html +++ b/docs/articles/cdata.html @@ -19,6 +19,8 @@ + +
+
-

The cdata package is a demonstration of the “coordinatized data” theory and includes an implementation of the “fluid data” methodology.

-

Briefly cdata supplies data transform operators that:

+

The cdata package +is a demonstration of the “coordinatized +data” theory and includes an implementation of the “fluid data” +methodology.

+

Briefly cdata supplies data transform operators +that:

  • Work on local data or with any DBI data source.
  • -
  • Are powerful generalizations of the operators commonly called pivot and un-pivot.
  • +
  • Are powerful generalizations of the operators commonly called +pivot and un-pivot.
  • Can be specified by drawing an example.

A quick example:

-
## Loading required package: wrapr
+library("cdata")
+
## Loading required package: wrapr
-# first few rows of the iris data as an example
-d <- wrapr::build_frame(
-   "Sepal.Length"  , "Sepal.Width", "Petal.Length", "Petal.Width", "Species" |
-     5.1           , 3.5          , 1.4           , 0.2          , "setosa"  |
-     4.9           , 3            , 1.4           , 0.2          , "setosa"  |
-     4.7           , 3.2          , 1.3           , 0.2          , "setosa"  |
-     4.6           , 3.1          , 1.5           , 0.2          , "setosa"  |
-     5             , 3.6          , 1.4           , 0.2          , "setosa"  |
-     5.4           , 3.9          , 1.7           , 0.4          , "setosa"  )
-d$iris_id <- seq_len(nrow(d))
-
-knitr::kable(d)
+# first few rows of the iris data as an example +d <- wrapr::build_frame( + "Sepal.Length" , "Sepal.Width", "Petal.Length", "Petal.Width", "Species" | + 5.1 , 3.5 , 1.4 , 0.2 , "setosa" | + 4.9 , 3 , 1.4 , 0.2 , "setosa" | + 4.7 , 3.2 , 1.3 , 0.2 , "setosa" | + 4.6 , 3.1 , 1.5 , 0.2 , "setosa" | + 5 , 3.6 , 1.4 , 0.2 , "setosa" | + 5.4 , 3.9 , 1.7 , 0.4 , "setosa" ) +d$iris_id <- seq_len(nrow(d)) + +knitr::kable(d)
++++++++ @@ -193,16 +203,18 @@

2021-06-11

Sepal.Length Sepal.Width
-

Now suppose we want to take the above “all facts about each iris are in a single row” representation and convert it into a per-iris record block with the following structure.

+

Now suppose we want to take the above “all facts about each iris are +in a single row” representation and convert it into a per-iris record +block with the following structure.

-record_example <- wrapr::qchar_frame(
-   "plant_part"  , "measurement", "value"      |
-     "sepal"     , "width"      , Sepal.Width  |
-     "sepal"     , "length"     , Sepal.Length |
-     "petal"     , "width"      , Petal.Width  |
-     "petal"     , "length"     , Petal.Length )
-
-knitr::kable(record_example)
+record_example <- wrapr::qchar_frame( + "plant_part" , "measurement", "value" | + "sepal" , "width" , Sepal.Width | + "sepal" , "length" , Sepal.Length | + "petal" , "width" , Petal.Width | + "petal" , "length" , Petal.Length ) + +knitr::kable(record_example)
@@ -232,40 +244,48 @@

2021-06-11

plant_part
-

The above sort of transformation may seem exotic, but it is fairly common when we want to plot many aspects of a record at the same time.

-

To specify our transformation we combine the record example with information about how records are keyed (recordKeys showing which rows go together to form a record, and controlTableKeys specifying the internal structure of a data record).

+

The above sort of transformation may seem exotic, but it is fairly +common when we want to plot many aspects of a record at the same +time.

+

To specify our transformation we combine the record example with +information about how records are keyed (recordKeys showing which rows +go together to form a record, and controlTableKeys specifying the +internal structure of a data record).

-layout <- rowrecs_to_blocks_spec(
-  record_example,
-  controlTableKeys = c("plant_part", "measurement"),
-  recordKeys = c("iris_id", "Species"))
-
-print(layout)
-
## {
-##  row_record <- wrapr::qchar_frame(
-##    "iris_id"  , "Species", "Sepal.Width", "Sepal.Length", "Petal.Width", "Petal.Length" |
-##      .        , .        , Sepal.Width  , Sepal.Length  , Petal.Width  , Petal.Length   )
-##  row_keys <- c('iris_id', 'Species')
-## 
-##  # becomes
-## 
-##  block_record <- wrapr::qchar_frame(
-##    "iris_id"  , "Species", "plant_part", "measurement", "value"      |
-##      .        , .        , "sepal"     , "width"      , Sepal.Width  |
-##      .        , .        , "sepal"     , "length"     , Sepal.Length |
-##      .        , .        , "petal"     , "width"      , Petal.Width  |
-##      .        , .        , "petal"     , "length"     , Petal.Length )
-##  block_keys <- c('iris_id', 'Species', 'plant_part', 'measurement')
-## 
-##  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-## }
-

In the above we have used the common useful data organizing trick of specifying a dependent column (Species being a function of iris_id) as an additional key.

-

This layout then specifies and implements the data transform. We can transform the data by sending it to the layout.

+layout <- rowrecs_to_blocks_spec( + record_example, + controlTableKeys = c("plant_part", "measurement"), + recordKeys = c("iris_id", "Species")) + +print(layout)
+
## {
+##  row_record <- wrapr::qchar_frame(
+##    "iris_id"  , "Species", "Sepal.Width", "Sepal.Length", "Petal.Width", "Petal.Length" |
+##      .        , .        , Sepal.Width  , Sepal.Length  , Petal.Width  , Petal.Length   )
+##  row_keys <- c('iris_id', 'Species')
+## 
+##  # becomes
+## 
+##  block_record <- wrapr::qchar_frame(
+##    "iris_id"  , "Species", "plant_part", "measurement", "value"      |
+##      .        , .        , "sepal"     , "width"      , Sepal.Width  |
+##      .        , .        , "sepal"     , "length"     , Sepal.Length |
+##      .        , .        , "petal"     , "width"      , Petal.Width  |
+##      .        , .        , "petal"     , "length"     , Petal.Length )
+##  block_keys <- c('iris_id', 'Species', 'plant_part', 'measurement')
+## 
+##  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+## }
+

In the above we have used the common useful data organizing trick of +specifying a dependent column (Species being a function of iris_id) as +an additional key.

+

This layout then specifies and implements the data transform. We can +transform the data by sending it to the layout.

-d_transformed <- d %.>% 
-  layout
-
-knitr::kable(d_transformed)
+d_transformed <- d %.>% + layout + +knitr::kable(d_transformed) @@ -445,34 +465,43 @@

2021-06-11

iris_id
-

And it is easy to invert these transforms using the t() transpose/adjoint notation.

+

And it is easy to invert these transforms using the t() +transpose/adjoint notation.

-inv_layout <- t(layout)
-
-print(inv_layout)
-
## {
-##  block_record <- wrapr::qchar_frame(
-##    "iris_id"  , "Species", "plant_part", "measurement", "value"      |
-##      .        , .        , "sepal"     , "width"      , Sepal.Width  |
-##      .        , .        , "sepal"     , "length"     , Sepal.Length |
-##      .        , .        , "petal"     , "width"      , Petal.Width  |
-##      .        , .        , "petal"     , "length"     , Petal.Length )
-##  block_keys <- c('iris_id', 'Species', 'plant_part', 'measurement')
-## 
-##  # becomes
-## 
-##  row_record <- wrapr::qchar_frame(
-##    "iris_id"  , "Species", "Sepal.Width", "Sepal.Length", "Petal.Width", "Petal.Length" |
-##      .        , .        , Sepal.Width  , Sepal.Length  , Petal.Width  , Petal.Length   )
-##  row_keys <- c('iris_id', 'Species')
-## 
-##  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-## }
+inv_layout <- t(layout) + +print(inv_layout) +
## {
+##  block_record <- wrapr::qchar_frame(
+##    "iris_id"  , "Species", "plant_part", "measurement", "value"      |
+##      .        , .        , "sepal"     , "width"      , Sepal.Width  |
+##      .        , .        , "sepal"     , "length"     , Sepal.Length |
+##      .        , .        , "petal"     , "width"      , Petal.Width  |
+##      .        , .        , "petal"     , "length"     , Petal.Length )
+##  block_keys <- c('iris_id', 'Species', 'plant_part', 'measurement')
+## 
+##  # becomes
+## 
+##  row_record <- wrapr::qchar_frame(
+##    "iris_id"  , "Species", "Sepal.Width", "Sepal.Length", "Petal.Width", "Petal.Length" |
+##      .        , .        , Sepal.Width  , Sepal.Length  , Petal.Width  , Petal.Length   )
+##  row_keys <- c('iris_id', 'Species')
+## 
+##  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+## }
-d_transformed %.>%
-  inv_layout %.>%
-  knitr::kable(.)
+d_transformed %.>% + inv_layout %.>% + knitr::kable(.) ++++++++ @@ -532,60 +561,85 @@

2021-06-11

iris_id Species
-

The layout specifications themselves are just simple lists with “pretty print methods” (the control table being simply and example record in the form of a data.frame).

+

The layout specifications themselves are just simple lists with +“pretty print methods” (the control table being simply and example +record in the form of a data.frame).

-unclass(layout)
-
## $controlTable
-##   plant_part measurement        value
-## 1      sepal       width  Sepal.Width
-## 2      sepal      length Sepal.Length
-## 3      petal       width  Petal.Width
-## 4      petal      length Petal.Length
-## 
-## $recordKeys
-## [1] "iris_id" "Species"
-## 
-## $controlTableKeys
-## [1] "plant_part"  "measurement"
-## 
-## $checkNames
-## [1] TRUE
-## 
-## $checkKeys
-## [1] FALSE
-## 
-## $strict
-## [1] FALSE
-## 
-## $allow_rqdatatable
-## [1] FALSE
-

Notice that almost all of the time and space in using cdata is spent in specifying how your data is structured and is to be structured.

-

The main cdata interfaces are given by the following set of methods:

+unclass(layout) +
## $controlTable
+##   plant_part measurement        value
+## 1      sepal       width  Sepal.Width
+## 2      sepal      length Sepal.Length
+## 3      petal       width  Petal.Width
+## 4      petal      length Petal.Length
+## 
+## $recordKeys
+## [1] "iris_id" "Species"
+## 
+## $controlTableKeys
+## [1] "plant_part"  "measurement"
+## 
+## $checkNames
+## [1] TRUE
+## 
+## $checkKeys
+## [1] FALSE
+## 
+## $strict
+## [1] FALSE
+## 
+## $allow_rqdatatable
+## [1] FALSE
+

Notice that almost all of the time and space in using cdata is spent +in specifying how your data is structured and is to be structured.

+

The main cdata interfaces are given by the following set +of methods:

Some convenience functions include:

-

The package vignettes can be found in the “Articles” tab of the cdata documentation site.

-

The (older) recommended tutorial is: Fluid data reshaping with cdata. We also have an (older) short free cdata screencast (and another example can be found here).

+

The package vignettes can be found in the “Articles” tab of the cdata +documentation site.

+

The (older) recommended tutorial is: Fluid +data reshaping with cdata. We also have an (older) short free cdata screencast (and +another example can be found here).

@@ -396,19 +415,32 @@

2021-06-11

id
-

Recle Etino Vibal (who asked for this feature in an issue) has an interesting article https://amateurdatasci.rbind.io/post/table-another-back-again-cdata/ trying some variations on the data shaping concepts.

-

The cdata unit test include the following variations of the above example:

+

Recle Etino Vibal (who asked for this +feature in an issue) has an interesting article +https://amateurdatasci.rbind.io/post/table-another-back-again-cdata/ +trying some variations on the data shaping concepts.

+

The cdata unit test include the following variations of +the above example:

-

We think cdata (and the accompanying fluid data methodology, plus extensions) is a very deep and powerful way of wrangling data. Once you take the time to learn the methodology (which is “draw what you want to happen to one record, type that in as your control table, and you are done!”) it is very easy to use.

+

We think cdata (and the accompanying fluid data +methodology, plus extensions) +is a very deep and powerful way of wrangling data. Once you take the +time to learn the methodology (which is “draw what you want to happen to +one record, type that in as your control table, and you are done!”) it +is very easy to use.

++++++++ @@ -385,22 +459,41 @@

2021-06-11

iris_id Species
-

Notice in both cases that having examples of the before and after form of the transform is the guide to building the transform specification, that is, the transform control table. In practice: we highly recommend looking at your data, writing down what a single record on each side of the transform would look like, and then using that to fill out the control table on paper.

-

The exercise of designing a control table really opens your eyes to how data is moving in such transforms and exposes a lot of structure of data transforms. For example:

+

Notice in both cases that having examples of the before and after +form of the transform is the guide to building the transform +specification, that is, the transform control table. In practice: we +highly recommend looking at your data, writing down what a +single record on each side of the transform would look like, and then +using that to fill out the control table on paper.

+

The exercise of designing a control table really opens your eyes to +how data is moving in such transforms and exposes a lot of structure of +data transforms. For example:

-

Some discussion of the nature of block records and row records in cdata can be found here.

-

Some additional tutorials on cdata data transforms can are given below:

+

Some discussion of the nature of block records and row records in +cdata can be found here.

+

Some additional tutorials on cdata data transforms can +are given below:

@@ -413,11 +506,13 @@

2021-06-11

@@ -426,5 +521,7 @@

2021-06-11

+ + diff --git a/docs/articles/exercises.html b/docs/articles/exercises.html index db7fbb9..2723a83 100644 --- a/docs/articles/exercises.html +++ b/docs/articles/exercises.html @@ -19,6 +19,8 @@ + +
+
-

In this note we will use five real life examples to demonstrate data layout transforms using the cdata R package. The examples for this note are all demo-examples from tidyr:demo/ (current when we shared this note on 2019-04-27, removed 2019-04-28), and are mostly based on questions posted to StackOverflow. They represent a good cross-section of data layout problems, so they are a good set of examples or exercises to work through.

-

For each of these examples we will show how to re-layout data using cdata.

-
-

-Introduction

+

In this +note we will use five real life examples to demonstrate data layout +transforms using the cdata +R package. The +examples for this note are all demo-examples from tidyr:demo/ +(current when we shared +this note on 2019-04-27, removed +2019-04-28), and are mostly based on questions posted to +StackOverflow. They represent a good cross-section of data layout +problems, so they are a good set of examples or exercises to work +through.

+

For each of these examples we will show how to re-layout data using +cdata.

+
+

Introduction +

Each of these five problems will be solved using the same steps:

  • Examine example input data and desired result data.

  • -

    Check if either the incoming or outgoing data format is in “row-record” format: is all the data for a single record contained in one row? This determines which data layout transform specification you will use:

    +

    Check if either the incoming or outgoing data format is in +“row-record” format: is all the data for a single record contained in +one row? This determines which data layout transform specification you +will use:

  • -
  • Identify which columns form the record ids (group sets of rows into records), which we call the “record keys.”

  • -
  • Draw the shape of the incoming record without the record key columns.

  • -
  • Draw the shape of the outgoing record without the record key columns.

  • -
  • Combine the above information as one of the above data layout transform specifications.

  • -
  • Print the layout transform to confirm it is what you want.

  • +
  • Identify which columns form the record ids (group sets of rows +into records), which we call the “record keys.”

  • +
  • Draw the shape of the incoming record without the record key +columns.

  • +
  • Draw the shape of the outgoing record without the record key +columns.

  • +
  • Combine the above information as one of the above data layout +transform specifications.

  • +
  • Print the layout transform to confirm it is what you +want.

  • Apply the layout transform.

-

This may seem like a lot of steps, but it is only because we are taking the problems very slowly. The important point is that we want to minimize additional problem solving when applying the cdata methodology. Usually when you need to transform data you are in the middle of some other more important task, so you want to delegate the details of how the layout transform is implemented. With cdata the user is not asked to perform additional puzzle solving to guess a sequence of operators that may implement the desired data layout transform. The cdata solution pattern is always the same, which can help in mastering it.

-

With cdata, record layout transforms are simple R objects with detailed print() methods- so they are convenient to alter, save, and re-use later. The record layout transform also documents the expected columns and constants of the incoming data.

-

We will work some examples with the hope that practice brings familiarity. We have some notes on how to graphically solve exercise like this here and here, but let’s dive into working the exercises.

+

This may seem like a lot of steps, but it is only because we are +taking the problems very slowly. The important point is that we want to +minimize additional problem solving when applying the +cdata methodology. Usually when you need to transform data +you are in the middle of some other more important task, so you want to +delegate the details of how the layout transform is implemented. With +cdata the user is not asked to perform additional puzzle +solving to guess a sequence of operators that may implement the desired +data layout transform. The cdata solution pattern is always +the same, which can help in mastering it.

+

With cdata, record layout transforms are simple +R objects with detailed print() methods- so +they are convenient to alter, save, and re-use later. The record layout +transform also documents the expected columns and constants of the +incoming data.

+

We will work some examples with the hope that practice brings +familiarity. We have some notes on how to graphically solve exercise +like this here +and here, +but let’s dive into working the exercises.

-
-

-Example 1

-

(From: tidyr:demo/dadmom.R.)

-

From https://stats.idre.ucla.edu/stata/modules/reshaping-data-wide-to-long/ we can get get a copy of the data and the question or “transform ask”:

+
+

Example 1 +

+

(From: tidyr:demo/dadmom.R.)

+

From https://stats.oarc.ucla.edu/stata/modules/reshaping-data-wide-to-long/ +we can get get a copy of the data and the question or “transform +ask”:

-# convert from this format
-dadmomw <- wrapr::build_frame(
-   "famid"  , "named", "incd", "namem", "incm" |
-     1      , "Bill" , 30000 , "Bess" , 15000  |
-     2      , "Art"  , 22000 , "Amy"  , 18000  |
-     3      , "Paul" , 25000 , "Pat"  , 50000  )
-
-# to this format
-dadmomt <- wrapr::build_frame(
-   "famid"  , "dadmom", "name", "inc" |
-     1      , "d"     , "Bill", 30000 |
-     1      , "m"     , "Bess", 15000 |
-     2      , "d"     , "Art" , 22000 |
-     2      , "m"     , "Amy" , 18000 |
-     3      , "d"     , "Paul", 25000 |
-     3      , "m"     , "Pat" , 50000 )
-

Each incoming record represents a family, and is designated by the record key famid. The data starts with each record in a single row (a row record):

+# convert from this format +dadmomw <- wrapr::build_frame( + "famid" , "named", "incd", "namem", "incm" | + 1 , "Bill" , 30000 , "Bess" , 15000 | + 2 , "Art" , 22000 , "Amy" , 18000 | + 3 , "Paul" , 25000 , "Pat" , 50000 ) + +# to this format +dadmomt <- wrapr::build_frame( + "famid" , "dadmom", "name", "inc" | + 1 , "d" , "Bill", 30000 | + 1 , "m" , "Bess", 15000 | + 2 , "d" , "Art" , 22000 | + 2 , "m" , "Amy" , 18000 | + 3 , "d" , "Paul", 25000 | + 3 , "m" , "Pat" , 50000 )
+

Each incoming record represents a family, and is designated by the +record key famid. The data starts with each record in a +single row (a row record):

@@ -178,60 +218,81 @@

famid15000
-

So we are going from a row record to a general block record: this means we want to use rowrecs_to_blocks_spec(), and we only have to describe the outgoing record shape.

+

So we are going from a row record to a general block record: this +means we want to use rowrecs_to_blocks_spec(), and we only +have to describe the outgoing record shape.

-library("cdata")
-#> Loading required package: wrapr
-
-# identify the record key
-recordKeys <- "famid"
-
-# specify the outgoing record shape
-outgoing_record <- wrapr::qchar_frame(
-   "dadmom"  , "name", "inc" |
-     "d"     , named , incd |
-     "m"     , namem , incm )
-

Notice we take the column names from the incoming row-record and use them as cell-names in the outgoing record; this is how we show where the data goes. In specifying the record with wrapr::qchar_frame(), we use the convention that quoted entities are values we know (values that specify column names, or keys that describe the interior of the block record structure), and un-quoted entities are values we expect to be in the record.

-

outgoing_record is just a data.frame, you can create it however you like – you don’t need to use qchar_frame().

+library("cdata") +#> Loading required package: wrapr + +# identify the record key +recordKeys <- "famid" + +# specify the outgoing record shape +outgoing_record <- wrapr::qchar_frame( + "dadmom" , "name", "inc" | + "d" , named , incd | + "m" , namem , incm )
+

Notice we take the column names from the incoming row-record and use +them as cell-names in the outgoing record; this is how we show where the +data goes. In specifying the record with +wrapr::qchar_frame(), we use the convention that quoted +entities are values we know (values that specify column names, or keys +that describe the interior of the block record structure), and un-quoted +entities are values we expect to be in the record.

+

outgoing_record is just a data.frame, you +can create it however you like – you don’t need to use +qchar_frame().

Now create the layout specification, and print it.

-# put it all together into a layout
-layout <- rowrecs_to_blocks_spec(
-  outgoing_record,
-  recordKeys = recordKeys)
-
-# confirm we have the right layout
-print(layout)
-#> {
-#>  row_record <- wrapr::qchar_frame(
-#>    "famid"  , "named", "incd", "namem", "incm" |
-#>      .      , named  , incd  , namem  , incm   )
-#>  row_keys <- c('famid')
-#> 
-#>  # becomes
-#> 
-#>  block_record <- wrapr::qchar_frame(
-#>    "famid"  , "dadmom", "name", "inc" |
-#>      .      , "d"     , named , incd  |
-#>      .      , "m"     , namem , incm  )
-#>  block_keys <- c('famid', 'dadmom')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-

The print() method fully documents what columns are expected and the intent of the data layout transform. It uses the same quoted/unquoted convention that we used in specifying outgoing_record above.

-

The block_keys of the outgoing record shape specify the unique identifier of each row of the transformed data: that is, each row of dadmomt will be uniquely identified by the values of the columns famid and dadmom (which family, which parent). One of the block keys is always the record key; by default, rowrecs_to_blocks_spec() takes the other one from the first column of the outgoing_record shape. You can specify the block key (or keys) explicitly with the controlTableKeys argument:

+# put it all together into a layout +layout <- rowrecs_to_blocks_spec( + outgoing_record, + recordKeys = recordKeys) + +# confirm we have the right layout +print(layout) +#> { +#> row_record <- wrapr::qchar_frame( +#> "famid" , "named", "incd", "namem", "incm" | +#> . , named , incd , namem , incm ) +#> row_keys <- c('famid') +#> +#> # becomes +#> +#> block_record <- wrapr::qchar_frame( +#> "famid" , "dadmom", "name", "inc" | +#> . , "d" , named , incd | +#> . , "m" , namem , incm ) +#> block_keys <- c('famid', 'dadmom') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> }
+

The print() method fully documents what columns are +expected and the intent of the data layout transform. It uses the same +quoted/unquoted convention that we used in specifying +outgoing_record above.

+

The block_keys of the outgoing record shape specify the +unique identifier of each row of the transformed data: that is, each row +of dadmomt will be uniquely identified by the values of the +columns famid and dadmom (which family, which +parent). One of the block keys is always the record key; by default, +rowrecs_to_blocks_spec() takes the other one from the first +column of the outgoing_record shape. You can specify the +block key (or keys) explicitly with the controlTableKeys +argument:

-# this is equivalent to the above call
-rowrecs_to_blocks_spec(
-  outgoing_record,
-  recordKeys = recordKeys,
-  controlTableKeys = 'dadmom')
+# this is equivalent to the above call +rowrecs_to_blocks_spec( + outgoing_record, + recordKeys = recordKeys, + controlTableKeys = 'dadmom')

Now apply the layout to get the new data shape:

-# apply the layout
-dadmomw %.>% 
-  layout %.>%
-  knitr::kable(.)
+# apply the layout +dadmomw %.>% + layout %.>% + knitr::kable(.)
@@ -279,10 +340,12 @@

famid
-
-

-Example 2

-

(From: tidyr:demo/so-15668870.R, https://stackoverflow.com/questions/15668870/reshape-wide-format-to-multi-column-long-format, .)

+
+

Example 2 +

+

(From: tidyr:demo/so-15668870.R, +https://stackoverflow.com/questions/15668870/reshape-wide-format-to-multi-column-long-format, +.)

The original question was:

I want to reshape a wide format dataset that has multiple tests which are measured at 3 time points:
 
@@ -305,78 +368,84 @@ 

... I have unsuccessfully tried to use reshape and melt. Existing posts address transforming to single column outcome.

-

First, notice that neither the incoming nor outgoing forms are single-row records; a single record corresponds to a single ID and Year, and has three measurements (Fall, Spring, Winter) of two tests (1 and 2). So an example single row record would look something like:

+

First, notice that neither the incoming nor outgoing forms are +single-row records; a single record corresponds to a single ID and Year, +and has three measurements (Fall, Spring, Winter) of two tests (1 and +2). So an example single row record would look something like:

  ID Year Fall1 Fall2 Spring1 Spring2 Winter1 Winter2
    1 2008    15    22      16      22      19      24
 
-

and the record key is formed from the ID and the Year (sometimes what the record keys are is not obvious, and is in fact domain knowledge).

-

Since neither the incoming nor outgoing shapes are row records, we use the general layout_specification().

+

and the record key is formed from the ID and the Year (sometimes what +the record keys are is not obvious, and is in fact domain +knowledge).

+

Since neither the incoming nor outgoing shapes are row records, we +use the general layout_specification().

-library("cdata")
-
-# identify the record keys
-recordKeys <- c("ID", "Year")
-
-# specify the incoming record shape
-incoming_record <- wrapr::qchar_frame(
-  "Test"  , "Fall"   , "Spring"     , "Winter" |
-    "1"   , Fall1    , Spring1      , Winter1  |
-    "2"   , Fall2    , Spring2      , Winter2  )
-
-# specify the outgoing record shape
-outgoing_record <- wrapr::qchar_frame(
-  "Time"     , "Test1" ,  "Test2"   |
-    "Fall"   , Fall1   ,   Fall2    |
-    "Spring" , Spring1 ,   Spring2  |
-    "Winter" , Winter1 ,   Winter2  )
-
-# put it all together into a layout
-layout <- layout_specification(
-  incoming_shape = incoming_record,
-  outgoing_shape = outgoing_record,
-  recordKeys = recordKeys)
-
-# confirm we have the right layout
-print(layout)
-#> {
-#>  in_record <- wrapr::qchar_frame(
-#>    "ID"  , "Year", "Test", "Fall", "Spring", "Winter" |
-#>      .   , .     , "1"   , Fall1 , Spring1 , Winter1  |
-#>      .   , .     , "2"   , Fall2 , Spring2 , Winter2  )
-#>  in_keys <- c('ID', 'Year', 'Test')
-#> 
-#>  # becomes
-#> 
-#>  out_record <- wrapr::qchar_frame(
-#>    "ID"  , "Year", "Time"  , "Test1", "Test2" |
-#>      .   , .     , "Fall"  , Fall1  , Fall2   |
-#>      .   , .     , "Spring", Spring1, Spring2 |
-#>      .   , .     , "Winter", Winter1, Winter2 )
-#>  out_keys <- c('ID', 'Year', 'Time')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# example data
-grades <- wrapr::build_frame(
-   "ID"  , "Test", "Year", "Fall", "Spring", "Winter" |
-     1   , 1     , 2008  , 15    , 16      , 19       |
-     1   , 1     , 2009  , 12    , 13      , 27       |
-     1   , 2     , 2008  , 22    , 22      , 24       |
-     1   , 2     , 2009  , 10    , 14      , 20       |
-     2   , 1     , 2008  , 12    , 13      , 25       |
-     2   , 1     , 2009  , 16    , 14      , 21       |
-     2   , 2     , 2008  , 13    , 11      , 29       |
-     2   , 2     , 2009  , 23    , 20      , 26       |
-     3   , 1     , 2008  , 11    , 12      , 22       |
-     3   , 1     , 2009  , 13    , 11      , 27       |
-     3   , 2     , 2008  , 17    , 12      , 23       |
-     3   , 2     , 2009  , 14    ,  9      , 31       )
-
-# apply the layout
-grades %.>% 
-  layout %.>%
-  knitr::kable(.)
+library("cdata") + +# identify the record keys +recordKeys <- c("ID", "Year") + +# specify the incoming record shape +incoming_record <- wrapr::qchar_frame( + "Test" , "Fall" , "Spring" , "Winter" | + "1" , Fall1 , Spring1 , Winter1 | + "2" , Fall2 , Spring2 , Winter2 ) + +# specify the outgoing record shape +outgoing_record <- wrapr::qchar_frame( + "Time" , "Test1" , "Test2" | + "Fall" , Fall1 , Fall2 | + "Spring" , Spring1 , Spring2 | + "Winter" , Winter1 , Winter2 ) + +# put it all together into a layout +layout <- layout_specification( + incoming_shape = incoming_record, + outgoing_shape = outgoing_record, + recordKeys = recordKeys) + +# confirm we have the right layout +print(layout) +#> { +#> in_record <- wrapr::qchar_frame( +#> "ID" , "Year", "Test", "Fall", "Spring", "Winter" | +#> . , . , "1" , Fall1 , Spring1 , Winter1 | +#> . , . , "2" , Fall2 , Spring2 , Winter2 ) +#> in_keys <- c('ID', 'Year', 'Test') +#> +#> # becomes +#> +#> out_record <- wrapr::qchar_frame( +#> "ID" , "Year", "Time" , "Test1", "Test2" | +#> . , . , "Fall" , Fall1 , Fall2 | +#> . , . , "Spring", Spring1, Spring2 | +#> . , . , "Winter", Winter1, Winter2 ) +#> out_keys <- c('ID', 'Year', 'Time') +#> +#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# example data +grades <- wrapr::build_frame( + "ID" , "Test", "Year", "Fall", "Spring", "Winter" | + 1 , 1 , 2008 , 15 , 16 , 19 | + 1 , 1 , 2009 , 12 , 13 , 27 | + 1 , 2 , 2008 , 22 , 22 , 24 | + 1 , 2 , 2009 , 10 , 14 , 20 | + 2 , 1 , 2008 , 12 , 13 , 25 | + 2 , 1 , 2009 , 16 , 14 , 21 | + 2 , 2 , 2008 , 13 , 11 , 29 | + 2 , 2 , 2009 , 23 , 20 , 26 | + 3 , 1 , 2008 , 11 , 12 , 22 | + 3 , 1 , 2009 , 13 , 11 , 27 | + 3 , 2 , 2008 , 17 , 12 , 23 | + 3 , 2 , 2009 , 14 , 9 , 31 ) + +# apply the layout +grades %.>% + layout %.>% + knitr::kable(.)
@@ -515,24 +584,28 @@

ID
-
-

-Example 3

-

(From: tidyr:demo/so-16032858.R , https://stackoverflow.com/questions/16032858/reshape-data-from-long-to-a-short-format-by-a-variable-and-rename-columns.)

-

Question: given data such as below how does one move treatment and control values for each individual into columns? Or how does one take a to b?

+
+

Example 3 +

+

(From: tidyr:demo/so-16032858.R +, https://stackoverflow.com/questions/16032858/reshape-data-from-long-to-a-short-format-by-a-variable-and-rename-columns.)

+

Question: given data such as below how does one move treatment and +control values for each individual into columns? Or how does one take +a to b?

-a <- wrapr::build_frame(
-   "Ind"   , "Treatment", "value" |
-     "Ind1", "Treat"    , 1       |
-     "Ind2", "Treat"    , 2       |
-     "Ind1", "Cont"     , 3       |
-     "Ind2", "Cont"     , 4       )
-
-b <- wrapr::build_frame(
-   "Ind"   , "Treat" , "Cont"|
-     "Ind1", 1       , 3     |
-     "Ind2", 2       , 4     )
-

In this case, a record corresponds to an individual, and the outgoing data is in row record form:

+a <- wrapr::build_frame( + "Ind" , "Treatment", "value" | + "Ind1", "Treat" , 1 | + "Ind2", "Treat" , 2 | + "Ind1", "Cont" , 3 | + "Ind2", "Cont" , 4 ) + +b <- wrapr::build_frame( + "Ind" , "Treat" , "Cont"| + "Ind1", 1 , 3 | + "Ind2", 2 , 4 )
+

In this case, a record corresponds to an individual, and the outgoing +data is in row record form:

@@ -548,46 +621,46 @@

That means we will use blocks_to_rowrecs_spec().

The cdata solution is as follows.

-library("cdata")
-
-# identify the record key
-recordKeys <- "Ind"
-
-# specify the incoming record shape
-incoming_record <- wrapr::qchar_frame(
-   "Treatment"  , "value" |
-    "Treat"     , Treat   |
-    "Cont"      , Cont    )
-
-
-# put it all together into a layout
-layout <- blocks_to_rowrecs_spec(
-  incoming_record,
-  recordKeys = recordKeys)
-
-# confirm we have the right layout
-print(layout)
-#> {
-#>  block_record <- wrapr::qchar_frame(
-#>    "Ind"  , "Treatment", "value" |
-#>      .    , "Treat"    , Treat   |
-#>      .    , "Cont"     , Cont    )
-#>  block_keys <- c('Ind', 'Treatment')
-#> 
-#>  # becomes
-#> 
-#>  row_record <- wrapr::qchar_frame(
-#>    "Ind"  , "Treat", "Cont" |
-#>      .    , Treat  , Cont   )
-#>  row_keys <- c('Ind')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# apply the layout
-a %.>% 
-  layout %.>%
-  knitr::kable(.)
+library("cdata") + +# identify the record key +recordKeys <- "Ind" + +# specify the incoming record shape +incoming_record <- wrapr::qchar_frame( + "Treatment" , "value" | + "Treat" , Treat | + "Cont" , Cont ) + + +# put it all together into a layout +layout <- blocks_to_rowrecs_spec( + incoming_record, + recordKeys = recordKeys) + +# confirm we have the right layout +print(layout) +#> { +#> block_record <- wrapr::qchar_frame( +#> "Ind" , "Treatment", "value" | +#> . , "Treat" , Treat | +#> . , "Cont" , Cont ) +#> block_keys <- c('Ind', 'Treatment') +#> +#> # becomes +#> +#> row_record <- wrapr::qchar_frame( +#> "Ind" , "Treat", "Cont" | +#> . , Treat , Cont ) +#> row_keys <- c('Ind') +#> +#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# apply the layout +a %.>% + layout %.>% + knitr::kable(.)

Ind
@@ -607,38 +680,55 @@

Ind
-

This particular transform, from a block consisting of a single column of values (and the rest of the columns being keys) to a row record, is the transform typically referred to as spread, dcast, or pivot. The tidyr package has a convenient call for this transform: spread(); cdata also has a similar convenience call: pivot_to_rowrecs().

-

Don’t worry if you didn’t notice that this example is a spread; one of the values of cdata is that you shouldn’t have to think about it. Most of the examples we show here are neither a simple spread/pivot nor a simple gather/unpivot.

-

By now you should be able to see the cdata solution always follows a very similar path. We try not to let the nature of the data layout transform (“easy” versus “hard”) dictate the solution method. Always slow down and draw out the “before” and “after” shapes before attempting to solve the problem.

+

This particular transform, from a block consisting of a single column +of values (and the rest of the columns being keys) to a row record, is +the transform typically referred to as spread, dcast, +or pivot. The tidyr package has a convenient call +for this transform: spread(); cdata also has a +similar convenience call: pivot_to_rowrecs().

+

Don’t worry if you didn’t notice that this example is a spread; one +of the values of cdata is that you shouldn’t have to think +about it. Most of the examples we show here are neither a simple +spread/pivot nor a simple gather/unpivot.

+

By now you should be able to see the cdata solution +always follows a very similar path. We try not to let the nature of the +data layout transform (“easy” versus “hard”) dictate the solution +method. Always slow down and draw out the “before” and “after” shapes +before attempting to solve the problem.

-
-

-Example 4

-

(From: tidyr:demo/so-17481212.R , https://stackoverflow.com/questions/17481212/rearranging-data-frame-in-r.)

-

Convert data that has one different observation for each column to a data that has all observations in rows. That is take a to b in the following.

+
+

Example 4 +

+

(From: tidyr:demo/so-17481212.R +, https://stackoverflow.com/questions/17481212/rearranging-data-frame-in-r.)

+

Convert data that has one different observation for each column to a +data that has all observations in rows. That is take a to +b in the following.

-a <- wrapr::build_frame(
-   "Name"   , "50", "100", "150", "200", "250", "300", "350" |
-     "Carla", 1.2 , 1.8  , 2.2  , 2.3  , 3    , 2.5  , 1.8   |
-     "Mace" , 1.5 , 1.1  , 1.9  , 2    , 3.6  , 3    , 2.5   )
-
-b <- wrapr::build_frame(
-   "Name"   , "Time", "Score" |
-     "Carla", 50    , 1.2     |
-     "Carla", 100   , 1.8     |
-     "Carla", 150   , 2.2     |
-     "Carla", 200   , 2.3     |
-     "Carla", 250   , 3       |
-     "Carla", 300   , 2.5     |
-     "Carla", 350   , 1.8     |
-     "Mace" , 50    , 1.5     |
-     "Mace" , 100   , 1.1     |
-     "Mace" , 150   , 1.9     |
-     "Mace" , 200   , 2       |
-     "Mace" , 250   , 3.6     |
-     "Mace" , 300   , 3       |
-     "Mace" , 350   , 2.5     )
-

Here a record corresponds to a single observation (keyed by Name), and the incoming data is arranged in row records:

+a <- wrapr::build_frame( + "Name" , "50", "100", "150", "200", "250", "300", "350" | + "Carla", 1.2 , 1.8 , 2.2 , 2.3 , 3 , 2.5 , 1.8 | + "Mace" , 1.5 , 1.1 , 1.9 , 2 , 3.6 , 3 , 2.5 ) + +b <- wrapr::build_frame( + "Name" , "Time", "Score" | + "Carla", 50 , 1.2 | + "Carla", 100 , 1.8 | + "Carla", 150 , 2.2 | + "Carla", 200 , 2.3 | + "Carla", 250 , 3 | + "Carla", 300 , 2.5 | + "Carla", 350 , 1.8 | + "Mace" , 50 , 1.5 | + "Mace" , 100 , 1.1 | + "Mace" , 150 , 1.9 | + "Mace" , 200 , 2 | + "Mace" , 250 , 3.6 | + "Mace" , 300 , 3 | + "Mace" , 350 , 2.5 )
+

Here a record corresponds to a single observation (keyed by +Name), and the incoming data is arranged in row +records:

@@ -661,65 +751,74 @@

Name1.8
-

This particular transformation, from a single row of values to a single column of values (with multiple key columns), is the transform commonly called gather, melt, or unpivot. This is a very common transformation—probably the most common one, by far. Again, cdata has a convenience function, pivot_to_blocks() (or its alias unpivot_to_blocks()).

-

Here, we will do the transform “the long way” with rowrecs_to_blocks_spec(). As we have a large number of columns we will use a helper function to specify the data layout transform.

+

This particular transformation, from a single row of values to a +single column of values (with multiple key columns), is the transform +commonly called gather, melt, or unpivot. +This is a very common transformation—probably the most common one, by +far. Again, cdata has a convenience function, +pivot_to_blocks() (or its alias +unpivot_to_blocks()).

+

Here, we will do the transform “the long way” with +rowrecs_to_blocks_spec(). As we have a large number of +columns we will use a helper function to specify the data layout +transform.

-library("cdata")
-
-# how to find records
-recordKeys <- "Name"
-
-# specify the outgoing record shape, using a helper function
-# (and print it --  notice that it's a data frame)
-( outgoing_record <- build_unpivot_control(
-  nameForNewKeyColumn = "Time",
-  nameForNewValueColumn = "Score",
-  columnsToTakeFrom = setdiff(colnames(a), recordKeys)) )
-#>   Time Score
-#> 1   50    50
-#> 2  100   100
-#> 3  150   150
-#> 4  200   200
-#> 5  250   250
-#> 6  300   300
-#> 7  350   350
-
-# put it all together into a layout
-layout <- rowrecs_to_blocks_spec(
-  outgoing_record,
-  recordKeys = recordKeys)
-
-# confirm we have the right layout
-print(layout)
-#> {
-#>  row_record <- wrapr::qchar_frame(
-#>    "Name"  , "50", "100", "150", "200", "250", "300", "350" |
-#>      .     , 50  , 100  , 150  , 200  , 250  , 300  , 350   )
-#>  row_keys <- c('Name')
-#> 
-#>  # becomes
-#> 
-#>  block_record <- wrapr::qchar_frame(
-#>    "Name"  , "Time", "Score" |
-#>      .     , "50"  , 50      |
-#>      .     , "100" , 100     |
-#>      .     , "150" , 150     |
-#>      .     , "200" , 200     |
-#>      .     , "250" , 250     |
-#>      .     , "300" , 300     |
-#>      .     , "350" , 350     )
-#>  block_keys <- c('Name', 'Time')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# apply the layout
-a %.>% 
-  layout %.>%
-  transform(., Time = as.numeric(Time)) %.>%
-  # sort the data frame by Name and then Time
-  .[order(.$Name, .$Time), , drop = FALSE] %.>%
-  knitr::kable(., row.names = FALSE)
+library("cdata") + +# how to find records +recordKeys <- "Name" + +# specify the outgoing record shape, using a helper function +# (and print it -- notice that it's a data frame) +( outgoing_record <- build_unpivot_control( + nameForNewKeyColumn = "Time", + nameForNewValueColumn = "Score", + columnsToTakeFrom = setdiff(colnames(a), recordKeys)) ) +#> Time Score +#> 1 50 50 +#> 2 100 100 +#> 3 150 150 +#> 4 200 200 +#> 5 250 250 +#> 6 300 300 +#> 7 350 350 + +# put it all together into a layout +layout <- rowrecs_to_blocks_spec( + outgoing_record, + recordKeys = recordKeys) + +# confirm we have the right layout +print(layout) +#> { +#> row_record <- wrapr::qchar_frame( +#> "Name" , "50", "100", "150", "200", "250", "300", "350" | +#> . , 50 , 100 , 150 , 200 , 250 , 300 , 350 ) +#> row_keys <- c('Name') +#> +#> # becomes +#> +#> block_record <- wrapr::qchar_frame( +#> "Name" , "Time", "Score" | +#> . , "50" , 50 | +#> . , "100" , 100 | +#> . , "150" , 150 | +#> . , "200" , 200 | +#> . , "250" , 250 | +#> . , "300" , 300 | +#> . , "350" , 350 ) +#> block_keys <- c('Name', 'Time') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# apply the layout +a %.>% + layout %.>% + transform(., Time = as.numeric(Time)) %.>% + # sort the data frame by Name and then Time + .[order(.$Name, .$Time), , drop = FALSE] %.>% + knitr::kable(., row.names = FALSE)
@@ -800,25 +899,39 @@

Name
-
-

-Example 5

-

(From: tidyr:demo/so-9684671.R , https://stackoverflow.com/questions/9684671/wide-to-long-multiple-measures-each-time.)

+
+

Example 5 +

+

(From: tidyr:demo/so-9684671.R +, https://stackoverflow.com/questions/9684671/wide-to-long-multiple-measures-each-time.)

Convert from a to b.

-a <- wrapr::build_frame(
-   "id"    , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" |
-     "x1.1", "cnt", 0.3443   , 0.7842   , 0.1079   , 0.888     , 0.6484   , 0.8795   , 0.7234   , 0.5631     |
-     "x1.2", "tr" , 0.06132  , 0.8427   , 0.3339   , 0.04686   , 0.2348   , 0.1971   , 0.5164   , 0.7618     )
-
-b <- wrapr::build_frame(
-   "id"    , "trt", "time", "work" , "play", "talk", "total" |
-     "x1.1", "cnt", "T1"  , 0.3443 , 0.7842, 0.1079, 0.888   |
-     "x1.1", "cnt", "T2"  , 0.6484 , 0.8795, 0.7234, 0.5631  |
-     "x1.2", "tr" , "T1"  , 0.06132, 0.8427, 0.3339, 0.04686 |
-     "x1.2", "tr" , "T2"  , 0.2348 , 0.1971, 0.5164, 0.7618  )
-

A record is an observation, keyed by id (plus trt, which is a function of id).

+a <- wrapr::build_frame( + "id" , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" | + "x1.1", "cnt", 0.3443 , 0.7842 , 0.1079 , 0.888 , 0.6484 , 0.8795 , 0.7234 , 0.5631 | + "x1.2", "tr" , 0.06132 , 0.8427 , 0.3339 , 0.04686 , 0.2348 , 0.1971 , 0.5164 , 0.7618 ) + +b <- wrapr::build_frame( + "id" , "trt", "time", "work" , "play", "talk", "total" | + "x1.1", "cnt", "T1" , 0.3443 , 0.7842, 0.1079, 0.888 | + "x1.1", "cnt", "T2" , 0.6484 , 0.8795, 0.7234, 0.5631 | + "x1.2", "tr" , "T1" , 0.06132, 0.8427, 0.3339, 0.04686 | + "x1.2", "tr" , "T2" , 0.2348 , 0.1971, 0.5164, 0.7618 )
+

A record is an observation, keyed by id (plus +trt, which is a function of id).

++++++++++++ @@ -844,49 +957,50 @@

id trt0.5631
-

The incoming data is in row record format, so we can use rowrecs_to_blocks_spec().

+

The incoming data is in row record format, so we can use +rowrecs_to_blocks_spec().

-library("cdata")
-
-# identify the record keys
-recordKeys <- c("id", "trt")
-
-# specify the outgoing record shape
-outgoing_record <- wrapr::qchar_frame(
-    "time"  , "work" , "play" , "talk" , "total"  |
-    "T1"    , work.T1, play.T1, talk.T1, total.T1 |
-    "T2"    , work.T2, play.T2, talk.T2, total.T2 )
-
-# put it all together into a layout
-layout <- rowrecs_to_blocks_spec(
-  outgoing_record,
-  recordKeys = recordKeys)
-
-# confirm we have the right layout
-print(layout)
-#> {
-#>  row_record <- wrapr::qchar_frame(
-#>    "id"  , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" |
-#>      .   , .    , work.T1  , play.T1  , talk.T1  , total.T1  , work.T2  , play.T2  , talk.T2  , total.T2   )
-#>  row_keys <- c('id', 'trt')
-#> 
-#>  # becomes
-#> 
-#>  block_record <- wrapr::qchar_frame(
-#>    "id"  , "trt", "time", "work" , "play" , "talk" , "total"  |
-#>      .   , .    , "T1"  , work.T1, play.T1, talk.T1, total.T1 |
-#>      .   , .    , "T2"  , work.T2, play.T2, talk.T2, total.T2 )
-#>  block_keys <- c('id', 'trt', 'time')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# apply the layout
-a %.>% 
-  layout %.>%
-  # reorder the frame by the record keys plus time
-  .[wrapr::orderv(.[ , c(recordKeys, "time"), drop = FALSE]), , drop = FALSE] %.>%
-  knitr::kable(., row.names = FALSE)
+library("cdata") + +# identify the record keys +recordKeys <- c("id", "trt") + +# specify the outgoing record shape +outgoing_record <- wrapr::qchar_frame( + "time" , "work" , "play" , "talk" , "total" | + "T1" , work.T1, play.T1, talk.T1, total.T1 | + "T2" , work.T2, play.T2, talk.T2, total.T2 ) + +# put it all together into a layout +layout <- rowrecs_to_blocks_spec( + outgoing_record, + recordKeys = recordKeys) + +# confirm we have the right layout +print(layout) +#> { +#> row_record <- wrapr::qchar_frame( +#> "id" , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" | +#> . , . , work.T1 , play.T1 , talk.T1 , total.T1 , work.T2 , play.T2 , talk.T2 , total.T2 ) +#> row_keys <- c('id', 'trt') +#> +#> # becomes +#> +#> block_record <- wrapr::qchar_frame( +#> "id" , "trt", "time", "work" , "play" , "talk" , "total" | +#> . , . , "T1" , work.T1, play.T1, talk.T1, total.T1 | +#> . , . , "T2" , work.T2, play.T2, talk.T2, total.T2 ) +#> block_keys <- c('id', 'trt', 'time') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# apply the layout +a %.>% + layout %.>% + # reorder the frame by the record keys plus time + .[wrapr::orderv(.[ , c(recordKeys, "time"), drop = FALSE]), , drop = FALSE] %.>% + knitr::kable(., row.names = FALSE)
@@ -936,37 +1050,54 @@

id
-
-

-Reversing Transforms

-

cdata transform specifications are usually reversible or invertible (and this can be enforced). So in solving any one of the above problems the user has complete freedom to try and solve “moving from a to b” or “moving from b to a” (and can pick whichever they find easier).

-

For example continuing with example 5, we can reverse the data layout transform using the t() function.

+
+

Reversing Transforms +

+

cdata transform specifications are usually reversible or +invertible (and this can be enforced). So in solving any one of the +above problems the user has complete freedom to try and solve “moving +from a to b” or “moving from b to a” (and can pick whichever they find +easier).

+

For example continuing with example 5, we can reverse the data layout +transform using the t() function.

-inv_layout <- t(layout)
-
-print(inv_layout)
-#> {
-#>  block_record <- wrapr::qchar_frame(
-#>    "id"  , "trt", "time", "work" , "play" , "talk" , "total"  |
-#>      .   , .    , "T1"  , work.T1, play.T1, talk.T1, total.T1 |
-#>      .   , .    , "T2"  , work.T2, play.T2, talk.T2, total.T2 )
-#>  block_keys <- c('id', 'trt', 'time')
-#> 
-#>  # becomes
-#> 
-#>  row_record <- wrapr::qchar_frame(
-#>    "id"  , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" |
-#>      .   , .    , work.T1  , play.T1  , talk.T1  , total.T1  , work.T2  , play.T2  , talk.T2  , total.T2   )
-#>  row_keys <- c('id', 'trt')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-# apply the inverse layout
-b %.>% 
-  inv_layout %.>%
-  knitr::kable(.)
+inv_layout <- t(layout) + +print(inv_layout) +#> { +#> block_record <- wrapr::qchar_frame( +#> "id" , "trt", "time", "work" , "play" , "talk" , "total" | +#> . , . , "T1" , work.T1, play.T1, talk.T1, total.T1 | +#> . , . , "T2" , work.T2, play.T2, talk.T2, total.T2 ) +#> block_keys <- c('id', 'trt', 'time') +#> +#> # becomes +#> +#> row_record <- wrapr::qchar_frame( +#> "id" , "trt", "work.T1", "play.T1", "talk.T1", "total.T1", "work.T2", "play.T2", "talk.T2", "total.T2" | +#> . , . , work.T1 , play.T1 , talk.T1 , total.T1 , work.T2 , play.T2 , talk.T2 , total.T2 ) +#> row_keys <- c('id', 'trt') +#> +#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +# apply the inverse layout +b %.>% + inv_layout %.>% + knitr::kable(.)
++++++++++++ @@ -1006,43 +1137,81 @@

id trt
-

In this case, the inverse transform recovered the original row and column order of a, but this is not guaranteed.

+

In this case, the inverse transform recovered the original row and +column order of a, but this is not guaranteed.

-
-

-Package entry points

-

The main cdata interfaces are given by the following set of methods:

+
+

Package entry points +

+

The main cdata interfaces are given by the following set +of methods:

Some convenience functions include:

  • -pivot_to_rowrecs(), for moving data from multi-row block records with one value per row (a single column of values) to single-row records [spread or dcast].
  • +pivot_to_rowrecs(), +for moving data from multi-row block records with one value per row (a +single column of values) to single-row records [spread or +dcast].
  • -pivot_to_blocks()/unpivot_to_blocks(), for moving data from single-row records to possibly multi row block records with one row per value (a single column of values) [gather or melt].
  • +pivot_to_blocks()/unpivot_to_blocks(), +for moving data from single-row records to possibly multi row block +records with one row per value (a single column of values) +[gather or melt].
  • -wrapr::qchar_frame() a helper function for specifying record control table layout specifications.
  • +wrapr::qchar_frame() +a helper function for specifying record control table layout +specifications.
  • -wrapr::build_frame() a helper function for specifying data frames.
  • +wrapr::build_frame() +a helper function for specifying data frames.
-

The package vignettes can be found in the “Articles” tab of the cdata documentation site.

+

The package vignettes can be found in the “Articles” tab of the cdata +documentation site.

-
-

-Conclusion

-

The key step in using cdata is to understand the record structure: what constitutes a record, what it would look like in a single row, and how the records are keyed. This is not always easy. However, understanding your data record layout is worth the effort. Once you understand the record structure of your data, the rest is relatively straightforward. Really all one is doing when using cdata is formalizing the transform “ask” into a machine readable example.

-

To make your own solutions, we suggest trying one of the above example solutions as a template. The idea of having the data layout transform be simple data (a list of a couple of data.frames) means one can use the full power of R and other R packages to build the data layout transform specification (one isn’t limited to some interface grammar specified by the data layout transform package). The idea of using arbitrary code to build up a data layout transform was used to good end in the grid scatter-plot example here.

-

We also note the value of being able to print and review the bulk of data layout transform, as it documents expected incoming data columns and interior block record key values.

+
+

Conclusion +

+

The key step in using cdata is to understand the record +structure: what constitutes a record, what it would look like in a +single row, and how the records are keyed. This is not always +easy. However, understanding your data record layout +is worth the effort. Once you understand the record structure +of your data, the rest is relatively straightforward. Really all one is +doing when using cdata is formalizing the transform “ask” +into a machine readable example.

+

To make your own solutions, we suggest trying one of the above +example solutions as a template. The idea of having the data layout +transform be simple data (a list of a couple of +data.frames) means one can use the full power of +R and other R packages to build the data +layout transform specification (one isn’t limited to some interface +grammar specified by the data layout transform package). The idea of +using arbitrary code to build up a data layout transform was used to +good end in the grid scatter-plot example here.

+

We also note the value of being able to print and review the bulk of +data layout transform, as it documents expected incoming data columns +and interior block record key values.

@@ -1058,11 +1227,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -1071,5 +1242,7 @@

+ + diff --git a/docs/articles/general_transform.html b/docs/articles/general_transform.html index d442cf5..743f9f4 100644 --- a/docs/articles/general_transform.html +++ b/docs/articles/general_transform.html @@ -19,6 +19,8 @@ + +
+
-

One of the design goals of the cdata R package is that data occurs in records, and records may be a pattern of cells in a groups of rows.

-

The allows cdata to support very powerful and arbitrary record transforms in one or two steps. Using “row records” (that is records that are exactly one row) as an intermediate lets us take just about any record shape to just about any record shape: first convert to row-records, then re-block the data into arbitrary record shapes (please see here and here for the concepts).

-

But as with all general ideas, it is much easier to see what we mean by the above with a concrete example. Let’s consider the following artificial (but simple) example. Suppose we have the following data.

+

One of the design goals of the cdata R package is that data +occurs in records, and records may be a pattern of cells in a groups of +rows.

+

The allows cdata to support very powerful and arbitrary +record transforms in one or two steps. Using “row records” (that is +records that are exactly one row) as an intermediate lets us take just +about any record shape to just about any record shape: first convert to +row-records, then re-block the data into arbitrary record shapes (please +see here +and here +for the concepts).

+

But as with all general ideas, it is much easier to see what we mean +by the above with a concrete example. Let’s consider the following +artificial (but simple) example. Suppose we have the following data.

-library("cdata")
-#> Loading required package: wrapr
-
-data <- wrapr::build_frame(
-   "record_id"  , "row" , "col1", "col2", "col3" |
-     1          , "row1", 1     , 2     , 3      |
-     1          , "row2", 4     , 5     , 6      |
-     1          , "row3", 7     , 8     , 9      |
-     2          , "row1", 11    , 12    , 13     |
-     2          , "row2", 14    , 15    , 16     |
-     2          , "row3", 17    , 18    , 19     )
-
-knitr::kable(data)
+library("cdata") +#> Loading required package: wrapr + +data <- wrapr::build_frame( + "record_id" , "row" , "col1", "col2", "col3" | + 1 , "row1", 1 , 2 , 3 | + 1 , "row2", 4 , 5 , 6 | + 1 , "row3", 7 , 8 , 9 | + 2 , "row1", 11 , 12 , 13 | + 2 , "row2", 14 , 15 , 16 | + 2 , "row3", 17 , 18 , 19 ) + +knitr::kable(data)
@@ -179,55 +187,72 @@

2021-06-11

record_id
-

In the above the records are the triples of rows with matching record_id and the different rows within the record are identified by the value in the row column. So The data items are named by the triplet record_id, row and renaming column name (col1, col2, or col2). This sort of naming of values is essentially Codd’s “guaranteed access rule”.

-

Suppose we want to transpose each of the records- swapping the row and column notions. With cdata this is easy. First you design a transform to flatten each complex record into a single wide row (using the design steps taught here). Essentially that is just specifying the following control variables. We define how to identify records (the key columns) and the structure of the records (giving the interior of the record arbitrary names we will re-use later).

+

In the above the records are the triples of rows with matching +record_id and the different rows within the record are +identified by the value in the row column. So The data +items are named by the triplet record_id, row +and renaming column name (col1, col2, or +col2). This sort of naming of values is essentially Codd’s +“guaranteed access rule”.

+

Suppose we want to transpose each of the records- swapping the row +and column notions. With cdata this is easy. First you +design a transform to flatten each complex record into a single wide row +(using the design steps taught here). +Essentially that is just specifying the following control variables. We +define how to identify records (the key columns) and the structure of +the records (giving the interior of the record arbitrary names we will +re-use later).

-recordKeys = 'record_id'
-
-incoming_shape <- wrapr::qchar_frame(
-   "row"   , "col1", "col2", "col3" |
-     "row1", v11   , v12   , v13    |
-     "row2", v21   , v22   , v23    |
-     "row3", v31   , v32   , v33    )
-

And we specify (using the same principles) the desired final record shape, re-using the interior names from the first step to show where values are to be mapped.

+recordKeys = 'record_id' + +incoming_shape <- wrapr::qchar_frame( + "row" , "col1", "col2", "col3" | + "row1", v11 , v12 , v13 | + "row2", v21 , v22 , v23 | + "row3", v31 , v32 , v33 )
+

And we specify (using the same principles) the desired final record +shape, re-using the interior names from the first step to show where +values are to be mapped.

-outgoing_shape <- wrapr::qchar_frame(
-   "column_label"  , "c_row1", "c_row2", "c_row3" |
-     "rec_col1"    , v11     , v21     , v31      |
-     "rec_col2"    , v12     , v22     , v32      |
-     "rec_col3"    , v13     , v23     , v33      )
-

Once you have done this you specify the overall transform by building a layout specifying the incoming and outgoing record shapes.

+outgoing_shape <- wrapr::qchar_frame( + "column_label" , "c_row1", "c_row2", "c_row3" | + "rec_col1" , v11 , v21 , v31 | + "rec_col2" , v12 , v22 , v32 | + "rec_col3" , v13 , v23 , v33 )
+

Once you have done this you specify the overall transform by building +a layout specifying the incoming and outgoing record shapes.

-layout <- layout_specification(
-  incoming_shape = incoming_shape,
-  outgoing_shape = outgoing_shape,
-  recordKeys = recordKeys)
-
-print(layout)
-#> {
-#>  in_record <- wrapr::qchar_frame(
-#>    "record_id"  , "row" , "col1", "col2", "col3" |
-#>      .          , "row1", v11   , v12   , v13    |
-#>      .          , "row2", v21   , v22   , v23    |
-#>      .          , "row3", v31   , v32   , v33    )
-#>  in_keys <- c('record_id', 'row')
-#> 
-#>  # becomes
-#> 
-#>  out_record <- wrapr::qchar_frame(
-#>    "record_id"  , "column_label", "c_row1", "c_row2", "c_row3" |
-#>      .          , "rec_col1"    , v11     , v21     , v31      |
-#>      .          , "rec_col2"    , v12     , v22     , v32      |
-#>      .          , "rec_col3"    , v13     , v23     , v33      )
-#>  out_keys <- c('record_id', 'column_label')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-

This layout specification or controller can then perform the transform.

+layout <- layout_specification( + incoming_shape = incoming_shape, + outgoing_shape = outgoing_shape, + recordKeys = recordKeys) + +print(layout) +#> { +#> in_record <- wrapr::qchar_frame( +#> "record_id" , "row" , "col1", "col2", "col3" | +#> . , "row1", v11 , v12 , v13 | +#> . , "row2", v21 , v22 , v23 | +#> . , "row3", v31 , v32 , v33 ) +#> in_keys <- c('record_id', 'row') +#> +#> # becomes +#> +#> out_record <- wrapr::qchar_frame( +#> "record_id" , "column_label", "c_row1", "c_row2", "c_row3" | +#> . , "rec_col1" , v11 , v21 , v31 | +#> . , "rec_col2" , v12 , v22 , v32 | +#> . , "rec_col3" , v13 , v23 , v33 ) +#> out_keys <- c('record_id', 'column_label') +#> +#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) +#> }
+

This layout specification or controller can then perform the +transform.

-data %.>%
-  layout %.>%
-  knitr::kable(.)
+data %.>% + layout %.>% + knitr::kable(.) @@ -282,12 +307,19 @@

2021-06-11

record_id

And the transform is done, each record has been transposed.

-

The principle is “draw a picture.” First we draw a picture of the block record structure we have, and then we draw a picture of the block record structure we want.

-

As you have seen, we have complete freedom to re-name columns and record-piece labels (the labels that tell us which portion of a block-record each row fits into).

-

If you don’t want to use pipe notation, you can use the method layout_by() (which takes a layout specification as an argument) or the method convert_records() (which takes the components of the transform specification as separate arguments).

+

The principle is “draw a picture.” First we draw a picture of the +block record structure we have, and then we draw a picture of the block +record structure we want.

+

As you have seen, we have complete freedom to re-name columns and +record-piece labels (the labels that tell us which portion of a +block-record each row fits into).

+

If you don’t want to use pipe notation, you can use the method +layout_by() (which takes a layout specification as an +argument) or the method convert_records() (which takes the +components of the transform specification as separate arguments).

-lr <- layout_by(layout, data)
-knitr::kable(lr)
+lr <- layout_by(layout, data) +knitr::kable(lr) @@ -342,14 +374,14 @@

2021-06-11

record_id
-
-cr <- convert_records(
-  data,
-  keyColumns = recordKeys,
-  incoming_shape = incoming_shape,
-  outgoing_shape = outgoing_shape)
-
-knitr::kable(cr)
+ +cr <- convert_records( + data, + keyColumns = recordKeys, + incoming_shape = incoming_shape, + outgoing_shape = outgoing_shape) + +knitr::kable(cr) @@ -403,35 +435,37 @@

2021-06-11

record_id
-

A nifty bonus is: if the transformation is “faithful” (preserves enough cells and labels), then it is invertible and in fact easy to invert (by the t() transpose/adjoint function).

+

A nifty bonus is: if the transformation is “faithful” (preserves +enough cells and labels), then it is invertible and in fact easy to +invert (by the t() transpose/adjoint function).

-inv_layout <- t(layout)
-
-print(inv_layout)
-#> {
-#>  in_record <- wrapr::qchar_frame(
-#>    "record_id"  , "column_label", "c_row1", "c_row2", "c_row3" |
-#>      .          , "rec_col1"    , v11     , v21     , v31      |
-#>      .          , "rec_col2"    , v12     , v22     , v32      |
-#>      .          , "rec_col3"    , v13     , v23     , v33      )
-#>  in_keys <- c('record_id', 'column_label')
-#> 
-#>  # becomes
-#> 
-#>  out_record <- wrapr::qchar_frame(
-#>    "record_id"  , "row" , "col1", "col2", "col3" |
-#>      .          , "row1", v11   , v12   , v13    |
-#>      .          , "row2", v21   , v22   , v23    |
-#>      .          , "row3", v31   , v32   , v33    )
-#>  out_keys <- c('record_id', 'row')
-#> 
-#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
-#> }
-
-data %.>%
-  layout %.>%
-  inv_layout %.>%
-  knitr::kable(.)
+inv_layout <- t(layout) + +print(inv_layout) +#> { +#> in_record <- wrapr::qchar_frame( +#> "record_id" , "column_label", "c_row1", "c_row2", "c_row3" | +#> . , "rec_col1" , v11 , v21 , v31 | +#> . , "rec_col2" , v12 , v22 , v32 | +#> . , "rec_col3" , v13 , v23 , v33 ) +#> in_keys <- c('record_id', 'column_label') +#> +#> # becomes +#> +#> out_record <- wrapr::qchar_frame( +#> "record_id" , "row" , "col1", "col2", "col3" | +#> . , "row1", v11 , v12 , v13 | +#> . , "row2", v21 , v22 , v23 | +#> . , "row3", v31 , v32 , v33 ) +#> out_keys <- c('record_id', 'row') +#> +#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) +#> } + +data %.>% + layout %.>% + inv_layout %.>% + knitr::kable(.) @@ -485,55 +519,58 @@

2021-06-11

record_id
-

Also these conversions can also be translated into rquery operators, and therefore saved to be run either in memory or directly on a database.

+

Also these conversions can also be translated into rquery +operators, and therefore saved to be run either in memory or directly on +a database.

-table_desciption <- rquery::local_td(data)
-ops <- table_desciption %.>%
-  layout
-
-cat(format(ops))
-#> mk_td("data", c(
-#>   "record_id",
-#>   "row",
-#>   "col1",
-#>   "col2",
-#>   "col3")) %.>%
-#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."record_id" "record_id", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col1" ELSE NULL END ) "v11", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col2" ELSE NULL END ) "v12", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col3" ELSE NULL END ) "v13", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col1" ELSE NULL END ) "v21", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col2" ELSE NULL END ) "v22", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col3" ELSE NULL END ) "v23", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col1" ELSE NULL END ) "v31", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col2" ELSE NULL END ) "v32", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col3" ELSE NULL END ) "v33" FROM "IN" a GROUP BY a."record_id") %.>%
-#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."record_id", b."column_label", CASE  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v11"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v12"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v13" ELSE NULL END AS "c_row1", CASE  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v21"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v22"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v23" ELSE NULL END AS "c_row2", CASE  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v31"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v32"  WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v33" ELSE NULL END AS "c_row3" FROM "IN" a CROSS JOIN "rrtbl_39702684778083857069_0000000002" b )
-
-rquery::column_names(ops) 
-#> [1] "record_id"    "column_label" "c_row1"       "c_row2"       "c_row3"
-
-if(requireNamespace("DBI", quietly = TRUE) &&
-   requireNamespace("RSQLite", quietly = TRUE)) {
-  raw_connection <- DBI::dbConnect(RSQLite::SQLite(), 
-                                   ":memory:")
-  RSQLite::initExtension(raw_connection)
-  db <- rquery::rquery_db_info(
-    connection = raw_connection,
-    is_dbi = TRUE,
-    connection_options = rquery::rq_connection_tests(raw_connection))
-  
-  db_td <- rquery::rq_copy_to(db, "data", data)
-  
-  ops %.>% 
-    db %.>% 
-    knitr::kable(.) %.>%
-    print(.)
-  
-  DBI::dbDisconnect(raw_connection)
-}
-#> 
-#> 
-#> | record_id|column_label | c_row1| c_row2| c_row3|
-#> |---------:|:------------|------:|------:|------:|
-#> |         1|rec_col1     |      1|      4|      7|
-#> |         1|rec_col2     |      2|      5|      8|
-#> |         1|rec_col3     |      3|      6|      9|
-#> |         2|rec_col1     |     11|     14|     17|
-#> |         2|rec_col2     |     12|     15|     18|
-#> |         2|rec_col3     |     13|     16|     19|
-

And that is some of the generality of cdata transforms.

+table_desciption <- rquery::local_td(data) +ops <- table_desciption %.>% + layout + +cat(format(ops)) +#> mk_td("data", c( +#> "record_id", +#> "row", +#> "col1", +#> "col2", +#> "col3")) %.>% +#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT a."record_id" "record_id", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col1" ELSE NULL END ) "v11", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col2" ELSE NULL END ) "v12", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col3" ELSE NULL END ) "v13", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col1" ELSE NULL END ) "v21", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col2" ELSE NULL END ) "v22", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col3" ELSE NULL END ) "v23", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col1" ELSE NULL END ) "v31", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col2" ELSE NULL END ) "v32", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col3" ELSE NULL END ) "v33" FROM "IN" a GROUP BY a."record_id") %.>% +#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT a."record_id", b."column_label", CASE WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v11" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v12" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v13" ELSE NULL END AS "c_row1", CASE WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v21" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v22" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v23" ELSE NULL END AS "c_row2", CASE WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col1' THEN a."v31" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col2' THEN a."v32" WHEN CAST(b."column_label" AS VARCHAR) = 'rec_col3' THEN a."v33" ELSE NULL END AS "c_row3" FROM "IN" a CROSS JOIN "rrtbl_07038396495688323042_0000000002" b ) + +rquery::column_names(ops) +#> [1] "record_id" "column_label" "c_row1" "c_row2" "c_row3" + +if(requireNamespace("DBI", quietly = TRUE) && + requireNamespace("RSQLite", quietly = TRUE)) { + raw_connection <- DBI::dbConnect(RSQLite::SQLite(), + ":memory:") + RSQLite::initExtension(raw_connection) + db <- rquery::rquery_db_info( + connection = raw_connection, + is_dbi = TRUE, + connection_options = rquery::rq_connection_tests(raw_connection)) + + db_td <- rquery::rq_copy_to(db, "data", data) + + ops %.>% + db %.>% + knitr::kable(.) %.>% + print(.) + + DBI::dbDisconnect(raw_connection) +} +#> +#> +#> | record_id|column_label | c_row1| c_row2| c_row3| +#> |---------:|:------------|------:|------:|------:| +#> | 1|rec_col1 | 1| 4| 7| +#> | 1|rec_col2 | 2| 5| 8| +#> | 1|rec_col3 | 3| 6| 9| +#> | 2|rec_col1 | 11| 14| 17| +#> | 2|rec_col2 | 12| 15| 18| +#> | 2|rec_col3 | 13| 16| 19| +

And that is some of the generality of cdata +transforms.

@@ -263,9 +267,10 @@

2021-06-11

model_id
-

We can expand record rows into blocks by a “multiplication” (or join) step.

+

We can expand record rows into blocks by a “multiplication” (or join) +step.

-knitr::kable(d2)
+knitr::kable(d2) @@ -286,10 +291,10 @@

2021-06-11

model_id
-
-d3 <- d2 %**% transform
-
-knitr::kable(d3)
+ +d3 <- d2 %**% transform + +knitr::kable(d3) @@ -320,11 +325,11 @@

2021-06-11

model_id
-
-# (or using general pipe notation)
-d2 %.>% 
-  transform %.>% 
-  knitr::kable(.)
+ +# (or using general pipe notation) +d2 %.>% + transform %.>% + knitr::kable(.) @@ -354,10 +359,12 @@

2021-06-11

model_id
-

(%//% and %**% being two operators introduced by the cdata package.)

-

And the two specialized operators have an inverse/adjoint relation.

+

(%//% and %**% being two operators +introduced by the cdata package.)

+

And the two specialized operators have an inverse/adjoint +relation.

-knitr::kable(d)
+knitr::kable(d) @@ -388,11 +395,11 @@

2021-06-11

model_id
-
-# identity
-d4 <- d %//% t(transform) %**% transform
-
-knitr::kable(d4)
+ +# identity +d4 <- d %//% t(transform) %**% transform + +knitr::kable(d4) @@ -422,14 +429,14 @@

2021-06-11

model_id
-

We can also pipe into the spec (and into its adjoint) using the wrapr dot pipe operator.

+

We can also pipe into the spec (and into its adjoint) using the wrapr dot pipe operator.

-# reverse or adjoint/transpose operation specification
-t_record_spec <- t(transform)
-
-d %.>% 
-  t_record_spec %.>%
-  knitr::kable(.)
+# reverse or adjoint/transpose operation specification +t_record_spec <- t(transform) + +d %.>% + t_record_spec %.>% + knitr::kable(.) @@ -450,11 +457,11 @@

2021-06-11

model_id
-
-# using dot-pipe's bquote style .() execute immediate notation
-d %.>% 
-  .(t(transform)) %.>%
-  knitr::kable(.)
+ +# using dot-pipe's bquote style .() execute immediate notation +d %.>% + .(t(transform)) %.>% + knitr::kable(.) @@ -475,12 +482,12 @@

2021-06-11

model_id
-
-# identity
-d %.>% 
-  .(t(transform)) %.>% 
-  transform %.>%
-  knitr::kable(.)
+ +# identity +d %.>% + .(t(transform)) %.>% + transform %.>% + knitr::kable(.) @@ -512,29 +519,29 @@

2021-06-11

model_id

And, of course, the exact same functionality for database tables.

-have_db <- requireNamespace("DBI", quietly = TRUE) &&
-   requireNamespace("RSQLite", quietly = TRUE)
+have_db <- requireNamespace("DBI", quietly = TRUE) && + requireNamespace("RSQLite", quietly = TRUE)
-raw_connection <- DBI::dbConnect(RSQLite::SQLite(), 
-                                 ":memory:")
-RSQLite::initExtension(raw_connection)
-db <- rquery::rquery_db_info(
-  connection = raw_connection,
-  is_dbi = TRUE,
-  connection_options = rquery::rq_connection_tests(raw_connection))
-
-d_td <- rquery::rq_copy_to(db, "d", d)
+raw_connection <- DBI::dbConnect(RSQLite::SQLite(), + ":memory:") +RSQLite::initExtension(raw_connection) +db <- rquery::rquery_db_info( + connection = raw_connection, + is_dbi = TRUE, + connection_options = rquery::rq_connection_tests(raw_connection)) + +d_td <- rquery::rq_copy_to(db, "d", d)
-ops <- d_td %//% t(transform)
-cat(format(ops))
-#> mk_td("d", c(
-#>   "model_id",
-#>   "measure",
-#>   "value")) %.>%
-#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."model_id" "model_id", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'AUC' THEN a."value" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'R2' THEN a."value" ELSE NULL END ) "R2" FROM "IN" a GROUP BY a."model_id")
-
-rquery::execute(db, ops) %.>%
-  knitr::kable(.)
+ops <- d_td %//% t(transform) +cat(format(ops)) +#> mk_td("d", c( +#> "model_id", +#> "measure", +#> "value")) %.>% +#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT a."model_id" "model_id", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'AUC' THEN a."value" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'R2' THEN a."value" ELSE NULL END ) "R2" FROM "IN" a GROUP BY a."model_id") + +rquery::execute(db, ops) %.>% + knitr::kable(.) @@ -555,11 +562,11 @@

2021-06-11

model_id
-
-d_td %.>% 
-  .(t(transform)) %.>%
-  rquery::execute(db, .) %.>%
-  knitr::kable(.)
+ +d_td %.>% + .(t(transform)) %.>% + rquery::execute(db, .) %.>% + knitr::kable(.) @@ -580,7 +587,7 @@

2021-06-11

model_id
-DBI::dbDisconnect(raw_connection)
+DBI::dbDisconnect(raw_connection)
-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -384,5 +390,7 @@

Dev status

+ + diff --git a/docs/news/index.html b/docs/news/index.html index 1dcc190..35efab4 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,66 +1,12 @@ - - - - - - - -Changelog • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Changelog • cdata - - + + - - -
-
- -
- -
+
-
-

-cdata 1.2.0 2021/06/11 Unreleased -

-
    -
  • Remove LazyData decl.
  • -
-
-
-

-cdata 1.1.9 2020/10/17 2020-10-17 -

-
    -
  • Move to tinytest.
  • -
-
-
-

-cdata 1.1.8 2020/08/24 2020-08-25 -

-
    -
  • Deal with data.table change in date type treatment.
  • +
    + +
    +
    + +
    • Remove LazyData decl.
    • +
    +
    + +
    • Move to tinytest.
    • +
    +
    + +
    • Deal with data.table change in date type treatment.
    • More links to https.
    • -
    -
    -
    -

    -cdata 1.1.7 2020/08/12 2020-08-12 -

    -
      -
    • Documentation improvements.
    • -
    -
    -
    -

    -cdata 1.1.6 2020/02/01 2020-02-01 -

    -
      -
    • fix noSuggests CRAN issue.
    • +
    +
    + +
    • Documentation improvements.
    • +
    +
    + +
    • fix noSuggests CRAN issue.
    • Move wrapr to Depends.
    • Add rmarkdown suggests.
    • -
    -
    -
    -

    -cdata 1.1.5 2020/01/21 2020-01-21 -

    -
      -
    • Fix SQL path and improve queries tolerance to types.
    • +
    +
    + +
    • Fix SQL path and improve queries tolerance to types.
    • Allow more trivial cases in transform construction.
    • -
    -
    -
    -

    -cdata 1.1.4 2020/01/07 2020-01-08 -

    -
      -
    • Tolerate nonexistent column value.
    • -
    -
    -
    -

    -cdata 1.1.3 2019/10/29 2019-10-29 -

    -
      -
    • Decorate transform.
    • +
    +
    + +
    • Tolerate nonexistent column value.
    • +
    +
    + +
    • Decorate transform.
    • Tune transform speed a bit (control column visibility and lifetime).
    • -
    -
    -
    -

    -cdata 1.1.2 2019/09/15 2019-09-15 -

    -
      -
    • Add data_algebra interop.
    • -
    -
    -
    -

    -cdata 1.1.1 2019/07/24 2019-07-24 -

    -
      -
    • Fix some newly dead links.
    • +
    +
    + +
    • Add data_algebra interop.
    • +
    +
    + +
    • Fix some newly dead links.
    • Make rqdatatable a dependency.
    • Start to schematize.
    • Move to newer f_db signature.
    • Adjust license.
    • -
    -
    -
    -

    -cdata 1.1.0 2019/04/27 2019-04-27 -

    -
      -
    • Switch to rqdatatable implementation.
    • +
    +
    + +
    • Switch to rqdatatable implementation.
    • General transform specification.
    • More care with factors.
    • Update vignettes.
    • More tests with factors and dates/times.
    • -
    -
    -
    -

    -cdata 1.0.9 2019/04/20 2019-04-20 -

    -
      -
    • “layout” commands.
    • +
    +
    + +
    • “layout” commands.
    • Deal better with duplicate entries in db-version of blocks to rows.
    • Move to wrapr draw_framec().
    • Fix typo in general transform example code.
    • -
    -
    -
    -

    -cdata 1.0.8 2019/03/30 2019-03-30 -

    -
      -
    • More column collision checks.
    • +
    +
    + +
    • More column collision checks.
    • Operator notation.
    • -
    -
    -
    -

    -cdata 1.0.7 2019/03/23 2019-03-23 -

    -
      -
    • Move to wrapr tests.
    • +
    +
    + +
    • Move to wrapr tests.
    • Better error messages.
    • Better handling of NA in row-dup check.
    • -
    -
    -
    -

    -cdata 1.0.6 2019/02/14 2019-02-14 -

    -
      -
    • More generality in control table keys.
    • +
    +
    + +
    • More generality in control table keys.
    • Move to RUnit.
    • Less direct data.table.
    • -
    -
    -
    -

    -cdata 1.0.5 2019/01/20 2019-01-20 -

    -
      -
    • Unify S3 method signatures to allow generic programming over them.
    • +
    +
    + +
    • Unify S3 method signatures to allow generic programming over them.
    • Generic record to record transform.
    • Move more functions from DBI to rquery.
    • -
    -
    -
    -

    -cdata 1.0.4 2019/01/04 2019-01-07 -

    -
      -
    • More vignettes.
    • +
    +
    + +
    • More vignettes.
    • Improve doc cross-linking.
    • Switch to new f_df signature.
    • -
    -
    -
    -

    -cdata 1.0.3 2018/10/20 2018-10-21 -

    -
      -
    • Fix ragged gather bug.
    • +
    +
    + +
    • Fix ragged gather bug.
    • More argument checking.
    • -
    -
    -
    -

    -cdata 1.0.2 2018/10/08 2018-10-08 -

    -
      -
    • Change defaults.
    • +
    +
    + +
    • Change defaults.
    • Some bug fixes.
    • -
    -
    -
    -

    -cdata 1.0.1 2018/09/22 2018-09-22 -

    -
      -
    • Clean up suggests.
    • -
    -
    -
    -

    -cdata 1.0.0 2018/09/08 2018-09-10 -

    -
      -
    • Neaten up uniqueness checking.
    • -
    -
    -
    -

    -cdata 0.7.4 2018/08/16 2018-08-17 -

    -
      -
    • rquery extension (moving methods to S3).
    • +
    +
    + +
    • Clean up suggests.
    • +
    +
    + +
    • Neaten up uniqueness checking.
    • +
    +
    + +
    • rquery extension (moving methods to S3).
    • Documentation fixes.
    • -
    -
    -
    -

    -cdata 0.7.3 2018/07/20 2018-07-20 -

    -
      -
    • Documentation fixes.
    • -
    -
    -
    -

    -cdata 0.7.2 2018/07/07 2018-07-07 -

    -
      -
    • switch local ops to data.table implementation.
    • +
    +
    + +
    • Documentation fixes.
    • +
    +
    + +
    • switch local ops to data.table implementation.
    • re-export more of wrapr
    • move db fns to rquery.
    • -
    -
    -
    -

    -cdata 0.7.1 2018/06/16 2018-06-16 -

    -
      -
    • Documentation fixes.
    • +
    +
    + +
    • Documentation fixes.
    • Don’t export cols().
    • Reduce wrapr re-export.
    • More rows in qlook().
    • -
    -
    -
    -

    -cdata 0.7.0 2018/04/09 2018-04-10 -

    -
      -
    • Narrow dependencies.
    • +
    +
    + +
    • Narrow dependencies.
    • Switch to dbExecute() (sparklyr seems to have that now).
    • Non-DB implementations for local data case.
    • Remove deprecated fns.
    • -
    -
    -
    -

    -cdata 0.6.0 2018/03/12 2018-03-13 -

    -
      -
    • Add cols() method.
    • +
    +
    + +
    • Add cols() method.
    • Add doi link in DESCRIPTION (CRAN request).
    • Use build_frame(), draw_frame(), and qchar_frame (quoted frame) from wrapr 1.3.0.
    • -
    -
    -
    -

    -cdata 0.5.2 2018/01/20 2018-01-20 -

    -
      -
    • Remove append based row binding (seems to have some issues on Spark).
    • +
    +
    + +
    • Remove append based row binding (seems to have some issues on Spark).
    • Deprecate old methods.
    • -
    -
    -
    -

    -cdata 0.5.1 2018/01/03 2018-01-04 -

    -
      -
    • New naming convention.
    • +
    +
    + +
    • New naming convention.
    • Doc fixes.
    • Better table lifetime controls.
    • Move to wrapr 1.0.2.
    • @@ -417,87 +238,58 @@

    • Add row binder.
    • Add map_fields.
    • Add winvector_temp_db_handle support.
    • -

    -
    -
    -

    -cdata 0.5.0 2017/11/13 2017-11-12 -

    -
      -
    • query-based re-implementation
    • +
    +
    + +
    • query-based re-implementation
    • fluid data workflow.
    • remove dplyr and tidyr dependence
    • -
    -
    -
    -

    -cdata 0.1.7 2017/10/31 Unreleased -

    -
      -
    • Better error msgs.
    • -
    -
    -
    -

    -cdata 0.1.6 2017/10/12 2017-10-12 -

    -
      -
    • work around empty keyset issues.
    • +
    +
    + +
    • Better error msgs.
    • +
    +
    + +
    • work around empty keyset issues.
    • add column control.
    • -
    -
    -
    -

    -cdata 0.1.5 2017/07/04 2017-07-04 -

    -
      -
    • Allow NA in key columns.
    • +
    +
    + +
    • Allow NA in key columns.
    • Add optional class annotation when moving values to rows.
    • -
    -
    -
    -

    -cdata 0.1.1 2017/05/05 2017-05-05 -

    -
      -
    • ungroup before calculating distinct.
    • -
    -
    -
    -

    -cdata 0.1.0 2017/03/28 2017-03-29 -

    -
      -
    • First release.
    • -
    -
    +
+
+ +
  • ungroup before calculating distinct.
  • +
+
+ +
  • First release.
  • +
+
-
- +
- - + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238..80ea5b8 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048f..6f0eee4 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index bce4e4c..978f508 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,5 @@ -pandoc: 2.11.4 -pkgdown: 1.6.1 +pandoc: 3.1.1 +pkgdown: 2.0.7 pkgdown_sha: ~ articles: blocksrecs: blocksrecs.html @@ -9,8 +9,8 @@ articles: exercises: exercises.html general_transform: general_transform.html operators: operators.html -last_built: 2021-06-11T23:26Z +last_built: 2023-08-19T23:33Z urls: - reference: https://winvector.github.io/cdata//reference - article: https://winvector.github.io/cdata//articles + reference: https://winvector.github.io/cdata/reference + article: https://winvector.github.io/cdata/articles diff --git a/docs/reference/blocks_to_rowrecs.html b/docs/reference/blocks_to_rowrecs.html index 8e99167..ccba11a 100644 --- a/docs/reference/blocks_to_rowrecs.html +++ b/docs/reference/blocks_to_rowrecs.html @@ -1,68 +1,13 @@ - - - - - - - -Map data records from block records to row records. — blocks_to_rowrecs • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map data records from block records to row records. — blocks_to_rowrecs • cdata - + + - - - -
-
- -
- -
+
@@ -149,111 +79,113 @@

Map data records from block records to row records.

row records (where each record is a single row).

-
blocks_to_rowrecs(
-  tallTable,
-  keyColumns,
-  controlTable,
-  ...,
-  columnsToCopy = NULL,
-  checkNames = TRUE,
-  checkKeys = TRUE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for default
-blocks_to_rowrecs(
-  tallTable,
-  keyColumns,
-  controlTable,
-  ...,
-  columnsToCopy = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  tmp_name_source = wrapr::mk_tmp_name_source("btrd"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for relop
-blocks_to_rowrecs(
-  tallTable,
-  keyColumns,
-  controlTable,
-  ...,
-  columnsToCopy = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tallTable

data.frame containing data to be mapped (in-memory data.frame).

keyColumns

character vector of column defining row groups

controlTable

table specifying mapping (local data frame)

...

force later arguments to be by name.

columnsToCopy

character, extra columns to copy.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check keyColumns uniquely identify blocks (required).

strict

logical, if TRUE check control table name forms

controlTableKeys

character, which column names of the control table are considered to be keys.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE use temporary tables

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

wide table built by mapping key-grouped tallTable rows to one row per group

-

Details

+
+
blocks_to_rowrecs(
+  tallTable,
+  keyColumns,
+  controlTable,
+  ...,
+  columnsToCopy = NULL,
+  checkNames = TRUE,
+  checkKeys = TRUE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for default
+blocks_to_rowrecs(
+  tallTable,
+  keyColumns,
+  controlTable,
+  ...,
+  columnsToCopy = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  tmp_name_source = wrapr::mk_tmp_name_source("btrd"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for relop
+blocks_to_rowrecs(
+  tallTable,
+  keyColumns,
+  controlTable,
+  ...,
+  columnsToCopy = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
tallTable
+

data.frame containing data to be mapped (in-memory data.frame).

+ + +
keyColumns
+

character vector of column defining row groups

+ + +
controlTable
+

table specifying mapping (local data frame)

+ + +
...
+

force later arguments to be by name.

+ + +
columnsToCopy
+

character, extra columns to copy.

+ + +
checkNames
+

logical, if TRUE check names.

+ + +
checkKeys
+

logical, if TRUE check keyColumns uniquely identify blocks (required).

+ + +
strict
+

logical, if TRUE check control table name forms

+ +
controlTableKeys
+

character, which column names of the control table are considered to be keys.

+ + +
tmp_name_source
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
temporary
+

logical, if TRUE use temporary tables

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

wide table built by mapping key-grouped tallTable rows to one row per group

+
+
+

Details

The controlTable defines the names of each data element in the two notations: the notation of the tall table (which is row oriented) and the notation of the wide table (which is column oriented). @@ -264,91 +196,96 @@

Details To get behavior similar to tidyr::gather/spread one builds the control table by running an appropriate query over the data.

Some discussion and examples can be found here: -https://winvector.github.io/FluidData/FluidData.html and -here https://github.com/WinVector/cdata.

-

See also

- - - -

Examples

-
- # pivot example - d <- data.frame(meas = c('AUC', 'R2'), - val = c(0.6, 0.2)) - - cT <- build_pivot_control(d, - columnToTakeKeysFrom= 'meas', - columnToTakeValuesFrom= 'val') - blocks_to_rowrecs(d, - keyColumns = NULL, - controlTable = cT) -
#> AUC R2 -#> 1 0.6 0.2
- -d <- data.frame(meas = c('AUC', 'R2'), - val = c(0.6, 0.2)) -cT <- build_pivot_control( - d, - columnToTakeKeysFrom= 'meas', - columnToTakeValuesFrom= 'val') - -ops <- rquery::local_td(d) %.>% - blocks_to_rowrecs(., - keyColumns = NULL, - controlTable = cT) -cat(format(ops)) -
#> mk_td("d", c( -#> "meas", -#> "val")) %.>% -#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'AUC' THEN a."val" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'R2' THEN a."val" ELSE NULL END ) "R2" FROM "IN" a )
-if(requireNamespace("rqdatatable", quietly = TRUE)) { - library("rqdatatable") - d %.>% - ops %.>% - print(.) -} -
#> Loading required package: rquery
#> AUC R2 -#> 1 0.6 0.2
-if(requireNamespace("RSQLite", quietly = TRUE)) { - db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - DBI::dbWriteTable(db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - db %.>% - ops %.>% - print(.) - DBI::dbDisconnect(db) -} -
#> AUC R2 -#> 1 0.6 0.2
-
+https://winvector.github.io/FluidData/FluidData.html and +here https://github.com/WinVector/cdata.

+
+ + +
+

Examples

+

+  # pivot example
+  d <- data.frame(meas = c('AUC', 'R2'),
+                  val = c(0.6, 0.2))
+
+  cT <- build_pivot_control(d,
+                            columnToTakeKeysFrom= 'meas',
+                            columnToTakeValuesFrom= 'val')
+  blocks_to_rowrecs(d,
+                    keyColumns = NULL,
+                    controlTable = cT)
+#>   AUC  R2
+#> 1 0.6 0.2
+
+
+d <- data.frame(meas = c('AUC', 'R2'),
+                val = c(0.6, 0.2))
+cT <- build_pivot_control(
+  d,
+  columnToTakeKeysFrom= 'meas',
+  columnToTakeValuesFrom= 'val')
+
+ops <- rquery::local_td(d) %.>%
+  blocks_to_rowrecs(.,
+                    keyColumns = NULL,
+                    controlTable = cT)
+cat(format(ops))
+#> mk_td("d", c(
+#>   "meas",
+#>   "val")) %.>%
+#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'AUC' THEN a."val" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'R2' THEN a."val" ELSE NULL END ) "R2" FROM "IN" a )
+
+if(requireNamespace("rqdatatable", quietly = TRUE)) {
+  library("rqdatatable")
+  d %.>%
+    ops %.>%
+    print(.)
+}
+#> Loading required package: rquery
+#>   AUC  R2
+#> 1 0.6 0.2
+
+if(requireNamespace("RSQLite", quietly = TRUE)) {
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  DBI::dbWriteTable(db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  db %.>%
+    ops %.>%
+    print(.)
+  DBI::dbDisconnect(db)
+}
+#>   AUC  R2
+#> 1 0.6 0.2
+
+
+
+
- - - + + diff --git a/docs/reference/blocks_to_rowrecs_q.html b/docs/reference/blocks_to_rowrecs_q.html index f18ff3c..613716a 100644 --- a/docs/reference/blocks_to_rowrecs_q.html +++ b/docs/reference/blocks_to_rowrecs_q.html @@ -1,68 +1,13 @@ - - - - - - - -Map sets rows to columns (query based, take name of table). — blocks_to_rowrecs_q • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map sets rows to columns (query based, take name of table). — blocks_to_rowrecs_q • cdata + + - - - - -
-
- -
- -
+
@@ -149,116 +79,118 @@

Map sets rows to columns (query based, take name of table).

and controlTable.

-
blocks_to_rowrecs_q(
-  tallTable,
-  keyColumns,
-  controlTable,
-  my_db,
-  ...,
-  columnsToCopy = NULL,
-  tempNameGenerator = mk_tmp_name_source("mvtcq"),
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  showQuery = FALSE,
-  defaultValue = NULL,
-  dropDups = TRUE,
-  temporary = FALSE,
-  resultName = NULL,
-  incoming_qualifiers = NULL,
-  outgoing_qualifiers = NULL,
-  executeQuery = TRUE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tallTable

name of table containing data to be mapped (db/Spark data)

keyColumns

character list of column defining row groups

controlTable

table specifying mapping (local data frame)

my_db

db handle

...

force later arguments to be by name.

columnsToCopy

character list of column names to copy

tempNameGenerator

a tempNameGenerator from cdata::mk_tmp_name_source()

strict

logical, if TRUE check control table name forms

controlTableKeys

character, which column names of the control table are considered to be keys.

checkNames

logical, if TRUE check names

checkKeys

logical, if TRUE check keying of tallTable

showQuery

if TRUE print query

defaultValue

if not NULL literal to use for non-match values.

dropDups

logical if TRUE suppress duplicate columns (duplicate determined by name, not content).

temporary

logical, if TRUE make result temporary.

resultName

character, name for result table.

incoming_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

outgoing_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

executeQuery

logical, if TRUE execute the query and return result.

- -

Value

- -

wide table built by mapping key-grouped tallTable rows to one row per group

-

Details

+
+
blocks_to_rowrecs_q(
+  tallTable,
+  keyColumns,
+  controlTable,
+  my_db,
+  ...,
+  columnsToCopy = NULL,
+  tempNameGenerator = mk_tmp_name_source("mvtcq"),
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  showQuery = FALSE,
+  defaultValue = NULL,
+  dropDups = TRUE,
+  temporary = FALSE,
+  resultName = NULL,
+  incoming_qualifiers = NULL,
+  outgoing_qualifiers = NULL,
+  executeQuery = TRUE
+)
+
+ +
+

Arguments

+
tallTable
+

name of table containing data to be mapped (db/Spark data)

+ + +
keyColumns
+

character list of column defining row groups

+ + +
controlTable
+

table specifying mapping (local data frame)

+ + +
my_db
+

db handle

+ +
...
+

force later arguments to be by name.

+ + +
columnsToCopy
+

character list of column names to copy

+ + +
tempNameGenerator
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
strict
+

logical, if TRUE check control table name forms

+ + +
controlTableKeys
+

character, which column names of the control table are considered to be keys.

+ + +
checkNames
+

logical, if TRUE check names

+ + +
checkKeys
+

logical, if TRUE check keying of tallTable

+ + +
showQuery
+

if TRUE print query

+ + +
defaultValue
+

if not NULL literal to use for non-match values.

+ + +
dropDups
+

logical if TRUE suppress duplicate columns (duplicate determined by name, not content).

+ + +
temporary
+

logical, if TRUE make result temporary.

+ + +
resultName
+

character, name for result table.

+ + +
incoming_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
outgoing_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
executeQuery
+

logical, if TRUE execute the query and return result.

+ +
+
+

Value

+ + +

wide table built by mapping key-grouped tallTable rows to one row per group

+
+
+

Details

This is using the theory of "fluid data"n -(https://github.com/WinVector/cdata), which includes the +(https://github.com/WinVector/cdata), which includes the principle that each data cell has coordinates independent of the storage details and storage detail dependent coordinates (usually row-id, column-id, and group-id) can be re-derived at will (the @@ -275,65 +207,66 @@

Details To get behavior similar to tidyr::gather/spread one builds the control table by running an appropriate query over the data.

Some discussion and examples can be found here: -https://winvector.github.io/FluidData/FluidData.html and -here https://github.com/WinVector/cdata.

-

See also

- - - -

Examples

-
-if (requireNamespace("DBI", quietly = TRUE) && - requireNamespace("RSQLite", quietly = TRUE)) { - my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - # pivot example - d <- data.frame(meas = c('AUC', 'R2'), val = c(0.6, 0.2)) - rquery::rq_copy_to(my_db, - 'd', - d, - temporary = TRUE) - cT <- build_pivot_control_q('d', - columnToTakeKeysFrom= 'meas', - columnToTakeValuesFrom= 'val', - my_db = my_db) - tab <- blocks_to_rowrecs_q('d', - keyColumns = NULL, - controlTable = cT, - my_db = my_db) - qlook(my_db, tab) - DBI::dbDisconnect(my_db) -} -
#> table `mvtcq_04050014603667178285_0000000000` SQLiteConnection -#> nrow: 1 -#> 'data.frame': 1 obs. of 2 variables: -#> $ AUC: num 0.6 -#> $ R2 : num 0.2
-
+https://winvector.github.io/FluidData/FluidData.html and +here https://github.com/WinVector/cdata.

+
+ + +
+

Examples

+

+if (requireNamespace("DBI", quietly = TRUE) &&
+  requireNamespace("RSQLite", quietly = TRUE)) {
+  my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  # pivot example
+  d <- data.frame(meas = c('AUC', 'R2'), val = c(0.6, 0.2))
+  rquery::rq_copy_to(my_db,
+                    'd',
+                    d,
+                    temporary = TRUE)
+  cT <- build_pivot_control_q('d',
+                              columnToTakeKeysFrom= 'meas',
+                              columnToTakeValuesFrom= 'val',
+                              my_db = my_db)
+  tab <- blocks_to_rowrecs_q('d',
+                             keyColumns = NULL,
+                             controlTable = cT,
+                             my_db = my_db)
+  qlook(my_db, tab)
+  DBI::dbDisconnect(my_db)
+}
+#> table `mvtcq_75430306308458576643_0000000000` SQLiteConnection 
+#>  nrow: 1 
+#> 'data.frame':	1 obs. of  2 variables:
+#>  $ AUC: num 0.6
+#>  $ R2 : num 0.2
+
+
+
+
- - - + + diff --git a/docs/reference/blocks_to_rowrecs_spec.html b/docs/reference/blocks_to_rowrecs_spec.html index 183a3d7..f6fac78 100644 --- a/docs/reference/blocks_to_rowrecs_spec.html +++ b/docs/reference/blocks_to_rowrecs_spec.html @@ -1,68 +1,13 @@ - - - - - - - -Create a block records to row records transform specification. — blocks_to_rowrecs_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a block records to row records transform specification. — blocks_to_rowrecs_spec • cdata - - + + - - -
-
- -
- -
+
@@ -149,156 +79,161 @@

Create a block records to row records transform specification.

extra row keys, and control table keys.

-
blocks_to_rowrecs_spec(
-  controlTable,
-  ...,
-  recordKeys = character(0),
-  controlTableKeys = colnames(controlTable)[[1]],
-  checkNames = TRUE,
-  checkKeys = TRUE,
-  strict = FALSE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
controlTable

an all character data frame or cdata pivot control.

...

not used, force later arguments to bind by name.

recordKeys

vector of columns identifying records.

controlTableKeys

vector of keying columns of the controlTable.

checkNames

passed to blocks_to_rowrecs.

checkKeys

passed to blocks_to_rowrecs.

strict

passed to blocks_to_rowrecs.

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

a record specification object

- -

Examples

-
-d <- wrapr::build_frame( - "id", "measure", "value" | - 1 , "AUC" , 0.7 | - 1 , "R2" , 0.4 | - 2 , "AUC" , 0.8 | - 2 , "R2" , 0.5 ) - -transform <- blocks_to_rowrecs_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -print(transform) -
#> { -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # becomes -#> -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-d %.>% transform -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-inv_transform <- t(transform) -print(inv_transform) -
#> { -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # becomes -#> -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-# identity (in structure) -d %.>% transform %.>% inv_transform -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-# identity again (using .() "immediate" notation) -d %.>% transform %.>% .(t(transform)) -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
- -
+
+
blocks_to_rowrecs_spec(
+  controlTable,
+  ...,
+  recordKeys = character(0),
+  controlTableKeys = colnames(controlTable)[[1]],
+  checkNames = TRUE,
+  checkKeys = TRUE,
+  strict = FALSE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
controlTable
+

an all character data frame or cdata pivot control.

+ + +
...
+

not used, force later arguments to bind by name.

+ + +
recordKeys
+

vector of columns identifying records.

+ + +
controlTableKeys
+

vector of keying columns of the controlTable.

+ + +
checkNames
+

passed to blocks_to_rowrecs.

+ + +
checkKeys
+

passed to blocks_to_rowrecs.

+ + +
strict
+

passed to blocks_to_rowrecs.

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

a record specification object

+
+ +
+

Examples

+

+d <- wrapr::build_frame(
+  "id", "measure", "value" |
+  1   , "AUC"    , 0.7     |
+  1   , "R2"     , 0.4     |
+  2   , "AUC"    , 0.8     |
+  2   , "R2"     , 0.5     )
+
+transform <- blocks_to_rowrecs_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+print(transform)
+#> {
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # becomes
+#> 
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+d %.>% transform
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+inv_transform <- t(transform)
+print(inv_transform)
+#> {
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # becomes
+#> 
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+# identity (in structure)
+d %.>% transform %.>% inv_transform
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+# identity again (using .() "immediate" notation)
+d %.>% transform %.>% .(t(transform))
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+
+
+
+
- - - + + diff --git a/docs/reference/build_pivot_control.html b/docs/reference/build_pivot_control.html index 70f090b..587cc4d 100644 --- a/docs/reference/build_pivot_control.html +++ b/docs/reference/build_pivot_control.html @@ -1,67 +1,12 @@ - - - - - - - -Build a blocks_to_rowrecs()/rowrecs_to_blocks() control table that specifies a pivot from a data.frame. — build_pivot_control • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Build a blocks_to_rowrecs()/rowrecs_to_blocks() control table that specifies a pivot from a data.frame. — build_pivot_control • cdata - - + + - - -
-
- -
- -
+
-

Some discussion and examples can be found here: https://winvector.github.io/FluidData/FluidData.html.

+

Some discussion and examples can be found here: https://winvector.github.io/FluidData/FluidData.html.

-
build_pivot_control(
-  table,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  ...,
-  prefix = columnToTakeKeysFrom,
-  sep = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("bpc"),
-  temporary = FALSE
-)
-
-# S3 method for default
-build_pivot_control(
-  table,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  ...,
-  prefix = columnToTakeKeysFrom,
-  sep = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("bpcd"),
-  temporary = TRUE
-)
-
-# S3 method for relop
-build_pivot_control(
-  table,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  ...,
-  prefix = columnToTakeKeysFrom,
-  sep = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("bpc"),
-  temporary = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
table

data.frame to scan for new column names (in-memory data.frame).

columnToTakeKeysFrom

character name of column build new column names from.

columnToTakeValuesFrom

character name of column to get values from.

...

not used, force later args to be by name

prefix

column name prefix (only used when sep is not NULL)

sep

separator to build complex column names.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE use temporary tables

- -

Value

- -

control table

-

See also

- - - -

Examples

-
- d <- data.frame(measType = c("wt", "ht"), - measValue = c(150, 6), - stringsAsFactors = FALSE) - build_pivot_control(d, - 'measType', 'measValue', - sep = '_') -
#> measType measValue -#> 1 wt measType_wt -#> 2 ht measType_ht
- -d <- data.frame(measType = c("wt", "ht"), - measValue = c(150, 6), - stringsAsFactors = FALSE) - -ops <- rquery::local_td(d) %.>% - build_pivot_control(., - 'measType', 'measValue', - sep = '_') -cat(format(ops)) -
#> mk_td("d", c( -#> "measType", -#> "measValue")) %.>% -#> non_sql_node(., build_pivot_control(., columnToTakeKeysFrom="measType", columnToTakeValuesFrom="measValue"))
-if(requireNamespace("rqdatatable", quietly = TRUE)) { - library("rqdatatable") - d %.>% - ops %.>% - print(.) -} -
#> measType measValue -#> 1 wt measType_wt -#> 2 ht measType_ht
-if(requireNamespace("RSQLite", quietly = TRUE)) { - db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - DBI::dbWriteTable(db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - db %.>% - ops %.>% - print(.) - DBI::dbDisconnect(db) -} -
#> measType measValue -#> 1 ht measType_ht -#> 2 wt measType_wt
-
+
+
build_pivot_control(
+  table,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  ...,
+  prefix = columnToTakeKeysFrom,
+  sep = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("bpc"),
+  temporary = FALSE
+)
+
+# S3 method for default
+build_pivot_control(
+  table,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  ...,
+  prefix = columnToTakeKeysFrom,
+  sep = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("bpcd"),
+  temporary = TRUE
+)
+
+# S3 method for relop
+build_pivot_control(
+  table,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  ...,
+  prefix = columnToTakeKeysFrom,
+  sep = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("bpc"),
+  temporary = FALSE
+)
+
+ +
+

Arguments

+
table
+

data.frame to scan for new column names (in-memory data.frame).

+ + +
columnToTakeKeysFrom
+

character name of column build new column names from.

+ + +
columnToTakeValuesFrom
+

character name of column to get values from.

+ + +
...
+

not used, force later args to be by name

+ + +
prefix
+

column name prefix (only used when sep is not NULL)

+ + +
sep
+

separator to build complex column names.

+ + +
tmp_name_source
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
temporary
+

logical, if TRUE use temporary tables

+ +
+
+

Value

+ + +

control table

+
+
+

See also

+ +
+ +
+

Examples

+

+  d <- data.frame(measType = c("wt", "ht"),
+                  measValue = c(150, 6),
+                  stringsAsFactors = FALSE)
+  build_pivot_control(d,
+                      'measType', 'measValue',
+                      sep = '_')
+#>   measType   measValue
+#> 1       wt measType_wt
+#> 2       ht measType_ht
+
+
+d <- data.frame(measType = c("wt", "ht"),
+                measValue = c(150, 6),
+                stringsAsFactors = FALSE)
+
+ops <- rquery::local_td(d) %.>%
+  build_pivot_control(.,
+                      'measType', 'measValue',
+                      sep = '_')
+cat(format(ops))
+#> mk_td("d", c(
+#>   "measType",
+#>   "measValue")) %.>%
+#>  non_sql_node(., build_pivot_control(., columnToTakeKeysFrom="measType", columnToTakeValuesFrom="measValue"))
+
+if(requireNamespace("rqdatatable", quietly = TRUE)) {
+  library("rqdatatable")
+  d %.>%
+    ops %.>%
+    print(.)
+}
+#>   measType   measValue
+#> 1       wt measType_wt
+#> 2       ht measType_ht
+
+if(requireNamespace("RSQLite", quietly = TRUE)) {
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  DBI::dbWriteTable(db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  db %.>%
+    ops %.>%
+    print(.)
+  DBI::dbDisconnect(db)
+}
+#>   measType   measValue
+#> 1       ht measType_ht
+#> 2       wt measType_wt
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/build_pivot_control_q.html b/docs/reference/build_pivot_control_q.html index f2e38b0..3df85e6 100644 --- a/docs/reference/build_pivot_control_q.html +++ b/docs/reference/build_pivot_control_q.html @@ -1,67 +1,12 @@ - - - - - - - -Build a blocks_to_rowrecs_q() control table that specifies a pivot (query based, takes name of table). — build_pivot_control_q • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Build a blocks_to_rowrecs_q() control table that specifies a pivot (query based, takes name of table). — build_pivot_control_q • cdata - - + + - - -
-
- -
- -
+
-

Some discussion and examples can be found here: https://winvector.github.io/FluidData/FluidData.html.

+

Some discussion and examples can be found here: https://winvector.github.io/FluidData/FluidData.html.

-
build_pivot_control_q(
-  tableName,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  my_db,
-  ...,
-  prefix = columnToTakeKeysFrom,
-  sep = NULL,
-  qualifiers = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tableName

Name of table to scan for new column names.

columnToTakeKeysFrom

character name of column build new column names from.

columnToTakeValuesFrom

character name of column to get values from.

my_db

db handle

...

not used, force later args to be by name

prefix

column name prefix (only used when sep is not NULL)

sep

separator to build complex column names.

qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

- -

Value

- -

control table

-

See also

- - - -

Examples

-
-if (requireNamespace("DBI", quietly = TRUE) && - requireNamespace("RSQLite", quietly = TRUE)) { - my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - d <- data.frame(measType = c("wt", "ht"), - measValue = c(150, 6), - stringsAsFactors = FALSE) - rquery::rq_copy_to(my_db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - build_pivot_control_q('d', 'measType', 'measValue', - my_db = my_db, - sep = '_') %.>% - print(.) - DBI::dbDisconnect(my_db) -} -
#> measType measValue -#> 1 ht measType_ht -#> 2 wt measType_wt
-
+
+
build_pivot_control_q(
+  tableName,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  my_db,
+  ...,
+  prefix = columnToTakeKeysFrom,
+  sep = NULL,
+  qualifiers = NULL
+)
+
+ +
+

Arguments

+
tableName
+

Name of table to scan for new column names.

+ + +
columnToTakeKeysFrom
+

character name of column build new column names from.

+ + +
columnToTakeValuesFrom
+

character name of column to get values from.

+ + +
my_db
+

db handle

+ + +
...
+

not used, force later args to be by name

+ + +
prefix
+

column name prefix (only used when sep is not NULL)

+ + +
sep
+

separator to build complex column names.

+ + +
qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ +
+
+

Value

+ + +

control table

+
+ + +
+

Examples

+

+if (requireNamespace("DBI", quietly = TRUE) &&
+  requireNamespace("RSQLite", quietly = TRUE)) {
+  my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  d <- data.frame(measType = c("wt", "ht"),
+                  measValue = c(150, 6),
+                  stringsAsFactors = FALSE)
+  rquery::rq_copy_to(my_db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  build_pivot_control_q('d', 'measType', 'measValue',
+                        my_db = my_db,
+                        sep = '_') %.>%
+     print(.)
+  DBI::dbDisconnect(my_db)
+}
+#>   measType   measValue
+#> 1       ht measType_ht
+#> 2       wt measType_wt
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/build_unpivot_control.html b/docs/reference/build_unpivot_control.html index b595707..da1ce6e 100644 --- a/docs/reference/build_unpivot_control.html +++ b/docs/reference/build_unpivot_control.html @@ -1,69 +1,14 @@ - - - - - - - -Build a rowrecs_to_blocks() control table that specifies a un-pivot (or "shred"). — build_unpivot_control • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Build a rowrecs_to_blocks() control table that specifies a un-pivot (or "shred"). — build_unpivot_control • cdata - - - - - - - - - - - + + - - -
-
- -
- -
+
+ +
+
build_unpivot_control(
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...
+)
-
build_unpivot_control(
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
nameForNewKeyColumn

character name of column to write new keys in.

nameForNewValueColumn

character name of column to write new values in.

columnsToTakeFrom

character array names of columns to take values from.

...

not used, force later args to be by name

- -

Value

- -

control table

-

See also

- - - -

Examples

-
-build_unpivot_control("measurmentType", "measurmentValue", c("c1", "c2")) -
#> measurmentType measurmentValue -#> 1 c1 c1 -#> 2 c2 c2
-
+
+

Arguments

+
nameForNewKeyColumn
+

character name of column to write new keys in.

+ + +
nameForNewValueColumn
+

character name of column to write new values in.

+ + +
columnsToTakeFrom
+

character array names of columns to take values from.

+ + +
...
+

not used, force later args to be by name

+ +
+
+

Value

+ + +

control table

+
+
+

See also

+ +
+ +
+

Examples

+

+build_unpivot_control("measurmentType", "measurmentValue", c("c1", "c2"))
+#>   measurmentType measurmentValue
+#> 1             c1              c1
+#> 2             c2              c2
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/cdata-package.html b/docs/reference/cdata-package.html new file mode 100644 index 0000000..e5adc81 --- /dev/null +++ b/docs/reference/cdata-package.html @@ -0,0 +1,136 @@ + +cdata: Fluid Data Transformations. — cdata-package • cdata + + +
+
+ + + +
+
+ + +
+

Supplies implementations of higher order "fluid data" transforms. These +transforms move data between rows and columns, are controlled by a graphical +transformation specification, and have pivot and un-pivot as special cases. +Large scale implementation is based on 'rquery', so should be usable on +'SQL' compliant data sources (include large systems such as 'PostgreSQL' and +'Spark'). +This package introduces the idea of control table specification of data transforms (later aslo adapted from 'cdata' by 'tidyr'). +A theory of fluid data transforms can be found in the following articles: +https://winvector.github.io/FluidData/FluidDataReshapingWithCdata.html, +https://github.com/WinVector/cdata and https://winvector.github.io/FluidData/FluidData.html.

+
+ + + +
+

Author

+

Maintainer: John Mount jmount@win-vector.com

+

Authors:

Other contributors:

  • Win-Vector LLC [copyright holder]

  • +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/check_cols_form_unique_keys.html b/docs/reference/check_cols_form_unique_keys.html index 3637b46..c7c57f8 100644 --- a/docs/reference/check_cols_form_unique_keys.html +++ b/docs/reference/check_cols_form_unique_keys.html @@ -1,67 +1,12 @@ - - - - - - - -Check columns form unique keys — check_cols_form_unique_keys • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check columns form unique keys — check_cols_form_unique_keys • cdata - - + + - - -
-
- -
- -
+
@@ -147,50 +77,48 @@

Check columns form unique keys

Check columns form unique keys

-
check_cols_form_unique_keys(data, keyColNames)
+
+
check_cols_form_unique_keys(data, keyColNames)
+
+ +
+

Arguments

+
data
+

data.frame to check

-

Arguments

- - - - - - - - - - -
data

data.frame to check

keyColNames

character, names of columns to consider as keys

-

Value

+
keyColNames
+

character, names of columns to consider as keys

-

logical TRUE if rows are uniquely keyed by named columns

+
+
+

Value

+ + +

logical TRUE if rows are uniquely keyed by named columns

+
+
- - - + + diff --git a/docs/reference/convert_cdata_spec_to_yaml.html b/docs/reference/convert_cdata_spec_to_yaml.html index 8cbae43..0803045 100644 --- a/docs/reference/convert_cdata_spec_to_yaml.html +++ b/docs/reference/convert_cdata_spec_to_yaml.html @@ -1,67 +1,12 @@ - - - - - - - -Convert a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec to a simple object. — convert_cdata_spec_to_yaml • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Convert a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec to a simple object. — convert_cdata_spec_to_yaml • cdata - - - - + + -
-
- -
- -
+
@@ -147,46 +77,44 @@

Convert a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks

Convert a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec to a simple object.

-
convert_cdata_spec_to_yaml(spec)
+
+
convert_cdata_spec_to_yaml(spec)
+
-

Arguments

- - - - - - -
spec

a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec

+
+

Arguments

+
spec
+

a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec

-

Value

+
+
+

Value

+ -

a simple object suitable for YAML serialization

+

a simple object suitable for YAML serialization

+
+
- - - + + diff --git a/docs/reference/convert_records.html b/docs/reference/convert_records.html index ab64a5a..3edcb43 100644 --- a/docs/reference/convert_records.html +++ b/docs/reference/convert_records.html @@ -1,67 +1,12 @@ - - - - - - - -General transform from arbitrary record shape to arbitrary record shape. — convert_records • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -General transform from arbitrary record shape to arbitrary record shape. — convert_records • cdata - - - - + + -
-
- -
- -
+
@@ -147,183 +77,186 @@

General transform from arbitrary record shape to arbitrary record shape.

General transform from arbitrary record shape to arbitrary record shape.

-
convert_records(
-  table,
-  incoming_shape = NULL,
-  outgoing_shape = NULL,
-  ...,
-  keyColumns = NULL,
-  columnsToCopy_in = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  incoming_controlTableKeys = colnames(incoming_shape)[[1]],
-  outgoing_controlTableKeys = colnames(outgoing_shape)[[1]],
-  tmp_name_source = wrapr::mk_tmp_name_source("crec"),
-  temporary = TRUE,
-  allow_rqdatatable_in = FALSE,
-  allow_rqdatatable_out = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
table

data.frame or relop.

incoming_shape

data.frame, definition of incoming record shape.

outgoing_shape

data.frame, defintion of outgoing record shape.

...

force later arguments to bind by name.

keyColumns

character vector of column defining incoming row groups

columnsToCopy_in

character array of incoming column names to copy.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

strict

logical, if TRUE check control table name forms.

incoming_controlTableKeys

character, which column names of the incoming control table are considered to be keys.

outgoing_controlTableKeys

character, which column names of the outgoing control table are considered to be keys.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE use temporary tables

allow_rqdatatable_in

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

allow_rqdatatable_out

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

processing pipeline or transformed table

- -

Examples

-
- -incoming_shape <- qchar_frame( - "row", "col1", "col2", "col3" | - "row1", v11, v12, v13 | - "row2", v21, v22, v23 | - "row3", v31, v32, v33 ) - - -outgoing_shape <- qchar_frame( - "column", "row1", "row2", "row3" | - "col1", v11, v21 , v31 | - "col2", v12, v22 , v32 | - "col3", v13, v23 , v33 ) - -data <- build_frame( - 'record_id', 'row', 'col1', 'col2', 'col3' | - 1, 'row1', 1, 2, 3 | - 1, 'row2', 4, 5, 6 | - 1, 'row3', 7, 8, 9 | - 2, 'row1', 11, 12, 13 | - 2, 'row2', 14, 15, 16 | - 2, 'row3', 17, 18, 19 ) - -print(data) -
#> record_id row col1 col2 col3 -#> 1 1 row1 1 2 3 -#> 2 1 row2 4 5 6 -#> 3 1 row3 7 8 9 -#> 4 2 row1 11 12 13 -#> 5 2 row2 14 15 16 -#> 6 2 row3 17 18 19
-convert_records( - data, - keyColumns = 'record_id', - incoming_shape = incoming_shape, - outgoing_shape = outgoing_shape) -
#> record_id column row1 row2 row3 -#> 1 1 col1 1 4 7 -#> 2 1 col2 2 5 8 -#> 3 1 col3 3 6 9 -#> 4 2 col1 11 14 17 -#> 5 2 col2 12 15 18 -#> 6 2 col3 13 16 19
-td <- rquery::local_td(data) - -ops <- convert_records( - td, - keyColumns = 'record_id', - incoming_shape = incoming_shape, - outgoing_shape = outgoing_shape) - -cat(format(ops)) -
#> mk_td("data", c( -#> "record_id", -#> "row", -#> "col1", -#> "col2", -#> "col3")) %.>% -#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT a."record_id" "record_id", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col1" ELSE NULL END ) "v11", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col2" ELSE NULL END ) "v12", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col3" ELSE NULL END ) "v13", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col1" ELSE NULL END ) "v21", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col2" ELSE NULL END ) "v22", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col3" ELSE NULL END ) "v23", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col1" ELSE NULL END ) "v31", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col2" ELSE NULL END ) "v32", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col3" ELSE NULL END ) "v33" FROM "IN" a GROUP BY a."record_id") %.>% -#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT a."record_id", b."column", CASE WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v11" WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v12" WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v13" ELSE NULL END AS "row1", CASE WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v21" WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v22" WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v23" ELSE NULL END AS "row2", CASE WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v31" WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v32" WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v33" ELSE NULL END AS "row3" FROM "IN" a CROSS JOIN "crec_63088935191991423144_0000000004" b )
- - -
+
+
convert_records(
+  table,
+  incoming_shape = NULL,
+  outgoing_shape = NULL,
+  ...,
+  keyColumns = NULL,
+  columnsToCopy_in = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  incoming_controlTableKeys = colnames(incoming_shape)[[1]],
+  outgoing_controlTableKeys = colnames(outgoing_shape)[[1]],
+  tmp_name_source = wrapr::mk_tmp_name_source("crec"),
+  temporary = TRUE,
+  allow_rqdatatable_in = FALSE,
+  allow_rqdatatable_out = FALSE
+)
+
+ +
+

Arguments

+
table
+

data.frame or relop.

+ + +
incoming_shape
+

data.frame, definition of incoming record shape.

+ + +
outgoing_shape
+

data.frame, defintion of outgoing record shape.

+ + +
...
+

force later arguments to bind by name.

+ + +
keyColumns
+

character vector of column defining incoming row groups

+ + +
columnsToCopy_in
+

character array of incoming column names to copy.

+ + +
checkNames
+

logical, if TRUE check names.

+ + +
checkKeys
+

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

+ + +
strict
+

logical, if TRUE check control table name forms.

+ + +
incoming_controlTableKeys
+

character, which column names of the incoming control table are considered to be keys.

+ + +
outgoing_controlTableKeys
+

character, which column names of the outgoing control table are considered to be keys.

+ + +
tmp_name_source
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
temporary
+

logical, if TRUE use temporary tables

+ + +
allow_rqdatatable_in
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ + +
allow_rqdatatable_out
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

processing pipeline or transformed table

+
+ +
+

Examples

+

+
+incoming_shape <- qchar_frame(
+  "row",  "col1", "col2", "col3" |
+  "row1",   v11,     v12,  v13   |
+  "row2",   v21,     v22,  v23   |
+  "row3",   v31,     v32,  v33   )
+
+
+outgoing_shape <- qchar_frame(
+  "column", "row1", "row2", "row3" |
+  "col1",      v11,  v21  ,  v31   |
+  "col2",      v12,  v22  ,  v32   |
+  "col3",      v13,  v23  ,  v33   )
+
+data <- build_frame(
+  'record_id', 'row',  'col1', 'col2', 'col3'  |
+  1,           'row1',  1,      2,      3      |
+  1,           'row2',  4,      5,      6      |
+  1,           'row3',  7,      8,      9      |
+  2,           'row1',  11,     12,     13     |
+  2,           'row2',  14,     15,     16     |
+  2,           'row3',  17,     18,     19     )
+
+print(data)
+#>   record_id  row col1 col2 col3
+#> 1         1 row1    1    2    3
+#> 2         1 row2    4    5    6
+#> 3         1 row3    7    8    9
+#> 4         2 row1   11   12   13
+#> 5         2 row2   14   15   16
+#> 6         2 row3   17   18   19
+
+convert_records(
+  data,
+  keyColumns = 'record_id',
+  incoming_shape = incoming_shape,
+  outgoing_shape = outgoing_shape)
+#>   record_id column row1 row2 row3
+#> 1         1   col1    1    4    7
+#> 2         1   col2    2    5    8
+#> 3         1   col3    3    6    9
+#> 4         2   col1   11   14   17
+#> 5         2   col2   12   15   18
+#> 6         2   col3   13   16   19
+
+td <- rquery::local_td(data)
+
+ops <- convert_records(
+  td,
+  keyColumns = 'record_id',
+  incoming_shape = incoming_shape,
+  outgoing_shape = outgoing_shape)
+
+cat(format(ops))
+#> mk_td("data", c(
+#>   "record_id",
+#>   "row",
+#>   "col1",
+#>   "col2",
+#>   "col3")) %.>%
+#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."record_id" "record_id", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col1" ELSE NULL END ) "v11", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col2" ELSE NULL END ) "v12", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row1' THEN a."col3" ELSE NULL END ) "v13", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col1" ELSE NULL END ) "v21", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col2" ELSE NULL END ) "v22", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row2' THEN a."col3" ELSE NULL END ) "v23", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col1" ELSE NULL END ) "v31", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col2" ELSE NULL END ) "v32", MAX( CASE WHEN CAST(a."row" AS VARCHAR) = 'row3' THEN a."col3" ELSE NULL END ) "v33" FROM "IN" a GROUP BY a."record_id") %.>%
+#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."record_id", b."column", CASE  WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v11"  WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v12"  WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v13" ELSE NULL END AS "row1", CASE  WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v21"  WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v22"  WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v23" ELSE NULL END AS "row2", CASE  WHEN CAST(b."column" AS VARCHAR) = 'col1' THEN a."v31"  WHEN CAST(b."column" AS VARCHAR) = 'col2' THEN a."v32"  WHEN CAST(b."column" AS VARCHAR) = 'col3' THEN a."v33" ELSE NULL END AS "row3" FROM "IN" a CROSS JOIN "crec_58641414306686443206_0000000004" b )
+
+
+
+
+
+
- - - + + diff --git a/docs/reference/convert_yaml_to_cdata_spec.html b/docs/reference/convert_yaml_to_cdata_spec.html index 80ca36e..8d88aab 100644 --- a/docs/reference/convert_yaml_to_cdata_spec.html +++ b/docs/reference/convert_yaml_to_cdata_spec.html @@ -1,67 +1,12 @@ - - - - - - - -Read a cdata record transform from a simple object (such as is imported from YAML). — convert_yaml_to_cdata_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Read a cdata record transform from a simple object (such as is imported from YAML). — convert_yaml_to_cdata_spec • cdata - - - - + + -
-
- -
- -
+
@@ -147,46 +77,44 @@

Read a cdata record transform from a simple object (such as is imported from

Read a cdata record transform from a simple object (such as is imported from YAML).

-
convert_yaml_to_cdata_spec(obj)
+
+
convert_yaml_to_cdata_spec(obj)
+
-

Arguments

- - - - - - -
obj

object to convert

+
+

Arguments

+
obj
+

object to convert

-

Value

+
+
+

Value

+ -

cdata transform specification

+

cdata transform specification

+
+
- - - + + diff --git a/docs/reference/get_transform_details.html b/docs/reference/get_transform_details.html index 6c1fcbf..a116b60 100644 --- a/docs/reference/get_transform_details.html +++ b/docs/reference/get_transform_details.html @@ -1,67 +1,12 @@ - - - - - - - -Upack details of a cdata record transform. — get_transform_details • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Upack details of a cdata record transform. — get_transform_details • cdata - - - - + + -
-
- -
- -
+
@@ -147,46 +77,44 @@

Upack details of a cdata record transform.

Unpack details, especially: generate data frames representing both sides of a transform.

-
get_transform_details(x)
+
+
get_transform_details(x)
+
-

Arguments

- - - - - - -
x

blocks_to_rowrecs_spec or rowrecs_to_blocks_spec

+
+

Arguments

+
x
+

blocks_to_rowrecs_spec or rowrecs_to_blocks_spec

-

Value

+
+
+

Value

+ -

detailed fields

+

detailed fields

+
+
- - - + + diff --git a/docs/reference/grapes-slash-slash-grapes.html b/docs/reference/grapes-slash-slash-grapes.html index 902715f..04b5e8b 100644 --- a/docs/reference/grapes-slash-slash-grapes.html +++ b/docs/reference/grapes-slash-slash-grapes.html @@ -1,67 +1,12 @@ - - - - - - - -Factor-out (aggregate/project) block records into row records. — %//% • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Factor-out (aggregate/project) block records into row records. — %//% • cdata - - + + - - -
-
- -
- -
+
+ +
+
table %//% transform
+
+ +
+

Arguments

+
table
+

data (data.frame or relop).

+ + +
transform
+

a rowrecs_to_blocks_spec.

+ +
+
+

Value

+ + +

blocks_to_rowrecs() result.

-
table %//% transform
- -

Arguments

- - - - - - - - - - -
table

data (data.frame or relop).

transform

a rowrecs_to_blocks_spec.

- -

Value

- -

blocks_to_rowrecs() result.

- -

Examples

-
-d <- wrapr::build_frame( - "id", "measure", "value" | - 1 , "AUC" , 0.7 | - 1 , "R2" , 0.4 | - 2 , "AUC" , 0.8 | - 2 , "R2" , 0.5 ) - -transform <- blocks_to_rowrecs_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -d %//% transform -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-# identity (in structure) -d %//% transform %**% t(transform) -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-
+
+

Examples

+

+d <- wrapr::build_frame(
+  "id", "measure", "value" |
+  1   , "AUC"    , 0.7     |
+  1   , "R2"     , 0.4     |
+  2   , "AUC"    , 0.8     |
+  2   , "R2"     , 0.5     )
+
+transform <- blocks_to_rowrecs_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+d %//% transform
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+# identity (in structure)
+d %//% transform %**% t(transform)
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/grapes-times-times-grapes.html b/docs/reference/grapes-times-times-grapes.html index e7a433c..5d49a4b 100644 --- a/docs/reference/grapes-times-times-grapes.html +++ b/docs/reference/grapes-times-times-grapes.html @@ -1,67 +1,12 @@ - - - - - - - -Multiply/join row records into block records. — %**% • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Multiply/join row records into block records. — %**% • cdata - - + + - - -
-
- -
- -
+
+ +
+
table %**% transform
+
+ +
+

Arguments

+
table
+

data (data.frame or relop).

+ + +
transform
+

a rowrecs_to_blocks_spec.

+ +
+
+

Value

+ + +

rowrecs_to_blocks() result.

-
table %**% transform
- -

Arguments

- - - - - - - - - - -
table

data (data.frame or relop).

transform

a rowrecs_to_blocks_spec.

- -

Value

- -

rowrecs_to_blocks() result.

- -

Examples

-
-d <- wrapr::build_frame( - "id", "AUC", "R2" | - 1 , 0.7 , 0.4 | - 2 , 0.8 , 0.5 ) - -transform <- rowrecs_to_blocks_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -d %**% transform -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-# identity (in structure) -d %**% transform %//% t(transform) -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-
+
+

Examples

+

+d <- wrapr::build_frame(
+  "id", "AUC", "R2" |
+  1   , 0.7  , 0.4  |
+  2   , 0.8  , 0.5  )
+
+transform <- rowrecs_to_blocks_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+d %**% transform
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+# identity (in structure)
+d %**% transform %//% t(transform)
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index f44ae1a..26706b2 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,66 +1,12 @@ - - - - - - - -Function reference • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • cdata + + - - - - -
-
- -
- -
+
- - - - - - - - - - -
-

All functions

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+

All functions

+

blocks_to_rowrecs()

Map data records from block records to row records.

+

blocks_to_rowrecs_spec()

Create a block records to row records transform specification.

+

build_pivot_control()

Build a blocks_to_rowrecs()/rowrecs_to_blocks() control table that specifies a pivot from a data.frame.

+

build_unpivot_control()

Build a rowrecs_to_blocks() control table that specifies a un-pivot (or "shred").

-

cdata

+
+

cdata cdata-package

cdata: Fluid Data Transformations.

+

convert_cdata_spec_to_yaml()

Convert a layout_specification, blocks_to_rowrecs_spec, or rowrecs_to_blocks_spec to a simple object.

+

convert_records()

General transform from arbitrary record shape to arbitrary record shape.

+

convert_yaml_to_cdata_spec()

Read a cdata record transform from a simple object (such as is imported from YAML).

+

`%//%`

Factor-out (aggregate/project) block records into row records.

+

`%**%`

Multiply/join row records into block records.

+

layout_by()

Use transform spec to layout data.

+

layout_by(<blocks_to_rowrecs_spec>)

Use transform spec to layout data.

+

layout_by(<cdata_general_transform_spec>)

Use transform spec to layout data.

+

layout_by(<rowrecs_to_blocks_spec>)

Use transform spec to layout data.

+

layout_specification()

Create a record to record spec.

+

map_fields()

Map field values from one column into new derived columns (takes a data.frame).

+

map_fields_q()

Map field values from one column into new derived columns (query based, takes name of table).

+

pivot_to_rowrecs() layout_to_rowrecs()

Map data records from block records that have one row per measurement value to row records.

+

rowrecs_to_blocks()

Map a data records from row records to block records.

+

rowrecs_to_blocks_spec()

Create a row records to block records transform specification.

+

unpivot_to_blocks() layout_to_blocks() pivot_to_blocks()

Map a data records from row records to block records with one record row per columnsToTakeFrom value.

- +
+
-
- +
- - + + diff --git a/docs/reference/layout_by.blocks_to_rowrecs_spec.html b/docs/reference/layout_by.blocks_to_rowrecs_spec.html index 55b6ac2..4c3607e 100644 --- a/docs/reference/layout_by.blocks_to_rowrecs_spec.html +++ b/docs/reference/layout_by.blocks_to_rowrecs_spec.html @@ -1,67 +1,12 @@ - - - - - - - -Use transform spec to layout data. — layout_by.blocks_to_rowrecs_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Use transform spec to layout data. — layout_by.blocks_to_rowrecs_spec • cdata - - + + - - -
-
- -
- -
+
@@ -147,90 +77,92 @@

Use transform spec to layout data.

Use transform spec to layout data.

-
# S3 method for blocks_to_rowrecs_spec
-layout_by(transform, table)
- -

Arguments

- - - - - - - - - - -
transform

object of class blocks_to_rowrecs_spec.

table

data.frame or relop.

- -

Value

- -

re-arranged data or data reference (relop).

- -

Examples

-
-d <- wrapr::build_frame( - "id", "measure", "value" | - 1 , "AUC" , 0.7 | - 1 , "R2" , 0.4 | - 2 , "AUC" , 0.8 | - 2 , "R2" , 0.5 ) - -transform <- blocks_to_rowrecs_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -print(transform) -
#> { -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # becomes -#> -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-layout_by(transform, d) -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-
+
+
# S3 method for blocks_to_rowrecs_spec
+layout_by(transform, table)
+
+ +
+

Arguments

+
transform
+

object of class blocks_to_rowrecs_spec.

+ + +
table
+

data.frame or relop.

+ +
+
+

Value

+ + +

re-arranged data or data reference (relop).

+
+ +
+

Examples

+

+d <- wrapr::build_frame(
+  "id", "measure", "value" |
+  1   , "AUC"    , 0.7     |
+  1   , "R2"     , 0.4     |
+  2   , "AUC"    , 0.8     |
+  2   , "R2"     , 0.5     )
+
+transform <- blocks_to_rowrecs_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+print(transform)
+#> {
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # becomes
+#> 
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+layout_by(transform, d)
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+
+
+
- - - + + diff --git a/docs/reference/layout_by.cdata_general_transform_spec.html b/docs/reference/layout_by.cdata_general_transform_spec.html index 0b07703..3d58d97 100644 --- a/docs/reference/layout_by.cdata_general_transform_spec.html +++ b/docs/reference/layout_by.cdata_general_transform_spec.html @@ -1,67 +1,12 @@ - - - - - - - -Use transform spec to layout data. — layout_by.cdata_general_transform_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Use transform spec to layout data. — layout_by.cdata_general_transform_spec • cdata - - + + - - -
-
- -
- -
+
@@ -147,51 +77,49 @@

Use transform spec to layout data.

Use transform spec to layout data.

-
# S3 method for cdata_general_transform_spec
-layout_by(transform, table)
+
+
# S3 method for cdata_general_transform_spec
+layout_by(transform, table)
+
+ +
+

Arguments

+
transform
+

object of class blocks_to_rowrecs_spec.

-

Arguments

- - - - - - - - - - -
transform

object of class blocks_to_rowrecs_spec.

table

data.frame or relop.

-

Value

+
table
+

data.frame or relop.

-

re-arranged data or data reference (relop).

+
+
+

Value

+ + +

re-arranged data or data reference (relop).

+
+
- - - + + diff --git a/docs/reference/layout_by.html b/docs/reference/layout_by.html index 29e61de..e4ad554 100644 --- a/docs/reference/layout_by.html +++ b/docs/reference/layout_by.html @@ -1,67 +1,12 @@ - - - - - - - -Use transform spec to layout data. — layout_by • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Use transform spec to layout data. — layout_by • cdata + + - - - - -
-
- -
- -
+
@@ -147,119 +77,123 @@

Use transform spec to layout data.

Use transform spec to layout data.

-
layout_by(transform, table)
+
+
layout_by(transform, table)
+
-

Arguments

- - - - - - - - - - -
transform

object of class rowrecs_to_blocks_spec

table

data.frame or relop.

+
+

Arguments

+
transform
+

object of class rowrecs_to_blocks_spec

-

Value

-

re-arranged data or data reference (relop).

+
table
+

data.frame or relop.

-

Examples

-
-d <- wrapr::build_frame( - "id" , "AUC", "R2" | - 1 , 0.7 , 0.4 | - 2 , 0.8 , 0.5 ) -transform <- rowrecs_to_blocks_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") -print(transform) -
#> { -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # becomes -#> -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
layout_by(transform, d) -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-d <- wrapr::build_frame( - "id", "measure", "value" | - 1 , "AUC" , 0.7 | - 1 , "R2" , 0.4 | - 2 , "AUC" , 0.8 | - 2 , "R2" , 0.5 ) -transform <- blocks_to_rowrecs_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") -print(transform) -
#> { -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # becomes -#> -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
layout_by(transform, d) -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-
+
+
+

Value

+ + +

re-arranged data or data reference (relop).

+
+ +
+

Examples

+

+d <- wrapr::build_frame(
+  "id"  , "AUC", "R2" |
+    1   , 0.7  , 0.4  |
+    2   , 0.8  , 0.5  )
+transform <- rowrecs_to_blocks_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+print(transform)
+#> {
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # becomes
+#> 
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+layout_by(transform, d)
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+d <- wrapr::build_frame(
+  "id", "measure", "value" |
+  1   , "AUC"    , 0.7     |
+  1   , "R2"     , 0.4     |
+  2   , "AUC"    , 0.8     |
+  2   , "R2"     , 0.5     )
+transform <- blocks_to_rowrecs_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+print(transform)
+#> {
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # becomes
+#> 
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+layout_by(transform, d)
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+
+
+
- - - + + diff --git a/docs/reference/layout_by.rowrecs_to_blocks_spec.html b/docs/reference/layout_by.rowrecs_to_blocks_spec.html index 2e6e373..aab5e98 100644 --- a/docs/reference/layout_by.rowrecs_to_blocks_spec.html +++ b/docs/reference/layout_by.rowrecs_to_blocks_spec.html @@ -1,67 +1,12 @@ - - - - - - - -Use transform spec to layout data. — layout_by.rowrecs_to_blocks_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Use transform spec to layout data. — layout_by.rowrecs_to_blocks_spec • cdata - - + + - - -
-
- -
- -
+
@@ -147,89 +77,91 @@

Use transform spec to layout data.

Use transform spec to layout data.

-
# S3 method for rowrecs_to_blocks_spec
-layout_by(transform, table)
- -

Arguments

- - - - - - - - - - -
transform

object of class rowrecs_to_blocks_spec

table

data.frame or relop.

- -

Value

- -

re-arranged data or data reference (relop).

- -

Examples

-
-d <- wrapr::build_frame( - "id" , "AUC", "R2" | - 1 , 0.7 , 0.4 | - 2 , 0.8 , 0.5 ) - -transform <- rowrecs_to_blocks_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -print(transform) -
#> { -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # becomes -#> -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
layout_by(transform, d) -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-
+
+
# S3 method for rowrecs_to_blocks_spec
+layout_by(transform, table)
+
+ +
+

Arguments

+
transform
+

object of class rowrecs_to_blocks_spec

+ + +
table
+

data.frame or relop.

+ +
+
+

Value

+ + +

re-arranged data or data reference (relop).

+
+ +
+

Examples

+

+d <- wrapr::build_frame(
+  "id"  , "AUC", "R2" |
+    1   , 0.7  , 0.4  |
+    2   , 0.8  , 0.5  )
+
+transform <- rowrecs_to_blocks_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+print(transform)
+#> {
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # becomes
+#> 
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+layout_by(transform, d)
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+
+
+
- - - + + diff --git a/docs/reference/layout_specification.html b/docs/reference/layout_specification.html index c76e97a..be5f6bb 100644 --- a/docs/reference/layout_specification.html +++ b/docs/reference/layout_specification.html @@ -1,67 +1,12 @@ - - - - - - - -Create a record to record spec. — layout_specification • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a record to record spec. — layout_specification • cdata + + - - - - -
-
- -
- -
+
@@ -147,174 +77,178 @@

Create a record to record spec.

Create a general record to record transform specification.

-
layout_specification(
-  incoming_shape = NULL,
-  outgoing_shape = NULL,
-  ...,
-  recordKeys = character(0),
-  incoming_controlTableKeys = colnames(incoming_shape)[[1]],
-  outgoing_controlTableKeys = colnames(outgoing_shape)[[1]],
-  checkNames = TRUE,
-  checkKeys = TRUE,
-  strict = FALSE,
-  allow_rqdatatable_in = FALSE,
-  allow_rqdatatable_out = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
incoming_shape

data.frame, definition of incoming record shape.

outgoing_shape

data.frame, defintion of outgoing record shape.

...

not used, force later arguments to bind by name.

recordKeys

vector of columns identifying records.

incoming_controlTableKeys

character, which column names of the incoming control table are considered to be keys.

outgoing_controlTableKeys

character, which column names of the outgoing control table are considered to be keys.

checkNames

passed to rowrecs_to_blocks.

checkKeys

passed to rowrecs_to_blocks.

strict

passed to rowrecs_to_blocks.

allow_rqdatatable_in

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

allow_rqdatatable_out

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

a record specification object

- -

Examples

-
- -incoming_shape <- qchar_frame( - "row", "col1", "col2", "col3" | - "row1", v11, v12, v13 | - "row2", v21, v22, v23 | - "row3", v31, v32, v33 ) - - -outgoing_shape <- qchar_frame( - "column", "row1", "row2", "row3" | - "col1", v11, v21 , v31 | - "col2", v12, v22 , v32 | - "col3", v13, v23 , v33 ) - -data <- build_frame( - 'record_id', 'row', 'col1', 'col2', 'col3' | - 1, 'row1', 1, 2, 3 | - 1, 'row2', 4, 5, 6 | - 1, 'row3', 7, 8, 9 | - 2, 'row1', 11, 12, 13 | - 2, 'row2', 14, 15, 16 | - 2, 'row3', 17, 18, 19 ) - -print(data) -
#> record_id row col1 col2 col3 -#> 1 1 row1 1 2 3 -#> 2 1 row2 4 5 6 -#> 3 1 row3 7 8 9 -#> 4 2 row1 11 12 13 -#> 5 2 row2 14 15 16 -#> 6 2 row3 17 18 19
-layout <- layout_specification( - incoming_shape = incoming_shape, - outgoing_shape = outgoing_shape, - recordKeys = 'record_id') - -print(layout) -
#> { -#> in_record <- wrapr::qchar_frame( -#> "record_id" , "row" , "col1", "col2", "col3" | -#> . , "row1", v11 , v12 , v13 | -#> . , "row2", v21 , v22 , v23 | -#> . , "row3", v31 , v32 , v33 ) -#> in_keys <- c('record_id', 'row') -#> -#> # becomes -#> -#> out_record <- wrapr::qchar_frame( -#> "record_id" , "column", "row1", "row2", "row3" | -#> . , "col1" , v11 , v21 , v31 | -#> . , "col2" , v12 , v22 , v32 | -#> . , "col3" , v13 , v23 , v33 ) -#> out_keys <- c('record_id', 'column') -#> -#> # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-data %.>% layout -
#> record_id column row1 row2 row3 -#> 1 1 col1 1 4 7 -#> 2 1 col2 2 5 8 -#> 3 1 col3 3 6 9 -#> 4 2 col1 11 14 17 -#> 5 2 col2 12 15 18 -#> 6 2 col3 13 16 19
-data %.>% layout %.>% .(t(layout)) -
#> record_id row col1 col2 col3 -#> 1 1 row1 1 2 3 -#> 2 1 row2 4 5 6 -#> 3 1 row3 7 8 9 -#> 4 2 row1 11 12 13 -#> 5 2 row2 14 15 16 -#> 6 2 row3 17 18 19
-
+
+
layout_specification(
+  incoming_shape = NULL,
+  outgoing_shape = NULL,
+  ...,
+  recordKeys = character(0),
+  incoming_controlTableKeys = colnames(incoming_shape)[[1]],
+  outgoing_controlTableKeys = colnames(outgoing_shape)[[1]],
+  checkNames = TRUE,
+  checkKeys = TRUE,
+  strict = FALSE,
+  allow_rqdatatable_in = FALSE,
+  allow_rqdatatable_out = FALSE
+)
+
+ +
+

Arguments

+
incoming_shape
+

data.frame, definition of incoming record shape.

+ + +
outgoing_shape
+

data.frame, defintion of outgoing record shape.

+ + +
...
+

not used, force later arguments to bind by name.

+ + +
recordKeys
+

vector of columns identifying records.

+ + +
incoming_controlTableKeys
+

character, which column names of the incoming control table are considered to be keys.

+ + +
outgoing_controlTableKeys
+

character, which column names of the outgoing control table are considered to be keys.

+ + +
checkNames
+

passed to rowrecs_to_blocks.

+ + +
checkKeys
+

passed to rowrecs_to_blocks.

+ + +
strict
+

passed to rowrecs_to_blocks.

+ + +
allow_rqdatatable_in
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ + +
allow_rqdatatable_out
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

a record specification object

+
+ +
+

Examples

+

+
+incoming_shape <- qchar_frame(
+  "row",  "col1", "col2", "col3" |
+  "row1",   v11,     v12,  v13   |
+  "row2",   v21,     v22,  v23   |
+  "row3",   v31,     v32,  v33   )
+
+
+outgoing_shape <- qchar_frame(
+  "column", "row1", "row2", "row3" |
+  "col1",      v11,  v21  ,  v31   |
+  "col2",      v12,  v22  ,  v32   |
+  "col3",      v13,  v23  ,  v33   )
+
+data <- build_frame(
+  'record_id', 'row',  'col1', 'col2', 'col3'  |
+  1,           'row1',  1,      2,      3      |
+  1,           'row2',  4,      5,      6      |
+  1,           'row3',  7,      8,      9      |
+  2,           'row1',  11,     12,     13     |
+  2,           'row2',  14,     15,     16     |
+  2,           'row3',  17,     18,     19     )
+
+print(data)
+#>   record_id  row col1 col2 col3
+#> 1         1 row1    1    2    3
+#> 2         1 row2    4    5    6
+#> 3         1 row3    7    8    9
+#> 4         2 row1   11   12   13
+#> 5         2 row2   14   15   16
+#> 6         2 row3   17   18   19
+
+layout <- layout_specification(
+  incoming_shape = incoming_shape,
+  outgoing_shape = outgoing_shape,
+  recordKeys = 'record_id')
+
+print(layout)
+#> {
+#>  in_record <- wrapr::qchar_frame(
+#>    "record_id"  , "row" , "col1", "col2", "col3" |
+#>      .          , "row1", v11   , v12   , v13    |
+#>      .          , "row2", v21   , v22   , v23    |
+#>      .          , "row3", v31   , v32   , v33    )
+#>  in_keys <- c('record_id', 'row')
+#> 
+#>  # becomes
+#> 
+#>  out_record <- wrapr::qchar_frame(
+#>    "record_id"  , "column", "row1", "row2", "row3" |
+#>      .          , "col1"  , v11   , v21   , v31    |
+#>      .          , "col2"  , v12   , v22   , v32    |
+#>      .          , "col3"  , v13   , v23   , v33    )
+#>  out_keys <- c('record_id', 'column')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = TRUE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+data %.>% layout
+#>   record_id column row1 row2 row3
+#> 1         1   col1    1    4    7
+#> 2         1   col2    2    5    8
+#> 3         1   col3    3    6    9
+#> 4         2   col1   11   14   17
+#> 5         2   col2   12   15   18
+#> 6         2   col3   13   16   19
+
+data %.>% layout %.>% .(t(layout))
+#>   record_id  row col1 col2 col3
+#> 1         1 row1    1    2    3
+#> 2         1 row2    4    5    6
+#> 3         1 row3    7    8    9
+#> 4         2 row1   11   12   13
+#> 5         2 row2   14   15   16
+#> 6         2 row3   17   18   19
+
+
+
+
- - - + + diff --git a/docs/reference/map_fields.html b/docs/reference/map_fields.html index 4732b77..e907095 100644 --- a/docs/reference/map_fields.html +++ b/docs/reference/map_fields.html @@ -1,67 +1,12 @@ - - - - - - - -Map field values from one column into new derived columns (takes a data.frame). — map_fields • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map field values from one column into new derived columns (takes a data.frame). — map_fields • cdata - - - - + + -
-
- -
- -
+
@@ -147,74 +77,75 @@

Map field values from one column into new derived columns (takes a dat

Map field values from one column into new derived columns (takes a data.frame).

-
map_fields(d, cname, m)
- -

Arguments

- - - - - - - - - - - - - - -
d

name of table to re-map.

cname

name of column to re-map.

m

name of table of data describing the mapping (cname column is source, derived columns are destinations).

- -

Value

- -

re-mapped table

- -

Examples

-
-d <- data.frame(what = c("acc", "loss", - "val_acc", "val_loss"), - score = c(0.8, 1.2, - 0.7, 1.7), - stringsAsFactors = FALSE) -m <- data.frame(what = c("acc", "loss", - "val_acc", "val_loss"), - measure = c("accuracy", "log-loss", - "accuracy", "log-loss"), - dataset = c("train", "train", "validation", "validation"), - stringsAsFactors = FALSE) -map_fields(d, 'what', m) -
#> what score measure dataset -#> 1 acc 0.8 accuracy train -#> 2 loss 1.2 log-loss train -#> 3 val_acc 0.7 accuracy validation -#> 4 val_loss 1.7 log-loss validation
-
+
+
map_fields(d, cname, m)
+
+ +
+

Arguments

+
d
+

name of table to re-map.

+ + +
cname
+

name of column to re-map.

+ + +
m
+

name of table of data describing the mapping (cname column is source, derived columns are destinations).

+ +
+
+

Value

+ + +

re-mapped table

+
+ +
+

Examples

+

+d <- data.frame(what = c("acc", "loss",
+                         "val_acc", "val_loss"),
+                score = c(0.8, 1.2,
+                       0.7, 1.7),
+                stringsAsFactors = FALSE)
+m <- data.frame(what = c("acc", "loss",
+                         "val_acc", "val_loss"),
+                measure = c("accuracy", "log-loss",
+                            "accuracy", "log-loss"),
+                dataset = c("train", "train", "validation", "validation"),
+                stringsAsFactors = FALSE)
+map_fields(d, 'what', m)
+#>       what score  measure    dataset
+#> 1      acc   0.8 accuracy      train
+#> 2     loss   1.2 log-loss      train
+#> 3  val_acc   0.7 accuracy validation
+#> 4 val_loss   1.7 log-loss validation
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/map_fields_q.html b/docs/reference/map_fields_q.html index 6232abb..3b49c68 100644 --- a/docs/reference/map_fields_q.html +++ b/docs/reference/map_fields_q.html @@ -1,67 +1,12 @@ - - - - - - - -Map field values from one column into new derived columns (query based, takes name of table). — map_fields_q • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map field values from one column into new derived columns (query based, takes name of table). — map_fields_q • cdata - - + + - - -
-
- -
- -
+
@@ -147,123 +77,124 @@

Map field values from one column into new derived columns (query based, take

Map field values from one column into new derived columns (query based, takes name of table).

-
map_fields_q(
-  dname,
-  cname,
-  mname,
-  my_db,
-  rname,
-  ...,
-  d_qualifiers = NULL,
-  m_qualifiers = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dname

name of table to re-map.

cname

name of column to re-map.

mname

name of table of data describing the mapping (cname column is source, derived columns are destinations).

my_db

database handle.

rname

name of result table.

...

force later arguments to be by name.

d_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

m_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

- -

Value

- -

re-mapped table

- -

Examples

-
-if (requireNamespace("DBI", quietly = TRUE) && - requireNamespace("RSQLite", quietly = TRUE)) { - my_db <- DBI::dbConnect(RSQLite::SQLite(), - ":memory:") - DBI::dbWriteTable( - my_db, - 'd', - data.frame(what = c("acc", "loss", - "val_acc", "val_loss"), - score = c(0.8, 1.2, - 0.7, 1.7), - stringsAsFactors = FALSE), - overwrite = TRUE, - temporary = TRUE) - DBI::dbWriteTable( - my_db, - 'm', - data.frame(what = c("acc", "loss", - "val_acc", "val_loss"), - measure = c("accuracy", "log-loss", - "accuracy", "log-loss"), - dataset = c("train", "train", "validation", "validation"), - stringsAsFactors = FALSE), - overwrite = TRUE, - temporary = TRUE) - - map_fields_q('d', 'what', 'm', my_db, "dm") - cdata::qlook(my_db, 'dm') - DBI::dbDisconnect(my_db) -} -
#> table `dm` SQLiteConnection -#> nrow: 4 -#> 'data.frame': 4 obs. of 4 variables: -#> $ what : chr "acc" "loss" "val_acc" "val_loss" -#> $ score : num 0.8 1.2 0.7 1.7 -#> $ measure: chr "accuracy" "log-loss" "accuracy" "log-loss" -#> $ dataset: chr "train" "train" "validation" "validation"
-
+
+
map_fields_q(
+  dname,
+  cname,
+  mname,
+  my_db,
+  rname,
+  ...,
+  d_qualifiers = NULL,
+  m_qualifiers = NULL
+)
+
+ +
+

Arguments

+
dname
+

name of table to re-map.

+ + +
cname
+

name of column to re-map.

+ + +
mname
+

name of table of data describing the mapping (cname column is source, derived columns are destinations).

+ + +
my_db
+

database handle.

+ + +
rname
+

name of result table.

+ + +
...
+

force later arguments to be by name.

+ + +
d_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
m_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ +
+
+

Value

+ + +

re-mapped table

+
+ +
+

Examples

+

+if (requireNamespace("DBI", quietly = TRUE) &&
+  requireNamespace("RSQLite", quietly = TRUE)) {
+  my_db <- DBI::dbConnect(RSQLite::SQLite(),
+                          ":memory:")
+  DBI::dbWriteTable(
+    my_db,
+    'd',
+    data.frame(what = c("acc", "loss",
+                        "val_acc", "val_loss"),
+               score = c(0.8, 1.2,
+                         0.7, 1.7),
+               stringsAsFactors = FALSE),
+    overwrite = TRUE,
+    temporary = TRUE)
+  DBI::dbWriteTable(
+    my_db,
+    'm',
+    data.frame(what = c("acc", "loss",
+                        "val_acc", "val_loss"),
+               measure = c("accuracy", "log-loss",
+                           "accuracy", "log-loss"),
+               dataset = c("train", "train", "validation", "validation"),
+               stringsAsFactors = FALSE),
+    overwrite = TRUE,
+    temporary = TRUE)
+
+  map_fields_q('d', 'what', 'm', my_db, "dm")
+  cdata::qlook(my_db, 'dm')
+  DBI::dbDisconnect(my_db)
+}
+#> table `dm` SQLiteConnection 
+#>  nrow: 4 
+#> 'data.frame':	4 obs. of  4 variables:
+#>  $ what   : chr  "acc" "loss" "val_acc" "val_loss"
+#>  $ score  : num  0.8 1.2 0.7 1.7
+#>  $ measure: chr  "accuracy" "log-loss" "accuracy" "log-loss"
+#>  $ dataset: chr  "train" "train" "validation" "validation"
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/pivot_to_rowrecs.html b/docs/reference/pivot_to_rowrecs.html index a98c7c1..e3465a8 100644 --- a/docs/reference/pivot_to_rowrecs.html +++ b/docs/reference/pivot_to_rowrecs.html @@ -1,69 +1,14 @@ - - - - - - - -Map data records from block records that have one row per measurement value to row records. — pivot_to_rowrecs • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map data records from block records that have one row per measurement value to row records. — pivot_to_rowrecs • cdata - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -151,120 +81,122 @@

Map data records from block records that have one row per measurement value determine which sets of rows build up records and are copied into the result.

-
pivot_to_rowrecs(
-  data,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  rowKeyColumns,
-  ...,
-  sep = NULL,
-  checkNames = TRUE,
-  checkKeys = TRUE,
-  strict = FALSE,
-  allow_rqdatatable = FALSE
-)
-
-layout_to_rowrecs(
-  data,
-  columnToTakeKeysFrom,
-  columnToTakeValuesFrom,
-  rowKeyColumns,
-  ...,
-  sep = NULL,
-  checkNames = TRUE,
-  checkKeys = TRUE,
-  strict = FALSE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data

data.frame to work with (must be local, for remote please try moveValuesToColumns*).

columnToTakeKeysFrom

character name of column build new column names from.

columnToTakeValuesFrom

character name of column to get values from.

rowKeyColumns

character array names columns that should be table keys.

...

force later arguments to bind by name.

sep

character if not null build more detailed column names.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check keyColumns uniquely identify blocks (required).

strict

logical, if TRUE check control table name forms

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

new data.frame with values moved to columns.

-

See also

- - - -

Examples

-
- d <- data.frame(model_id = c("m1", "m1"), meas = c('AUC', 'R2'), val= c(0.6, 0.2)) - pivot_to_rowrecs(d, - columnToTakeKeysFrom= 'meas', - columnToTakeValuesFrom= 'val', - rowKeyColumns= "model_id") %.>% - print(.) -
#> model_id AUC R2 -#> 1 m1 0.6 0.2
-
+
+
pivot_to_rowrecs(
+  data,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  rowKeyColumns,
+  ...,
+  sep = NULL,
+  checkNames = TRUE,
+  checkKeys = TRUE,
+  strict = FALSE,
+  allow_rqdatatable = FALSE
+)
+
+layout_to_rowrecs(
+  data,
+  columnToTakeKeysFrom,
+  columnToTakeValuesFrom,
+  rowKeyColumns,
+  ...,
+  sep = NULL,
+  checkNames = TRUE,
+  checkKeys = TRUE,
+  strict = FALSE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
data
+

data.frame to work with (must be local, for remote please try moveValuesToColumns*).

+ + +
columnToTakeKeysFrom
+

character name of column build new column names from.

+ + +
columnToTakeValuesFrom
+

character name of column to get values from.

+ + +
rowKeyColumns
+

character array names columns that should be table keys.

+ + +
...
+

force later arguments to bind by name.

+ + +
sep
+

character if not null build more detailed column names.

+ + +
checkNames
+

logical, if TRUE check names.

+ + +
checkKeys
+

logical, if TRUE check keyColumns uniquely identify blocks (required).

+ + +
strict
+

logical, if TRUE check control table name forms

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

new data.frame with values moved to columns.

+
+ + +
+

Examples

+

+  d <- data.frame(model_id = c("m1", "m1"), meas = c('AUC', 'R2'), val= c(0.6, 0.2))
+  pivot_to_rowrecs(d,
+                   columnToTakeKeysFrom= 'meas',
+                   columnToTakeValuesFrom= 'val',
+                   rowKeyColumns= "model_id") %.>%
+     print(.)
+#>   model_id AUC  R2
+#> 1       m1 0.6 0.2
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/qlook.html b/docs/reference/qlook.html index ee17a56..37fd150 100644 --- a/docs/reference/qlook.html +++ b/docs/reference/qlook.html @@ -1,67 +1,12 @@ - - - - - - - -Quick look at remote data — qlook • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Quick look at remote data — qlook • cdata + + - - - - -
-
- -
- -
+
@@ -147,92 +77,93 @@

Quick look at remote data

Quick look at remote data

-
qlook(
-  my_db,
-  tableName,
-  ...,
-  displayRows = 10,
-  countRows = TRUE,
-  qualifiers = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
my_db

database handle

tableName

name of table to look at

...

force later arguments to be by name.

displayRows

number of rows to sample

countRows

logical, if TRUE return row count.

qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

- -

Value

- -

str view of data

- -

Examples

-
-if ( requireNamespace("DBI", quietly = TRUE) && - requireNamespace("RSQLite", quietly = TRUE)) { - my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - rquery::rq_copy_to(my_db, - 'd', - data.frame(AUC = 0.6, R2 = 0.2), - overwrite = TRUE, - temporary = TRUE) - qlook(my_db, 'd') - DBI::dbDisconnect(my_db) -} -
#> table `d` SQLiteConnection -#> nrow: 1 -#> 'data.frame': 1 obs. of 2 variables: -#> $ AUC: num 0.6 -#> $ R2 : num 0.2
-
+
+
qlook(
+  my_db,
+  tableName,
+  ...,
+  displayRows = 10,
+  countRows = TRUE,
+  qualifiers = NULL
+)
+
+ +
+

Arguments

+
my_db
+

database handle

+ + +
tableName
+

name of table to look at

+ + +
...
+

force later arguments to be by name.

+ + +
displayRows
+

number of rows to sample

+ + +
countRows
+

logical, if TRUE return row count.

+ + +
qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ +
+
+

Value

+ + +

str view of data

+
+ +
+

Examples

+

+if ( requireNamespace("DBI", quietly = TRUE) &&
+  requireNamespace("RSQLite", quietly = TRUE)) {
+  my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  rquery::rq_copy_to(my_db,
+                    'd',
+                    data.frame(AUC = 0.6, R2 = 0.2),
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  qlook(my_db, 'd')
+  DBI::dbDisconnect(my_db)
+}
+#> table `d` SQLiteConnection 
+#>  nrow: 1 
+#> 'data.frame':	1 obs. of  2 variables:
+#>  $ AUC: num 0.6
+#>  $ R2 : num 0.2
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/rowrecs_to_blocks.html b/docs/reference/rowrecs_to_blocks.html index 5d8a734..17c43d5 100644 --- a/docs/reference/rowrecs_to_blocks.html +++ b/docs/reference/rowrecs_to_blocks.html @@ -1,68 +1,13 @@ - - - - - - - -Map a data records from row records to block records. — rowrecs_to_blocks • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map a data records from row records to block records. — rowrecs_to_blocks • cdata + + - - - - -
-
- -
- -
+
@@ -149,104 +79,106 @@

Map a data records from row records to block records.

(records that may be more than one row).

-
rowrecs_to_blocks(
-  wideTable,
-  controlTable,
-  ...,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  columnsToCopy = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("rrtbl"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for default
-rowrecs_to_blocks(
-  wideTable,
-  controlTable,
-  ...,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  columnsToCopy = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("rrtobd"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for relop
-rowrecs_to_blocks(
-  wideTable,
-  controlTable,
-  ...,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  columnsToCopy = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("rrtbl"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
wideTable

data.frame containing data to be mapped (in-memory data.frame).

controlTable

table specifying mapping (local data frame).

...

force later arguments to be by name.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

strict

logical, if TRUE check control table name forms.

controlTableKeys

character, which column names of the control table are considered to be keys.

columnsToCopy

character array of column names to copy.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE use temporary tables

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

long table built by mapping wideTable to one row per group

-

Details

+
+
rowrecs_to_blocks(
+  wideTable,
+  controlTable,
+  ...,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  columnsToCopy = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("rrtbl"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for default
+rowrecs_to_blocks(
+  wideTable,
+  controlTable,
+  ...,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  columnsToCopy = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("rrtobd"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for relop
+rowrecs_to_blocks(
+  wideTable,
+  controlTable,
+  ...,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  columnsToCopy = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("rrtbl"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
wideTable
+

data.frame containing data to be mapped (in-memory data.frame).

+ + +
controlTable
+

table specifying mapping (local data frame).

+ + +
...
+

force later arguments to be by name.

+ + +
checkNames
+

logical, if TRUE check names.

+ + +
checkKeys
+

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

+ + +
strict
+

logical, if TRUE check control table name forms.

+ + +
controlTableKeys
+

character, which column names of the control table are considered to be keys.

+ +
columnsToCopy
+

character array of column names to copy.

+ + +
tmp_name_source
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
temporary
+

logical, if TRUE use temporary tables

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

long table built by mapping wideTable to one row per group

+
+
+

Details

The controlTable defines the names of each data element in the two notations: the notation of the tall table (which is row oriented) and the notation of the wide table (which is column oriented). @@ -257,90 +189,94 @@

Details To get behavior similar to tidyr::gather/spread one builds the control table by running an appropriate query over the data.

Some discussion and examples can be found here: -https://winvector.github.io/FluidData/FluidData.html and -here https://github.com/WinVector/cdata.

+https://winvector.github.io/FluidData/FluidData.html and +here https://github.com/WinVector/cdata.

rowrecs_to_blocks.default will change some factor columns to character, and there are issues with time columns with different time zones.

-

See also

- - - -

Examples

-
- # un-pivot example - d <- data.frame(AUC = 0.6, R2 = 0.2) - cT <- build_unpivot_control(nameForNewKeyColumn= 'meas', - nameForNewValueColumn= 'val', - columnsToTakeFrom= c('AUC', 'R2')) - rowrecs_to_blocks(d, cT) -
#> meas val -#> 1 AUC 0.6 -#> 2 R2 0.2
- - -d <- data.frame(AUC = 0.6, R2 = 0.2) -cT <- build_unpivot_control( - nameForNewKeyColumn= 'meas', - nameForNewValueColumn= 'val', - columnsToTakeFrom= c('AUC', 'R2')) - -ops <- rquery::local_td(d) %.>% - rowrecs_to_blocks(., cT) -cat(format(ops)) -
#> mk_td("d", c( -#> "AUC", -#> "R2")) %.>% -#> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT b."meas", CASE WHEN CAST(b."meas" AS VARCHAR) = 'AUC' THEN a."AUC" WHEN CAST(b."meas" AS VARCHAR) = 'R2' THEN a."R2" ELSE NULL END AS "val" FROM "IN" a CROSS JOIN "rrtbl_23845669547197155044_0000000002" b )
-if(requireNamespace("rqdatatable", quietly = TRUE)) { - library("rqdatatable") - d %.>% - ops %.>% - print(.) -} -
#> meas val -#> 1 AUC 0.6 -#> 2 R2 0.2
-if(requireNamespace("RSQLite", quietly = TRUE)) { - db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - DBI::dbWriteTable(db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - db %.>% - ops %.>% - print(.) - DBI::dbDisconnect(db) -} -
#> meas val -#> 1 AUC 0.6 -#> 2 R2 0.2
-
+
+ + +
+

Examples

+

+  # un-pivot example
+  d <- data.frame(AUC = 0.6, R2 = 0.2)
+  cT <- build_unpivot_control(nameForNewKeyColumn= 'meas',
+                              nameForNewValueColumn= 'val',
+                              columnsToTakeFrom= c('AUC', 'R2'))
+  rowrecs_to_blocks(d, cT)
+#>   meas val
+#> 1  AUC 0.6
+#> 2   R2 0.2
+
+
+
+d <- data.frame(AUC = 0.6, R2 = 0.2)
+cT <- build_unpivot_control(
+  nameForNewKeyColumn= 'meas',
+  nameForNewValueColumn= 'val',
+  columnsToTakeFrom= c('AUC', 'R2'))
+
+ops <- rquery::local_td(d) %.>%
+  rowrecs_to_blocks(., cT)
+cat(format(ops))
+#> mk_td("d", c(
+#>   "AUC",
+#>   "R2")) %.>%
+#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT b."meas", CASE  WHEN CAST(b."meas" AS VARCHAR) = 'AUC' THEN a."AUC"  WHEN CAST(b."meas" AS VARCHAR) = 'R2' THEN a."R2" ELSE NULL END AS "val" FROM "IN" a CROSS JOIN "rrtbl_06058172961944270978_0000000002" b )
+
+if(requireNamespace("rqdatatable", quietly = TRUE)) {
+  library("rqdatatable")
+  d %.>%
+    ops %.>%
+    print(.)
+}
+#>   meas val
+#> 1  AUC 0.6
+#> 2   R2 0.2
+
+if(requireNamespace("RSQLite", quietly = TRUE)) {
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  DBI::dbWriteTable(db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  db %.>%
+    ops %.>%
+    print(.)
+  DBI::dbDisconnect(db)
+}
+#>   meas val
+#> 1  AUC 0.6
+#> 2   R2 0.2
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/rowrecs_to_blocks_q.html b/docs/reference/rowrecs_to_blocks_q.html index 1f37217..e12d5ec 100644 --- a/docs/reference/rowrecs_to_blocks_q.html +++ b/docs/reference/rowrecs_to_blocks_q.html @@ -1,68 +1,13 @@ - - - - - - - -Map a set of columns to rows (query based, take name of table). — rowrecs_to_blocks_q • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map a set of columns to rows (query based, take name of table). — rowrecs_to_blocks_q • cdata + + - - - - -
-
- -
- -
+
@@ -149,111 +79,113 @@

Map a set of columns to rows (query based, take name of table).

and controlTable.

-
rowrecs_to_blocks_q(
-  wideTable,
-  controlTable,
-  my_db,
-  ...,
-  columnsToCopy = NULL,
-  tempNameGenerator = mk_tmp_name_source("mvtrq"),
-  strict = FALSE,
-  controlTableKeys = colnames(controlTable)[[1]],
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  showQuery = FALSE,
-  defaultValue = NULL,
-  temporary = FALSE,
-  resultName = NULL,
-  incoming_qualifiers = NULL,
-  outgoing_qualifiers = NULL,
-  temp_qualifiers = NULL,
-  executeQuery = TRUE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
wideTable

name of table containing data to be mapped (db/Spark data)

controlTable

table specifying mapping (local data frame)

my_db

db handle

...

force later arguments to be by name.

columnsToCopy

character array of column names to copy

tempNameGenerator

a tempNameGenerator from cdata::mk_tmp_name_source()

strict

logical, if TRUE check control table name forms

controlTableKeys

character, which column names of the control table are considered to be keys.

checkNames

logical, if TRUE check names

checkKeys

logical, if TRUE check wideTable keys

showQuery

if TRUE print query

defaultValue

if not NULL literal to use for non-match values.

temporary

logical, if TRUE make result temporary.

resultName

character, name for result table.

incoming_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

outgoing_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

temp_qualifiers

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

executeQuery

logical, if TRUE execute the query and return result.

- -

Value

- -

long table built by mapping wideTable to one row per group (or query if executeQuery is FALSE)

-

Details

+
+
rowrecs_to_blocks_q(
+  wideTable,
+  controlTable,
+  my_db,
+  ...,
+  columnsToCopy = NULL,
+  tempNameGenerator = mk_tmp_name_source("mvtrq"),
+  strict = FALSE,
+  controlTableKeys = colnames(controlTable)[[1]],
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  showQuery = FALSE,
+  defaultValue = NULL,
+  temporary = FALSE,
+  resultName = NULL,
+  incoming_qualifiers = NULL,
+  outgoing_qualifiers = NULL,
+  temp_qualifiers = NULL,
+  executeQuery = TRUE
+)
+
+ +
+

Arguments

+
wideTable
+

name of table containing data to be mapped (db/Spark data)

+ + +
controlTable
+

table specifying mapping (local data frame)

+ + +
my_db
+

db handle

+ +
...
+

force later arguments to be by name.

+ + +
columnsToCopy
+

character array of column names to copy

+ + +
tempNameGenerator
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
strict
+

logical, if TRUE check control table name forms

+ + +
controlTableKeys
+

character, which column names of the control table are considered to be keys.

+ + +
checkNames
+

logical, if TRUE check names

+ + +
checkKeys
+

logical, if TRUE check wideTable keys

+ + +
showQuery
+

if TRUE print query

+ + +
defaultValue
+

if not NULL literal to use for non-match values.

+ + +
temporary
+

logical, if TRUE make result temporary.

+ + +
resultName
+

character, name for result table.

+ + +
incoming_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
outgoing_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
temp_qualifiers
+

optional named ordered vector of strings carrying additional db hierarchy terms, such as schema.

+ + +
executeQuery
+

logical, if TRUE execute the query and return result.

+ +
+
+

Value

+ + +

long table built by mapping wideTable to one row per group (or query if executeQuery is FALSE)

+
+
+

Details

This is using the theory of "fluid data"n -(https://github.com/WinVector/cdata), which includes the +(https://github.com/WinVector/cdata), which includes the principle that each data cell has coordinates independent of the storage details and storage detail dependent coordinates (usually row-id, column-id, and group-id) can be re-derived at will (the @@ -270,63 +202,64 @@

Details To get behavior similar to tidyr::gather/spread one builds the control table by running an appropriate query over the data.

Some discussion and examples can be found here: -https://winvector.github.io/FluidData/FluidData.html and -here https://github.com/WinVector/cdata.

-

See also

- - - -

Examples

-
-if (requireNamespace("DBI", quietly = TRUE) && - requireNamespace("RSQLite", quietly = TRUE)) { - my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - - # un-pivot example - d <- data.frame(AUC = 0.6, R2 = 0.2) - rquery::rq_copy_to(my_db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - cT <- build_unpivot_control(nameForNewKeyColumn= 'meas', - nameForNewValueColumn= 'val', - columnsToTakeFrom= c('AUC', 'R2')) - tab <- rowrecs_to_blocks_q('d', cT, my_db = my_db) - qlook(my_db, tab) - DBI::dbDisconnect(my_db) -} -
#> table `mvtrq_88012909446674514404_0000000001` SQLiteConnection -#> nrow: 2 -#> 'data.frame': 2 obs. of 2 variables: -#> $ meas: chr "AUC" "R2" -#> $ val : num 0.6 0.2
-
+https://winvector.github.io/FluidData/FluidData.html and +here https://github.com/WinVector/cdata.

+
+ + +
+

Examples

+

+if (requireNamespace("DBI", quietly = TRUE) &&
+  requireNamespace("RSQLite", quietly = TRUE)) {
+  my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+
+  # un-pivot example
+  d <- data.frame(AUC = 0.6, R2 = 0.2)
+  rquery::rq_copy_to(my_db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  cT <- build_unpivot_control(nameForNewKeyColumn= 'meas',
+                              nameForNewValueColumn= 'val',
+                              columnsToTakeFrom= c('AUC', 'R2'))
+  tab <- rowrecs_to_blocks_q('d', cT, my_db = my_db)
+  qlook(my_db, tab)
+  DBI::dbDisconnect(my_db)
+}
+#> table `mvtrq_58589036583724552768_0000000001` SQLiteConnection 
+#>  nrow: 2 
+#> 'data.frame':	2 obs. of  2 variables:
+#>  $ meas: chr  "AUC" "R2"
+#>  $ val : num  0.6 0.2
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/rowrecs_to_blocks_spec.html b/docs/reference/rowrecs_to_blocks_spec.html index 93e50a3..4f1bffc 100644 --- a/docs/reference/rowrecs_to_blocks_spec.html +++ b/docs/reference/rowrecs_to_blocks_spec.html @@ -1,68 +1,13 @@ - - - - - - - -Create a row records to block records transform specification. — rowrecs_to_blocks_spec • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a row records to block records transform specification. — rowrecs_to_blocks_spec • cdata - - + + - - -
-
- -
- -
+
@@ -149,151 +79,156 @@

Create a row records to block records transform specification.

extra row keys, and control table keys.

-
rowrecs_to_blocks_spec(
-  controlTable,
-  ...,
-  recordKeys = character(0),
-  controlTableKeys = colnames(controlTable)[[1]],
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
controlTable

an all character data frame or cdata pivot control.

...

not used, force later arguments to bind by name.

recordKeys

vector of columns identifying records.

controlTableKeys

vector of keying columns of the controlTable.

checkNames

passed to rowrecs_to_blocks.

checkKeys

passed to rowrecs_to_blocks.

strict

passed to rowrecs_to_blocks.

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

a record specification object

- -

Examples

-
-d <- wrapr::build_frame( - "id" , "AUC", "R2" | - 1 , 0.7 , 0.4 | - 2 , 0.8 , 0.5 ) - -transform <- rowrecs_to_blocks_spec( - wrapr::qchar_frame( - "measure", "value" | - "AUC" , AUC | - "R2" , R2 ), - recordKeys = "id") - -print(transform) -
#> { -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # becomes -#> -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-d %.>% transform -
#> id measure value -#> 1 1 AUC 0.7 -#> 2 1 R2 0.4 -#> 3 2 AUC 0.8 -#> 4 2 R2 0.5
-inv_transform <- t(transform) -print(inv_transform) -
#> { -#> block_record <- wrapr::qchar_frame( -#> "id" , "measure", "value" | -#> . , "AUC" , AUC | -#> . , "R2" , R2 ) -#> block_keys <- c('id', 'measure') -#> -#> # becomes -#> -#> row_record <- wrapr::qchar_frame( -#> "id" , "AUC", "R2" | -#> . , AUC , R2 ) -#> row_keys <- c('id') -#> -#> # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE) -#> } -#>
-# identity (in structure) -d %.>% transform %.>% inv_transform -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-# identity again (using .() "immediate" notation) -d %.>% transform %.>% .(t(transform)) -
#> id AUC R2 -#> 1 1 0.7 0.4 -#> 2 2 0.8 0.5
-
+
+
rowrecs_to_blocks_spec(
+  controlTable,
+  ...,
+  recordKeys = character(0),
+  controlTableKeys = colnames(controlTable)[[1]],
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
controlTable
+

an all character data frame or cdata pivot control.

+ + +
...
+

not used, force later arguments to bind by name.

+ + +
recordKeys
+

vector of columns identifying records.

+ + +
controlTableKeys
+

vector of keying columns of the controlTable.

+ + +
checkNames
+

passed to rowrecs_to_blocks.

+ + +
checkKeys
+

passed to rowrecs_to_blocks.

+ + +
strict
+

passed to rowrecs_to_blocks.

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

a record specification object

+
+ +
+

Examples

+

+d <- wrapr::build_frame(
+  "id"  , "AUC", "R2" |
+    1   , 0.7  , 0.4  |
+    2   , 0.8  , 0.5  )
+
+transform <- rowrecs_to_blocks_spec(
+  wrapr::qchar_frame(
+    "measure", "value" |
+    "AUC"    , AUC     |
+    "R2"     , R2      ),
+  recordKeys = "id")
+
+print(transform)
+#> {
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # becomes
+#> 
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+d %.>% transform
+#>   id measure value
+#> 1  1     AUC   0.7
+#> 2  1      R2   0.4
+#> 3  2     AUC   0.8
+#> 4  2      R2   0.5
+
+inv_transform <- t(transform)
+print(inv_transform)
+#> {
+#>  block_record <- wrapr::qchar_frame(
+#>    "id"  , "measure", "value" |
+#>      .   , "AUC"    , AUC     |
+#>      .   , "R2"     , R2      )
+#>  block_keys <- c('id', 'measure')
+#> 
+#>  # becomes
+#> 
+#>  row_record <- wrapr::qchar_frame(
+#>    "id"  , "AUC", "R2" |
+#>      .   , AUC  , R2   )
+#>  row_keys <- c('id')
+#> 
+#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
+#> }
+#> 
+
+# identity (in structure)
+d %.>% transform %.>% inv_transform
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+# identity again (using .() "immediate" notation)
+d %.>% transform %.>% .(t(transform))
+#>   id AUC  R2
+#> 1  1 0.7 0.4
+#> 2  2 0.8 0.5
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/rows_are_uniquely_keyed.html b/docs/reference/rows_are_uniquely_keyed.html index faac929..681f3a1 100644 --- a/docs/reference/rows_are_uniquely_keyed.html +++ b/docs/reference/rows_are_uniquely_keyed.html @@ -1,67 +1,12 @@ - - - - - - - -Check if table rows are uniquely keyed by keyset. — rows_are_uniquely_keyed • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check if table rows are uniquely keyed by keyset. — rows_are_uniquely_keyed • cdata - - + + - - -
-
- -
- -
+
@@ -147,54 +77,52 @@

Check if table rows are uniquely keyed by keyset.

Return TRUE if table rows are uniquely keyed by key_columns.

-
rows_are_uniquely_keyed(table_rep, key_columns, db)
- -

Arguments

- - - - - - - - - - - - - - -
table_rep

rquery op_tree

key_columns

character vector names of key columns

db

database handle

- -

Value

- -

TRUE if table rows are uniquely keyed by key columns

+
+
rows_are_uniquely_keyed(table_rep, key_columns, db)
+
+ +
+

Arguments

+
table_rep
+

rquery op_tree

+ + +
key_columns
+

character vector names of key columns

+ + +
db
+

database handle

+ +
+
+

Value

+ + +

TRUE if table rows are uniquely keyed by key columns

+
+
-
- +
- - + + diff --git a/docs/reference/unpivot_to_blocks.html b/docs/reference/unpivot_to_blocks.html index ba0d685..a50a085 100644 --- a/docs/reference/unpivot_to_blocks.html +++ b/docs/reference/unpivot_to_blocks.html @@ -1,69 +1,14 @@ - - - - - - - -Map a data records from row records to block records with one record row per columnsToTakeFrom value. — unpivot_to_blocks • cdata - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Map a data records from row records to block records with one record row per columnsToTakeFrom value. — unpivot_to_blocks • cdata - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -151,215 +81,220 @@

Map a data records from row records to block records with one record row per record row in the result.

-
unpivot_to_blocks(
-  data,
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...,
-  nameForNewClassColumn = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-layout_to_blocks(
-  data,
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...,
-  nameForNewClassColumn = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-pivot_to_blocks(
-  data,
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...,
-  nameForNewClassColumn = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for default
-unpivot_to_blocks(
-  data,
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...,
-  nameForNewClassColumn = NULL,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  allow_rqdatatable = FALSE
-)
-
-# S3 method for relop
-unpivot_to_blocks(
-  data,
-  nameForNewKeyColumn,
-  nameForNewValueColumn,
-  columnsToTakeFrom,
-  ...,
-  checkNames = TRUE,
-  checkKeys = FALSE,
-  strict = FALSE,
-  nameForNewClassColumn = NULL,
-  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
-  temporary = TRUE,
-  allow_rqdatatable = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data

data.frame to work with.

nameForNewKeyColumn

character name of column to write new keys in.

nameForNewValueColumn

character name of column to write new values in.

columnsToTakeFrom

character array names of columns to take values from.

...

force later arguments to bind by name.

nameForNewClassColumn

optional name to land original cell classes to.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

strict

logical, if TRUE check control table name forms.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE make result temporary.

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

- -

Value

- -

new data.frame with values moved to rows.

-

See also

- - - -

Examples

-
- d <- data.frame(model_name = "m1", AUC = 0.6, R2 = 0.2) - unpivot_to_blocks(d, - nameForNewKeyColumn= 'meas', - nameForNewValueColumn= 'val', - columnsToTakeFrom= c('AUC', 'R2')) %.>% - print(.) -
#> model_name meas val -#> 1 m1 AUC 0.6 -#> 2 m1 R2 0.2
- -d <- data.frame(AUC= 0.6, R2= 0.2) -ops <- rquery::local_td(d) %.>% - unpivot_to_blocks( - ., - nameForNewKeyColumn= 'meas', - nameForNewValueColumn= 'val', - columnsToTakeFrom= c('AUC', 'R2')) -cat(format(ops)) -
#> mk_td("d", c( -#> "AUC", -#> "R2")) %.>% -#> non_sql_node(., unpivot_to_blocks(., nameForNewKeyColumn="meas , nameForNewValueColumn="val"))
-if(requireNamespace("rqdatatable", quietly = TRUE)) { - library("rqdatatable") - d %.>% - ops %.>% - print(.) -} -
#> meas val -#> 1 AUC 0.6 -#> 2 R2 0.2
-if(requireNamespace("RSQLite", quietly = TRUE)) { - db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - DBI::dbWriteTable(db, - 'd', - d, - overwrite = TRUE, - temporary = TRUE) - db %.>% - ops %.>% - print(.) - DBI::dbDisconnect(db) -} -
#> meas val -#> 1 AUC 0.6 -#> 2 R2 0.2
-
+
+
unpivot_to_blocks(
+  data,
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...,
+  nameForNewClassColumn = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+layout_to_blocks(
+  data,
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...,
+  nameForNewClassColumn = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+pivot_to_blocks(
+  data,
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...,
+  nameForNewClassColumn = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for default
+unpivot_to_blocks(
+  data,
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...,
+  nameForNewClassColumn = NULL,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  allow_rqdatatable = FALSE
+)
+
+# S3 method for relop
+unpivot_to_blocks(
+  data,
+  nameForNewKeyColumn,
+  nameForNewValueColumn,
+  columnsToTakeFrom,
+  ...,
+  checkNames = TRUE,
+  checkKeys = FALSE,
+  strict = FALSE,
+  nameForNewClassColumn = NULL,
+  tmp_name_source = wrapr::mk_tmp_name_source("upb"),
+  temporary = TRUE,
+  allow_rqdatatable = FALSE
+)
+
+ +
+

Arguments

+
data
+

data.frame to work with.

+ + +
nameForNewKeyColumn
+

character name of column to write new keys in.

+ + +
nameForNewValueColumn
+

character name of column to write new values in.

+ + +
columnsToTakeFrom
+

character array names of columns to take values from.

+ + +
...
+

force later arguments to bind by name.

+ + +
nameForNewClassColumn
+

optional name to land original cell classes to.

+ + +
checkNames
+

logical, if TRUE check names.

+ + +
checkKeys
+

logical, if TRUE check columnsToCopy form row keys (not a requirement, unless you want to be able to invert the operation).

+ + +
strict
+

logical, if TRUE check control table name forms.

+ + +
tmp_name_source
+

a tempNameGenerator from cdata::mk_tmp_name_source()

+ + +
temporary
+

logical, if TRUE make result temporary.

+ + +
allow_rqdatatable
+

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

+ +
+
+

Value

+ + +

new data.frame with values moved to rows.

+
+ + +
+

Examples

+

+  d <- data.frame(model_name = "m1", AUC = 0.6, R2 = 0.2)
+  unpivot_to_blocks(d,
+                    nameForNewKeyColumn= 'meas',
+                    nameForNewValueColumn= 'val',
+                    columnsToTakeFrom= c('AUC', 'R2')) %.>%
+     print(.)
+#>   model_name meas val
+#> 1         m1  AUC 0.6
+#> 2         m1   R2 0.2
+
+
+d <- data.frame(AUC= 0.6, R2= 0.2)
+ops <- rquery::local_td(d) %.>%
+  unpivot_to_blocks(
+    .,
+    nameForNewKeyColumn= 'meas',
+    nameForNewValueColumn= 'val',
+    columnsToTakeFrom= c('AUC', 'R2'))
+cat(format(ops))
+#> mk_td("d", c(
+#>   "AUC",
+#>   "R2")) %.>%
+#>  non_sql_node(., unpivot_to_blocks(., nameForNewKeyColumn="meas , nameForNewValueColumn="val"))
+
+if(requireNamespace("rqdatatable", quietly = TRUE)) {
+  library("rqdatatable")
+  d %.>%
+    ops %.>%
+    print(.)
+}
+#>   meas val
+#> 1  AUC 0.6
+#> 2   R2 0.2
+
+if(requireNamespace("RSQLite", quietly = TRUE)) {
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  DBI::dbWriteTable(db,
+                    'd',
+                    d,
+                    overwrite = TRUE,
+                    temporary = TRUE)
+  db %.>%
+    ops %.>%
+    print(.)
+  DBI::dbDisconnect(db)
+}
+#>   meas val
+#> 1  AUC 0.6
+#> 2   R2 0.2
+
+
+
+
-
- +
- - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index c1efe64..b9fa065 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -1,111 +1,150 @@ - https://winvector.github.io/cdata//index.html + https://winvector.github.io/cdata/404.html - https://winvector.github.io/cdata//reference/blocks_to_rowrecs.html + https://winvector.github.io/cdata/LICENSE-text.html - https://winvector.github.io/cdata//reference/blocks_to_rowrecs_q.html + https://winvector.github.io/cdata/articles/blocksrecs.html - https://winvector.github.io/cdata//reference/blocks_to_rowrecs_spec.html + https://winvector.github.io/cdata/articles/cdata.html - https://winvector.github.io/cdata//reference/build_pivot_control.html + https://winvector.github.io/cdata/articles/control_table_keys.html - https://winvector.github.io/cdata//reference/build_pivot_control_q.html + https://winvector.github.io/cdata/articles/design.html - https://winvector.github.io/cdata//reference/build_unpivot_control.html + https://winvector.github.io/cdata/articles/exercises.html - https://winvector.github.io/cdata//reference/cdata.html + https://winvector.github.io/cdata/articles/general_transform.html - https://winvector.github.io/cdata//reference/check_cols_form_unique_keys.html + https://winvector.github.io/cdata/articles/index.html - https://winvector.github.io/cdata//reference/convert_cdata_spec_to_yaml.html + https://winvector.github.io/cdata/articles/operators.html - https://winvector.github.io/cdata//reference/convert_records.html + https://winvector.github.io/cdata/authors.html - https://winvector.github.io/cdata//reference/convert_yaml_to_cdata_spec.html + https://winvector.github.io/cdata/index.html - https://winvector.github.io/cdata//reference/get_transform_details.html + https://winvector.github.io/cdata/news/index.html - https://winvector.github.io/cdata//reference/grapes-slash-slash-grapes.html + https://winvector.github.io/cdata/reference/blocks_to_rowrecs.html - https://winvector.github.io/cdata//reference/grapes-times-times-grapes.html + https://winvector.github.io/cdata/reference/blocks_to_rowrecs_q.html - https://winvector.github.io/cdata//reference/layout_by.html + https://winvector.github.io/cdata/reference/blocks_to_rowrecs_spec.html - https://winvector.github.io/cdata//reference/layout_by.blocks_to_rowrecs_spec.html + https://winvector.github.io/cdata/reference/build_pivot_control.html - https://winvector.github.io/cdata//reference/layout_by.cdata_general_transform_spec.html + https://winvector.github.io/cdata/reference/build_pivot_control_q.html - https://winvector.github.io/cdata//reference/layout_by.rowrecs_to_blocks_spec.html + https://winvector.github.io/cdata/reference/build_unpivot_control.html - https://winvector.github.io/cdata//reference/layout_specification.html + https://winvector.github.io/cdata/reference/cdata-package.html - https://winvector.github.io/cdata//reference/map_fields.html + https://winvector.github.io/cdata/reference/cdata.html - https://winvector.github.io/cdata//reference/map_fields_q.html + https://winvector.github.io/cdata/reference/checkColsFormUniqueKeys.html - https://winvector.github.io/cdata//reference/pivot_to_rowrecs.html + https://winvector.github.io/cdata/reference/check_cols_form_unique_keys.html - https://winvector.github.io/cdata//reference/qlook.html + https://winvector.github.io/cdata/reference/check_equiv_frames.html - https://winvector.github.io/cdata//reference/rowrecs_to_blocks.html + https://winvector.github.io/cdata/reference/convert_cdata_spec_to_yaml.html - https://winvector.github.io/cdata//reference/rowrecs_to_blocks_q.html + https://winvector.github.io/cdata/reference/convert_records.html - https://winvector.github.io/cdata//reference/rowrecs_to_blocks_spec.html + https://winvector.github.io/cdata/reference/convert_yaml_to_cdata_spec.html - https://winvector.github.io/cdata//reference/rows_are_uniquely_keyed.html + https://winvector.github.io/cdata/reference/get_transform_details.html - https://winvector.github.io/cdata//reference/unpivot_to_blocks.html + https://winvector.github.io/cdata/reference/grapes-pivot-grapes.html - https://winvector.github.io/cdata//articles/blocksrecs.html + https://winvector.github.io/cdata/reference/grapes-slash-slash-grapes.html - https://winvector.github.io/cdata//articles/cdata.html + https://winvector.github.io/cdata/reference/grapes-times-times-grapes.html - https://winvector.github.io/cdata//articles/control_table_keys.html + https://winvector.github.io/cdata/reference/index.html - https://winvector.github.io/cdata//articles/design.html + https://winvector.github.io/cdata/reference/layout_by.blocks_to_rowrecs_spec.html - https://winvector.github.io/cdata//articles/exercises.html + https://winvector.github.io/cdata/reference/layout_by.cdata_general_transform_spec.html - https://winvector.github.io/cdata//articles/general_transform.html + https://winvector.github.io/cdata/reference/layout_by.html - https://winvector.github.io/cdata//articles/operators.html + https://winvector.github.io/cdata/reference/layout_by.rowrecs_to_blocks_spec.html + + + https://winvector.github.io/cdata/reference/layout_specification.html + + + https://winvector.github.io/cdata/reference/map_fields.html + + + https://winvector.github.io/cdata/reference/map_fields_q.html + + + https://winvector.github.io/cdata/reference/new_record_spec.html + + + https://winvector.github.io/cdata/reference/pivot_to_rowrecs.html + + + https://winvector.github.io/cdata/reference/qlook.html + + + https://winvector.github.io/cdata/reference/reexports.html + + + https://winvector.github.io/cdata/reference/rowrecs_to_blocks.html + + + https://winvector.github.io/cdata/reference/rowrecs_to_blocks_q.html + + + https://winvector.github.io/cdata/reference/rowrecs_to_blocks_spec.html + + + https://winvector.github.io/cdata/reference/rows_are_uniquely_keyed.html + + + https://winvector.github.io/cdata/reference/run_cdata_tests.html + + + https://winvector.github.io/cdata/reference/unpivot_to_blocks.html diff --git a/extras/check_reverse_dependencies.md b/extras/check_reverse_dependencies.md index a3fba60..c394766 100644 --- a/extras/check_reverse_dependencies.md +++ b/extras/check_reverse_dependencies.md @@ -1,4 +1,4 @@ -check\_reverse\_dependencies +check_reverse_dependencies ================ ``` r @@ -8,13 +8,13 @@ package = "cdata" packageVersion(package) ``` - ## [1] '1.2.0' + ## [1] '1.2.1' ``` r date() ``` - ## [1] "Fri Jun 11 16:07:35 2021" + ## [1] "Sat Aug 19 16:22:57 2023" ``` r parallelCluster <- NULL @@ -33,7 +33,7 @@ setwd(td) print(td) ``` - ## [1] "/var/folders/7f/sdjycp_d08n8wwytsbgwqgsw0000gn/T//Rtmp6BAJph" + ## [1] "/var/folders/7f/sdjycp_d08n8wwytsbgwqgsw0000gn/T//RtmpZWTbbQ" ``` r options(repos = c(CRAN="https://cloud.r-project.org")) @@ -58,9 +58,8 @@ if(!is.null(parallelCluster)) { } ``` - ## ## Reverse depends check of cdata 1.2.0 - ## rmoo_0.1.6 started at 2021-06-11 16:07:37 success at 2021-06-11 16:08:14 (1/0/0) - ## WVPlots_1.3.2 started at 2021-06-11 16:08:14 success at 2021-06-11 16:09:08 (2/0/0) + ## ## Reverse depends check of cdata 1.2.1 + ## WVPlots_1.3.5 started at 2023-08-19 16:22:58 success at 2023-08-19 16:23:56 (1/0/0) ## [1] id title status ## <0 rows> (or 0-length row.names) @@ -69,9 +68,9 @@ if(!is.null(parallelCluster)) { summariseQueue(package=package, directory=td) ``` - ## Test of cdata 1.2.0 had 2 successes, 0 failures, and 0 skipped packages. - ## Ran from 2021-06-11 16:07:37 to 2021-06-11 16:09:08 for 1.517 mins - ## Average of 45.5 secs relative to 45.378 secs using 1 runners + ## Test of cdata 1.2.1 had 1 successes, 0 failures, and 0 skipped packages. + ## Ran from 2023-08-19 16:22:58 to 2023-08-19 16:23:56 for 58 secs + ## Average of 58 secs relative to 57.286 secs using 1 runners ## ## Failed packages: ## diff --git a/man/cdata.Rd b/man/cdata-package.Rd similarity index 66% rename from man/cdata.Rd rename to man/cdata-package.Rd index 7ab5269..80eb619 100644 --- a/man/cdata.Rd +++ b/man/cdata-package.Rd @@ -1,8 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/package.R \docType{package} -\name{cdata} +\name{cdata-package} \alias{cdata} +\alias{cdata-package} \title{\code{cdata}: Fluid Data Transformations.} \description{ Supplies implementations of higher order "fluid data" transforms. These @@ -16,3 +17,26 @@ A theory of fluid data transforms can be found in the following articles: \url{https://winvector.github.io/FluidData/FluidDataReshapingWithCdata.html}, \url{https://github.com/WinVector/cdata} and \url{https://winvector.github.io/FluidData/FluidData.html}. } +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/WinVector/cdata/} + \item \url{https://winvector.github.io/cdata/} + \item Report bugs at \url{https://github.com/WinVector/cdata/issues} +} + +} +\author{ +\strong{Maintainer}: John Mount \email{jmount@win-vector.com} + +Authors: +\itemize{ + \item Nina Zumel \email{nzumel@win-vector.com} +} + +Other contributors: +\itemize{ + \item Win-Vector LLC [copyright holder] +} + +} diff --git a/vignettes/exercises.Rmd b/vignettes/exercises.Rmd index dbbeb85..5c99c72 100644 --- a/vignettes/exercises.Rmd +++ b/vignettes/exercises.Rmd @@ -52,7 +52,7 @@ We will work some examples with the hope that practice brings familiarity. We ha (From: [tidyr:demo/dadmom.R](https://github.com/tidyverse/tidyr/blob/0d633f79e85a87a686b2f43de20e2ae74f5c122c/demo/dadmom.R).) -From [https://stats.idre.ucla.edu/stata/modules/reshaping-data-wide-to-long/](https://stats.idre.ucla.edu/stata/modules/reshaping-data-wide-to-long/) we can get get a copy of the data and the question or "transform ask": +From [https://stats.oarc.ucla.edu/stata/modules/reshaping-data-wide-to-long/](https://stats.oarc.ucla.edu/stata/modules/reshaping-data-wide-to-long/) we can get get a copy of the data and the question or "transform ask": ```{r} # convert from this format