diff --git a/API b/API
index 98900f5f0..cab8f302a 100644
--- a/API
+++ b/API
@@ -4,6 +4,7 @@
create_style_guide(initialize = default_style_guide_attributes, line_break = NULL, space = NULL, token = NULL, indention = NULL, use_raw_indention = FALSE, reindention = tidyverse_reindention())
default_style_guide_attributes(pd_flat)
+equals_style()
specify_math_token_spacing(zero = "'^'", one = c("'+'", "'-'", "'*'", "'/'"))
specify_reindention(regex_pattern = NULL, indention = 0, comments_only = TRUE)
style_dir(path = ".", ..., style = tidyverse_style, transformers = style(...), filetype = "R", recursive = TRUE, exclude_files = NULL, include_roxygen_examples = TRUE)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f695de09f..91ad6ec67 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -178,3 +178,16 @@ indicates for every row in a parse table whether it contains an `else` token.
The use of closures is discouraged. We prefer to prefill a template function
with `purrr::partial()`.
+
+## Testing
+
+We have a testing framework powered by `test_collection()`.
+Essentially, there is an \*-in.R file and a \*-out.R file. The \*-in.R file is the
+input that is transformed and - if it matches the *-out.R file, the test has
+passed. You can create an \*-in.R file, run `devtools::test(f = "[your file]")`
+and an \*-out.R file is generated. If the file matches your expectation,
+you can commit it. Note that files are overwritten and version control should be
+used to track failed tests.
+The files are placed in `tests/testthat` under the category they fit.
+Please have a look at the documentation for `test_collection()` and see other
+unit tests. Let me know if there is anything unclear about this.
diff --git a/DESCRIPTION b/DESCRIPTION
index 172229f3f..fc76b250d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,34 +11,34 @@ Authors@R:
family = "Walthert",
role = c("cre", "aut"),
email = "lorenz.walthert@icloud.com"))
-Description: Pretty-prints R code without changing the
- user's formatting intent.
+Description: Pretty-prints R code without changing the user's
+ formatting intent.
License: GPL-3
URL: https://github.com/r-lib/styler
BugReports: https://github.com/r-lib/styler/issues
Imports:
- backports,
+ backports (>= 1.1.0),
cli,
- magrittr,
+ fs,
+ magrittr (>= 1.0.1),
purrr (>= 0.2.3),
rematch2,
rlang (>= 0.1.1),
- rprojroot,
+ rprojroot (>= 1.1),
tibble (>= 1.4.2),
tools,
withr,
xfun
Suggests:
- data.tree,
+ data.tree (>= 0.1.6),
dplyr,
here,
knitr,
prettycode,
rmarkdown,
- rstudioapi,
+ rstudioapi (>= 0.7),
testthat
-VignetteBuilder:
- knitr
+VignetteBuilder: knitr
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE, roclets = c("rd", "namespace",
diff --git a/NAMESPACE b/NAMESPACE
index a57c38209..03ee7d175 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
S3method(print,vertical)
export(create_style_guide)
export(default_style_guide_attributes)
+export(equals_style)
export(specify_math_token_spacing)
export(specify_reindention)
export(style_dir)
diff --git a/R/communicate.R b/R/communicate.R
index 350f6c312..d61689f4c 100644
--- a/R/communicate.R
+++ b/R/communicate.R
@@ -27,9 +27,10 @@ communicate_summary <- function(changed, ruler_width) {
cli::cat_rule(width = max(40, ruler_width))
}
-stop_insufficient_r_version <- function() {
- stop(paste0(
- "Can't write tree with R version ", getRversion(),
- "since data.tree not available. Needs at least R version 3.2."
- ), call. = FALSE)
+#' @importFrom rlang is_installed
+assert_data.tree_installation <- function() {
+ if (!is_installed("data.tree")) {
+ stop("The package data.tree needs to be installed for this functionality.")
+ }
}
+
diff --git a/R/environments.R b/R/environments.R
old mode 100755
new mode 100644
index 3964ad824..85968b59d
--- a/R/environments.R
+++ b/R/environments.R
@@ -16,7 +16,7 @@
#' * version 1: Before fix mentioned in #419.
#' * version 2: After #419.
#'
-#'The following utilities are available:
+#' The following utilities are available:
#'
#' * `parser_version_set()` sets the parser version in the environment
#' `env_current`.
diff --git a/R/io.R b/R/io.R
index 404fd09dc..8c2571873 100644
--- a/R/io.R
+++ b/R/io.R
@@ -20,7 +20,7 @@ transform_utf8_one <- function(path, fun, write_back = write_back) {
xfun::write_utf8(new, path)
}
!identical(unclass(old), unclass(new))
- }, error = function(e) {
+ }, error = function(e) {
warning("When processing ", path, ": ", conditionMessage(e), call. = FALSE)
NA
})
diff --git a/R/nested-to-tree.R b/R/nested-to-tree.R
index 2aa3516d9..48b7db664 100644
--- a/R/nested-to-tree.R
+++ b/R/nested-to-tree.R
@@ -14,7 +14,7 @@ create_tree <- function(text, structure_only = FALSE) {
create_tree_from_pd_with_default_style_attributes <- function(pd, structure_only = FALSE) {
pd %>%
- create_node_from_nested_root(structure_only) %>%
+ create_node_from_nested_root(structure_only) %>%
as.data.frame()
}
@@ -29,7 +29,7 @@ create_tree_from_pd_with_default_style_attributes <- function(pd, structure_only
#' to check whether two structures are identical.
#' @return An object of class "Node" and "R6".
#' @examples
-#' if (getRversion() >= 3.2) {
+#' if (rlang::is_installed("data.tree")) {
#' code <- "a <- function(x) { if(x > 1) { 1+1 } else {x} }"
#' nested_pd <- styler:::compute_parse_data_nested(code)
#' initialized <- styler:::pre_visit(nested_pd, c(default_style_guide_attributes))
@@ -37,7 +37,7 @@ create_tree_from_pd_with_default_style_attributes <- function(pd, structure_only
#' }
#' @keywords internal
create_node_from_nested_root <- function(pd_nested, structure_only) {
- if (getRversion() < 3.2) stop_insufficient_r_version()
+ assert_data.tree_installation()
n <- data.tree::Node$new(ifelse(
structure_only, "Hierarchical structure",
"ROOT (token: short_text [lag_newlines/spaces] {pos_id})"
diff --git a/R/parse.R b/R/parse.R
index d527f4e43..2b8e1d3b1 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -174,7 +174,7 @@ ensure_valid_pd <- function(pd) {
non_terminals <- pd %>%
filter(terminal == FALSE)
valid_pd <- non_terminals$id %>%
- map_lgl(~.x %in% pd$parent) %>%
+ map_lgl(~ .x %in% pd$parent) %>%
all()
if (!valid_pd) {
stop(paste(
diff --git a/R/roxygen-examples-add-remove.R b/R/roxygen-examples-add-remove.R
index 910cc59f5..addd32b04 100644
--- a/R/roxygen-examples-add-remove.R
+++ b/R/roxygen-examples-add-remove.R
@@ -36,5 +36,5 @@ remove_roxygen_header <- function(text) {
#' @importFrom purrr map_chr
add_roxygen_mask <- function(text) {
- c(paste0("#' @examples"), map_chr(text, ~paste0("#' ", .x)))
+ c(paste0("#' @examples"), map_chr(text, ~ paste0("#' ", .x)))
}
diff --git a/R/rules-other.R b/R/rules-other.R
index e24b266e5..a4e750086 100644
--- a/R/rules-other.R
+++ b/R/rules-other.R
@@ -33,7 +33,7 @@ wrap_if_else_multi_line_in_curly <- function(pd, indent_by = 2) {
pd <- pd %>%
wrap_if_multiline_curly(indent_by,
space_after = ifelse(contains_else_expr(pd), 1, 0)
- ) %>%
+ ) %>%
wrap_else_multiline_curly(indent_by, space_after = 0)
}
pd
diff --git a/R/set-assert-args.R b/R/set-assert-args.R
index 526a7055c..df705da99 100644
--- a/R/set-assert-args.R
+++ b/R/set-assert-args.R
@@ -6,11 +6,10 @@
#' @param write_tree Whether or not to write tree.
#' @keywords internal
set_arg_write_tree <- function(write_tree) {
- sufficient_version <- getRversion() >= 3.2
if (is.na(write_tree)) {
- write_tree <- ifelse(sufficient_version, TRUE, FALSE)
- } else if (!sufficient_version && write_tree) {
- stop_insufficient_r_version()
+ write_tree <- ifelse(is_installed("data.tree"), TRUE, FALSE)
+ } else if (write_tree) {
+ assert_data.tree_installation()
}
write_tree
}
diff --git a/R/style-guides.R b/R/style-guides.R
index 104a1b02a..0833e2fe3 100644
--- a/R/style-guides.R
+++ b/R/style-guides.R
@@ -347,3 +347,33 @@ tidyverse_math_token_spacing <- function() {
one = c("'+'", "'-'", "'*'", "'/'")
)
}
+
+
+#' Equals assignment style
+#'
+#' Use equals assignment instead of arrow assignment.
+#'
+#' @inheritParams tidyverse_style
+
+#' @details
+#'
+#' This style guide is the same as [`tidyverse_style()`], except it uses
+#' equals (`=`) rather than arrow `<-` assignment.
+#'
+#' @family style_guides
+#' @examples
+#' style_text("x <- 1", style = equals_style)
+#' @export
+equals_style <- function() {
+ create_style_guide(
+ token = list(force_assignment_op_equals),
+ use_raw_indention = TRUE
+ )
+}
+
+force_assignment_op_equals <- function(pd) {
+ to_replace <- pd$token == "LEFT_ASSIGN"
+ pd$token[to_replace] <- "EQ_ASSIGN"
+ pd$text[to_replace] <- "="
+ pd
+}
diff --git a/R/transform-code.R b/R/transform-code.R
index c8fc39c77..f289bf779 100644
--- a/R/transform-code.R
+++ b/R/transform-code.R
@@ -56,12 +56,12 @@ separate_chunks <- function(lines, filetype) {
r_raw_chunks <- identify_raw_chunks(lines, filetype = filetype)
r_chunks <- map2(
- r_raw_chunks$starts, r_raw_chunks$ends, ~lines[seq2(.x + 1, .y - 1)]
+ r_raw_chunks$starts, r_raw_chunks$ends, ~ lines[seq2(.x + 1, .y - 1)]
)
text_chunks <- map2(
c(1, r_raw_chunks$ends), c(r_raw_chunks$starts, length(lines)),
- ~lines[seq2(.x, .y)]
+ ~ lines[seq2(.x, .y)]
)
lst(r_chunks, text_chunks)
}
@@ -124,7 +124,7 @@ get_engine_pattern <- function() {
#' @inheritParams separate_chunks
#' @keywords internal
get_knitr_pattern <- function(filetype) {
- if(filetype == "Rnw") {
+ if (filetype == "Rnw") {
knitr::all_patterns[["rnw"]]
} else if (filetype == "Rmd") {
knitr::all_patterns[["md"]]
diff --git a/R/ui.R b/R/ui.R
index c43895e84..fecf89cfd 100644
--- a/R/ui.R
+++ b/R/ui.R
@@ -59,7 +59,7 @@ NULL
#' @family stylers
#' @examples
#' \dontrun{
-#'
+#'
#' style_pkg(style = tidyverse_style, strict = TRUE)
#' style_pkg(
#' scope = "line_breaks",
diff --git a/R/zzz.R b/R/zzz.R
index 024f4c6bc..10b074fc4 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1,11 +1,10 @@
.onLoad <- function(libname, pkgname) {
backports::import(pkgname, "trimws")
- op <- options()
- op.styler <- list(
- styler.colored_print.vertical = TRUE
- )
- toset <- !(names(op.styler) %in% names(op))
- if(any(toset)) options(op.styler[toset])
- invisible()
+ op <- options()
+ op.styler <- list(
+ styler.colored_print.vertical = TRUE
+ )
+ toset <- !(names(op.styler) %in% names(op))
+ if (any(toset)) options(op.styler[toset])
+ invisible()
}
-
diff --git a/README.md b/README.md
index a545cd74e..39bbb8009 100644
--- a/README.md
+++ b/README.md
@@ -1,45 +1,18 @@
+styler
+======
-# styler
+[](https://travis-ci.org/krlmlr/styler) [](https://ci.appveyor.com/project/krlmlr/styler) [](http://www.repostatus.org/#wip)
-[](https://travis-ci.org/r-lib/styler)
-[](https://ci.appveyor.com/project/r-lib/styler)
-[](https://www.tidyverse.org/lifecycle/#stable)
-[](https://codecov.io/gh/r-lib/styler)
-[](https://cran.r-project.org/package=styler)
-
-The goal of styler is to provide non-invasive pretty-printing of R
-source code while adhering to the
-[tidyverse](http://style.tidyverse.org) formatting rules. styler can be
-customized to format code according to other style guides too.
-
-## Installation
-
-You can install the package from CRAN:
-
-``` r
-install.packages("styler")
-```
-
-Or get the development version from GitHub:
-
-``` r
-# install.packages("remotes")
-remotes::install_github("r-lib/styler")
-```
-
-## API
+The goal of styler is to provide non-invasive pretty-printing of R source code while adhering to the [tidyverse](https://github.com/tidyverse/style) formatting rules. Support for custom style guides is planned.
You can style a simple character vector of code with `style_text()`:
``` r
-library("styler")
-ugly_code <- "a=function( x){1+1} "
-style_text(ugly_code)
+ugly_code <- "a<-function( x){1+1} "
+style_text(ugly_code) %>%
+ cat(sep = "\n")
#> a <- function(x) {
#> 1 + 1
#> }
@@ -47,98 +20,11 @@ style_text(ugly_code)
There are a few variants of `style_text()`:
- - `style_file()` styles .R and/or .Rmd files.
- - `style_dir()` styles all .R and/or .Rmd files in a directory.
- - `style_pkg()` styles the source files of an R package.
- - RStudio Addins for styling the active file, styling the current
- package and styling the highlighted code
-region.
+- `style_file()` styles a single .R file.
+- `style_dir()` styles all .R files in a directory.
+- `style_pkg()` styles the source files of an R package.
+- An RStudio Addin that styles the active file .R file
-## Functionality of styler
-
-**scope**
-
-You can decide on the level of invasiveness with the scope argument. You
-can style:
-
- - just spaces.
- - spaces and indention.
- - spaces, indention and line breaks.
- - spaces, indention, line breaks and tokens.
-
-
-
-``` r
-ugly_code <- "a=function( x){1+1} "
-style_text(ugly_code, scope = "spaces")
-#> a = function(x) {1 + 1}
-```
-
-Note that compared to the default used above `scope = "tokens"`:
-
- - no line breaks were added.
- - `<-` was not replaced with `=`.
-
-While spaces still got styled (around `=` in `(x)`).
-
-**strict**
-
-If you wish to keep alignment as is, you can use `strict = FALSE`:
-
-``` r
-style_text(
- c(
- "first <- 4",
- "second <- 1+1"
- ),
- strict = FALSE
-)
-#> first <- 4
-#> second <- 1 + 1
-```
-
-This was just the tip of the iceberg. Learn more about customization
-with the tidyverse style guide in in this
-[vignette](http://styler.r-lib.org/articles/introducing_styler.html). If
-this is not flexible enough for you, you can implement your own style
-guide, as explained in the corresponding
-[vignette](http://styler.r-lib.org/articles/customizing_styler.html).
-
-## Adaption of styler
-
-styler functionality is made available through other packages, most
-notably
-
- - `usethis::use_tidy_style()` styles your project according to the
- tidyverse style guide.
- - `reprex::reprex(style = TRUE)` to prettify reprex code before
- printing. To permanently use `style = TRUE` without specifying it
- every time, you can add the following line to your `.Rprofile` (via
- `usethis::edit_r_profile()`): `options(reprex.styler = TRUE)`.
- - you can pretty-print your R code in RMarkdown reports without having
- styler modifying the source. This feature is implemented as a code
- chunk option in knitr. use `tidy = "styler"` in the header of a code
- chunks (e.g. ` ```{r name-of-the-chunk, tidy = "styler"}`), or
- `knitr::opts_chunk$set(tidy = "styler")` at the top of your
- RMarkdown script.
- - pretty-printing of [drake](https://github.com/ropensci/drake)
- workflow data frames with `drake::drake_plan_source()`.
-
-## Further resources
-
- - The official [web documentation](http://styler.r-lib.org/) of
- styler, containing various vignettes function documentation as well
- as a change-log.
- - [Blog
- post](https://lorenzwalthert.netlify.com/posts/customizing-styler-the-quick-way/)
- about how you can customize styler without being an expert.
- - A [tidyverse.org blog
- post](https://www.tidyverse.org/articles/2017/12/styler-1.0.0/)
- introducing the functionality of styler.
- - The wiki of [Google Summer of Code
- 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Noninvasive-source-code-formatting)
- or the [pkgdown](https://r-lib.github.io/styler/) page contain
- information related to the initial development phase during Google
- Summer of Code 2017.
+You can find more information on the wiki of [Google Summer of Code 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Noninvasive-source-code-formatting) or check out the [pkgdown](https://krlmlr.github.io/styler/) page.
diff --git a/derby.log b/derby.log
new file mode 100644
index 000000000..33784b4ea
--- /dev/null
+++ b/derby.log
@@ -0,0 +1,13 @@
+----------------------------------------------------------------
+Fri Jun 23 14:57:19 CEST 2017:
+Booting Derby version The Apache Software Foundation - Apache Derby - 10.10.1.1 - (1458268): instance a816c00e-015c-d507-3867-00002e8b9ff0
+on database directory memory:/home/muelleki/git/R/styler/databaseName=metastore_db with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@26ce3a2
+Loaded from file:/home/muelleki/.cache/spark/spark-1.6.2-bin-hadoop2.6/lib/spark-assembly-1.6.2-hadoop2.6.0.jar
+java.vendor=Oracle Corporation
+java.runtime.version=1.8.0_131-8u131-b11-0ubuntu1.17.04.1-b11
+user.dir=/home/muelleki/git/R/styler
+os.name=Linux
+os.arch=amd64
+os.version=4.10.0-24-generic
+derby.system.home=null
+Database Class Loader started - derby.database.classpath=''
diff --git a/docs/articles/data_structures.Rmd b/docs/articles/data_structures.Rmd
new file mode 100644
index 000000000..b0bd31edb
--- /dev/null
+++ b/docs/articles/data_structures.Rmd
@@ -0,0 +1,153 @@
+---
+title: "Data Structures"
+author: "Lorenz Walthert"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Vignette Title}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+This vignette illustrates how the core of `styler` currently^[at commit `e6ddee0f510d3c9e3e22ef68586068fa5c6bc140`] works, i.e. how
+rules are applied to a parse table and how limitations of this approach can be
+overcome with a refined approach.
+
+## Status quo - the flat approach
+
+Roughly speaking, a string containing code to be formatted is parsed with `parse`
+and the output is passed to `getParseData` in order to obtain a parse
+table with detailed information about every token. For a simple example string
+"`a <- function(x) { if(x > 1) { 1+1 } else {x} }`" to be formatted, the parse
+table on which `styler` performs the manipulations looks similar to the one
+presented below.
+
+```{r, message = FALSE}
+library("styler")
+library("dplyr")
+
+code <- "a <- function(x) { if(x > 1) { 1+1 } else {x} }"
+
+(parse_table <- styler:::compute_parse_data_flat_enhanced(code))
+```
+The column `spaces` was computed from the columns `col1` and `col2`, `newlines`
+was computed from `line1` and `line2` respectively.
+
+So far, styler can set the spaces around the operators correctly. In our example,
+that involves adding spaces around `+`, so in the `spaces` column, element nine
+and ten must be set to one. This means that a space is added after `1` and after `+`.
+To get the spacing right and cover the various cases, a set of functions has to
+be applied to the parse table subsequently (and in the right order),
+which is essentially done via `Reduce()`.
+After all modifications on the table are completed, `serialize_parse_data()`
+collapses the `text` column and adds the number of spaces and
+line breaks specified in `spaces` and `newlines` in between the elements of
+`text`. If we serialize our table and don't perform any modification, we
+obviously just get back what we started with.
+```{r}
+styler:::serialize_parse_data_flat(parse_table)
+```
+
+## Refining the flat approach - nesting the parse table
+
+Although the flat approach is good place to start, e.g. for fixing spaces
+between operators, it has its limitations. In particular, it treats each token
+the same way in the sense that it does not account for the context of the token,
+i.e. in which sub-expression it appears.
+To set the indention correctly, we need a hierarchical view on the parse data,
+since all tokens in a sub-expression have the same indention level. Hence,
+a natural approach would be to create a nested parse table instead of a flat
+parse table and then take a recursion over all elements in the table, so for
+each sub(-sub etc.)-expression, a separate parse table would be created and the
+modifications would be applied to this table before putting everything back
+together. A function to create a nested parse table already exists in `styler`.
+Let's have a look at the top level:
+
+```{r}
+(l1 <- styler:::compute_parse_data_nested(code)[-1])
+
+```
+
+The tibble contains the column `child`, which itself contains a tibble.
+If we "enter" the first child, we can see that the expression was split up
+further.
+
+```{r}
+l1$child[[1]] %>%
+ select(text, terminal, child, token)
+```
+
+And further...
+```{r}
+l1$child[[1]]$child[[3]]$child[[5]]
+```
+
+... and so on. Every child that is not a terminal contains another tibble where
+the sub-expression is split up further - until we are left with tibbles that
+only contain terminals.
+
+
+Recall the above example. `a <- function(x) { if(x > 1) { 1+1 } else {x} }`.
+In the last printed parse table, we can see that see that the whole if condition
+is a sub-expression of `code`, surrounded by two curly brackets. Hence,
+one would like to set the indention level for this sub-expression before
+doing anything with it in more detail. Later, when we progressed deeper into
+the nested table, we hit a similar pattern:
+
+```{r}
+l1$child[[1]]$child[[3]]$child[[5]]$child[[2]]$child[[5]]
+```
+Again, we have two curly brackets and an expression inside. We would like to
+set the indention level for the expression `1+1` in the same way as for the
+whole if condition.
+
+The simple example above makes it evident that a recursive approach to this
+problem would be the most natural.
+
+The code for a function that kind of sketches the idea and illustrates such a
+recursion is given below.
+
+It takes a nested parse table as input and then does the recursion over all
+children. If the child is a terminal, it returns the text, otherwise,
+it "enters" the child to find the terminals inside of the child and returns them.
+
+```{r}
+serialize <- function(x) {
+ out <- Map(
+ function(terminal, text, child) {
+ if (terminal)
+ text
+ else
+ serialize(child)
+ },
+ x$terminal, x$text, x$child
+ )
+ out
+}
+
+x <- styler:::compute_parse_data_nested(code)
+serialize(x) %>% unlist
+```
+
+How to exactly implement a similar recursion to not just return each text
+token separately, but
+the styled text as one string (or one string per line) is subject to future work,
+so would be the functions to be
+applied to a sub-expression parse table that create correct indention.
+Similar to `compute_parse_data_flat_enhanced`, the column `spaces` and `newlines`
+would be required to be computed by `compute_parse_data_nested` as well as a
+new column `indention`.
+
+
+## Final Remarks
+
+Although a flat structure would possibly also allow us to solve the problem of
+indention, it is a less elegant and flexible solution to the problem. It would
+involve looking for an opening curly bracket in the parse table, set the
+indention level for all subsequent rows in the parse table until the next
+opening or closing curly bracket is hit and then intending one level further or
+setting indention back to where it was at the beginning of the table.
+
+Note that the vignette just addressed the question of indention caused by
+curly brackets and has not dealt with other operators that would trigger
+indention, such as `(` or `+`.
diff --git a/docs/articles/data_structures.html b/docs/articles/data_structures.html
new file mode 100644
index 000000000..ee6d8b75b
--- /dev/null
+++ b/docs/articles/data_structures.html
@@ -0,0 +1,208 @@
+
+
+
This vignette illustrates how the core of styler
currently1 works, i.e. how rules are applied to a parse table and how limitations of this approach can be overcome with a refined approach.
Roughly speaking, a string containing code to be formatted is parsed with parse
and the output is passed to getParseData
in order to obtain a parse table with detailed information about every token. For a simple example string “a <- function(x) { if(x > 1) { 1+1 } else {x} }
” to be formatted, the parse table on which styler
performs the manipulations looks similar to the one presented below.
library("styler")
+library("dplyr")
+
+code <- "a <- function(x) { if(x > 1) { 1+1 } else {x} }"
+
+(parse_table <- styler:::compute_parse_data_flat_enhanced(code))
## # A tibble: 24 x 13
+## line1 col1 line2 col2 token text terminal short newlines
+## <int> <int> <int> <int> <chr> <chr> <lgl> <chr> <int>
+## 1 1 0 1 0 START NA <NA> 0
+## 2 1 1 1 1 SYMBOL a TRUE a 0
+## 3 1 3 1 4 LEFT_ASSIGN <- TRUE <- 0
+## 4 1 6 1 13 FUNCTION function TRUE funct 0
+## 5 1 14 1 14 '(' ( TRUE ( 0
+## 6 1 15 1 15 SYMBOL_FORMALS x TRUE x 0
+## 7 1 16 1 16 ')' ) TRUE ) 0
+## 8 1 18 1 18 '{' { TRUE { 0
+## 9 1 20 1 21 IF if TRUE if 0
+## 10 1 22 1 22 '(' ( TRUE ( 0
+## # ... with 14 more rows, and 4 more variables: lag_newlines <dbl>,
+## # spaces <int>, multi_line <lgl>, indent <dbl>
+The column spaces
was computed from the columns col1
and col2
, newlines
was computed from line1
and line2
respectively.
So far, styler can set the spaces around the operators correctly. In our example, that involves adding spaces around +
, so in the spaces
column, element nine and ten must be set to one. This means that a space is added after 1
and after +
. To get the spacing right and cover the various cases, a set of functions has to be applied to the parse table subsequently (and in the right order), which is essentially done via Reduce()
. After all modifications on the table are completed, serialize_parse_data()
collapses the text
column and adds the number of spaces and line breaks specified in spaces
and newlines
in between the elements of text
. If we serialize our table and don’t perform any modification, we obviously just get back what we started with.
styler:::serialize_parse_data_flat(parse_table)
## [1] "a <- function(x) { if(x > 1) { 1+1 } else {x} }"
+Although the flat approach is good place to start, e.g. for fixing spaces between operators, it has its limitations. In particular, it treats each token the same way in the sense that it does not account for the context of the token, i.e. in which sub-expression it appears. To set the indention correctly, we need a hierarchical view on the parse data, since all tokens in a sub-expression have the same indention level. Hence, a natural approach would be to create a nested parse table instead of a flat parse table and then take a recursion over all elements in the table, so for each sub(-sub etc.)-expression, a separate parse table would be created and the modifications would be applied to this table before putting everything back together. A function to create a nested parse table already exists in styler
. Let’s have a look at the top level:
(l1 <- styler:::compute_parse_data_nested(code)[-1])
## # A tibble: 1 x 13
+## col1 line2 col2 id parent token terminal text short token_before
+## <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr> <chr>
+## 1 1 1 47 49 0 expr FALSE <NA>
+## # ... with 3 more variables: token_after <chr>, internal <lgl>,
+## # child <list>
+The tibble contains the column child
, which itself contains a tibble. If we “enter” the first child, we can see that the expression was split up further.
l1$child[[1]] %>%
+ select(text, terminal, child, token)
## # A tibble: 3 x 4
+## text terminal child token
+## <chr> <lgl> <list> <chr>
+## 1 FALSE <tibble [1 x 14]> expr
+## 2 <- TRUE <NULL> LEFT_ASSIGN
+## 3 FALSE <tibble [5 x 14]> expr
+And further…
+l1$child[[1]]$child[[3]]$child[[5]]
## # A tibble: 3 x 14
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 18 1 18 9 45 '{' TRUE { {
+## 2 1 20 1 45 42 45 expr FALSE
+## 3 1 47 1 47 40 45 '}' TRUE } }
+## # ... with 4 more variables: token_before <chr>, token_after <chr>,
+## # internal <lgl>, child <list>
+… and so on. Every child that is not a terminal contains another tibble where the sub-expression is split up further - until we are left with tibbles that only contain terminals.
+Recall the above example. a <- function(x) { if(x > 1) { 1+1 } else {x} }
. In the last printed parse table, we can see that see that the whole if condition is a sub-expression of code
, surrounded by two curly brackets. Hence, one would like to set the indention level for this sub-expression before doing anything with it in more detail. Later, when we progressed deeper into the nested table, we hit a similar pattern:
l1$child[[1]]$child[[3]]$child[[5]]$child[[2]]$child[[5]]
## # A tibble: 3 x 14
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 30 1 30 20 30 '{' TRUE { {
+## 2 1 32 1 34 27 30 expr FALSE
+## 3 1 36 1 36 26 30 '}' TRUE } }
+## # ... with 4 more variables: token_before <chr>, token_after <chr>,
+## # internal <lgl>, child <list>
+Again, we have two curly brackets and an expression inside. We would like to set the indention level for the expression 1+1
in the same way as for the whole if condition.
The simple example above makes it evident that a recursive approach to this problem would be the most natural.
+The code for a function that kind of sketches the idea and illustrates such a recursion is given below.
+It takes a nested parse table as input and then does the recursion over all children. If the child is a terminal, it returns the text, otherwise, it “enters” the child to find the terminals inside of the child and returns them.
+serialize <- function(x) {
+ out <- Map(
+ function(terminal, text, child) {
+ if (terminal)
+ text
+ else
+ serialize(child)
+ },
+ x$terminal, x$text, x$child
+ )
+ out
+}
+
+x <- styler:::compute_parse_data_nested(code)
+serialize(x) %>% unlist
## [1] "a" "<-" "function" "(" "x" ")"
+## [7] "{" "if" "(" "x" ">" "1"
+## [13] ")" "{" "1" "+" "1" "}"
+## [19] "else" "{" "x" "}" "}"
+How to exactly implement a similar recursion to not just return each text token separately, but the styled text as one string (or one string per line) is subject to future work, so would be the functions to be applied to a sub-expression parse table that create correct indention. Similar to compute_parse_data_flat_enhanced
, the column spaces
and newlines
would be required to be computed by compute_parse_data_nested
as well as a new column indention
.
Although a flat structure would possibly also allow us to solve the problem of indention, it is a less elegant and flexible solution to the problem. It would involve looking for an opening curly bracket in the parse table, set the indention level for all subsequent rows in the parse table until the next opening or closing curly bracket is hit and then intending one level further or setting indention back to where it was at the beginning of the table.
+Note that the vignette just addressed the question of indention caused by curly brackets and has not dealt with other operators that would trigger indention, such as (
or +
.
at commit e6ddee0f510d3c9e3e22ef68586068fa5c6bc140
↩
library("dplyr")
+library("purrr")
+pkgload::load_all()
This vignette builds on the vignette “Data Structures” and discusses how to go forward with the nested structure of the parse data. In order to compute the white space information in a nested data structure, we need a recursion. We use a visitor approach to separate the algorithm (computing white space information) from the object (nested) data structure. The function create_filler()
can then be used to add white space information on every level of nesting within the nested parse data if applied in combination with the visitor. visitor()
takes a object to operate on and a list of functions. Each function is applied at the current level of nesting before the next level of nesting is entered.
pre_visit
## function(pd_nested, funs) {
+## if (is.null(pd_nested)) return()
+## pd_transformed <- visit_one(pd_nested, funs)
+##
+## pd_transformed$child <- map(pd_transformed$child, pre_visit, funs = funs)
+## pd_transformed
+## }
+## <environment: namespace:styler>
+visit_one
## function(pd_flat, funs) {
+## reduce(funs, function(x, fun) fun(x),
+## .init = pd_flat)
+## }
+## <environment: namespace:styler>
+This comes with two advantages.
+create_filler()
was adapted to also initialize indention and lag_newlines.
create_filler
## function(pd_flat) {
+##
+## pd_flat$line3 <- lead(pd_flat$line1, default = tail(pd_flat$line2, 1))
+## pd_flat$col3 <- lead(pd_flat$col1, default = tail(pd_flat$col2, 1) + 1L)
+## pd_flat$newlines <- pd_flat$line3 - pd_flat$line2
+## pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0)
+## pd_flat$col2_nl <- if_else(pd_flat$newlines > 0L, 0L, pd_flat$col2)
+## pd_flat$spaces <- pd_flat$col3 - pd_flat$col2_nl - 1L
+## pd_flat$multi_line <- ifelse(pd_flat$terminal, FALSE, NA)
+##
+## ret <- pd_flat[, !(names(pd_flat) %in% c("line3", "col3", "col2_nl"))]
+##
+##
+## if (!("indent" %in% names(ret))) {
+## ret$indent <- 0
+## }
+##
+## if (any(ret$spaces < 0L)) {
+## stop("Invalid parse data")
+## }
+##
+## ret
+## }
+## <environment: namespace:styler>
+code <- "a <- function(x) { if(x > 1) { 1+1 } else {x} }"
+pd_nested <- compute_parse_data_nested(code)
+pd_nested_enhanced <- pre_visit(pd_nested, c(create_filler))
+pd_nested_enhanced
## # A tibble: 1 x 19
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 1 1 47 49 0 expr FALSE
+## # ... with 9 more variables: token_before <chr>, token_after <chr>,
+## # internal <lgl>, child <list>, newlines <int>, lag_newlines <dbl>,
+## # spaces <int>, multi_line <lgl>, indent <dbl>
+As a next step, we need to find a way to serialize the nested tibble, or in other words, to transform it to its character vector representation. As a starting point, consider the function serialize
that was introduced in the vignette “Data Structures”.
serialize <- function(x) {
+ out <- Map(
+ function(terminal, text, child) {
+ if (terminal)
+ text
+ else
+ serialize(child)
+ },
+ x$terminal, x$text, x$child
+ )
+ out
+}
+
+serialize(pd_nested) %>% unlist
## [1] "a" "<-" "function" "(" "x" ")"
+## [7] "{" "if" "(" "x" ">" "1"
+## [13] ")" "{" "1" "+" "1" "}"
+## [19] "else" "{" "x" "}" "}"
+serialize
can be combined with serialize_parse_data_flat
. The latter pastes together the column “text” of a flat parse table by taking into account space and line break information, splits the string by line break and returns it.
serialize_parse_data_flat
## function(pd_flat) {
+## pd_flat %>%
+## summarize_(
+## text_ws = ~paste0(
+## text, newlines_and_spaces(newlines, spaces),
+## collapse = "")) %>%
+## .[["text_ws"]] %>%
+## strsplit("\\n", fixed = TRUE) %>%
+## .[[1L]]
+## }
+## <environment: namespace:styler>
+However, things get a bit more complicated, mainly because line break and white space information is not only contained in the terminal tibbles of the nested parse data, but even before, as the following example shows.
+pd_nested_enhanced$child[[1]]
## # A tibble: 3 x 19
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 1 1 1 3 49 expr FALSE
+## 2 1 3 1 4 2 49 LEFT_ASSIGN TRUE <- <-
+## 3 1 6 1 47 48 49 expr FALSE
+## # ... with 9 more variables: token_before <chr>, token_after <chr>,
+## # internal <lgl>, child <list>, newlines <int>, lag_newlines <dbl>,
+## # spaces <int>, multi_line <lgl>, indent <dbl>
+pd_nested_enhanced$child[[1]]$child[[1]]
## # A tibble: 1 x 19
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 1 1 1 1 3 SYMBOL TRUE a a
+## # ... with 9 more variables: token_before <chr>, token_after <chr>,
+## # child <list>, internal <lgl>, newlines <int>, lag_newlines <dbl>,
+## # spaces <int>, multi_line <lgl>, indent <dbl>
+After “a” in code
, there is a space, but this information is not contained in the tibble where we find the terminal “a”. In general, we must add newlines and spaces values after we computed character vector representation of the expression. In our example: we know that there is a space after the non-terminal “a” by looking at pd_nested_enhanced$child[[1]]
. Therefore, we need to add this space to the very last terminal within pd_nested_enhanced$child[[1]]
before we collapse everything together.
serialize_parse_data_nested_helper
## function(pd_nested, pass_indent) {
+## out <- pmap(list(pd_nested$terminal, pd_nested$text, pd_nested$child,
+## pd_nested$spaces, pd_nested$lag_newlines, pd_nested$indent),
+## function(terminal, text, child, spaces, lag_newlines, indent) {
+## total_indent <- pass_indent + indent
+## preceding_linebreak <- if_else(lag_newlines > 0, 1, 0)
+## if (terminal) {
+## c(add_newlines(lag_newlines),
+## add_spaces(total_indent * preceding_linebreak),
+## text,
+## add_spaces(spaces))
+## } else {
+## c(add_newlines(lag_newlines),
+## add_spaces(total_indent * preceding_linebreak),
+## serialize_parse_data_nested_helper(child, total_indent),
+## add_spaces(spaces))
+## }
+## }
+## )
+## out
+## }
+## <environment: namespace:styler>
+serialize_parse_data_nested
## function(pd_nested) {
+## out <- c(add_newlines(start_on_line(pd_nested) - 1),
+## serialize_parse_data_nested_helper(pd_nested, pass_indent = 0)) %>%
+## unlist() %>%
+## paste0(collapse = "") %>%
+## strsplit("\\n", fixed = TRUE) %>%
+## .[[1L]]
+## out
+## }
+## <environment: namespace:styler>
+Before we are done, we need to add information regarding indention to the parse table. We can add indention after every line break that comes after a round bracket with indent_round()
. And then serialize it.
## # A tibble: 1 x 19
+## line1 col1 line2 col2 id parent token terminal text short
+## <int> <int> <int> <int> <int> <int> <chr> <lgl> <chr> <chr>
+## 1 1 1 1 47 49 0 expr FALSE
+## # ... with 9 more variables: token_before <chr>, token_after <chr>,
+## # internal <lgl>, child <list>, newlines <int>, lag_newlines <dbl>,
+## # spaces <int>, multi_line <lgl>, indent <dbl>
+We can see how indention works with a more complicated example
+indented <- c(
+ "call(",
+ " 1,",
+ " call2(",
+ " 2, 3,",
+ " call3(1, 2, 22),",
+ " 5",
+ " ),",
+ " 144",
+ ")"
+)
+
+not_indented <- trimws(indented)
+back_and_forth <- not_indented %>%
+ compute_parse_data_nested() %>%
+ pre_visit(c(create_filler,
+ purrr::partial(indent_round, indent_by = 2))) %>%
+ serialize_parse_data_nested()
+
+identical(indented, back_and_forth)
## [1] TRUE
+The goal of styler is to provide non-invasive pretty-printing of R source code while adhering to the tidyverse formatting rules. Support for custom style guides is planned.
+You can style a simple character vector of code with style_text()
:
ugly_code <- "a<-function( x){1+1} "
+style_text(ugly_code) %>%
+ cat(sep = "\n")
+#> a <- function(x) {
+#> 1 + 1
+#> }
There are a few variants of style_text()
:
style_file()
styles a single .R file.style_dir()
styles all .R files in a directory.style_pkg()
styles the source files of an R package.You can find more information on the wiki of Google Summer of Code 2017 or check out the pkgdown page.
+Depending on whether transformers
contains functions to modify the
+line break information, the column multi_line
is updated (after
+the line break information is modified) and
+the rest of the transformers is applied afterwards, or (if line break
+information is not to be modified), all transformers are applied in one
+step. The former requires two pre visits and one post visit, the latter
+only one pre visit.
apply_transformers(pd_nested, transformers)+ +
pd_nested | +A nested parse table. |
+
---|---|
transformers | +A list of named transformer functions |
+
Convert a vector to an ordered factor but stop if any of the values in
+x
does not match the predefined levels in levels.
character_to_ordered(x, levels, name = substitute(x))+ +
x | +A character vector. |
+
---|---|
levels | +A vector with levels. |
+
name | +The name of the character vector to be dispayed if the +construction of the factor fails. |
+
binds two parse tables together and arranges them so that the tokens are in +the correct order.
+ + +combine_children(child, internal_child)+ +
child | +A parse table or |
+
---|---|
internal_child | +A parse table or |
+
Essentially, this is a wrapper around dplyr::bind_rows()
, but
+returns NULL
if the result of dplyr::bind_rows()
is a data frame with
+zero rows.
Based on token
, find the rows in pd
that need to be indented.
compute_indent_indices(pd, token = "'('", indent_last = FALSE)+ +
pd | +A parse table. |
+
---|---|
token | +A character vector with tokens. |
+
indent_last | +Flag to indicate whether the last token in |
+
For example when token
is a parenthesis, the closing parenthesis does not
+need indention, but if token is something else, for example a plus (+), the
+last token in pd
needs indention.
The function obtains detailed parse information for text
via
+utils::getParseData()
and does some minimal pre-processing by calling
+enhance_parse_data()
.
compute_parse_data_flat_enhanced(text)+ +
text | +A character vector. |
+
---|
A pre-processed parse table.
+ +Roughly speaking, this is the inverse operation of
+serialize_parse_data_flat()
, which turns a parse table into a character
+vector, since compute_parse_data_flat_enhanced()
turns a character vector
+into a parse table.
Preprocessing includes
removing non-terminal entries.
removing columns id, parent and terminal.
adding a start token.
adding line-break and space information.
removing spaces in comments at the end of the line.
Parses text
to a flat parse table and subsequently changes its
+representation into a nested parse table with
+nest_parse_data()
.
compute_parse_data_nested(text)+ +
text | +A character vector to parse. |
+
---|
A nested parse table. Apart from the columns provided by
+utils::getParseData()
, a column "short" with the first five characters of
+"text" is added, the nested subtibbles are in column "child".
+TODO:
Implement enhance_parse_data_nested()
Walk tree defined by child
, compute whitespace information
Store indention depth in a separate column, unaffected by +inter-token space
Implement compute_parse_data_nested_with_ws() as +compute_parse_data_nested() + enhance_parse_data_nested()
Implement serialization of nested parse data
Use compute_parse_data_nested_with_ws() instead of +compute_parse_data_flat_enhanced()
Perform all transformations on hierarchical structure
Compute text for a sub-element
Compute indentation
Braces
Function calls
Function definitions
Remove includeText = TRUE
Multiple *-in.R files can have the same *-out.R file since to create the +*-out.R file, everything after the first dash is replaced by *-out.R.
+ + +construct_out(in_paths)+ +
in_paths | +A character vector that denotes paths to *-in.R files. |
+
---|
+#> [1] "path/to/file/first-out.R" "path/to/file/first-out.R"
Construct paths of a tree object given the paths of *-in.R files
+ + +construct_tree(in_paths, suffix = "_tree")+ +
in_paths | +Character vector of *-in.R files. |
+
---|---|
suffix | +Suffix for the tree object. |
+
This function computes difference (as column and line difference) between two +entries in the parse table and adds this information to the table.
+ + +create_filler(pd_flat)+ +
pd_flat | +A parse table. |
+
---|
A parse table with two three columns: lag_newlines, newlines and +spaces.
+ + +Uses create_filler()
in a recursion add space and line break information
+separately on every level of nesting.
create_filler_nested(pd_nested)+ +
pd_nested | +A nested parse table. |
+
---|
A nested parse table with two new columns: newlines and spaces.
+ +This function is convenient to display all nesting levels of a nested tibble +at once.
+ + +create_node_from_nested_root(pd_nested)+ +
pd_nested | +A nested tibble. |
+
---|
An object of class "Node" and "R6".
+ + ++library("magrittr")#> +#>#>+#> +#>code <- "a <- function(x) { if(x > 1) { 1+1 } else {x} }" +l1 <- styler:::compute_parse_data_nested(code) %>% + styler:::pre_visit(c(styler:::create_filler)) %>% + styler:::create_node_from_nested_root()#> Warning: replacing previous import ‘scales::viridis_pal’ by ‘viridis::viridis_pal’ when loading ‘DiagrammeR’
Modifies the parse table minimally by applying some pre-processing steps.
+ + +enhance_parse_data(parse_data)+ +
parse_data | +a parse table. |
+
---|
a pre-processed parse table.
+ +Preprocessing includes
removing non-terminal entries.
removing columns id, parent and terminal.
adding a start token.
adding line-break and space information.
removing spaces in comments at the end of the line.
Certain tokens are not placed optimally in the nested parse data with
+compute_parse_data_nested()
. For example, the token of arithmetic
+operations 1 + 1 + 1 should all be on the same level of nesting since
+the indention is the same for all but the first two terminals. Setting the
+indention correcly is easier to achieve if they are put on the same level
+of nesting.
flatten_operators(pd_nested)+ +
pd_nested | +A nested parse table to partially flatten. |
+
---|
Get the transformer functions for styling
+ + +get_transformers(flat = FALSE, ...)+ +
flat | +Whether the transformer functions for flat or nested styling +should be returned. |
+
---|---|
... | +Parameters passed to
|
+
A list of transformer functions that operate on flat parse tables.
+ + +Get the transformer functions for flat styling
+ + +get_transformers_flat(strict = TRUE, start_comments_with_one_space = FALSE)+ +
strict | +A logical value indicating whether a set of strict +or not so strict transformer functions should be returned. |
+
---|---|
start_comments_with_one_space | +Whether or not comments should start
+with only one space (see |
+
A list of transformer functions that operate on flat parse +tables.
+ +Other obtain transformers: get_transformers_nested
Similar to get_transformers_flat()
, but additionally, returns some
+functions needed due the fact that styling is done in a nested way.
get_transformers_nested(scope = "tokens", strict = TRUE, indent_by = 2, + start_comments_with_one_space = FALSE)+ +
scope | +The extent of manipulation. Can range from "none" (least +invasive) to "token" (most invasive). See 'Details'. This argument is a +vector of length one. |
+
---|---|
strict | +A logical value indicating whether a set of strict +or not so strict transformer functions should be returned. |
+
indent_by | +How many spaces of indention should be inserted after +operators such as '('. |
+
start_comments_with_one_space | +Whether or not comments should start
+with only one space (see |
+
The following options for scope
are available.
"none": Performs no transformation at all.
"spaces": Manipulates spacing between token on the same line.
"line_breaks": In addition to "spaces", this option also manipulates +line breaks.
"tokens": In addition to "line_breaks", this option also manipulates +tokens.
As it becomes clear from this description, more invasive operations can only +be performed if all less invasive operations are performed too.
+ +Other obtain transformers: get_transformers_flat
+ All functions+ + |
+ |
---|---|
+ + | +Add information about previous / next token to each terminal |
+
+ + | +Apply transformers to a parse table |
+
+ + | +Convert a character vector to an ordered factor |
+
+ + | +Combine child and internal child |
+
+ + | +Compute the indices that need indention |
+
+ + | +Parse and pre-process character vector |
+
+ + | +Obtain a nested parse table from a character vector |
+
+ + | +Construct *-out.R from a *-in.R |
+
+ + | +Construct paths of a tree object given the paths of *-in.R files |
+
+ + | +Enrich parse table with space and linebreak information |
+
+ + | +Convert a nested tibble into a node tree |
+
+ + | +Create node from nested parse data |
+
+ + | +Create a tree from text |
+
+ + | +Enhance the mapping of text to the token "SPECIAL" |
+
+ + | +Pre-processing parse data |
+
+ + | +Flatten some token in the nested parse table based on operators |
+
+ + | +Get the transformer functions for flat styling |
+
+ + | +Get the transformer functions for nested styling |
+
+ + | +Get the transformer functions for styling |
+
+ + | +lookup which new tokens were created from "SPECIAL" |
+
+ + | +A Closure to return transformer function |
+
+ + | +Closure to return transformer function |
+
+ + | +Closure to return a transformer function |
+
+ + | +Check whether indention is needed |
+
+ + | +Nest a flat parse table |
+
+ + | +concentrate newlines an spaces in a string |
+
+ + | +Parse, transform and serialize text |
+
+ + | +Prettify one R file |
+
+ + | +Repeat elements of a character vector |
+
+ + | +Serialize Flat Parse Data |
+
+ + | +Serialize a nested parse table |
+
+ + | +Serialize a nested parse table |
+
+ + | +Set the multi-line column |
+
+ + | +Set space between levels of nesting |
+
+ + | +Helper for setting spaces |
+
+ + | +Unindent a chlid if necessary |
+
+ + | +Start comments with a space |
+
+ + | +Get the start right |
+
+ + | +Strip EOL spaces |
+
+ + | +Style the active file |
+
+ + | +Prettify arbitrary R code |
+
+ + | +Style a file |
+
+ + | +Prettify R source code |
+
+ + | +Style a string |
+
+ + | +Non-invasive pretty printing of R code |
+
+ + | +Run a collection of tests |
+
+
|
+ Transforming test input with a transformer function |
+
+ + | +Check whether a parse table is a multi-line token |
+
+ + | +Obtain token table from text |
+
+ + | +Transform a file an check the result |
+
+ + | +Transform files with transformer functions |
+
+ + | +Unindent a child |
+
+
|
+ Update indention information of parse data |
+
+ + | +Verify parse data modifications |
+
+ + | +Transform a flat parse table with a list of transformers |
+
+ + | +Visit'em all |
+
This function takes a list of transformer functions as input and +returns a function that can be applied to character strings +that should be transformed.
+ + +make_transformer(transformers, flat)+ +
transformers | +A list of transformer functions that operate on flat +parse tables. |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
Other make transformers: make_transformer_flat
,
+ make_transformer_nested
Returns a closure that turns text
into a flat parse table and applies
+transformers
on it.
make_transformer_flat(transformers)+ +
transformers | +A list of transformer functions that operate on flat +parse tables. |
+
---|
Other make transformers: make_transformer_nested
,
+ make_transformer
Returns a closure that turns text
into a nested parse table and applies
+transformers
on it.
make_transformer_nested(transformers)+ +
transformers | +A list of transformer functions that operate on flat +parse tables. |
+
---|
Other make transformers: make_transformer_flat
,
+ make_transformer
Check whether indention is needed
+ + +needs_indention(pd, opening)+ +
pd | +A parse table. |
+
---|---|
opening | +the index of the opening parse table. Since always computed +before this function is called, it is included as an argument so it does +not have to be recomputed. |
+
returns TRUE
if indention is needed, FALSE
otherwise. Indention
+is needed if and only if:
+* the opening token is not NA
.
+* if there is a multi-line token before the first line break.
+
+TRUE
if indention is needed, FALSE
otherwise.
nest_parse_data
groups pd_flat
into a parse table with tokens that are
+a parent to other tokens (called internal) and such that are not (called
+child). Then, the token in child are joined to their parents in internal
+and all token information of the children is nested into a column "child".
+This is done recursively until we are only left with a nested tibble that
+contains one row: The nested parse table.
nest_parse_data(pd_flat)+ +
pd_flat | +A flat parse table including both terminals and non-terminals. |
+
---|
A nested parse table.
+ +concentrate newlines an spaces in a string
+ + +newlines_and_spaces(newlines, spaces)+ +
newlines | +Scalar indicating how many newlines ("\ n") should returned. |
+
---|---|
spaces | +Scalar indicating how many spaces should be appended to the +newlines. |
+
A string.
+ + +This is a helper function for style_dir.
+ + +prettify_any(transformers, recursive, flat)+ +
transformers | +A list with functions to be applied to the parsed data. |
+
---|---|
recursive | +A logical value indicating whether or not files in subdirectories +should be styled as well. |
+
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
This is a helper function for style_dir.
+ + +prettify_one(transformers, flat, path)+ +
transformers | +A list with functions to be applied to the parsed data. |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
path | +The path to a file that should be styled. |
+
times
times and collapse it — rep_char • stylertimes
times and collapse itRepeat elements of a character vector times
times and collapse it
rep_char(char, times)+ +
char | +A character vector. |
+
---|---|
times | +an integer giving the number of repetitions. |
+
A character vector.
+ + +Collapses a parse table into character vector representation.
+ + +serialize_parse_data_flat(pd_flat)+ +
pd_flat | +A parse table. |
+
---|
The function essentially collapses the column text of pd_flat
+while taking into account space and linebreak information from the columns
+newlines and spaces.
+Roughly speaking, this is the inverse operation of
+compute_parse_data_flat_enhanced()
, which turns a character vector into a
+parse table, since serialize_parse_data_flat()
turns a parse table back
+into a character vector.
Collapses a nested parse table into its character vector representation.
+ + +serialize_parse_data_nested(pd_nested)+ +
pd_nested | +A nested parse table with line break, spaces and indention +information. |
+
---|
A character string.
+ + +Helper function that recursively extracts terminals from a nested tibble.
+ + +serialize_parse_data_nested_helper(pd_nested, pass_indent)+ +
pd_nested | +A nested parse table. |
+
---|---|
pass_indent | +Level of indention of a token. |
+
A character vector with all terminal tokens in pd_nested
plus
+the appropriate amount of white spaces and line breaks are inserted between
+them.
With the nested approach, certain rules do not have an effect anymore because +of the nature of the nested structure. Setting spacing before curly +brackets in for / if / while statements and function declarations will be +such a case since a curly bracket is always at the first position in a +parse table, so spacing cannot be set after the previous token.
+ + +set_space_between_levels(pd_flat)+ +
pd_flat | +A flat parse table. |
+
---|
Helper for setting spaces
+ + +set_spaces(spaces_after_prefix, force_one)+ +
spaces_after_prefix | +An integer vector with the number of spaces +after the prefix. |
+
---|---|
force_one | +Whether spaces_after_prefix should be set to one in all +cases. |
+
An integer vector of length spaces_after_prefix, which is either
+one (if force_one = TRUE
) or space_after_prefix
with all values
+below one set to one.
check whether any of the children of pd
has token
on the same line as
+the closing token
of pd. If so, unindent that token.
set_unindention_child(pd, token = "')'", unindent_by)+ +
pd | +A parse table. |
+
---|---|
token | +The token the unindention should be based on. |
+
unindent_by | +By how many spaces one level of indention is reversed. |
+
Forces comments to start with a space, that is, after the regular expression +"^#+'*", at least one space must follow. Multiple spaces may be legit for +indention in some situations.
+ + +start_comments_with_space(pd, force_one = FALSE)+ +
pd | +A parse table. |
+
---|---|
force_one | +Wheter or not to force one space or allow multiple spaces +after the regex "^#+'*". |
+
Helper function fot RStudio Add-in.
+ + +style_active_file()
+
+
+ Performs various substitutions in all .R
files in a directory.
+Carefully examine the results after running this function!
style_dir(path = ".", flat = FALSE, recursive = TRUE, + transformers = get_transformers(flat = flat))+ +
path | +Path to a directory with files to transform. |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
recursive | +A logical value indicating whether or not files in subdirectories
+of |
+
transformers | +A list with functions to be applied to the parsed data. |
+
Other stylers: style_file
,
+ style_pkg
, style_text
Performs various substitutions in the .R
file specified.
+Carefully examine the results after running this function!
style_file(path, flat = FALSE, transformers = get_transformers(flat = flat))+ +
path | +A path to a file to style. |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
transformers | +A list with functions to be applied to the parsed data. |
+
Other stylers: style_dir
,
+ style_pkg
, style_text
Performs various substitutions in all .R
files in a package
+(code and tests).
+Carefully examine the results after running this function!
style_pkg(pkg = ".", flat = FALSE, transformers = get_transformers(flat = + flat))+ +
pkg | +Path to a (subdirectory of an) R package |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
transformers | +A list with functions to be applied to the parsed data. |
+
Other stylers: style_dir
,
+ style_file
, style_text
Performs various substitutions in all .R
files in a directory.
+Carefully examine the results after running this function!
style_src(path = ".", transformers = get_transformers(), recursive = TRUE)+ +
path | +Path to a directory with files to transform. |
+
---|---|
transformers | +A list of transformer functions to be applied to the
+files in |
+
recursive | +A logical value indicating whether or not files in subdirectories
+of |
+
Styles a character vector
+ + +style_text(text, flat = FALSE, transformers = get_transformers(flat = flat))+ +
text | +A character vector with text to style. |
+
---|---|
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
transformers | +A list with functions to be applied to the parsed data. |
+
Other stylers: style_dir
,
+ style_file
, style_pkg
Run transformations on all *-in.R files in a test directory and compare them +with their *-out.R counterpart.
+ + +test_collection(test, sub_test = NULL, write_back = TRUE, + write_tree = TRUE, transformer, ...)+ +
test | +The test to run. It corresponds to a folder name in +tests/testthat. |
+
---|---|
sub_test | +A regex pattern to further reduce the amount of test files
+to be tested in the test. |
+
write_back | +Whether the results of the transformation should be written +to the output file. |
+
write_tree | +Whether or not the tree structure of the test should be +computed and written to a file. |
+
transformer | +A function to apply to the content of |
+
... | +Parameters passed to transformer function. |
+
Each file name that matches test
and sub_test
and ends with
+"-in.R" is considered as an input to test. Its counterpart,
+the reference to compare it against is the *-out.R file. It is constructed
+by taking the substring of the *-in.R file before the
+first dash and adding -out.R. This allows for multiple in.R files to
+share one out.R file. You could have one_line-out.R as the reference to
+compare one_line-random-something-stuff-in.R and
+one_line-random-but-not-so-much-in.R.
This also implies that -out.R files cannot have more than one dash in +their name, i.e. just the one before out.R.
+ + +These functions can be used as inputs for test_collection()
and
+transform_and_check()
.
style_indent_round(text) + +style_empty(text) + +style_indent_curly(text) + +style_indent_curly_round(text) + +style_op(text)+ +
text | +A character vector to transform. |
+
---|
As inputs for test_collection()
, we can also use top-level functions such
+as style_text()
.
style_indent_round
: Transformations for indention based on round
+brackets.
style_empty
: Nest and unnest text
without applying any
+transformations but remove EOL spaces and indention due to the way the
+serialization is set up.
style_indent_curly
: Transformations for indention based on curly
+brackets only.
style_indent_curly_round
: Transformations for indention based on curly
+brackets and round brackets.
style_op
: Transformations for indention based on operators
A token is a multi-line expression if and only if:
+ + +token_is_multi_line(pd)+ +
pd | +A parse table. |
+
---|
it contains a line break.
it has at least one child that is a multi-line expression itself.
utils::getParseData()
is used to obtain a flat parse table from text
.
tokenize(text)+ +
text | +A character vector. |
+
---|
A flat parse table
+ + +Transform an file and check whether it is identical to a reference.
+ + +transform_and_check(in_item, out_item, in_name = in_item, + out_name = out_item, transformer, write_back, write_tree = FALSE, + out_tree = "_tree", ...)+ +
in_item | +An path to an file to transform. |
+
---|---|
out_item | +The path to a file that contains the expected result. |
+
in_name | +The label of the in_item, defaults to |
+
out_name | +The label of the out_item, defaults to |
+
transformer | +A function to apply to the content of |
+
write_back | +Whether the results of the transformation should be written +to the output file. |
+
write_tree | +Whether or not the tree structure of the test should be +computed and written to a file. |
+
out_tree | +Name of tree file if written out. |
+
... | +Parameters passed to transformer function. |
+
transform_files
applies transformations to file contents and writes back
+the result.
transform_files(files, transformers, flat)+ +
files | +A character vector with paths to the file that should be +transformed. |
+
---|---|
transformers | +A list of transformer functions that operate on flat +parse tables. |
+
flat | +Whether to do the styling with a flat approach or with a nested +approach. |
+
A logical value that indicates whether or not any file was changed is +returned invisibly. If files were changed, the user is informed to +carefully inspect the changes via a message sent to the console.
+ + +Update indention information of parse data
+ + +indent_round(pd, indent_by) + +indent_curly(pd, indent_by) + +indent_op(pd, indent_by, token = c(math_token, "SPECIAL-PIPE")) + +indent_assign(pd, indent_by, token = c("LEFT_ASSIGN", + "\n EQ_ASSIGN")) + +indent_without_paren(pd, indent_by = 2)+ +
pd | +A nested or flat parse table that is already enhanced with
+line break and space information via |
+
---|---|
indent_by | +How many spaces should be added after the token of interest. |
+
token | +The token the indention should be based on. |
+
indent_round
: Inserts indetion based on round brackets.
indent_assign
: Same as indent_op, but only indents one token
+after token
, not all remaining.
indent_without_paren
: Is used to indent if / while / for statements
+that do not have curly brackets.
These functions apply the update functions of the same name but without +suffix nested to each level of nesting of the nested parse table.
+ + +indent_round_nested(pd)+ +
pd | +A nested parse table that is already enhanced with +line break and space information via create_filler_nested. |
+
---|
Check whether serializing the parse data results in the same +number of lines as the initial data that should be styled.
+ + +verify_roundtrip(pd_flat, text)+ +
pd_flat | +A parse table. |
+
---|---|
text | +A character vector with the initial text to compare against. |
+
If the verification is successful, pd
is returned, with empty
+lines at the end of text
stripped.
+Otherwise, an error is thrown.
Apply a list of functions to each level in a nested parse table.
+pre_visit()
applies funs
before it preceeds to the children,
+(that is, starts from the outermost level of nesting progressing
+to the innermost level), post_visit()
preceeds to its children
+before applying the functions (meaning it first applies the functions
+to the innermost level of nesting first and then going outwards).
pre_visit(pd_nested, funs) + +post_visit(pd_nested, funs)+ +
pd_nested | +A nested parse table. |
+
---|---|
funs | +A list of transformer functions. |
+
Other visitors: visit_one
Uses purrr::reduce()
to apply each function of funs
sequentially to
+pd_flat
.
visit_one(pd_flat, funs)+ +
pd_flat | +A flat parse table. |
+
---|---|
funs | +A list of transformer functions. |
+
Other visitors: visit