r-lib · lorenzwalthert · Aug 7, 2017 · Jul 30, 2017 · Jul 30, 2017 · Jul 30, 2017
diff --git a/R/nested.R b/R/nested.R
@@ -101,18 +101,20 @@ add_terminal_token_before <- function(pd_flat) {
 #'
 #' @param spaces_after_prefix An integer vector with the number of spaces
 #'   after the prefix.
+#' @param text_length Integer vector giving the number of characters of
+#'   the text.
 #' @param force_one Whether spaces_after_prefix should be set to one in all
 #'   cases.
 #' @return An integer vector of length spaces_after_prefix, which is either
 #'   one (if `force_one = TRUE`) or `space_after_prefix` with all values
 #'   below one set to one.
-set_spaces <- function(spaces_after_prefix, force_one) {
+set_spaces <- function(spaces_after_prefix, text_length, force_one) {
   if (force_one) {
     n_of_spaces <- rep(1, length(spaces_after_prefix))
   } else {
     n_of_spaces <- pmax(spaces_after_prefix, 1L)
   }
-  n_of_spaces
+  ifelse(text_length > 0, n_of_spaces, 0)
 }
 
 #' Nest a flat parse table

diff --git a/R/parsed.R b/R/parsed.R
@@ -88,7 +88,7 @@ create_filler <- function(pd_flat) {
   pd_flat$line3 <- lead(pd_flat$line1, default = tail(pd_flat$line2, 1))
   pd_flat$col3 <- lead(pd_flat$col1, default = tail(pd_flat$col2, 1) + 1L)
   pd_flat$newlines <- pd_flat$line3 - pd_flat$line2
-  pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0)
+  pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0L)
   pd_flat$col2_nl <- if_else(pd_flat$newlines > 0L, 0L, pd_flat$col2)
   pd_flat$spaces <- pd_flat$col3 - pd_flat$col2_nl - 1L
   pd_flat$multi_line <- ifelse(pd_flat$terminal, FALSE, NA)

diff --git a/R/rules-other.R b/R/rules-other.R
@@ -7,10 +7,10 @@ add_brackets_in_pipe <- function(pd) {
              lag_newlines = rep(0, 2),
              terminal = rep(TRUE, 2),
              spaces = rep(0, 2),
-             line1 = pd$line2[has_no_brackets] + 1:2,
+             line1 = pd$line1[has_no_brackets],
              line2 = line1,
              col1 = pd$col1[has_no_brackets],
-             col2 = col1,
+             col2 = col1 + 1:2,
              indent = rep(0, 2),
              child = rep(list(NULL), 2)
          )

diff --git a/R/rules-spacing.R b/R/rules-spacing.R
@@ -136,9 +136,9 @@ set_space_between_levels <- function(pd_flat) {
 #' Start comments with a space
 #'
 #' Forces comments to start with a space, that is, after the regular expression
-#'   "^#+'*", at least one space must follow. Multiple spaces may be legit for
-#'   indention in some situations.
-#'
+#'   "^#+'*", at least one space must follow if the comment is *non-empty*, i.e
+#'   there is not just spaces within the comment. Multiple spaces may be legit
+#'   for indention in some situations.
 #' @param pd A parse table.
 #' @param force_one Wheter or not to force one space or allow multiple spaces
 #'   after the regex "^#+'*".
@@ -156,7 +156,8 @@ start_comments_with_space <- function(pd, force_one = FALSE) {
                        regex = "^(#+'*)( *)(.*)$")
   comments$space_after_prefix <- nchar(comments$space_after_prefix)
   comments$space_after_prefix <- set_spaces(
-    comments$space_after_prefix,
+    spaces_after_prefix = comments$space_after_prefix,
+    text_length = nchar(trimws(comments$text, "right")),
     force_one
   )
 

diff --git a/R/serialize.R b/R/serialize.R
@@ -69,3 +69,22 @@ serialize_parse_data_flat <- function(pd_flat) {
     .[[1L]]
 }
 
+#' Serialize flattened parse data
+#'
+#' Collapses a flattened parse table into character vector representation.
+#' @param flattened_pd A flattened parse table.
+serialize_parse_data_flattened <- function(flattened_pd) {
+  flattened_pd$lag_newlines[1] <- flattened_pd$line1[1] - 1
+  flattened_pd %>%
+    summarize_(
+      text_ws = ~paste0(
+        map(lag_newlines, add_newlines),
+        map(lag_spaces, add_spaces),
+        text,
+        collapse = "")) %>%
+    .[["text_ws"]] %>%
+    strsplit("\n", fixed = TRUE) %>%
+    .[[1L]]
+
+
+}
diff --git a/R/serialized_tests.R b/R/serialized_tests.R
@@ -190,11 +190,8 @@ style_indent_curly_round <- function(text) {
     NULL
   )
 
-  text %>%
-    compute_parse_data_nested() %>%
-    apply_transformers(transformers) %>%
-    serialize_parse_data_nested()
-
+  transformed_text <- parse_transform_serialize(text, transformers)
+  transformed_text
 }
 
 #' @describeIn test_transformer Transformations for indention based on operators

diff --git a/R/transform.R b/R/transform.R
@@ -17,7 +17,6 @@ transform_files <- function(files, transformers, flat) {
   }
   invisible(changed)
 }
-
 #' Closure to return a transformer function
 #'
 #' This function takes a list of transformer functions as input and
@@ -86,7 +85,10 @@ parse_transform_serialize <- function(text, transformers) {
   pd_nested <- compute_parse_data_nested(text)
   transformed_pd <- apply_transformers(pd_nested, transformers)
   # TODO verify_roundtrip
-  serialized_transformed_text <- serialize_parse_data_nested(transformed_pd)
+  flattened_pd <- post_visit(transformed_pd, list(extract_terminals)) %>%
+    enrich_terminals()
+
+  serialized_transformed_text <- serialize_parse_data_flattened(flattened_pd)
   serialized_transformed_text
 }
 
@@ -113,5 +115,12 @@ apply_transformers <- function(pd_nested, transformers) {
 
   transformed_all <- pre_visit(transformed_updated_multi_line,
                                c(transformers$space, transformers$token))
-  transformed_all
+
+  transformed_absolute_indent <- context_to_terminals(transformed_all,
+                                                      outer_lag_newlines = 0,
+                                                      outer_indent = 0,
+                                                      outer_spaces = 0)
+
+  transformed_absolute_indent
+
 }
diff --git a/R/visit.R b/R/visit.R
@@ -43,3 +43,127 @@ visit_one <- function(pd_flat, funs) {
   reduce(funs, function(x, fun) fun(x),
          .init = pd_flat)
 }
+
+
+#' Propagate context to terminals
+#'
+#' Implements a very specific pre-visiting scheme, namely to propagate
+#'   indention, spaces and lag_newlines to inner token to terminals. This means
+#'   that information regarding indention, linebreaks and spaces (which is
+#'   relative in `pd_nested`) will be converted into absolute.
+#' @inherit context_towards_terminals
+#' @seealso context_towards_terminals visitors
+context_to_terminals <- function(pd_nested,
+                                 outer_lag_newlines,
+                                 outer_indent,
+                                 outer_spaces) {
+
+  if (is.null(pd_nested)) return()
+
+  pd_transformed <- context_towards_terminals(
+    pd_nested, outer_lag_newlines, outer_indent, outer_spaces
+  )
+
+  pd_transformed$child <- pmap(list(pd_transformed$child,
+                                    pd_transformed$lag_newlines,
+                                    pd_transformed$indent,
+                                    pd_transformed$spaces),
+                               context_to_terminals)
+  pd_transformed
+}
+
+
+#' Update the a parse table given outer context
+#'
+#' `outer_lag_newlines` are added to the first token in `pd`,
+#'   `outer_indent` is added to all tokens in `pd`, `outer_spaces` is added to
+#'   the last token in `pd`. [context_to_terminals()] calls this function
+#'   repeatedly, which means the propagation of the parse information to the
+#'   terminal tokens.
+#' @param pd_nested A nested parse table.
+#' @param outer_lag_newlines The lag_newlines to be propagated inwards.
+#' @param outer_indent The indention depth to be propagated inwards.
+#' @param outer_spaces The number of spaces to be propagated inwards.
+#' @return An updated parse table.
+#' @seealso context_to_terminals
+context_towards_terminals <- function(pd_nested,
+                                      outer_lag_newlines,
+                                      outer_indent,
+                                      outer_spaces) {
+  pd_nested$indent <- pd_nested$indent + outer_indent
+  pd_nested$lag_newlines[1] <- pd_nested$lag_newlines[1] + outer_lag_newlines
+  pd_nested$spaces[nrow(pd_nested)] <-
+    pd_nested$spaces[nrow(pd_nested)] + outer_spaces
+  pd_nested
+}
+
+#' Extract terminal tokens
+#'
+#' Turns a nested parse table into a flat parse table and extracts *all*
+#' attributes
+#' @param pd_nested A nested parse table.
+extract_terminals <- function(pd_nested) {
+  if (is.null(pd_nested)) return(pd)
+  pd_split <- split(pd_nested, seq_len(nrow(pd_nested)))
+  bind_rows(ifelse(pd_nested$terminal, pd_split, pd_nested$child))
+}
+
+
+#' Enrich flattened parse table
+#'
+#' Enriches a flattened parse table with terminals only. In particular, it is
+#'   possible to compute the exact position a token will have (line and column)
+#'   when it will be serialized.
+#' @details Since we have only terminal tokens now, the line on which a token
+#'  starts we also be the line on which it ends. We call `line1` the line on
+#'  which the token starts. `line1` has the same meaning as `line1` that can be
+#'  found in a flat parse table (see [tokenize()]), just that the `line1`
+#'  created by `enrich_terminals()` is the updated version of the former
+#'  `line1`. The same applies for `col1` and `col2`.
+#' @inheritParams choose_indention
+enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) {
+  flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0L)
+  flattened_pd <- choose_indention(flattened_pd, use_raw_indention)
+  flattened_pd$line1 <-
+    cumsum(flattened_pd$lag_newlines) + flattened_pd$line1[1]
+
+  flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0L)
+  flattened_pd$nchar <- nchar(flattened_pd$text, type = "width")
+  flattened_pd <- flattened_pd %>%
+    group_by(line1) %>%
+    mutate(col2 = cumsum(nchar + lag_spaces)) %>%
+    ungroup()
+  flattened_pd$col1 <- flattened_pd$col2 - flattened_pd$nchar
+  flattened_pd
+}
+
+#' Choose the indention method for the tokens
+#'
+#' Either use the raw indention, which is just the spaces computed between
+#'   the first token on a new line and the token before it, or use the indention
+#'   computed according to the transformer used, which is stored in the column
+#'   `indention`.
+#'
+#'  All indention information will be combined with the space information for
+#'  the first token on a new line.
+#'  If `use_raw_indention` is set, information in the column `indention` will
+#'  be discarded anyways. If it is not set, the first token on a new line will
+#'  "inherit" the indention of the whole line.
+#'  The column `indention` will be removed since all information necessary is
+#'  containted in the spacing information of the first token on a new line and
+#'  the position of the tokens will not be changed anymore at this stage.
+#' @param flattened_pd A nested parse table that was turned into a flat parse
+#'   table using [extract_terminals()].
+#' @param use_raw_indention Boolean indicating wheter or not the raw indention
+#'   should be used.
+choose_indention <- function(flattened_pd, use_raw_indention) {
+  if (!use_raw_indention) {
+    flattened_pd$lag_spaces <- ifelse(flattened_pd$lag_newlines > 0,
+                                      flattened_pd$indent,
+                                      flattened_pd$lag_spaces)
+  }
+  flattened_pd$indent <- NULL
+  flattened_pd
+}
+
+
diff --git a/man/choose_indention.Rd b/man/choose_indention.Rd
diff --git a/man/context_to_terminals.Rd b/man/context_to_terminals.Rd
diff --git a/man/context_towards_terminals.Rd b/man/context_towards_terminals.Rd
diff --git a/man/enrich_terminals.Rd b/man/enrich_terminals.Rd