From ef1ba00961b0b108f6275c7dec8437671300e616 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 30 Jul 2017 21:17:26 +0200 Subject: [PATCH 01/17] Propagate context to terminals Uses a specialised visitor-like approach. --- R/visit.R | 56 ++++++++++++++++++++++++++++++++ man/context_to_terminals.Rd | 30 +++++++++++++++++ man/context_towards_terminals.Rd | 31 ++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 man/context_to_terminals.Rd create mode 100644 man/context_towards_terminals.Rd diff --git a/R/visit.R b/R/visit.R index 1d984d58d..e72417133 100644 --- a/R/visit.R +++ b/R/visit.R @@ -43,3 +43,59 @@ visit_one <- function(pd_flat, funs) { reduce(funs, function(x, fun) fun(x), .init = pd_flat) } + + +#' Propagate context to terminals +#' +#' Implements a very specific pre-visiting scheme, namely to propagate +#' indention, spaces and lag_newlines to inner token to terminals. This means +#' that information regarding indention, linebreaks and spaces (which is +#' relative in `pd_nested`) will be converted into absolute. +#' @inherit context_towards_terminals +#' @seealso context_towards_terminals visitors +context_to_terminals <- function(pd_nested, + passed_lag_newlines, + passed_indent, + passed_spaces) { + + if (is.null(pd_nested)) return() + + pd_transformed <- context_towards_terminals( + pd_nested, passed_lag_newlines, passed_indent, passed_spaces + ) + + pd_transformed$child <- pmap(list(pd_transformed$child, + pd_transformed$lag_newlines, + pd_transformed$indent, + pd_transformed$spaces), + context_to_terminals) + pd_transformed +} + + +#' Update the a parse table given outer context +#' +#' `passed_lag_newlines` are added to the first token in `pd`, +#' `passed_indent` is added to all tokens in `pd`, `passed_spaces` is added to +#' the last token in `pd`. [context_to_terminals()] calls this function +#' repeatedly, which means the propagation of the parse information to the +#' terminal tokens. +#' @param pd_nested A nested parse table. +#' @param passed_lag_newlines The lag_newlines to be propagated inwards. +#' @param passed_indent The indention depth to be propagated inwards. +#' @param passed_spaces The number of spaces to be propagated inwards. +#' @return An updated parse table. +#' @seealso context_to_terminals +context_towards_terminals <- function(pd_nested, + passed_lag_newlines, + passed_indent, + passed_spaces) { + pd_nested$indent <- pd_nested$indent + passed_indent + pd_nested$lag_newlines[1] <- pd_nested$lag_newlines[1] + passed_lag_newlines + pd_nested$spaces[nrow(pd_nested)] <- + pd_nested$spaces[nrow(pd_nested)] + passed_spaces + pd_nested +} + + + diff --git a/man/context_to_terminals.Rd b/man/context_to_terminals.Rd new file mode 100644 index 000000000..17d04f54b --- /dev/null +++ b/man/context_to_terminals.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{context_to_terminals} +\alias{context_to_terminals} +\title{Propagate context to terminals} +\usage{ +context_to_terminals(pd_nested, passed_lag_newlines, passed_indent, + passed_spaces) +} +\arguments{ +\item{pd_nested}{A nested parse table.} + +\item{passed_lag_newlines}{The lag_newlines to be propagated inwards.} + +\item{passed_indent}{The indention depth to be propagated inwards.} + +\item{passed_spaces}{The number of spaces to be propagated inwards.} +} +\value{ +An updated parse table. +} +\description{ +Implements a very specific pre-visiting scheme, namely to propagate +indention, spaces and lag_newlines to inner token to terminals. This means +that information regarding indention, linebreaks and spaces (which is +relative in \code{pd_nested}) will be converted into absolute. +} +\seealso{ +context_towards_terminals visitors +} diff --git a/man/context_towards_terminals.Rd b/man/context_towards_terminals.Rd new file mode 100644 index 000000000..eeb041baf --- /dev/null +++ b/man/context_towards_terminals.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{context_towards_terminals} +\alias{context_towards_terminals} +\title{Update the a parse table given outer context} +\usage{ +context_towards_terminals(pd_nested, passed_lag_newlines, passed_indent, + passed_spaces) +} +\arguments{ +\item{pd_nested}{A nested parse table.} + +\item{passed_lag_newlines}{The lag_newlines to be propagated inwards.} + +\item{passed_indent}{The indention depth to be propagated inwards.} + +\item{passed_spaces}{The number of spaces to be propagated inwards.} +} +\value{ +An updated parse table. +} +\description{ +\code{passed_lag_newlines} are added to the first token in \code{pd}, +\code{passed_indent} is added to all tokens in \code{pd}, \code{passed_spaces} is added to +the last token in \code{pd}. \code{\link[=context_to_terminals]{context_to_terminals()}} calls this function +repeatedly, which means the propagation of the parse information to the +terminal tokens. +} +\seealso{ +context_to_terminals +} From f48fb9dd342f74b38e9778f0d27740afc5bb0c38 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 30 Jul 2017 21:19:55 +0200 Subject: [PATCH 02/17] Extract terminals from nested parse table. --- NAMESPACE | 3 ++ R/visit.R | 49 +++++++++++++++++++++++++++++++++ man/extract_terminals.Rd | 27 ++++++++++++++++++ man/extract_terminals_helper.Rd | 14 ++++++++++ 4 files changed, 93 insertions(+) create mode 100644 man/extract_terminals.Rd create mode 100644 man/extract_terminals_helper.Rd diff --git a/NAMESPACE b/NAMESPACE index 800d1094b..bfbdcbf5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,4 +21,7 @@ importFrom(purrr,pmap) importFrom(purrr,pwalk) importFrom(purrr,reduce) importFrom(purrr,when) +importFrom(readr,col_integer) +importFrom(readr,cols) +importFrom(readr,type_convert) importFrom(utils,write.table) diff --git a/R/visit.R b/R/visit.R index e72417133..3576aec56 100644 --- a/R/visit.R +++ b/R/visit.R @@ -97,5 +97,54 @@ context_towards_terminals <- function(pd_nested, pd_nested } +#' Extract terminal tokens +#' +#' Turns a nested parse table into a flat parse table. In particular it extracts +#' terminal tokens and the following attributes: +#' +#' * lag_newlines +#' * indent +#' * token +#' * text +#' * spaces +#' * id +#' * parent +#' * line1 +#' @inheritParams extract_terminals_helper +#' @importFrom readr type_convert col_integer cols +extract_terminals <- function(pd_nested) { + flat_vec <- extract_terminals_helper(pd_nested) %>% + unlist() + nms <- list( + NULL, + c("lag_newlines", "indent", "token", "text", "spaces", "id", "parent", "line1") + ) + flat_tbl <- matrix(flat_vec, ncol = length(nms[[2]]), byrow = TRUE, dimnames = nms) %>% + as_tibble() %>% + type_convert( + col_types = cols( + lag_newlines = col_integer(), + indent = col_integer(), + spaces = col_integer() + ) + ) +} +#' Helper to extract terminals +#' +#' @param pd_nested A nested parse table. +extract_terminals_helper <- function(pd_nested) { + if (is.null(pd_nested)) return(pd) + pmap(list(pd_nested$terminal, pd_nested$token, pd_nested$text, + pd_nested$lag_newlines, pd_nested$spaces, pd_nested$indent, + pd_nested$id, pd_nested$parent, pd_nested$line1, pd_nested$child), + function(terminal, token, text, lag_newlines, spaces, indent, id, + parent, line1, child) { + if (terminal) { + c(lag_newlines, indent, token, text, spaces, id, parent, line1) + } else { + extract_terminals_helper(child) + } + }) +} diff --git a/man/extract_terminals.Rd b/man/extract_terminals.Rd new file mode 100644 index 000000000..ec123b346 --- /dev/null +++ b/man/extract_terminals.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{extract_terminals} +\alias{extract_terminals} +\title{Extract terminal tokens} +\usage{ +extract_terminals(pd_nested) +} +\arguments{ +\item{pd_nested}{A nested parse table.} +} +\description{ +Turns a nested parse table into a flat parse table. In particular it extracts +terminal tokens and the following attributes: +} +\details{ +\itemize{ +\item lag_newlines +\item indent +\item token +\item text +\item spaces +\item id +\item parent +\item line1 +} +} diff --git a/man/extract_terminals_helper.Rd b/man/extract_terminals_helper.Rd new file mode 100644 index 000000000..76a215f09 --- /dev/null +++ b/man/extract_terminals_helper.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{extract_terminals_helper} +\alias{extract_terminals_helper} +\title{Helper to extract terminals} +\usage{ +extract_terminals_helper(pd_nested) +} +\arguments{ +\item{pd_nested}{A nested parse table.} +} +\description{ +Helper to extract terminals +} From 75cab06cc8c48fb02667b2331886839ae448241f Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 30 Jul 2017 21:21:25 +0200 Subject: [PATCH 03/17] enrich flattened parse table with line, col etc. --- R/visit.R | 49 +++++++++++++++++++++++++++++++++++++++++ man/choose_indention.Rd | 31 ++++++++++++++++++++++++++ man/enrich_terminals.Rd | 20 +++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 man/choose_indention.Rd create mode 100644 man/enrich_terminals.Rd diff --git a/R/visit.R b/R/visit.R index 3576aec56..4f9c4938f 100644 --- a/R/visit.R +++ b/R/visit.R @@ -148,3 +148,52 @@ extract_terminals_helper <- function(pd_nested) { }) } +#' Enrich flattened parse table +#' +#' Enriches a flattened parse table with terminals only. In particular, it is +#' possible to compute the line on which a token occurs and compute the exact +#' position a token will have on that line if it would be serialized. +#' @inheritParams choose_indention +enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) { + flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0) + flattened_pd <- choose_indention(flattened_pd, use_raw_indention) + flattened_pd$line <- cumsum(flattened_pd$lag_newlines) + 1 + flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0) + flattened_pd$nchar <- nchar(flattened_pd$text) + flattened_pd$nr <- seq_len(nrow(flattened_pd)) + flattened_pd %>% + group_by(line) %>% + mutate(col = cumsum(nchar + lag_spaces)) %>% + ungroup() +} + +#' Choose the indention method for the tokens +#' +#' Either use the raw indention, which is just the spaces computed between +#' the first token on a new line and the token before it, or use the indention +#' computed according to the transformer used, which is stored in the column +#' `indention`. +#' +#' All indention information will be combined with the space information for +#' the first token on a new line. +#' If `use_raw_indention` is set, information in the column `indention` will +#' be discarded anyways. If it is not set, the first token on a new line will +#' "inherit" the indention of the whole line. +#' The column `indention` will be removed since all information necessary is +#' containted in the spacing information of the first token on a new line and +#' the position of the tokens will not be changed anymore at this stage. +#' @param flattened_pd A nested parse table that was turned into a flat parse +#' table using [extract_terminals()]. +#' @param use_raw_indention Boolean indicating wheter or not the raw indention +#' should be used. +choose_indention <- function(flattened_pd, use_raw_indention) { + if (!use_raw_indention) { + flattened_pd$lag_spaces <- ifelse(flattened_pd$lag_newlines > 0, + flattened_pd$indent, + flattened_pd$lag_spaces) + } + flattened_pd$indention <- NULL + flattened_pd +} + + diff --git a/man/choose_indention.Rd b/man/choose_indention.Rd new file mode 100644 index 000000000..00b74f791 --- /dev/null +++ b/man/choose_indention.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{choose_indention} +\alias{choose_indention} +\title{Choose the indention method for the tokens} +\usage{ +choose_indention(flattened_pd, use_raw_indention) +} +\arguments{ +\item{flattened_pd}{A nested parse table that was turned into a flat parse +table using \code{\link[=extract_terminals]{extract_terminals()}}.} + +\item{use_raw_indention}{Boolean indicating wheter or not the raw indention +should be used.} +} +\description{ +Either use the raw indention, which is just the spaces computed between +the first token on a new line and the token before it, or use the indention +computed according to the transformer used, which is stored in the column +\code{indention}. +} +\details{ +All indention information will be combined with the space information for +the first token on a new line. +If \code{use_raw_indention} is set, information in the column \code{indention} will +be discarded anyways. If it is not set, the first token on a new line will +"inherit" the indention of the whole line. +The column \code{indention} will be removed since all information necessary is +containted in the spacing information of the first token on a new line and +the position of the tokens will not be changed anymore at this stage. +} diff --git a/man/enrich_terminals.Rd b/man/enrich_terminals.Rd new file mode 100644 index 000000000..f91b87b51 --- /dev/null +++ b/man/enrich_terminals.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visit.R +\name{enrich_terminals} +\alias{enrich_terminals} +\title{Enrich flattened parse table} +\usage{ +enrich_terminals(flattened_pd, use_raw_indention = FALSE) +} +\arguments{ +\item{flattened_pd}{A nested parse table that was turned into a flat parse +table using \code{\link[=extract_terminals]{extract_terminals()}}.} + +\item{use_raw_indention}{Boolean indicating wheter or not the raw indention +should be used.} +} +\description{ +Enriches a flattened parse table with terminals only. In particular, it is +possible to compute the line on which a token occurs and compute the exact +position a token will have on that line if it would be serialized. +} From 36a87fbcdff1661985583b9eb00fd3f69f70c572 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 30 Jul 2017 21:22:05 +0200 Subject: [PATCH 04/17] serialize flattened parse table --- R/serialize.R | 19 +++++++++++++++++++ man/serialize_parse_data_flattened.Rd | 14 ++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 man/serialize_parse_data_flattened.Rd diff --git a/R/serialize.R b/R/serialize.R index 6efe8d68f..46f98e626 100644 --- a/R/serialize.R +++ b/R/serialize.R @@ -69,3 +69,22 @@ serialize_parse_data_flat <- function(pd_flat) { .[[1L]] } +#' Serialize flattened parse data +#' +#' Collapses a flattened parse table into character vector representation. +#' @param flattened_pd A flattened parse table. +serialize_parse_data_flattened <- function(flattened_pd) { + flattened_pd$lag_newlines[1] <- flattened_pd$line1[1] - 1 + flattened_pd %>% + summarize_( + text_ws = ~paste0( + map(lag_newlines, add_newlines), + map(lag_spaces, add_spaces), + text, + collapse = "")) %>% + .[["text_ws"]] %>% + strsplit("\n", fixed = TRUE) %>% + .[[1L]] + + +} diff --git a/man/serialize_parse_data_flattened.Rd b/man/serialize_parse_data_flattened.Rd new file mode 100644 index 000000000..9c74ac706 --- /dev/null +++ b/man/serialize_parse_data_flattened.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/serialize.R +\name{serialize_parse_data_flattened} +\alias{serialize_parse_data_flattened} +\title{Serialize flattened parse data} +\usage{ +serialize_parse_data_flattened(flattened_pd) +} +\arguments{ +\item{flattened_pd}{A flattened parse table.} +} +\description{ +Collapses a flattened parse table into character vector representation. +} From 1d90bde483e5d909d7154eded450a1c543892ff2 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 30 Jul 2017 22:17:54 +0200 Subject: [PATCH 05/17] putting it all together: actually call new functions --- R/serialized_tests.R | 7 ++----- R/transform.R | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/R/serialized_tests.R b/R/serialized_tests.R index a175e1fca..1456a9228 100644 --- a/R/serialized_tests.R +++ b/R/serialized_tests.R @@ -190,11 +190,8 @@ style_indent_curly_round <- function(text) { NULL ) - text %>% - compute_parse_data_nested() %>% - apply_transformers(transformers) %>% - serialize_parse_data_nested() - + transformed_text <- parse_transform_serialize(text, transformers) + transformed_text } #' @describeIn test_transformer Transformations for indention based on operators diff --git a/R/transform.R b/R/transform.R index 54e0de332..b925091c4 100644 --- a/R/transform.R +++ b/R/transform.R @@ -17,7 +17,6 @@ transform_files <- function(files, transformers, flat) { } invisible(changed) } - #' Closure to return a transformer function #' #' This function takes a list of transformer functions as input and @@ -86,7 +85,10 @@ parse_transform_serialize <- function(text, transformers) { pd_nested <- compute_parse_data_nested(text) transformed_pd <- apply_transformers(pd_nested, transformers) # TODO verify_roundtrip - serialized_transformed_text <- serialize_parse_data_nested(transformed_pd) + flattened_pd <- extract_terminals(transformed_pd) %>% + enrich_terminals() + + serialized_transformed_text <- serialize_parse_data_flattened(flattened_pd) serialized_transformed_text } @@ -113,5 +115,12 @@ apply_transformers <- function(pd_nested, transformers) { transformed_all <- pre_visit(transformed_updated_multi_line, c(transformers$space, transformers$token)) - transformed_all + + transformed_absolute_indent <- context_to_terminals(transformed_all, + passed_lag_newlines = 0, + passed_indent = 0, + passed_spaces = 0) + + transformed_absolute_indent + } From 70955276ad3af91cee80c93d53b2019546e51d97 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Thu, 3 Aug 2017 19:17:31 +0200 Subject: [PATCH 06/17] FIXME. Will be fixed in future commit --- .../testthat/line_breaks_and_other/curly-in.R | 1 + .../line_breaks_and_other/curly-in_tree | 217 +++++++++--------- .../line_breaks_and_other/curly-out.R | 3 +- 3 files changed, 112 insertions(+), 109 deletions(-) diff --git a/tests/testthat/line_breaks_and_other/curly-in.R b/tests/testthat/line_breaks_and_other/curly-in.R index 9780a787f..46f86379b 100644 --- a/tests/testthat/line_breaks_and_other/curly-in.R +++ b/tests/testthat/line_breaks_and_other/curly-in.R @@ -1,3 +1,4 @@ +# FIXME should have space after ). # { never on its own line if (y == 0) { diff --git a/tests/testthat/line_breaks_and_other/curly-in_tree b/tests/testthat/line_breaks_and_other/curly-in_tree index 7a31453b5..ff3fe7e44 100644 --- a/tests/testthat/line_breaks_and_other/curly-in_tree +++ b/tests/testthat/line_breaks_and_other/curly-in_tree @@ -1,113 +1,114 @@ ROOT (token: short_text [lag_newlines/spaces] {id}) - ¦--COMMENT: # { n [0/0] {1} - ¦--expr: [1/0] {39} - ¦ ¦--IF: if [0/1] {4} - ¦ ¦--'(': ( [0/0] {5} - ¦ ¦--expr: [0/0] {12} - ¦ ¦ ¦--expr: [0/1] {8} - ¦ ¦ ¦ °--SYMBOL: y [0/0] {6} - ¦ ¦ ¦--EQ: == [0/1] {7} - ¦ ¦ °--expr: [0/0] {10} - ¦ ¦ °--NUM_CONST: 0 [0/0] {9} - ¦ ¦--')': ) [0/0] {11} - ¦ ¦--expr: [1/1] {24} - ¦ ¦ ¦--'{': { [0/2] {15} - ¦ ¦ ¦--expr: [1/0] {18} - ¦ ¦ ¦ °--NUM_CONST: 1 [0/0] {17} - ¦ ¦ °--'}': } [1/0] {22} - ¦ ¦--ELSE: else [0/1] {25} - ¦ °--expr: [0/0] {36} - ¦ ¦--'{': { [0/2] {27} - ¦ ¦--expr: [1/0] {30} - ¦ ¦ °--NUM_CONST: 2 [0/0] {29} - ¦ °--'}': } [1/0] {34} - ¦--expr: [2/0] {73} - ¦ ¦--expr: [0/0] {46} - ¦ ¦ °--SYMBOL_FUNCTION_CALL: test_ [0/0] {44} - ¦ ¦--'(': ( [0/0] {45} + ¦--COMMENT: # FIX [0/0] {1} + ¦--COMMENT: # { n [1/0] {4} + ¦--expr: [1/0] {42} + ¦ ¦--IF: if [0/1] {7} + ¦ ¦--'(': ( [0/0] {8} + ¦ ¦--expr: [0/0] {15} + ¦ ¦ ¦--expr: [0/1] {11} + ¦ ¦ ¦ °--SYMBOL: y [0/0] {9} + ¦ ¦ ¦--EQ: == [0/1] {10} + ¦ ¦ °--expr: [0/0] {13} + ¦ ¦ °--NUM_CONST: 0 [0/0] {12} + ¦ ¦--')': ) [0/0] {14} + ¦ ¦--expr: [1/1] {27} + ¦ ¦ ¦--'{': { [0/2] {18} + ¦ ¦ ¦--expr: [1/0] {21} + ¦ ¦ ¦ °--NUM_CONST: 1 [0/0] {20} + ¦ ¦ °--'}': } [1/0] {25} + ¦ ¦--ELSE: else [0/1] {28} + ¦ °--expr: [0/0] {39} + ¦ ¦--'{': { [0/2] {30} + ¦ ¦--expr: [1/0] {33} + ¦ ¦ °--NUM_CONST: 2 [0/0] {32} + ¦ °--'}': } [1/0] {37} + ¦--expr: [2/0] {76} ¦ ¦--expr: [0/0] {49} - ¦ ¦ °--STR_CONST: "I am [0/0] {47} - ¦ ¦--',': , [0/10] {48} - ¦ ¦--expr: [1/0] {69} - ¦ ¦ ¦--'{': { [0/12] {53} - ¦ ¦ ¦--expr: [1/10] {63} - ¦ ¦ ¦ ¦--expr: [0/0] {57} - ¦ ¦ ¦ ¦ °--SYMBOL_FUNCTION_CALL: a_tes [0/0] {55} - ¦ ¦ ¦ ¦--'(': ( [0/0] {56} + ¦ ¦ °--SYMBOL_FUNCTION_CALL: test_ [0/0] {47} + ¦ ¦--'(': ( [0/0] {48} + ¦ ¦--expr: [0/0] {52} + ¦ ¦ °--STR_CONST: "I am [0/0] {50} + ¦ ¦--',': , [0/10] {51} + ¦ ¦--expr: [1/0] {72} + ¦ ¦ ¦--'{': { [0/12] {56} + ¦ ¦ ¦--expr: [1/10] {66} ¦ ¦ ¦ ¦--expr: [0/0] {60} - ¦ ¦ ¦ ¦ °--SYMBOL: x [0/0] {58} - ¦ ¦ ¦ °--')': ) [0/0] {59} - ¦ ¦ °--'}': } [1/0] {67} - ¦ °--')': ) [0/0] {70} - ¦--COMMENT: # A { [3/0] {81} - ¦--expr: [1/0] {105} - ¦ ¦--IF: if [0/1] {84} - ¦ ¦--'(': ( [0/0] {85} - ¦ ¦--expr: [0/0] {92} - ¦ ¦ ¦--expr: [0/1] {88} - ¦ ¦ ¦ °--SYMBOL: x [0/0] {86} - ¦ ¦ ¦--GT: > [0/1] {87} - ¦ ¦ °--expr: [0/0] {90} - ¦ ¦ °--NUM_CONST: 3 [0/0] {89} - ¦ ¦--')': ) [0/1] {91} - ¦ °--expr: [0/0] {102} - ¦ ¦--'{': { [0/1] {94} - ¦ ¦--expr: [0/0] {97} - ¦ ¦ °--STR_CONST: "x" [0/0] {95} - ¦ °--'}': } [1/0] {100} - ¦--COMMENT: # A } [2/0] {110} - ¦--expr: [1/0] {133} - ¦ ¦--IF: if [0/1] {113} - ¦ ¦--'(': ( [0/0] {114} - ¦ ¦--expr: [0/0] {121} - ¦ ¦ ¦--expr: [0/1] {117} - ¦ ¦ ¦ °--SYMBOL: x [0/0] {115} - ¦ ¦ ¦--GT: > [0/1] {116} - ¦ ¦ °--expr: [0/0] {119} - ¦ ¦ °--NUM_CONST: 3 [0/0] {118} - ¦ ¦--')': ) [0/1] {120} - ¦ °--expr: [0/0] {130} - ¦ ¦--'{': { [0/2] {123} - ¦ ¦--expr: [1/0] {127} - ¦ ¦ °--STR_CONST: "x" [0/0] {125} - ¦ °--'}': } [0/0] {126} - ¦--COMMENT: # ELS [2/0] {138} - ¦--expr: [1/0] {175} - ¦ ¦--IF: if [0/1] {141} - ¦ ¦--'(': ( [0/0] {142} - ¦ ¦--expr: [0/0] {149} - ¦ ¦ ¦--expr: [0/1] {144} - ¦ ¦ ¦ °--NUM_CONST: 1 [0/0] {143} - ¦ ¦ ¦--GT: > [0/1] {145} - ¦ ¦ °--expr: [0/0] {147} - ¦ ¦ °--NUM_CONST: 3 [0/0] {146} - ¦ ¦--')': ) [0/1] {148} - ¦ ¦--expr: [0/1] {160} - ¦ ¦ ¦--'{': { [0/2] {151} - ¦ ¦ ¦--expr: [1/0] {155} - ¦ ¦ ¦ °--STR_CONST: "x" [0/0] {153} - ¦ ¦ °--'}': } [1/0] {158} - ¦ ¦--ELSE: else [0/1] {161} - ¦ °--expr: [0/0] {172} - ¦ ¦--'{': { [0/2] {163} - ¦ ¦--expr: [1/0] {167} - ¦ ¦ °--STR_CONST: "y" [0/0] {165} - ¦ °--'}': } [1/0] {170} - °--expr: [2/0] {209} - ¦--expr: [0/0] {182} - ¦ °--SYMBOL_FUNCTION_CALL: test_ [0/0] {180} - ¦--'(': ( [0/0] {181} + ¦ ¦ ¦ ¦ °--SYMBOL_FUNCTION_CALL: a_tes [0/0] {58} + ¦ ¦ ¦ ¦--'(': ( [0/0] {59} + ¦ ¦ ¦ ¦--expr: [0/0] {63} + ¦ ¦ ¦ ¦ °--SYMBOL: x [0/0] {61} + ¦ ¦ ¦ °--')': ) [0/0] {62} + ¦ ¦ °--'}': } [1/0] {70} + ¦ °--')': ) [0/0] {73} + ¦--COMMENT: # A { [3/0] {84} + ¦--expr: [1/0] {108} + ¦ ¦--IF: if [0/1] {87} + ¦ ¦--'(': ( [0/0] {88} + ¦ ¦--expr: [0/0] {95} + ¦ ¦ ¦--expr: [0/1] {91} + ¦ ¦ ¦ °--SYMBOL: x [0/0] {89} + ¦ ¦ ¦--GT: > [0/1] {90} + ¦ ¦ °--expr: [0/0] {93} + ¦ ¦ °--NUM_CONST: 3 [0/0] {92} + ¦ ¦--')': ) [0/1] {94} + ¦ °--expr: [0/0] {105} + ¦ ¦--'{': { [0/1] {97} + ¦ ¦--expr: [0/0] {100} + ¦ ¦ °--STR_CONST: "x" [0/0] {98} + ¦ °--'}': } [1/0] {103} + ¦--COMMENT: # A } [2/0] {113} + ¦--expr: [1/0] {136} + ¦ ¦--IF: if [0/1] {116} + ¦ ¦--'(': ( [0/0] {117} + ¦ ¦--expr: [0/0] {124} + ¦ ¦ ¦--expr: [0/1] {120} + ¦ ¦ ¦ °--SYMBOL: x [0/0] {118} + ¦ ¦ ¦--GT: > [0/1] {119} + ¦ ¦ °--expr: [0/0] {122} + ¦ ¦ °--NUM_CONST: 3 [0/0] {121} + ¦ ¦--')': ) [0/1] {123} + ¦ °--expr: [0/0] {133} + ¦ ¦--'{': { [0/2] {126} + ¦ ¦--expr: [1/0] {130} + ¦ ¦ °--STR_CONST: "x" [0/0] {128} + ¦ °--'}': } [0/0] {129} + ¦--COMMENT: # ELS [2/0] {141} + ¦--expr: [1/0] {178} + ¦ ¦--IF: if [0/1] {144} + ¦ ¦--'(': ( [0/0] {145} + ¦ ¦--expr: [0/0] {152} + ¦ ¦ ¦--expr: [0/1] {147} + ¦ ¦ ¦ °--NUM_CONST: 1 [0/0] {146} + ¦ ¦ ¦--GT: > [0/1] {148} + ¦ ¦ °--expr: [0/0] {150} + ¦ ¦ °--NUM_CONST: 3 [0/0] {149} + ¦ ¦--')': ) [0/1] {151} + ¦ ¦--expr: [0/1] {163} + ¦ ¦ ¦--'{': { [0/2] {154} + ¦ ¦ ¦--expr: [1/0] {158} + ¦ ¦ ¦ °--STR_CONST: "x" [0/0] {156} + ¦ ¦ °--'}': } [1/0] {161} + ¦ ¦--ELSE: else [0/1] {164} + ¦ °--expr: [0/0] {175} + ¦ ¦--'{': { [0/2] {166} + ¦ ¦--expr: [1/0] {170} + ¦ ¦ °--STR_CONST: "y" [0/0] {168} + ¦ °--'}': } [1/0] {173} + °--expr: [2/0] {212} ¦--expr: [0/0] {185} - ¦ °--STR_CONST: "I am [0/0] {183} - ¦--',': , [0/1] {184} - ¦--expr: [0/0] {204} - ¦ ¦--'{': { [0/2] {188} - ¦ ¦--expr: [1/0] {198} - ¦ ¦ ¦--expr: [0/0] {192} - ¦ ¦ ¦ °--SYMBOL_FUNCTION_CALL: a_tes [0/0] {190} - ¦ ¦ ¦--'(': ( [0/0] {191} + ¦ °--SYMBOL_FUNCTION_CALL: test_ [0/0] {183} + ¦--'(': ( [0/0] {184} + ¦--expr: [0/0] {188} + ¦ °--STR_CONST: "I am [0/0] {186} + ¦--',': , [0/1] {187} + ¦--expr: [0/0] {207} + ¦ ¦--'{': { [0/2] {191} + ¦ ¦--expr: [1/0] {201} ¦ ¦ ¦--expr: [0/0] {195} - ¦ ¦ ¦ °--SYMBOL: x [0/0] {193} - ¦ ¦ °--')': ) [0/0] {194} - ¦ °--'}': } [1/0] {202} - °--')': ) [1/0] {206} + ¦ ¦ ¦ °--SYMBOL_FUNCTION_CALL: a_tes [0/0] {193} + ¦ ¦ ¦--'(': ( [0/0] {194} + ¦ ¦ ¦--expr: [0/0] {198} + ¦ ¦ ¦ °--SYMBOL: x [0/0] {196} + ¦ ¦ °--')': ) [0/0] {197} + ¦ °--'}': } [1/0] {205} + °--')': ) [1/0] {209} diff --git a/tests/testthat/line_breaks_and_other/curly-out.R b/tests/testthat/line_breaks_and_other/curly-out.R index dd3ff4d23..3aeac214e 100644 --- a/tests/testthat/line_breaks_and_other/curly-out.R +++ b/tests/testthat/line_breaks_and_other/curly-out.R @@ -1,5 +1,6 @@ +# FIXME should have space after ). # { never on its own line -if (y == 0) { +if (y == 0){ 1 } else { 2 From 069b81ae72ad88ce1154978e7d1ab557b6e8ea1a Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Tue, 1 Aug 2017 13:13:19 +0200 Subject: [PATCH 07/17] use line1 col1 / col2 instead of line and col. --- R/visit.R | 25 +++++++++++++++++-------- man/enrich_terminals.Rd | 12 ++++++++++-- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/R/visit.R b/R/visit.R index 4f9c4938f..5c797fbf6 100644 --- a/R/visit.R +++ b/R/visit.R @@ -151,20 +151,29 @@ extract_terminals_helper <- function(pd_nested) { #' Enrich flattened parse table #' #' Enriches a flattened parse table with terminals only. In particular, it is -#' possible to compute the line on which a token occurs and compute the exact -#' position a token will have on that line if it would be serialized. +#' possible to compute the exact position a token will have (line and column) +#' when it will be serialized. +#' @details Since we have only terminal tokens now, the line on which a token +#' starts we also be the line on which it ends. We call `line1` the line on +#' which the token starts. `line1` has the same meaning as `line1` that can be +#' found in a flat parse table (see [tokenize()]), just that the `line1` +#' created by `enrich_terminals()` is the updated version of the former +#' `line1`. The same applies for `col1` and `col2`. #' @inheritParams choose_indention enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) { flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0) flattened_pd <- choose_indention(flattened_pd, use_raw_indention) - flattened_pd$line <- cumsum(flattened_pd$lag_newlines) + 1 + flattened_pd$line1 <- + cumsum(flattened_pd$lag_newlines) + flattened_pd$line1[1] + flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0) flattened_pd$nchar <- nchar(flattened_pd$text) - flattened_pd$nr <- seq_len(nrow(flattened_pd)) - flattened_pd %>% - group_by(line) %>% - mutate(col = cumsum(nchar + lag_spaces)) %>% + flattened_pd <- flattened_pd %>% + group_by(line1) %>% + mutate(col2 = cumsum(nchar + lag_spaces)) %>% ungroup() + flattened_pd$col1 <- flattened_pd$col2 - flattened_pd$nchar + flattened_pd } #' Choose the indention method for the tokens @@ -192,7 +201,7 @@ choose_indention <- function(flattened_pd, use_raw_indention) { flattened_pd$indent, flattened_pd$lag_spaces) } - flattened_pd$indention <- NULL + flattened_pd$indent <- NULL flattened_pd } diff --git a/man/enrich_terminals.Rd b/man/enrich_terminals.Rd index f91b87b51..04832a76a 100644 --- a/man/enrich_terminals.Rd +++ b/man/enrich_terminals.Rd @@ -15,6 +15,14 @@ should be used.} } \description{ Enriches a flattened parse table with terminals only. In particular, it is -possible to compute the line on which a token occurs and compute the exact -position a token will have on that line if it would be serialized. +possible to compute the exact position a token will have (line and column) +when it will be serialized. +} +\details{ +Since we have only terminal tokens now, the line on which a token +starts we also be the line on which it ends. We call \code{line1} the line on +which the token starts. \code{line1} has the same meaning as \code{line1} that can be +found in a flat parse table (see \code{\link[=tokenize]{tokenize()}}), just that the \code{line1} +created by \code{enrich_terminals()} is the updated version of the former +\code{line1}. The same applies for \code{col1} and \code{col2}. } From 92d261bcde433e0231e8bf59c64170b5a877d8af Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Fri, 4 Aug 2017 08:51:12 +0200 Subject: [PATCH 08/17] argument renaming: outer_ instead of passed_ --- R/transform.R | 6 +++--- R/visit.R | 30 +++++++++++++++--------------- man/context_to_terminals.Rd | 9 ++++----- man/context_towards_terminals.Rd | 14 +++++++------- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/R/transform.R b/R/transform.R index b925091c4..100158b6f 100644 --- a/R/transform.R +++ b/R/transform.R @@ -117,9 +117,9 @@ apply_transformers <- function(pd_nested, transformers) { c(transformers$space, transformers$token)) transformed_absolute_indent <- context_to_terminals(transformed_all, - passed_lag_newlines = 0, - passed_indent = 0, - passed_spaces = 0) + outer_lag_newlines = 0, + outer_indent = 0, + outer_spaces = 0) transformed_absolute_indent diff --git a/R/visit.R b/R/visit.R index 5c797fbf6..5bded5e38 100644 --- a/R/visit.R +++ b/R/visit.R @@ -54,14 +54,14 @@ visit_one <- function(pd_flat, funs) { #' @inherit context_towards_terminals #' @seealso context_towards_terminals visitors context_to_terminals <- function(pd_nested, - passed_lag_newlines, - passed_indent, - passed_spaces) { + outer_lag_newlines, + outer_indent, + outer_spaces) { if (is.null(pd_nested)) return() pd_transformed <- context_towards_terminals( - pd_nested, passed_lag_newlines, passed_indent, passed_spaces + pd_nested, outer_lag_newlines, outer_indent, outer_spaces ) pd_transformed$child <- pmap(list(pd_transformed$child, @@ -75,25 +75,25 @@ context_to_terminals <- function(pd_nested, #' Update the a parse table given outer context #' -#' `passed_lag_newlines` are added to the first token in `pd`, -#' `passed_indent` is added to all tokens in `pd`, `passed_spaces` is added to +#' `outer_lag_newlines` are added to the first token in `pd`, +#' `outer_indent` is added to all tokens in `pd`, `outer_spaces` is added to #' the last token in `pd`. [context_to_terminals()] calls this function #' repeatedly, which means the propagation of the parse information to the #' terminal tokens. #' @param pd_nested A nested parse table. -#' @param passed_lag_newlines The lag_newlines to be propagated inwards. -#' @param passed_indent The indention depth to be propagated inwards. -#' @param passed_spaces The number of spaces to be propagated inwards. +#' @param outer_lag_newlines The lag_newlines to be propagated inwards. +#' @param outer_indent The indention depth to be propagated inwards. +#' @param outer_spaces The number of spaces to be propagated inwards. #' @return An updated parse table. #' @seealso context_to_terminals context_towards_terminals <- function(pd_nested, - passed_lag_newlines, - passed_indent, - passed_spaces) { - pd_nested$indent <- pd_nested$indent + passed_indent - pd_nested$lag_newlines[1] <- pd_nested$lag_newlines[1] + passed_lag_newlines + outer_lag_newlines, + outer_indent, + outer_spaces) { + pd_nested$indent <- pd_nested$indent + outer_indent + pd_nested$lag_newlines[1] <- pd_nested$lag_newlines[1] + outer_lag_newlines pd_nested$spaces[nrow(pd_nested)] <- - pd_nested$spaces[nrow(pd_nested)] + passed_spaces + pd_nested$spaces[nrow(pd_nested)] + outer_spaces pd_nested } diff --git a/man/context_to_terminals.Rd b/man/context_to_terminals.Rd index 17d04f54b..479943ab0 100644 --- a/man/context_to_terminals.Rd +++ b/man/context_to_terminals.Rd @@ -4,17 +4,16 @@ \alias{context_to_terminals} \title{Propagate context to terminals} \usage{ -context_to_terminals(pd_nested, passed_lag_newlines, passed_indent, - passed_spaces) +context_to_terminals(pd_nested, outer_lag_newlines, outer_indent, outer_spaces) } \arguments{ \item{pd_nested}{A nested parse table.} -\item{passed_lag_newlines}{The lag_newlines to be propagated inwards.} +\item{outer_lag_newlines}{The lag_newlines to be propagated inwards.} -\item{passed_indent}{The indention depth to be propagated inwards.} +\item{outer_indent}{The indention depth to be propagated inwards.} -\item{passed_spaces}{The number of spaces to be propagated inwards.} +\item{outer_spaces}{The number of spaces to be propagated inwards.} } \value{ An updated parse table. diff --git a/man/context_towards_terminals.Rd b/man/context_towards_terminals.Rd index eeb041baf..e55c7b2e7 100644 --- a/man/context_towards_terminals.Rd +++ b/man/context_towards_terminals.Rd @@ -4,24 +4,24 @@ \alias{context_towards_terminals} \title{Update the a parse table given outer context} \usage{ -context_towards_terminals(pd_nested, passed_lag_newlines, passed_indent, - passed_spaces) +context_towards_terminals(pd_nested, outer_lag_newlines, outer_indent, + outer_spaces) } \arguments{ \item{pd_nested}{A nested parse table.} -\item{passed_lag_newlines}{The lag_newlines to be propagated inwards.} +\item{outer_lag_newlines}{The lag_newlines to be propagated inwards.} -\item{passed_indent}{The indention depth to be propagated inwards.} +\item{outer_indent}{The indention depth to be propagated inwards.} -\item{passed_spaces}{The number of spaces to be propagated inwards.} +\item{outer_spaces}{The number of spaces to be propagated inwards.} } \value{ An updated parse table. } \description{ -\code{passed_lag_newlines} are added to the first token in \code{pd}, -\code{passed_indent} is added to all tokens in \code{pd}, \code{passed_spaces} is added to +\code{outer_lag_newlines} are added to the first token in \code{pd}, +\code{outer_indent} is added to all tokens in \code{pd}, \code{outer_spaces} is added to the last token in \code{pd}. \code{\link[=context_to_terminals]{context_to_terminals()}} calls this function repeatedly, which means the propagation of the parse information to the terminal tokens. From 02f691d9e50e9503369c6cd123f9d686f6997aca Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sat, 5 Aug 2017 12:01:14 +0200 Subject: [PATCH 09/17] fix bug in add_brackets_in pipe --- R/rules-other.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/rules-other.R b/R/rules-other.R index 7966d7929..418b433ad 100644 --- a/R/rules-other.R +++ b/R/rules-other.R @@ -7,10 +7,10 @@ add_brackets_in_pipe <- function(pd) { lag_newlines = rep(0, 2), terminal = rep(TRUE, 2), spaces = rep(0, 2), - line1 = pd$line2[has_no_brackets] + 1:2, + line1 = pd$line1[has_no_brackets], line2 = line1, col1 = pd$col1[has_no_brackets], - col2 = col1, + col2 = col1 + 1:2, indent = rep(0, 2), child = rep(list(NULL), 2) ) From 0fd8723adbd6de95405bc5f3bcae6d8d79d1ed39 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 6 Aug 2017 21:13:36 +0200 Subject: [PATCH 10/17] Use visitor for terminal extraction Use visitor for terminal extraction instead of complicated and pmap / matrix construct. --- R/transform.R | 2 +- R/visit.R | 50 ++++----------------------------- man/extract_terminals.Rd | 16 ++--------- man/extract_terminals_helper.Rd | 14 --------- 4 files changed, 9 insertions(+), 73 deletions(-) delete mode 100644 man/extract_terminals_helper.Rd diff --git a/R/transform.R b/R/transform.R index 100158b6f..8c10bb70a 100644 --- a/R/transform.R +++ b/R/transform.R @@ -85,7 +85,7 @@ parse_transform_serialize <- function(text, transformers) { pd_nested <- compute_parse_data_nested(text) transformed_pd <- apply_transformers(pd_nested, transformers) # TODO verify_roundtrip - flattened_pd <- extract_terminals(transformed_pd) %>% + flattened_pd <- post_visit(transformed_pd, list(extract_terminals)) %>% enrich_terminals() serialized_transformed_text <- serialize_parse_data_flattened(flattened_pd) diff --git a/R/visit.R b/R/visit.R index 5bded5e38..182a78ec2 100644 --- a/R/visit.R +++ b/R/visit.R @@ -99,55 +99,17 @@ context_towards_terminals <- function(pd_nested, #' Extract terminal tokens #' -#' Turns a nested parse table into a flat parse table. In particular it extracts -#' terminal tokens and the following attributes: -#' -#' * lag_newlines -#' * indent -#' * token -#' * text -#' * spaces -#' * id -#' * parent -#' * line1 -#' @inheritParams extract_terminals_helper +#' Turns a nested parse table into a flat parse table and extracts *all* +#' attributes +#' @param pd_nested A nested parse table. #' @importFrom readr type_convert col_integer cols extract_terminals <- function(pd_nested) { - flat_vec <- extract_terminals_helper(pd_nested) %>% - unlist() - nms <- list( - NULL, - c("lag_newlines", "indent", "token", "text", "spaces", "id", "parent", "line1") - ) - flat_tbl <- matrix(flat_vec, ncol = length(nms[[2]]), byrow = TRUE, dimnames = nms) %>% - as_tibble() %>% - type_convert( - col_types = cols( - lag_newlines = col_integer(), - indent = col_integer(), - spaces = col_integer() - ) - ) -} - -#' Helper to extract terminals -#' -#' @param pd_nested A nested parse table. -extract_terminals_helper <- function(pd_nested) { if (is.null(pd_nested)) return(pd) - pmap(list(pd_nested$terminal, pd_nested$token, pd_nested$text, - pd_nested$lag_newlines, pd_nested$spaces, pd_nested$indent, - pd_nested$id, pd_nested$parent, pd_nested$line1, pd_nested$child), - function(terminal, token, text, lag_newlines, spaces, indent, id, - parent, line1, child) { - if (terminal) { - c(lag_newlines, indent, token, text, spaces, id, parent, line1) - } else { - extract_terminals_helper(child) - } - }) + pd_splitted <- split(pd_nested, seq_len(nrow(pd_nested))) + bind_rows(ifelse(pd_nested$terminal, pd_splitted, pd_nested$child)) } + #' Enrich flattened parse table #' #' Enriches a flattened parse table with terminals only. In particular, it is diff --git a/man/extract_terminals.Rd b/man/extract_terminals.Rd index ec123b346..2fc5d7e67 100644 --- a/man/extract_terminals.Rd +++ b/man/extract_terminals.Rd @@ -10,18 +10,6 @@ extract_terminals(pd_nested) \item{pd_nested}{A nested parse table.} } \description{ -Turns a nested parse table into a flat parse table. In particular it extracts -terminal tokens and the following attributes: -} -\details{ -\itemize{ -\item lag_newlines -\item indent -\item token -\item text -\item spaces -\item id -\item parent -\item line1 -} +Turns a nested parse table into a flat parse table and extracts \emph{all} +attributes } diff --git a/man/extract_terminals_helper.Rd b/man/extract_terminals_helper.Rd deleted file mode 100644 index 76a215f09..000000000 --- a/man/extract_terminals_helper.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visit.R -\name{extract_terminals_helper} -\alias{extract_terminals_helper} -\title{Helper to extract terminals} -\usage{ -extract_terminals_helper(pd_nested) -} -\arguments{ -\item{pd_nested}{A nested parse table.} -} -\description{ -Helper to extract terminals -} From 3b193f0bcbdbe30d680bcd563595f253b7d02dbb Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Sun, 6 Aug 2017 21:15:38 +0200 Subject: [PATCH 11/17] trim white space as last serialization step --- R/serialize.R | 15 ++++++++++----- man/serialize_parse_data_flat.Rd | 3 ++- man/serialize_parse_data_flattened.Rd | 3 ++- man/serialize_parse_data_nested.Rd | 3 ++- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/R/serialize.R b/R/serialize.R index 46f98e626..3d299f45c 100644 --- a/R/serialize.R +++ b/R/serialize.R @@ -31,7 +31,8 @@ serialize_parse_data_nested_helper <- function(pd_nested, pass_indent) { #' Serialize a nested parse table #' -#' Collapses a nested parse table into its character vector representation. +#' Collapses a nested parse table into its character vector representation and +#' removes trailing white spaces. #' @param pd_nested A nested parse table with line break, spaces and indention #' information. #' @return A character string. @@ -48,7 +49,8 @@ serialize_parse_data_nested <- function(pd_nested) { #' Serialize Flat Parse Data #' -#' Collapses a parse table into character vector representation. +#' Collapses a parse table into character vector representation and +#' removes trailing white spaces. #' @param pd_flat A parse table. #' @details #' The function essentially collapses the column text of `pd_flat` @@ -66,12 +68,14 @@ serialize_parse_data_flat <- function(pd_flat) { collapse = "")) %>% .[["text_ws"]] %>% strsplit("\n", fixed = TRUE) %>% - .[[1L]] + .[[1L]] %>% + trimws(which = "right") } #' Serialize flattened parse data #' -#' Collapses a flattened parse table into character vector representation. +#' Collapses a flattened parse table into character vector representation and +#' removes trailing white spaces. #' @param flattened_pd A flattened parse table. serialize_parse_data_flattened <- function(flattened_pd) { flattened_pd$lag_newlines[1] <- flattened_pd$line1[1] - 1 @@ -84,7 +88,8 @@ serialize_parse_data_flattened <- function(flattened_pd) { collapse = "")) %>% .[["text_ws"]] %>% strsplit("\n", fixed = TRUE) %>% - .[[1L]] + .[[1L]] %>% + trimws(which = "right") } diff --git a/man/serialize_parse_data_flat.Rd b/man/serialize_parse_data_flat.Rd index 6dfff5bc0..ea163f166 100644 --- a/man/serialize_parse_data_flat.Rd +++ b/man/serialize_parse_data_flat.Rd @@ -10,7 +10,8 @@ serialize_parse_data_flat(pd_flat) \item{pd_flat}{A parse table.} } \description{ -Collapses a parse table into character vector representation. +Collapses a parse table into character vector representation and +removes trailing white spaces. } \details{ The function essentially collapses the column text of \code{pd_flat} diff --git a/man/serialize_parse_data_flattened.Rd b/man/serialize_parse_data_flattened.Rd index 9c74ac706..06ba802cf 100644 --- a/man/serialize_parse_data_flattened.Rd +++ b/man/serialize_parse_data_flattened.Rd @@ -10,5 +10,6 @@ serialize_parse_data_flattened(flattened_pd) \item{flattened_pd}{A flattened parse table.} } \description{ -Collapses a flattened parse table into character vector representation. +Collapses a flattened parse table into character vector representation and +removes trailing white spaces. } diff --git a/man/serialize_parse_data_nested.Rd b/man/serialize_parse_data_nested.Rd index 4ad0f10e1..8fdb08e75 100644 --- a/man/serialize_parse_data_nested.Rd +++ b/man/serialize_parse_data_nested.Rd @@ -14,5 +14,6 @@ information.} A character string. } \description{ -Collapses a nested parse table into its character vector representation. +Collapses a nested parse table into its character vector representation and +removes trailing white spaces. } From 13dad41a6342603e42a2131e61b8224c8db9269b Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 09:15:06 +0200 Subject: [PATCH 12/17] 0L instead of 0, type = "width" in nchar(). --- R/visit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/visit.R b/R/visit.R index 182a78ec2..dff8c6d55 100644 --- a/R/visit.R +++ b/R/visit.R @@ -128,8 +128,8 @@ enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) { flattened_pd$line1 <- cumsum(flattened_pd$lag_newlines) + flattened_pd$line1[1] - flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0) - flattened_pd$nchar <- nchar(flattened_pd$text) + flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0L) + flattened_pd$nchar <- nchar(flattened_pd$text, type = "width") flattened_pd <- flattened_pd %>% group_by(line1) %>% mutate(col2 = cumsum(nchar + lag_spaces)) %>% From bd235905816ed4d21cf5ada77ea41d4d0d961292 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 09:47:28 +0200 Subject: [PATCH 13/17] 0L insted of 0. Now I searched through everything. --- R/parsed.R | 2 +- R/visit.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/parsed.R b/R/parsed.R index 935693495..b39e2ff66 100644 --- a/R/parsed.R +++ b/R/parsed.R @@ -88,7 +88,7 @@ create_filler <- function(pd_flat) { pd_flat$line3 <- lead(pd_flat$line1, default = tail(pd_flat$line2, 1)) pd_flat$col3 <- lead(pd_flat$col1, default = tail(pd_flat$col2, 1) + 1L) pd_flat$newlines <- pd_flat$line3 - pd_flat$line2 - pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0) + pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0L) pd_flat$col2_nl <- if_else(pd_flat$newlines > 0L, 0L, pd_flat$col2) pd_flat$spaces <- pd_flat$col3 - pd_flat$col2_nl - 1L pd_flat$multi_line <- ifelse(pd_flat$terminal, FALSE, NA) diff --git a/R/visit.R b/R/visit.R index dff8c6d55..390239713 100644 --- a/R/visit.R +++ b/R/visit.R @@ -123,7 +123,7 @@ extract_terminals <- function(pd_nested) { #' `line1`. The same applies for `col1` and `col2`. #' @inheritParams choose_indention enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) { - flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0) + flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0L) flattened_pd <- choose_indention(flattened_pd, use_raw_indention) flattened_pd$line1 <- cumsum(flattened_pd$lag_newlines) + flattened_pd$line1[1] From 4cc9382a723ca9bf9d0183ab69b55c915f055b5f Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 09:58:04 +0200 Subject: [PATCH 14/17] remove readr import --- NAMESPACE | 3 --- R/visit.R | 1 - 2 files changed, 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bfbdcbf5c..800d1094b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,7 +21,4 @@ importFrom(purrr,pmap) importFrom(purrr,pwalk) importFrom(purrr,reduce) importFrom(purrr,when) -importFrom(readr,col_integer) -importFrom(readr,cols) -importFrom(readr,type_convert) importFrom(utils,write.table) diff --git a/R/visit.R b/R/visit.R index 390239713..fb6fbfc9c 100644 --- a/R/visit.R +++ b/R/visit.R @@ -102,7 +102,6 @@ context_towards_terminals <- function(pd_nested, #' Turns a nested parse table into a flat parse table and extracts *all* #' attributes #' @param pd_nested A nested parse table. -#' @importFrom readr type_convert col_integer cols extract_terminals <- function(pd_nested) { if (is.null(pd_nested)) return(pd) pd_splitted <- split(pd_nested, seq_len(nrow(pd_nested))) From c60bf90018178e6d09de96fe470b9a3a23e392bd Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 10:47:16 +0200 Subject: [PATCH 15/17] Revert "trim white space as last serialization step" This reverts commit 3b193f0bcbdbe30d680bcd563595f253b7d02dbb. --- R/serialize.R | 15 +++++---------- man/serialize_parse_data_flat.Rd | 3 +-- man/serialize_parse_data_flattened.Rd | 3 +-- man/serialize_parse_data_nested.Rd | 3 +-- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/R/serialize.R b/R/serialize.R index 3d299f45c..46f98e626 100644 --- a/R/serialize.R +++ b/R/serialize.R @@ -31,8 +31,7 @@ serialize_parse_data_nested_helper <- function(pd_nested, pass_indent) { #' Serialize a nested parse table #' -#' Collapses a nested parse table into its character vector representation and -#' removes trailing white spaces. +#' Collapses a nested parse table into its character vector representation. #' @param pd_nested A nested parse table with line break, spaces and indention #' information. #' @return A character string. @@ -49,8 +48,7 @@ serialize_parse_data_nested <- function(pd_nested) { #' Serialize Flat Parse Data #' -#' Collapses a parse table into character vector representation and -#' removes trailing white spaces. +#' Collapses a parse table into character vector representation. #' @param pd_flat A parse table. #' @details #' The function essentially collapses the column text of `pd_flat` @@ -68,14 +66,12 @@ serialize_parse_data_flat <- function(pd_flat) { collapse = "")) %>% .[["text_ws"]] %>% strsplit("\n", fixed = TRUE) %>% - .[[1L]] %>% - trimws(which = "right") + .[[1L]] } #' Serialize flattened parse data #' -#' Collapses a flattened parse table into character vector representation and -#' removes trailing white spaces. +#' Collapses a flattened parse table into character vector representation. #' @param flattened_pd A flattened parse table. serialize_parse_data_flattened <- function(flattened_pd) { flattened_pd$lag_newlines[1] <- flattened_pd$line1[1] - 1 @@ -88,8 +84,7 @@ serialize_parse_data_flattened <- function(flattened_pd) { collapse = "")) %>% .[["text_ws"]] %>% strsplit("\n", fixed = TRUE) %>% - .[[1L]] %>% - trimws(which = "right") + .[[1L]] } diff --git a/man/serialize_parse_data_flat.Rd b/man/serialize_parse_data_flat.Rd index ea163f166..6dfff5bc0 100644 --- a/man/serialize_parse_data_flat.Rd +++ b/man/serialize_parse_data_flat.Rd @@ -10,8 +10,7 @@ serialize_parse_data_flat(pd_flat) \item{pd_flat}{A parse table.} } \description{ -Collapses a parse table into character vector representation and -removes trailing white spaces. +Collapses a parse table into character vector representation. } \details{ The function essentially collapses the column text of \code{pd_flat} diff --git a/man/serialize_parse_data_flattened.Rd b/man/serialize_parse_data_flattened.Rd index 06ba802cf..9c74ac706 100644 --- a/man/serialize_parse_data_flattened.Rd +++ b/man/serialize_parse_data_flattened.Rd @@ -10,6 +10,5 @@ serialize_parse_data_flattened(flattened_pd) \item{flattened_pd}{A flattened parse table.} } \description{ -Collapses a flattened parse table into character vector representation and -removes trailing white spaces. +Collapses a flattened parse table into character vector representation. } diff --git a/man/serialize_parse_data_nested.Rd b/man/serialize_parse_data_nested.Rd index 8fdb08e75..4ad0f10e1 100644 --- a/man/serialize_parse_data_nested.Rd +++ b/man/serialize_parse_data_nested.Rd @@ -14,6 +14,5 @@ information.} A character string. } \description{ -Collapses a nested parse table into its character vector representation and -removes trailing white spaces. +Collapses a nested parse table into its character vector representation. } From 0cf43fc9b18a07a03b3acc393a4751f344a43990 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 10:52:29 +0200 Subject: [PATCH 16/17] don't insert space after # / #' if only spaces follow --- R/nested.R | 6 ++++-- R/rules-spacing.R | 9 +++++---- man/set_spaces.Rd | 5 ++++- man/start_comments_with_space.Rd | 5 +++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/R/nested.R b/R/nested.R index 2f05750dc..97422c810 100644 --- a/R/nested.R +++ b/R/nested.R @@ -101,18 +101,20 @@ add_terminal_token_before <- function(pd_flat) { #' #' @param spaces_after_prefix An integer vector with the number of spaces #' after the prefix. +#' @param text_lenght Integer vector giving the number of characters of +#' the text. #' @param force_one Whether spaces_after_prefix should be set to one in all #' cases. #' @return An integer vector of length spaces_after_prefix, which is either #' one (if `force_one = TRUE`) or `space_after_prefix` with all values #' below one set to one. -set_spaces <- function(spaces_after_prefix, force_one) { +set_spaces <- function(spaces_after_prefix, text_length, force_one) { if (force_one) { n_of_spaces <- rep(1, length(spaces_after_prefix)) } else { n_of_spaces <- pmax(spaces_after_prefix, 1L) } - n_of_spaces + ifelse(text_length > 0, n_of_spaces, 0) } #' Nest a flat parse table diff --git a/R/rules-spacing.R b/R/rules-spacing.R index 5cc9493be..cea2518dd 100644 --- a/R/rules-spacing.R +++ b/R/rules-spacing.R @@ -136,9 +136,9 @@ set_space_between_levels <- function(pd_flat) { #' Start comments with a space #' #' Forces comments to start with a space, that is, after the regular expression -#' "^#+'*", at least one space must follow. Multiple spaces may be legit for -#' indention in some situations. -#' +#' "^#+'*", at least one space must follow if the comment is *non-empty*, i.e +#' there is not just spaces within the comment. Multiple spaces may be legit +#' for indention in some situations. #' @param pd A parse table. #' @param force_one Wheter or not to force one space or allow multiple spaces #' after the regex "^#+'*". @@ -156,7 +156,8 @@ start_comments_with_space <- function(pd, force_one = FALSE) { regex = "^(#+'*)( *)(.*)$") comments$space_after_prefix <- nchar(comments$space_after_prefix) comments$space_after_prefix <- set_spaces( - comments$space_after_prefix, + spaces_after_prefix = comments$space_after_prefix, + text_length = nchar(trimws(comments$text, "right")), force_one ) diff --git a/man/set_spaces.Rd b/man/set_spaces.Rd index fcd8fa47e..6ef623577 100644 --- a/man/set_spaces.Rd +++ b/man/set_spaces.Rd @@ -4,7 +4,7 @@ \alias{set_spaces} \title{Helper for setting spaces} \usage{ -set_spaces(spaces_after_prefix, force_one) +set_spaces(spaces_after_prefix, text_length, force_one) } \arguments{ \item{spaces_after_prefix}{An integer vector with the number of spaces @@ -12,6 +12,9 @@ after the prefix.} \item{force_one}{Whether spaces_after_prefix should be set to one in all cases.} + +\item{text_lenght}{Integer vector giving the number of characters of +the text.} } \value{ An integer vector of length spaces_after_prefix, which is either diff --git a/man/start_comments_with_space.Rd b/man/start_comments_with_space.Rd index a224c3ae5..36ad71944 100644 --- a/man/start_comments_with_space.Rd +++ b/man/start_comments_with_space.Rd @@ -14,6 +14,7 @@ after the regex "^#+'*".} } \description{ Forces comments to start with a space, that is, after the regular expression -"^#+'*", at least one space must follow. Multiple spaces may be legit for -indention in some situations. +"^#+'*", at least one space must follow if the comment is \emph{non-empty}, i.e +there is not just spaces within the comment. Multiple spaces may be legit +for indention in some situations. } From bcc22188d1536a555a2a0222a37c25fefa47f7e8 Mon Sep 17 00:00:00 2001 From: Lorenz Walthert Date: Mon, 7 Aug 2017 11:05:06 +0200 Subject: [PATCH 17/17] grammar --- R/nested.R | 2 +- R/visit.R | 4 ++-- man/set_spaces.Rd | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/nested.R b/R/nested.R index 97422c810..565787540 100644 --- a/R/nested.R +++ b/R/nested.R @@ -101,7 +101,7 @@ add_terminal_token_before <- function(pd_flat) { #' #' @param spaces_after_prefix An integer vector with the number of spaces #' after the prefix. -#' @param text_lenght Integer vector giving the number of characters of +#' @param text_length Integer vector giving the number of characters of #' the text. #' @param force_one Whether spaces_after_prefix should be set to one in all #' cases. diff --git a/R/visit.R b/R/visit.R index fb6fbfc9c..d6cf63a16 100644 --- a/R/visit.R +++ b/R/visit.R @@ -104,8 +104,8 @@ context_towards_terminals <- function(pd_nested, #' @param pd_nested A nested parse table. extract_terminals <- function(pd_nested) { if (is.null(pd_nested)) return(pd) - pd_splitted <- split(pd_nested, seq_len(nrow(pd_nested))) - bind_rows(ifelse(pd_nested$terminal, pd_splitted, pd_nested$child)) + pd_split <- split(pd_nested, seq_len(nrow(pd_nested))) + bind_rows(ifelse(pd_nested$terminal, pd_split, pd_nested$child)) } diff --git a/man/set_spaces.Rd b/man/set_spaces.Rd index 6ef623577..cb20dc3a9 100644 --- a/man/set_spaces.Rd +++ b/man/set_spaces.Rd @@ -10,11 +10,11 @@ set_spaces(spaces_after_prefix, text_length, force_one) \item{spaces_after_prefix}{An integer vector with the number of spaces after the prefix.} +\item{text_length}{Integer vector giving the number of characters of +the text.} + \item{force_one}{Whether spaces_after_prefix should be set to one in all cases.} - -\item{text_lenght}{Integer vector giving the number of characters of -the text.} } \value{ An integer vector of length spaces_after_prefix, which is either