Skip to content

Commit 5f57d9f

Browse files
Merge pull request #111 from lorenzwalthert/pr106_1_flattening_out
flattening out the parse table
2 parents 93e529b + bcc2218 commit 5f57d9f

19 files changed

+436
-129
lines changed

R/nested.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,18 +101,20 @@ add_terminal_token_before <- function(pd_flat) {
101101
#'
102102
#' @param spaces_after_prefix An integer vector with the number of spaces
103103
#' after the prefix.
104+
#' @param text_length Integer vector giving the number of characters of
105+
#' the text.
104106
#' @param force_one Whether spaces_after_prefix should be set to one in all
105107
#' cases.
106108
#' @return An integer vector of length spaces_after_prefix, which is either
107109
#' one (if `force_one = TRUE`) or `space_after_prefix` with all values
108110
#' below one set to one.
109-
set_spaces <- function(spaces_after_prefix, force_one) {
111+
set_spaces <- function(spaces_after_prefix, text_length, force_one) {
110112
if (force_one) {
111113
n_of_spaces <- rep(1, length(spaces_after_prefix))
112114
} else {
113115
n_of_spaces <- pmax(spaces_after_prefix, 1L)
114116
}
115-
n_of_spaces
117+
ifelse(text_length > 0, n_of_spaces, 0)
116118
}
117119

118120
#' Nest a flat parse table

R/parsed.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ create_filler <- function(pd_flat) {
8888
pd_flat$line3 <- lead(pd_flat$line1, default = tail(pd_flat$line2, 1))
8989
pd_flat$col3 <- lead(pd_flat$col1, default = tail(pd_flat$col2, 1) + 1L)
9090
pd_flat$newlines <- pd_flat$line3 - pd_flat$line2
91-
pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0)
91+
pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0L)
9292
pd_flat$col2_nl <- if_else(pd_flat$newlines > 0L, 0L, pd_flat$col2)
9393
pd_flat$spaces <- pd_flat$col3 - pd_flat$col2_nl - 1L
9494
pd_flat$multi_line <- ifelse(pd_flat$terminal, FALSE, NA)

R/rules-other.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ add_brackets_in_pipe <- function(pd) {
77
lag_newlines = rep(0, 2),
88
terminal = rep(TRUE, 2),
99
spaces = rep(0, 2),
10-
line1 = pd$line2[has_no_brackets] + 1:2,
10+
line1 = pd$line1[has_no_brackets],
1111
line2 = line1,
1212
col1 = pd$col1[has_no_brackets],
13-
col2 = col1,
13+
col2 = col1 + 1:2,
1414
indent = rep(0, 2),
1515
child = rep(list(NULL), 2)
1616
)

R/rules-spacing.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ set_space_between_levels <- function(pd_flat) {
136136
#' Start comments with a space
137137
#'
138138
#' Forces comments to start with a space, that is, after the regular expression
139-
#' "^#+'*", at least one space must follow. Multiple spaces may be legit for
140-
#' indention in some situations.
141-
#'
139+
#' "^#+'*", at least one space must follow if the comment is *non-empty*, i.e
140+
#' there is not just spaces within the comment. Multiple spaces may be legit
141+
#' for indention in some situations.
142142
#' @param pd A parse table.
143143
#' @param force_one Wheter or not to force one space or allow multiple spaces
144144
#' after the regex "^#+'*".
@@ -156,7 +156,8 @@ start_comments_with_space <- function(pd, force_one = FALSE) {
156156
regex = "^(#+'*)( *)(.*)$")
157157
comments$space_after_prefix <- nchar(comments$space_after_prefix)
158158
comments$space_after_prefix <- set_spaces(
159-
comments$space_after_prefix,
159+
spaces_after_prefix = comments$space_after_prefix,
160+
text_length = nchar(trimws(comments$text, "right")),
160161
force_one
161162
)
162163

R/serialize.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,22 @@ serialize_parse_data_flat <- function(pd_flat) {
6969
.[[1L]]
7070
}
7171

72+
#' Serialize flattened parse data
73+
#'
74+
#' Collapses a flattened parse table into character vector representation.
75+
#' @param flattened_pd A flattened parse table.
76+
serialize_parse_data_flattened <- function(flattened_pd) {
77+
flattened_pd$lag_newlines[1] <- flattened_pd$line1[1] - 1
78+
flattened_pd %>%
79+
summarize_(
80+
text_ws = ~paste0(
81+
map(lag_newlines, add_newlines),
82+
map(lag_spaces, add_spaces),
83+
text,
84+
collapse = "")) %>%
85+
.[["text_ws"]] %>%
86+
strsplit("\n", fixed = TRUE) %>%
87+
.[[1L]]
88+
89+
90+
}

R/serialized_tests.R

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,8 @@ style_indent_curly_round <- function(text) {
190190
NULL
191191
)
192192

193-
text %>%
194-
compute_parse_data_nested() %>%
195-
apply_transformers(transformers) %>%
196-
serialize_parse_data_nested()
197-
193+
transformed_text <- parse_transform_serialize(text, transformers)
194+
transformed_text
198195
}
199196

200197
#' @describeIn test_transformer Transformations for indention based on operators

R/transform.R

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ transform_files <- function(files, transformers, flat) {
1717
}
1818
invisible(changed)
1919
}
20-
2120
#' Closure to return a transformer function
2221
#'
2322
#' This function takes a list of transformer functions as input and
@@ -86,7 +85,10 @@ parse_transform_serialize <- function(text, transformers) {
8685
pd_nested <- compute_parse_data_nested(text)
8786
transformed_pd <- apply_transformers(pd_nested, transformers)
8887
# TODO verify_roundtrip
89-
serialized_transformed_text <- serialize_parse_data_nested(transformed_pd)
88+
flattened_pd <- post_visit(transformed_pd, list(extract_terminals)) %>%
89+
enrich_terminals()
90+
91+
serialized_transformed_text <- serialize_parse_data_flattened(flattened_pd)
9092
serialized_transformed_text
9193
}
9294

@@ -113,5 +115,12 @@ apply_transformers <- function(pd_nested, transformers) {
113115

114116
transformed_all <- pre_visit(transformed_updated_multi_line,
115117
c(transformers$space, transformers$token))
116-
transformed_all
118+
119+
transformed_absolute_indent <- context_to_terminals(transformed_all,
120+
outer_lag_newlines = 0,
121+
outer_indent = 0,
122+
outer_spaces = 0)
123+
124+
transformed_absolute_indent
125+
117126
}

R/visit.R

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,127 @@ visit_one <- function(pd_flat, funs) {
4343
reduce(funs, function(x, fun) fun(x),
4444
.init = pd_flat)
4545
}
46+
47+
48+
#' Propagate context to terminals
49+
#'
50+
#' Implements a very specific pre-visiting scheme, namely to propagate
51+
#' indention, spaces and lag_newlines to inner token to terminals. This means
52+
#' that information regarding indention, linebreaks and spaces (which is
53+
#' relative in `pd_nested`) will be converted into absolute.
54+
#' @inherit context_towards_terminals
55+
#' @seealso context_towards_terminals visitors
56+
context_to_terminals <- function(pd_nested,
57+
outer_lag_newlines,
58+
outer_indent,
59+
outer_spaces) {
60+
61+
if (is.null(pd_nested)) return()
62+
63+
pd_transformed <- context_towards_terminals(
64+
pd_nested, outer_lag_newlines, outer_indent, outer_spaces
65+
)
66+
67+
pd_transformed$child <- pmap(list(pd_transformed$child,
68+
pd_transformed$lag_newlines,
69+
pd_transformed$indent,
70+
pd_transformed$spaces),
71+
context_to_terminals)
72+
pd_transformed
73+
}
74+
75+
76+
#' Update the a parse table given outer context
77+
#'
78+
#' `outer_lag_newlines` are added to the first token in `pd`,
79+
#' `outer_indent` is added to all tokens in `pd`, `outer_spaces` is added to
80+
#' the last token in `pd`. [context_to_terminals()] calls this function
81+
#' repeatedly, which means the propagation of the parse information to the
82+
#' terminal tokens.
83+
#' @param pd_nested A nested parse table.
84+
#' @param outer_lag_newlines The lag_newlines to be propagated inwards.
85+
#' @param outer_indent The indention depth to be propagated inwards.
86+
#' @param outer_spaces The number of spaces to be propagated inwards.
87+
#' @return An updated parse table.
88+
#' @seealso context_to_terminals
89+
context_towards_terminals <- function(pd_nested,
90+
outer_lag_newlines,
91+
outer_indent,
92+
outer_spaces) {
93+
pd_nested$indent <- pd_nested$indent + outer_indent
94+
pd_nested$lag_newlines[1] <- pd_nested$lag_newlines[1] + outer_lag_newlines
95+
pd_nested$spaces[nrow(pd_nested)] <-
96+
pd_nested$spaces[nrow(pd_nested)] + outer_spaces
97+
pd_nested
98+
}
99+
100+
#' Extract terminal tokens
101+
#'
102+
#' Turns a nested parse table into a flat parse table and extracts *all*
103+
#' attributes
104+
#' @param pd_nested A nested parse table.
105+
extract_terminals <- function(pd_nested) {
106+
if (is.null(pd_nested)) return(pd)
107+
pd_split <- split(pd_nested, seq_len(nrow(pd_nested)))
108+
bind_rows(ifelse(pd_nested$terminal, pd_split, pd_nested$child))
109+
}
110+
111+
112+
#' Enrich flattened parse table
113+
#'
114+
#' Enriches a flattened parse table with terminals only. In particular, it is
115+
#' possible to compute the exact position a token will have (line and column)
116+
#' when it will be serialized.
117+
#' @details Since we have only terminal tokens now, the line on which a token
118+
#' starts we also be the line on which it ends. We call `line1` the line on
119+
#' which the token starts. `line1` has the same meaning as `line1` that can be
120+
#' found in a flat parse table (see [tokenize()]), just that the `line1`
121+
#' created by `enrich_terminals()` is the updated version of the former
122+
#' `line1`. The same applies for `col1` and `col2`.
123+
#' @inheritParams choose_indention
124+
enrich_terminals <- function(flattened_pd, use_raw_indention = FALSE) {
125+
flattened_pd$lag_spaces <- lag(flattened_pd$spaces, default = 0L)
126+
flattened_pd <- choose_indention(flattened_pd, use_raw_indention)
127+
flattened_pd$line1 <-
128+
cumsum(flattened_pd$lag_newlines) + flattened_pd$line1[1]
129+
130+
flattened_pd$newlines <- lead(flattened_pd$lag_newlines, default = 0L)
131+
flattened_pd$nchar <- nchar(flattened_pd$text, type = "width")
132+
flattened_pd <- flattened_pd %>%
133+
group_by(line1) %>%
134+
mutate(col2 = cumsum(nchar + lag_spaces)) %>%
135+
ungroup()
136+
flattened_pd$col1 <- flattened_pd$col2 - flattened_pd$nchar
137+
flattened_pd
138+
}
139+
140+
#' Choose the indention method for the tokens
141+
#'
142+
#' Either use the raw indention, which is just the spaces computed between
143+
#' the first token on a new line and the token before it, or use the indention
144+
#' computed according to the transformer used, which is stored in the column
145+
#' `indention`.
146+
#'
147+
#' All indention information will be combined with the space information for
148+
#' the first token on a new line.
149+
#' If `use_raw_indention` is set, information in the column `indention` will
150+
#' be discarded anyways. If it is not set, the first token on a new line will
151+
#' "inherit" the indention of the whole line.
152+
#' The column `indention` will be removed since all information necessary is
153+
#' containted in the spacing information of the first token on a new line and
154+
#' the position of the tokens will not be changed anymore at this stage.
155+
#' @param flattened_pd A nested parse table that was turned into a flat parse
156+
#' table using [extract_terminals()].
157+
#' @param use_raw_indention Boolean indicating wheter or not the raw indention
158+
#' should be used.
159+
choose_indention <- function(flattened_pd, use_raw_indention) {
160+
if (!use_raw_indention) {
161+
flattened_pd$lag_spaces <- ifelse(flattened_pd$lag_newlines > 0,
162+
flattened_pd$indent,
163+
flattened_pd$lag_spaces)
164+
}
165+
flattened_pd$indent <- NULL
166+
flattened_pd
167+
}
168+
169+

man/choose_indention.Rd

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/context_to_terminals.Rd

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/context_towards_terminals.Rd

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/enrich_terminals.Rd

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)