diff --git a/R/nested.R b/R/nested.R index d9a06101b..e83769d59 100644 --- a/R/nested.R +++ b/R/nested.R @@ -4,9 +4,8 @@ #' representation into a nested parse table with #' [nest_parse_data()]. #' @param text A character vector to parse. -#' @return A nested parse table. Apart from the columns provided by -#' `utils::getParseData()`, a column "short" with the first five characters of -#' "text" is added, the nested subtibbles are in column "child". +#' @return A nested parse table. See [tokenize()] for details on the columns +#' of the parse table. compute_parse_data_nested <- function(text) { parse_data <- tokenize(text) %>% add_terminal_token_before() %>% @@ -23,14 +22,26 @@ compute_parse_data_nested <- function(text) { #' Obtain token table from text #' #' [utils::getParseData()] is used to obtain a flat parse table from `text`. +#' +#' Apart from the columns provided by `utils::getParseData()`, the following +#' columns are added: +#' +#' * A column "short" with the first five characters of "text". +#' * A column "pos_id" for (positional id) which can be used for sorting +#' (because "id" cannot be used in general). Note that the nth value of this +#' column corresponds to n as long as no tokens are inserted. +#' * A column "child" that contains the nested subtibbles. +#' #' @param text A character vector. #' @return A flat parse table +#' @importFrom rlang seq2 tokenize <- function(text) { # avoid https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16041 parse(text = text, keep.source = TRUE) parsed <- parse(text = text, keep.source = TRUE) parse_data <- as_tibble(utils::getParseData(parsed, includeText = NA)) %>% enhance_mapping_special() + parse_data$pos_id <- seq2(1L, nrow(parse_data)) parse_data$short <- substr(parse_data$text, 1, 5) parse_data } @@ -67,7 +78,7 @@ NULL add_terminal_token_after <- function(pd_flat) { terminals <- pd_flat %>% filter(terminal) %>% - arrange(line1, col1) + arrange(pos_id) data_frame(id = terminals$id, token_after = lead(terminals$token, default = "")) %>% @@ -78,7 +89,7 @@ add_terminal_token_after <- function(pd_flat) { add_terminal_token_before <- function(pd_flat) { terminals <- pd_flat %>% filter(terminal) %>% - arrange(line1, col1) + arrange(pos_id) data_frame(id = terminals$id, token_before = lag(terminals$token, default = "")) %>% @@ -149,7 +160,7 @@ nest_parse_data <- function(pd_flat) { combine_children <- function(child, internal_child) { bound <- bind_rows(child, internal_child) if (nrow(bound) == 0) return(NULL) - bound[order(bound$line1, bound$col1), ] + bound[order(bound$pos_id), ] } diff --git a/R/relevel.R b/R/relevel.R index f51f4ba31..77460354b 100644 --- a/R/relevel.R +++ b/R/relevel.R @@ -62,6 +62,6 @@ bind_with_child <- function(pd_nested, pos) { pd_nested %>% slice(-pos) %>% bind_rows(pd_nested$child[[pos]]) %>% - arrange(line1, col1) + arrange(pos_id) } diff --git a/R/rules-line_break.R b/R/rules-line_break.R index 76c610f9e..d81ab017f 100644 --- a/R/rules-line_break.R +++ b/R/rules-line_break.R @@ -72,7 +72,7 @@ set_line_break_after_opening_if_call_is_multi_line <- except_text_before = NULL) { if (!is_function_call(pd)) return(pd) npd <- nrow(pd) - seq_x <- seq2(3, npd - 1) + seq_x <- seq2(3L, npd - 1L) is_multi_line <- any( (pd$lag_newlines[seq_x] > 0) | (pd$token[seq_x] == "COMMENT") @@ -94,7 +94,7 @@ set_line_break_after_opening_if_call_is_multi_line <- set_line_break_before_closing_call <- function(pd, except_token_before) { if (!is_function_call(pd)) return(pd) npd <- nrow(pd) - is_multi_line <- any(pd$lag_newlines[seq2(3, npd - 1)] > 0) + is_multi_line <- any(pd$lag_newlines[seq2(3L, npd - 1L)] > 0) if (!is_multi_line) { exception <- which(pd$token_before %in% except_token_before) pd$lag_newlines[setdiff(npd, exception)] <- 0L diff --git a/R/rules-other.R b/R/rules-other.R index 754849960..af9271d6c 100644 --- a/R/rules-other.R +++ b/R/rules-other.R @@ -14,6 +14,8 @@ add_brackets_in_pipe <- function(pd) { col1 = pd$col1[has_no_brackets] + c(0.3, 0.6), col2 = col1 + 1:2, indent = rep(0, 2), + id = rep(NA, 2), + pos_id = pd$pos_id[has_no_brackets] + c(0.3, 0.6), child = rep(list(NULL), 2) ) pd <- bind_rows(pd, new) diff --git a/R/rules-spacing.R b/R/rules-spacing.R index db7c5f195..099155c18 100644 --- a/R/rules-spacing.R +++ b/R/rules-spacing.R @@ -166,7 +166,6 @@ start_comments_with_space <- function(pd, force_one = FALSE) { ) %>% trimws("right") pd$short[comment_pos] <- substr(pd$text[comment_pos], 1, 5) - pd } diff --git a/R/styler.R b/R/styler.R index 881cb6dd0..040ef4c59 100644 --- a/R/styler.R +++ b/R/styler.R @@ -23,6 +23,7 @@ if (getRversion() >= "2.15.1") { "terminal", "text", "short", "spaces", "lag_spaces", "newlines", "lag_newlines", + "pos_id", NULL )) } diff --git a/R/unindent.R b/R/unindent.R index e5e59dd9f..8779cd609 100644 --- a/R/unindent.R +++ b/R/unindent.R @@ -27,7 +27,7 @@ set_unindention_child <- function(pd, token = "')'", unindent_by) { unindent_by = abs(pd$indent[closing] - pd$indent[closing-1])) bind_rows(candidates, non_candidates) %>% - arrange(line1, col1) + arrange(pos_id) } #' Unindent a child diff --git a/man/compute_parse_data_nested.Rd b/man/compute_parse_data_nested.Rd index 79e738b14..8c09f9bca 100644 --- a/man/compute_parse_data_nested.Rd +++ b/man/compute_parse_data_nested.Rd @@ -10,9 +10,8 @@ compute_parse_data_nested(text) \item{text}{A character vector to parse.} } \value{ -A nested parse table. Apart from the columns provided by -\code{utils::getParseData()}, a column "short" with the first five characters of -"text" is added, the nested subtibbles are in column "child". +A nested parse table. See \code{\link[=tokenize]{tokenize()}} for details on the columns +of the parse table. } \description{ Parses \code{text} to a flat parse table and subsequently changes its diff --git a/man/tokenize.Rd b/man/tokenize.Rd index 626a3b5fe..f6ae882e2 100644 --- a/man/tokenize.Rd +++ b/man/tokenize.Rd @@ -15,3 +15,16 @@ A flat parse table \description{ \code{\link[utils:getParseData]{utils::getParseData()}} is used to obtain a flat parse table from \code{text}. } +\details{ +Apart from the columns provided by \code{utils::getParseData()}, the following +columns are added: +\itemize{ +\item A column "short" with the first five characters of "text". +\itemize{ +\item A column "pos_id" for (positional id) which can be used for sorting +(because "id" cannot be used in general). Note that the nth value of this +column corresponds to n as long as no tokens are inserted. +\item A column "child" that contains the nested subtibbles. +} +} +}