Support Windows Newline Characters #396

NGaffney · 2018-06-15T04:35:20Z

Currently windows style (CRLF) line endings cause styler to throw an error e.g.

library(styler)                 
                                
style_text("a <- 0\nb <- 1")   
#> a <- 0
#> b <- 1
style_text("a <- 0\r\nb <- 1")
#> Error in parse(text = text, keep.source = TRUE): <text>:1:7: unexpected input
#> 1: a <- 0
#>           ^

This caused issues for me when attempting to use the format provider in atom which uses styler.

P.S. Thanks for a great package.

The text was updated successfully, but these errors were encountered:

lorenzwalthert · 2018-06-15T16:14:03Z

Thanks @NGaffney for reporting this. This smells like trouble -.- I tried to use textConnection but we loose information.

library(magrittr)

text <- "a <- 0\nb <- 1"
text_crlf <- "a <- 0\r\nb <- 1"

# as is
p <- parse(text = text, keep.source = TRUE)

p %>% str()
#> length 2 expression(a <- 0, b <- 1)
#>  - attr(*, "srcref")=List of 2
#>   ..$ : 'srcref' int [1:8] 1 1 1 6 1 6 1 1
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000017a65e20> 
#>   ..$ : 'srcref' int [1:8] 2 1 2 6 1 6 2 2
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000017a65e20> 
#>  - attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000017a65e20> 
#>  - attr(*, "wholeSrcref")= 'srcref' int [1:8] 1 0 3 0 0 0 1 3
#>   ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000017a65e20>
p %>% getParseData()
#>    line1 col1 line2 col2 id parent       token terminal text
#> 7      1    1     1    6  7      0        expr    FALSE     
#> 1      1    1     1    1  1      3      SYMBOL     TRUE    a
#> 3      1    1     1    1  3      7        expr    FALSE     
#> 2      1    3     1    4  2      7 LEFT_ASSIGN     TRUE   <-
#> 4      1    6     1    6  4      5   NUM_CONST     TRUE    0
#> 5      1    6     1    6  5      7        expr    FALSE     
#> 16     2    1     2    6 16      0        expr    FALSE     
#> 10     2    1     2    1 10     12      SYMBOL     TRUE    b
#> 12     2    1     2    1 12     16        expr    FALSE     
#> 11     2    3     2    4 11     16 LEFT_ASSIGN     TRUE   <-
#> 13     2    6     2    6 13     14   NUM_CONST     TRUE    1
#> 14     2    6     2    6 14     16        expr    FALSE

# with textConnection() we are loosing information we need downstream
p <- parse(textConnection(text), keep.source = TRUE)
p %>% str()
#>   expression(a <- 0, b <- 1)
p %>% getParseData()
#> NULL

parse(textConnection(text_crlf), keep.source = TRUE) %>% getParseText()
#> Error in getParseText(.): Argument "id" fehlt (ohne Standardwert)


# Writing to a temp file might help
p <- parse(text = text, keep.source = TRUE)

p %>% str()
#> length 2 expression(a <- 0, b <- 1)
#>  - attr(*, "srcref")=List of 2
#>   ..$ : 'srcref' int [1:8] 1 1 1 6 1 6 1 1
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x00000000191ae768> 
#>   ..$ : 'srcref' int [1:8] 2 1 2 6 1 6 2 2
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x00000000191ae768> 
#>  - attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x00000000191ae768> 
#>  - attr(*, "wholeSrcref")= 'srcref' int [1:8] 1 0 3 0 0 0 1 3
#>   ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x00000000191ae768>
p %>% getParseData()
#>    line1 col1 line2 col2 id parent       token terminal text
#> 7      1    1     1    6  7      0        expr    FALSE     
#> 1      1    1     1    1  1      3      SYMBOL     TRUE    a
#> 3      1    1     1    1  3      7        expr    FALSE     
#> 2      1    3     1    4  2      7 LEFT_ASSIGN     TRUE   <-
#> 4      1    6     1    6  4      5   NUM_CONST     TRUE    0
#> 5      1    6     1    6  5      7        expr    FALSE     
#> 16     2    1     2    6 16      0        expr    FALSE     
#> 10     2    1     2    1 10     12      SYMBOL     TRUE    b
#> 12     2    1     2    1 12     16        expr    FALSE     
#> 11     2    3     2    4 11     16 LEFT_ASSIGN     TRUE   <-
#> 13     2    6     2    6 13     14   NUM_CONST     TRUE    1
#> 14     2    6     2    6 14     16        expr    FALSE
tempfile <- tempfile()
enc::write_lines_enc(text_crlf, tempfile)

p <- parse(tempfile, keep.source = TRUE)
p %>% str()
#> length 2 expression(a <- 0, b <- 1)
#>  - attr(*, "srcref")=List of 2
#>   ..$ : 'srcref' int [1:8] 1 1 1 6 1 6 1 1
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x000000001ba6fe18> 
#>   ..$ : 'srcref' int [1:8] 2 1 2 6 1 6 2 2
#>   .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x000000001ba6fe18> 
#>  - attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x000000001ba6fe18> 
#>  - attr(*, "wholeSrcref")= 'srcref' int [1:8] 1 0 3 0 0 0 1 3
#>   ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x000000001ba6fe18>
p %>% getParseData()
#>    line1 col1 line2 col2 id parent       token terminal text
#> 7      1    1     1    6  7      0        expr    FALSE     
#> 1      1    1     1    1  1      3      SYMBOL     TRUE    a
#> 3      1    1     1    1  3      7        expr    FALSE     
#> 2      1    3     1    4  2      7 LEFT_ASSIGN     TRUE   <-
#> 4      1    6     1    6  4      5   NUM_CONST     TRUE    0
#> 5      1    6     1    6  5      7        expr    FALSE     
#> 16     2    1     2    6 16      0        expr    FALSE     
#> 10     2    1     2    1 10     12      SYMBOL     TRUE    b
#> 12     2    1     2    1 12     16        expr    FALSE     
#> 11     2    3     2    4 11     16 LEFT_ASSIGN     TRUE   <-
#> 13     2    6     2    6 13     14   NUM_CONST     TRUE    1
#> 14     2    6     2    6 14     16        expr    FALSE

Created on 2018-06-15 by the reprex package (v0.2.0).

The alternative with a temp file looks more promising but also a bit brittle, and a few unit tests fail because of encoding troubles. I opened krlmlr/enc#21, might be related. Is it possible to use unix style eol in atom? Because we already had some debate about a similar topic: Wheter or not to support other encodings than UTF-8 (#376). And we are currently not planning to support it.
cc: @krlmlr

NGaffney · 2018-06-16T04:39:13Z

Setting the default end of line to Unix style is simple in all the editors I'm familiar with (atom, vs code, RStudio), I did it straight away after discovering the issue.

The main complication is that these editors all default to the platform native form. How difficult is it to detect windows style line endings and fail with an informative error message?

lorenzwalthert · 2018-06-17T14:32:26Z

Ok, thanks for the clarification. I actually found that the approach implemented in the branch crlf seems to work on Unix and the encoding problems on windows seem related to krlmlr/enc#21 only, so we may well end up taking this approach anyways. May need to also consider slowdown caused by this extra step.
Can you reproduce krlmlr/enc#21 on your machine?

NGaffney · 2018-06-17T21:35:56Z

Yep I can reproduce, looks like our systems are very close.

tempfile <- tempfile()         
text <- "glück"                
library(enc)                   
write_lines_enc(text, tempfile)
read_lines_enc(tempfile)       
#> [1] ""

Session info

devtools::session_info()
#> Session info -------------------------------------------------------------
#>  setting  value                       
#>  version  R version 3.5.0 (2018-04-23)
#>  system   x86_64, mingw32             
#>  ui       RTerm                       
#>  language (EN)                        
#>  collate  English_Australia.1252      
#>  tz       Australia/Sydney            
#>  date     2018-06-18
#> Packages -----------------------------------------------------------------
#>  package   * version date       source        
#>  backports   1.1.2   2017-12-13 CRAN (R 3.5.0)
#>  base      * 3.5.0   2018-04-23 local         
#>  compiler    3.5.0   2018-04-23 local         
#>  datasets  * 3.5.0   2018-04-23 local         
#>  devtools    1.13.5  2018-02-18 CRAN (R 3.5.0)
#>  digest      0.6.15  2018-01-28 CRAN (R 3.5.0)
#>  enc       * 0.2.0   2018-03-03 CRAN (R 3.5.0)
#>  evaluate    0.10.1  2017-06-24 CRAN (R 3.5.0)
#>  graphics  * 3.5.0   2018-04-23 local         
#>  grDevices * 3.5.0   2018-04-23 local         
#>  htmltools   0.3.6   2017-04-28 CRAN (R 3.5.0)
#>  knitr       1.20    2018-02-20 CRAN (R 3.5.0)
#>  magrittr    1.5     2014-11-22 CRAN (R 3.5.0)
#>  memoise     1.1.0   2017-04-21 CRAN (R 3.5.0)
#>  methods   * 3.5.0   2018-04-23 local         
#>  Rcpp        0.12.17 2018-05-18 CRAN (R 3.5.0)
#>  rmarkdown   1.9     2018-03-01 CRAN (R 3.5.0)
#>  rprojroot   1.3-2   2018-01-03 CRAN (R 3.5.0)
#>  stats     * 3.5.0   2018-04-23 local         
#>  stringi     1.2.2   2018-05-02 CRAN (R 3.5.0)
#>  stringr     1.3.1   2018-05-10 CRAN (R 3.5.0)
#>  tools       3.5.0   2018-04-23 local         
#>  utils     * 3.5.0   2018-04-23 local         
#>  withr       2.1.2   2018-03-15 CRAN (R 3.5.0)
#>  yaml        2.1.19  2018-05-01 CRAN (R 3.5.0)

lorenzwalthert · 2018-06-17T23:00:12Z

As far as benchmarking goes, we have with current master a median runtime of 281s for styler::style_text("glück <- 1").

Unit: milliseconds
                                      expr      min       lq     mean   median       uq      max neval
 styler::style_text(rep("glück <- 1", 20)) 262.6423 281.4982 317.6855 289.8061 351.6167 549.9157    50

and with branch crlf at 1f93629 we have 290s.

Unit: milliseconds
                                      expr      min       lq    mean   median       uq      max neval
 styler::style_text(rep("glück <- 1", 20)) 276.0006 290.4277 336.255 305.6802 367.2087 606.0931    50

and at with first converting to UTF (see ) at 180c8ac we have 286s.

Unit: milliseconds
                                      expr      min       lq    mean   median       uq     max neval
 styler::style_text(rep("glück <- 1", 20)) 270.1516 286.4523 309.555 296.4907 320.2215 452.225    50

Obviously these are not comprehensive tests but it seems as there is obvious slowdown.

lorenzwalthert · 2018-06-20T23:09:06Z

The thing is that crlf does not work on windows because on windows, base::parse() cannot parse UTF-8. I think we should try to simply use writeLines() for writing the temp file (and hence use native encoding) and reading with readLines(). Alternative: use enc::to_utf8(), enc::write_lines_enc(), enc::read_lines_enc() plus enc::to_native(), before parsing. This is a lot of overhead.

Yet another way is to implement a warning only and limit ourselves to detect that the newline character is not \n and ask the user to switch to Unix EOLs. After formatting, I guess Unix EOLs are used anyways (yet we could theoretically also support other EOL styles).

Maybe just requiring Unix EOLs (as we ask for UTF-8) and throw an informative error message otherwise is the cleanest of all.

Since this is relevant for the atom IDE and I'd like users to have a pleasant first experience with styler (otherwise most of them won't ever use it again), I think this should be included in v1.0.2 (#397) .

@krlmlr what do you prefer?

lorenzwalthert · 2018-06-21T18:58:07Z

Ok, @NGaffney I implemented the warning in #400. Any change you can test that? No idea how that works with the IDE. I tried to wrap my head around atom but I could not find it out within 15 minutes because it seemed not as straightforward as I expected.

king-of-poppk · 2023-09-11T21:28:00Z

Would it make sense to revisit this issue? I was having a similar problem and here is the solution I found, without the need for a temporary file:

parse(
  textConnection(x),
  srcfile = srcfilecopy(
    "<text>",
    lines = trimws(
      stringr::str_split(x, stringr::boundary(type = "sentence"))[[1]],
      which = "right",
      whitespace = "\n"
    )
  )
)

lorenzwalthert · 2023-09-12T05:38:36Z

No sorry, I don’t think we plan to support this due to the overhead in time and complexity.

lorenzwalthert added Type: Bug Priority: High Complexity: Unassigned Status: Unassigned labels Jun 15, 2018

This was referenced Jun 20, 2018

Prepare release v1.0.2 #397

Closed

Throw informative error for Windows style EOL (mainly for atom users) #400

Merged

lorenzwalthert closed this as completed in #400 Jun 23, 2018

lorenzwalthert mentioned this issue Jun 27, 2018

Require latest styler release for more informative error message REditorSupport/languageserver#23

Merged

lorenzwalthert mentioned this issue Jul 7, 2018

Transformer categories and UTF-8 note #406

Merged

lorenzwalthert mentioned this issue Oct 13, 2024

Feature request: format R codes on save rstudio/rstudio#2563

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Support Windows Newline Characters #396

Support Windows Newline Characters #396

NGaffney commented Jun 15, 2018 •

edited

Loading

lorenzwalthert commented Jun 15, 2018

Uh oh!

NGaffney commented Jun 16, 2018

Uh oh!

lorenzwalthert commented Jun 17, 2018 •

edited

Loading

Uh oh!

NGaffney commented Jun 17, 2018

Uh oh!

lorenzwalthert commented Jun 17, 2018 •

edited

Loading

Uh oh!

lorenzwalthert commented Jun 20, 2018 •

edited

Loading

Uh oh!

lorenzwalthert commented Jun 21, 2018

Uh oh!

king-of-poppk commented Sep 11, 2023 •

edited

Loading

Uh oh!

lorenzwalthert commented Sep 12, 2023 •

edited

Loading

Uh oh!

Support Windows Newline Characters #396

Support Windows Newline Characters #396

Comments

NGaffney commented Jun 15, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

lorenzwalthert commented Jun 15, 2018

Uh oh!

NGaffney commented Jun 16, 2018

Uh oh!

lorenzwalthert commented Jun 17, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

NGaffney commented Jun 17, 2018

Uh oh!

lorenzwalthert commented Jun 17, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lorenzwalthert commented Jun 20, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lorenzwalthert commented Jun 21, 2018

Uh oh!

king-of-poppk commented Sep 11, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lorenzwalthert commented Sep 12, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

NGaffney commented Jun 15, 2018 •

edited

Loading

lorenzwalthert commented Jun 17, 2018 •

edited

Loading

lorenzwalthert commented Jun 17, 2018 •

edited

Loading

lorenzwalthert commented Jun 20, 2018 •

edited

Loading

king-of-poppk commented Sep 11, 2023 •

edited

Loading

lorenzwalthert commented Sep 12, 2023 •

edited

Loading