-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsacct.R
More file actions
98 lines (93 loc) · 2.78 KB
/
sacct.R
File metadata and controls
98 lines (93 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
library(data.table)
library(namedCapture)
library(tidyr)
library(magrittr)
(sacct.dt <- data.table(
Elapsed=c("07:04:42", "07:04:42", "07:04:49", "00:00:00", "00:00:00"),
JobID=c("13937810_25", "13937810_25.batch", "13937810_25.extern",
"14022192_[1-3]", "14022204_[4]"), stringsAsFactors=FALSE))
int.pattern <- list("[0-9]+", as.integer)
range.pattern <- list(
"[[]",
task1=int.pattern,
list(
"-",
taskN=int.pattern
), "?",
"[]]")
task.pattern <- list(
"_", list(
task=int.pattern,
"|",#either one task(above) or range(below)
range.pattern))
namedCapture.patterns <- list(
JobID=list(
job=int.pattern,
task.pattern,
list(
"[.]",
type=".*"
), "?"),
Elapsed=list(
hours=int.pattern,
":",
minutes=int.pattern,
":",
seconds=int.pattern))
tidyr.range.pattern <- "\\[([0-9]+)(?:-([0-9]+))?\\]"
tidyr.task.pattern <- paste0(
"_(?:([0-9]+)|",
tidyr.range.pattern,
")")
tidyr.job.pattern <- paste0(
"([0-9]+)",
tidyr.task.pattern,
"(?:[.](.*))?")
rematch2.patterns <- sapply(namedCapture.patterns, function(L){
namedCapture::variable_args_list(L)$pattern
})
## interesting: for(){big <- cbind(big, small)} is more efficient than do.call(cbind, big.list)!
## But not true for rbind...!
timing.dt.list <- list()
for(subject.size in 10^seq(2, 5, by=0.5)){
subject.list <- list()
for(name in names(sacct.dt)){
subject.list[[name]] <- rep(sacct.dt[[name]], l=subject.size)
}
subject <- do.call(data.table, subject.list)
namedCapture.args <- c(list(subject), namedCapture.patterns)
timing <- microbenchmark::microbenchmark(
"tidyr::extract(ICU)"={
job.df <- tidyr::extract(
subject, "JobID",
c("job", "task", "task1", "taskN", "type"),
tidyr.job.pattern,
convert=TRUE)
tidyr::extract(
job.df, "Elapsed", c("hours", "minutes", "seconds"),
"([0-9]+):([0-9]+):([0-9]+)",
convert=TRUE)
},
"rematch2::bind_re_match(PCRE)"={
## out.df <- subject
## for(col.name in names(rematch2.patterns)){
## out.df <- rematch2::bind_re_match_(
## out.df, col.name, rematch2.patterns[[col.name]])
## }
subject %>%
rematch2::bind_re_match(JobID, rematch2.patterns[["JobID"]]) %>%
rematch2::bind_re_match(Elapsed, rematch2.patterns[["Elapsed"]])
},
"namedCapture::df_match_variable(PCRE)"={
options(namedCapture.engine="PCRE")
do.call(df_match_variable, namedCapture.args)
},
"namedCapture::df_match_variable(RE2)"={
options(namedCapture.engine="RE2")
do.call(df_match_variable, namedCapture.args)
},
times=5)
timing.dt.list[[paste(subject.size)]] <- data.table(subject.size, timing)
}
timing.dt <- do.call(rbind, timing.dt.list)
saveRDS(timing.dt, "sacct.rds")