Skip to content

Commit 2fef015

Browse files
committed
Add FIS-Broker datasets overview based on
offline version of dataset table (inst/extdata/fisbroker.htmlk) and code for table preparation in data-raw/fb_datasets.R Due to dynamic js session ids it is not very easy to get the data required for read_wfs(), i.e. the basename of "Rechneraddresse" as this is only reached after clicking an interactive link
1 parent 4a62e4a commit 2fef015

File tree

5 files changed

+205
-0
lines changed

5 files changed

+205
-0
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@
99
^index\.md$
1010
^README\.md$
1111
^vignettes/articles$
12+
^data-raw$

DESCRIPTION

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,6 @@ Remotes:
3636
Encoding: UTF-8
3737
Roxygen: list(markdown = TRUE)
3838
RoxygenNote: 7.2.1
39+
Depends:
40+
R (>= 2.10)
41+
LazyData: true

data-raw/fb_datasets.R

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
## code to prepare `DATASET` dataset goes here
2+
####"inst/extdata/fisbroker.html = "view-source:https://fbinter.stadt-berlin.de/fb/gisbroker.do;jsessionid=C0EDCE1624C9A8877CB10BFE1B3C74EE?cmd=navigationFrameResult"
3+
4+
get_jsessionid <- function(string) {
5+
jsessionid <- stringr::str_extract(string, "jsessionid=.*\\?.*")
6+
7+
jsessionid[!is.na(jsessionid )] %>%
8+
stringr::str_remove("\\?.*") %>%
9+
stringr::str_remove(".*jsessionid=")
10+
}
11+
12+
tabelle <- xml2::read_html("inst/extdata/fisbroker.html", encoding = "UTF-8") %>%
13+
rvest::html_element(css = "table.nav_tabelle")
14+
15+
tabelle %>%
16+
rvest::html_elements(css = "tr.kategorie")
17+
18+
x <- tabelle %>%
19+
rvest::html_elements(css = "tr")
20+
21+
22+
23+
is_category <- stringr::str_detect(x, "class=\"kategorie\"")
24+
25+
categories_df <- tibble::tibble(
26+
idx = which(is_category),
27+
category_id = seq_len(length(idx)),
28+
category_name = rvest::html_text(x[idx])
29+
)
30+
31+
is_dataset <- !is_category
32+
33+
datasets_df <- tibble::tibble(idx = which(is_dataset),
34+
dataset_id = seq_len(length(idx)),
35+
dataset_name_raw = rvest::html_text(x[idx]))
36+
37+
38+
datasets_text_list <- stats::setNames(lapply(datasets_df$idx, function(idx) {
39+
elements <- x[idx] %>%
40+
rvest::html_elements(css = "a.standard")
41+
42+
elements_text <- rvest::html_text(elements)
43+
elements_href <- rvest::html_attr(elements, "href") %>%
44+
stringr::str_replace(get_jsessionid(.),
45+
"<jsessionid>")
46+
47+
n_elements <- length(elements)
48+
49+
if(n_elements == 1) {
50+
tibble::tibble(dataset_name = elements_text[1],
51+
dataset_name_href = elements_href[1])
52+
} else if (n_elements > 1) {
53+
dataset_name <- rvest::html_text(elements[1])
54+
dataset_types <- rvest::html_text(elements[2:n_elements])
55+
56+
tibble::tibble(dataset_name = elements_text[1],
57+
dataset_name_href = elements_href[1],
58+
dataset_type = elements_text[2:n_elements],
59+
dataset_type_href = elements_href[2:n_elements])
60+
} else {
61+
stop("no element found")
62+
}
63+
64+
65+
}),datasets_df$idx)
66+
67+
fb_datasets <- dplyr::bind_rows(datasets_text_list, .id = "idx") %>%
68+
dplyr::mutate(idx = as.integer(.data$idx))
69+
70+
fb_datasets <- categories_df %>%
71+
dplyr::full_join(fb_datasets, by = "idx") %>%
72+
dplyr::arrange(.data$idx) %>%
73+
tidyr::fill(.data$category_id,
74+
.data$category_name) %>%
75+
dplyr::filter(!is.na(.data$dataset_name)) %>%
76+
dplyr::left_join(datasets_df[,c("idx", "dataset_id")], by = "idx") %>%
77+
dplyr::select(- .data$idx) %>%
78+
dplyr::relocate(.data$dataset_id, .before = .data$dataset_name)
79+
80+
usethis::use_data(fb_datasets, overwrite = TRUE)

data/fb_datasets.rda

22.9 KB
Binary file not shown.

inst/extdata/fisbroker.html

Lines changed: 121 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)