From ce10410a67ca50599d4c4d440c496368f9f24de2 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 16:14:31 +0000 Subject: [PATCH 1/9] Add sv_version to training data ingest --- pipeline/00-ingest.R | 1 + 1 file changed, 1 insertion(+) diff --git a/pipeline/00-ingest.R b/pipeline/00-ingest.R index 6af6af59..851941f4 100644 --- a/pipeline/00-ingest.R +++ b/pipeline/00-ingest.R @@ -45,6 +45,7 @@ training_data <- dbGetQuery( sale.buyer_name AS meta_sale_buyer_name, sale.sv_is_outlier, sale.sv_outlier_type, + sale.sv_version, res.* FROM model.vw_card_res_input res INNER JOIN default.vw_pin_sale sale From 02f4d660989cdc3cf41f158f51d915f2732f509f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 19:09:22 +0000 Subject: [PATCH 2/9] Switch to correct column --- pipeline/00-ingest.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/00-ingest.R b/pipeline/00-ingest.R index 851941f4..551dffe5 100644 --- a/pipeline/00-ingest.R +++ b/pipeline/00-ingest.R @@ -45,7 +45,7 @@ training_data <- dbGetQuery( sale.buyer_name AS meta_sale_buyer_name, sale.sv_is_outlier, sale.sv_outlier_type, - sale.sv_version, + sale.sv_run_id, res.* FROM model.vw_card_res_input res INNER JOIN default.vw_pin_sale sale From 0abe909662788bf6710a5d25fcca7564940dc140 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 21:55:51 +0000 Subject: [PATCH 3/9] Add run_id to assessment outputs --- pipeline/02-assess.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index 3c2c74d7..96480aa0 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -319,7 +319,7 @@ assessment_card_data_merged %>% select( meta_year, meta_pin, meta_class, meta_card_num, meta_card_pct_total_fmv, meta_complex_id, pred_card_initial_fmv, pred_card_final_fmv, - all_of(params$model$predictor$all), township_code + all_of(params$model$predictor$all), township_code, all_of(starts_with("sv_")) ) %>% mutate(meta_complex_id = as.numeric(meta_complex_id)) %>% ccao::vars_recode( @@ -362,15 +362,27 @@ sales_data_ratio_study <- sales_data %>% sales_data_two_most_recent <- sales_data %>% distinct( meta_pin, meta_year, - meta_sale_price, meta_sale_date, meta_sale_document_num + meta_sale_price, meta_sale_date, meta_sale_document_num, + sv_outlier_type, sv_run_id ) %>% + rename(meta_sale_outlier_type = sv_outlier_type, + meta_sale_sv_run_id = sv_run_id) %>% + mutate( + meta_sale_outlier_type = ifelse( + meta_sale_outlier_type == "Not outlier", NA, meta_sale_outlier_type + ) + ) %>% group_by(meta_pin) %>% slice_max(meta_sale_date, n = 2) %>% mutate(mr = paste0("sale_recent_", row_number())) %>% tidyr::pivot_wider( id_cols = meta_pin, names_from = mr, - values_from = c(meta_sale_date, meta_sale_price, meta_sale_document_num), + values_from = c(meta_sale_date, + meta_sale_price, + meta_sale_document_num, + meta_sale_outlier_type, + meta_sale_sv_run_id), names_glue = "{mr}_{gsub('meta_sale_', '', .value)}" ) %>% select(meta_pin, contains("1"), contains("2")) %>% From 4636f8a6c549f325293f0ea7b313a0dc9e595d3b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 21:58:16 +0000 Subject: [PATCH 4/9] Remove selection criteria --- pipeline/02-assess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index 96480aa0..3435a391 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -319,7 +319,7 @@ assessment_card_data_merged %>% select( meta_year, meta_pin, meta_class, meta_card_num, meta_card_pct_total_fmv, meta_complex_id, pred_card_initial_fmv, pred_card_final_fmv, - all_of(params$model$predictor$all), township_code, all_of(starts_with("sv_")) + all_of(params$model$predictor$all), township_code ) %>% mutate(meta_complex_id = as.numeric(meta_complex_id)) %>% ccao::vars_recode( From f9d7e71d52310cfe6f578234a08098ca7f81647f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 22:21:42 +0000 Subject: [PATCH 5/9] Fix linting errors --- pipeline/02-assess.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index 3435a391..a170aab8 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -365,8 +365,10 @@ sales_data_two_most_recent <- sales_data %>% meta_sale_price, meta_sale_date, meta_sale_document_num, sv_outlier_type, sv_run_id ) %>% - rename(meta_sale_outlier_type = sv_outlier_type, - meta_sale_sv_run_id = sv_run_id) %>% + rename( + meta_sale_outlier_type = sv_outlier_type, + meta_sale_sv_run_id = sv_run_id + ) %>% mutate( meta_sale_outlier_type = ifelse( meta_sale_outlier_type == "Not outlier", NA, meta_sale_outlier_type @@ -378,11 +380,13 @@ sales_data_two_most_recent <- sales_data %>% tidyr::pivot_wider( id_cols = meta_pin, names_from = mr, - values_from = c(meta_sale_date, - meta_sale_price, - meta_sale_document_num, - meta_sale_outlier_type, - meta_sale_sv_run_id), + values_from = c( + meta_sale_date, + meta_sale_price, + meta_sale_document_num, + meta_sale_outlier_type, + meta_sale_sv_run_id + ), names_glue = "{mr}_{gsub('meta_sale_', '', .value)}" ) %>% select(meta_pin, contains("1"), contains("2")) %>% From 16908681fbb2aebd085f2de2ce035e719cadfc4b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 1 Feb 2024 22:23:53 +0000 Subject: [PATCH 6/9] Fix linting errors --- pipeline/02-assess.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index a170aab8..2c88e8e7 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -372,8 +372,8 @@ sales_data_two_most_recent <- sales_data %>% mutate( meta_sale_outlier_type = ifelse( meta_sale_outlier_type == "Not outlier", NA, meta_sale_outlier_type - ) - ) %>% + ) + ) %>% group_by(meta_pin) %>% slice_max(meta_sale_date, n = 2) %>% mutate(mr = paste0("sale_recent_", row_number())) %>% From e45ca4afed4480d66ab458d91bc3d1a639057df9 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 2 Feb 2024 16:04:08 +0000 Subject: [PATCH 7/9] Remove assess stage changes --- pipeline/02-assess.R | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index 2c88e8e7..dab8c344 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -179,7 +179,7 @@ assessment_pin_data_w_land <- assessment_card_data_round %>% # Use the fixed late value first (unless it exceeds the land % cap) !is.na(land_rate_per_pin) & (land_rate_per_pin > pred_pin_final_fmv_round_no_prorate * - params$pv$land_pct_of_total_cap) ~ + params$pv$land_pct_of_total_cap) ~ pred_pin_final_fmv_round_no_prorate * params$pv$land_pct_of_total_cap, !is.na(land_rate_per_pin) ~ land_rate_per_pin, # nolint end @@ -362,17 +362,7 @@ sales_data_ratio_study <- sales_data %>% sales_data_two_most_recent <- sales_data %>% distinct( meta_pin, meta_year, - meta_sale_price, meta_sale_date, meta_sale_document_num, - sv_outlier_type, sv_run_id - ) %>% - rename( - meta_sale_outlier_type = sv_outlier_type, - meta_sale_sv_run_id = sv_run_id - ) %>% - mutate( - meta_sale_outlier_type = ifelse( - meta_sale_outlier_type == "Not outlier", NA, meta_sale_outlier_type - ) + meta_sale_price, meta_sale_date, meta_sale_document_num ) %>% group_by(meta_pin) %>% slice_max(meta_sale_date, n = 2) %>% @@ -380,13 +370,7 @@ sales_data_two_most_recent <- sales_data %>% tidyr::pivot_wider( id_cols = meta_pin, names_from = mr, - values_from = c( - meta_sale_date, - meta_sale_price, - meta_sale_document_num, - meta_sale_outlier_type, - meta_sale_sv_run_id - ), + values_from = c(meta_sale_date, meta_sale_price, meta_sale_document_num), names_glue = "{mr}_{gsub('meta_sale_', '', .value)}" ) %>% select(meta_pin, contains("1"), contains("2")) %>% @@ -515,7 +499,7 @@ assessment_pin_data_final <- assessment_pin_data_sale %>% mutate( flag_prior_near_to_pred_unchanged = prior_near_tot >= pred_pin_final_fmv_round - 100 & - prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint + prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint flag_pred_initial_to_final_changed = ccao::val_round_fmv( pred_pin_initial_fmv, breaks = params$pv$round_break, From 142c4518b3318d2367214f4871e7e3595bbe7bfc Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 2 Feb 2024 16:06:02 +0000 Subject: [PATCH 8/9] Fix spacing --- pipeline/02-assess.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index dab8c344..940c5d1f 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -179,7 +179,7 @@ assessment_pin_data_w_land <- assessment_card_data_round %>% # Use the fixed late value first (unless it exceeds the land % cap) !is.na(land_rate_per_pin) & (land_rate_per_pin > pred_pin_final_fmv_round_no_prorate * - params$pv$land_pct_of_total_cap) ~ + params$pv$land_pct_of_total_cap) ~ pred_pin_final_fmv_round_no_prorate * params$pv$land_pct_of_total_cap, !is.na(land_rate_per_pin) ~ land_rate_per_pin, # nolint end @@ -499,7 +499,7 @@ assessment_pin_data_final <- assessment_pin_data_sale %>% mutate( flag_prior_near_to_pred_unchanged = prior_near_tot >= pred_pin_final_fmv_round - 100 & - prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint + prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint flag_pred_initial_to_final_changed = ccao::val_round_fmv( pred_pin_initial_fmv, breaks = params$pv$round_break, From c8e26c63ce71b6d25a5188e0e4cad59036c1656c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 2 Feb 2024 16:06:33 +0000 Subject: [PATCH 9/9] Fix spacing --- pipeline/02-assess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/02-assess.R b/pipeline/02-assess.R index 940c5d1f..3c2c74d7 100644 --- a/pipeline/02-assess.R +++ b/pipeline/02-assess.R @@ -499,7 +499,7 @@ assessment_pin_data_final <- assessment_pin_data_sale %>% mutate( flag_prior_near_to_pred_unchanged = prior_near_tot >= pred_pin_final_fmv_round - 100 & - prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint + prior_near_tot <= pred_pin_final_fmv_round + 100, # nolint flag_pred_initial_to_final_changed = ccao::val_round_fmv( pred_pin_initial_fmv, breaks = params$pv$round_break,