Skip to content

Making Text classification template similar to Image Classification [skip ci] #92

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 19, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 55 additions & 52 deletions templates/text_classification/_sidebar.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,63 +18,66 @@ def get_configs() -> dict:
config["eval_epoch_length"] = None
default_none_options(config)

st.header("Transformer")
with st.beta_expander("Text Classification Template Configurations", expanded=True):
st.info("Names in the parenthesis are variable names used in the generated code.")

st.subheader("Model Options")
config["model"] = st.selectbox(
"Model name (from transformers) to setup model, tokenize and config to train (model)",
options=["bert-base-uncased"],
)
config["model_dir"] = st.text_input("Cache directory to download the pretrained model (model_dir)", value="./")
config["tokenizer_dir"] = st.text_input("Tokenizer cache directory (tokenizer_dir)", value="./tokenizer")
config["num_classes"] = st.number_input(
"Number of target classes. Default, 1 (binary classification) (num_classes)", min_value=0, value=1
)
config["max_length"] = st.number_input(
"Maximum number of tokens for the inputs to the transformer model (max_length)", min_value=1, value=256
)
config["dropout"] = st.number_input(
"Dropout probability (dropout)", min_value=0.0, max_value=1.0, value=0.3, format="%f"
)
config["n_fc"] = st.number_input(
"Number of neurons in the last fully connected layer (n_fc)", min_value=1, value=768
)
st.markdown("---")
st.subheader("Model Options")
config["model"] = st.selectbox(
"Model name (from transformers) to setup model, tokenize and config to train (model)",
options=["bert-base-uncased"],
)
config["model_dir"] = st.text_input("Cache directory to download the pretrained model (model_dir)", value="./")
config["tokenizer_dir"] = st.text_input("Tokenizer cache directory (tokenizer_dir)", value="./tokenizer")
config["num_classes"] = st.number_input(
"Number of target classes. Default, 1 (binary classification) (num_classes)", min_value=0, value=1
)
config["max_length"] = st.number_input(
"Maximum number of tokens for the inputs to the transformer model (max_length)", min_value=1, value=256
)
config["dropout"] = st.number_input(
"Dropout probability (dropout)", min_value=0.0, max_value=1.0, value=0.3, format="%f"
)
config["n_fc"] = st.number_input(
"Number of neurons in the last fully connected layer (n_fc)", min_value=1, value=768
)
st.markdown("---")

st.subheader("Dataset Options")
config["data_dir"] = st.text_input("Dataset cache directory (data_dir)", value="./")
st.markdown("---")
st.subheader("Dataset Options")
config["data_dir"] = st.text_input("Dataset cache directory (data_dir)", value="./")
st.markdown("---")

st.subheader("DataLoader Options")
config["batch_size"] = st.number_input("Total batch size (batch_size)", min_value=1, value=4)
config["num_workers"] = st.number_input("Number of workers in the data loader (num_workers)", min_value=1, value=2)
st.markdown("---")
st.subheader("DataLoader Options")
config["batch_size"] = st.number_input("Total batch size (batch_size)", min_value=1, value=4)
config["num_workers"] = st.number_input(
"Number of workers in the data loader (num_workers)", min_value=1, value=2
)
st.markdown("---")

st.subheader("Optimizer Options")
config["learning_rate"] = st.number_input(
"Peak of piecewise linear learning rate scheduler", min_value=0.0, value=5e-5, format="%e"
)
config["weight_decay"] = st.number_input("Weight decay", min_value=0.0, value=0.01, format="%f")
st.markdown("---")
st.subheader("Optimizer Options")
config["learning_rate"] = st.number_input(
"Peak of piecewise linear learning rate scheduler", min_value=0.0, value=5e-5, format="%e"
)
config["weight_decay"] = st.number_input("Weight decay", min_value=0.0, value=0.01, format="%f")
st.markdown("---")

st.subheader("Training Options")
config["max_epochs"] = st.number_input("Number of epochs to train the model", min_value=1, value=3)
config["num_warmup_epochs"] = st.number_input(
"Number of warm-up epochs before learning rate decay", min_value=0, value=0
)
config["validate_every"] = st.number_input(
"Run model's validation every validate_every epochs", min_value=0, value=1
)
config["checkpoint_every"] = st.number_input(
"Store training checkpoint every checkpoint_every iterations", min_value=0, value=1000
)
config["log_every_iters"] = st.number_input(
"Argument to log batch loss every log_every_iters iterations. 0 to disable it", min_value=0, value=15
)
st.markdown("---")
st.subheader("Training Options")
config["max_epochs"] = st.number_input("Number of epochs to train the model", min_value=1, value=3)
config["num_warmup_epochs"] = st.number_input(
"Number of warm-up epochs before learning rate decay", min_value=0, value=0
)
config["validate_every"] = st.number_input(
"Run model's validation every validate_every epochs", min_value=0, value=1
)
config["checkpoint_every"] = st.number_input(
"Store training checkpoint every checkpoint_every iterations", min_value=0, value=1000
)
config["log_every_iters"] = st.number_input(
"Argument to log batch loss every log_every_iters iterations. 0 to disable it", min_value=0, value=15
)
st.markdown("---")

distributed_options(config)
ignite_handlers_options(config)
ignite_loggers_options(config)
distributed_options(config)
ignite_handlers_options(config)
ignite_loggers_options(config)

return config