Skip to content

feat(template): complete cifar10 classification #118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 17, 2021
Merged
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ dist-ssr
*.local
__pycache__
*.log
.vscode
.vscode
*.tar.gz
21 changes: 21 additions & 0 deletions scripts/check_copies.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,26 @@ def check_utils():
print(red, "Unmatched", file, reset)


def check_readme():
red = "\033[31m"
green = "\033[32m"
reset = "\033[0m"

with open("./src/templates/template-common/README.md", "r") as f:
common_utils = f.read()

path = Path("./src/templates/")

for file in path.rglob("**/README.md"):
utils = file.read_text("utf-8")
if utils.find(common_utils) > -1:
print(green, "Matched", file, reset)
else:
print(red, "Unmatched", file, reset)


if __name__ == "__main__":
check_utils()
print()
check_readme()
print()
2 changes: 2 additions & 0 deletions src/components/CodeBlock.vue
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<script>
import { highlight, languages } from 'prismjs'
import 'prismjs/components/prism-json'
import 'prismjs/components/prism-yaml'
import 'prismjs/components/prism-python'
import 'prismjs/components/prism-markdown'
import 'prismjs/themes/prism-tomorrow.css'
Expand Down Expand Up @@ -162,6 +163,7 @@ div[class~='language-bash']::before {
content: 'sh';
}

div[class~='language-yml']::before,
div[class~='language-yaml']::before {
content: 'yaml';
}
Expand Down
5 changes: 4 additions & 1 deletion src/components/NavBar.vue
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,19 @@ import { ref } from 'vue'
export default {
components: { IconDiscord, IconDownload, IconGitHub, IconTwitter },
setup() {
let zip = new JSZip()
const showDownloadMsg = ref(false)
const currentCommit = __COMMIT__ /* from vite.config.js */

const downloadProject = () => {
const zip = new JSZip()
if (store.code && Object.keys(store.code).length) {
msg.color = '#ff0000'
if (!store.config.output_dir) {
msg.showMsg = true
msg.content = `Output directory is required. Please input in Loggers tab.`
} else if (!store.config.log_every_iters) {
msg.showMsg = true
msg.content = `Logging interval is required. Please input in Loggers tab.`
} else {
for (const filename in store.code) {
zip.file(filename, store.code[filename])
Expand Down
18 changes: 8 additions & 10 deletions src/components/PaneRight.vue
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<template>
<div v-if="tabs">
<div v-if="tabs()">
<div class="right-pane-tabs">
<div
v-for="tab in tabs"
v-for="tab in tabs()"
:key="tab"
class="right-pane-tab"
:class="{ active: currentTab === tab }"
Expand Down Expand Up @@ -38,22 +38,20 @@ export default {
components: { CodeBlock, Instruction },
setup() {
const currentTab = ref('README.md')
const tabs = computed(() => {
const tabs = () => {
if (store.config.template) {
const tabsArr = Object.keys(templates[store.config.template])
if (import.meta.env.DEV) {
tabsArr.push(__DEV_CONFIG_FILE__)
}
return tabsArr
return Object.keys(store.code)
}
})
}
// search more file types mapping on
// https://icones.js.org/collection/vscode-icons
const fileTypes = {
py: 'python',
md: 'markdown',
json: 'json',
txt: 'text'
txt: 'text',
yml: 'yaml',
yaml: 'yaml'
}

const getFileType = (tab) => {
Expand Down
5 changes: 5 additions & 0 deletions src/components/TabHandlers.vue
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
:saveKey="filename_prefix.name"
:type="filename_prefix.type"
/>
<FormInput
:label="save_every_iters.description"
:saveKey="save_every_iters.name"
:type="save_every_iters.type"
/>
<FormInput
:label="n_saved.description"
:saveKey="n_saved.name"
Expand Down
6 changes: 6 additions & 0 deletions src/components/TabLoggers.vue
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
:saveKey="output_dir.name"
required
/>
<FormInput
type="number"
:label="log_every_iters.description"
:saveKey="log_every_iters.name"
required
/>
<FormSelect
:label="logger.description"
:options="logger.options"
Expand Down
6 changes: 5 additions & 1 deletion src/components/TabTemplates.vue
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ export default {

const downloadTemplates = () => fetchTemplates(store.config.template)

return { templateLabel, templateOptions, downloadTemplates }
return {
templateLabel,
templateOptions,
downloadTemplates
}
}
}
</script>
25 changes: 18 additions & 7 deletions src/metadata/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"launch": {
"name": "launch",
"type": "radio",
"description": "Run the training with torch.distributed.launch"
"description": "Run the training with torch.distributed.launch (recommended)"
},
"spawn": {
"name": "spawn",
Expand All @@ -18,13 +18,13 @@
"nproc_per_node": {
"name": "nproc_per_node",
"type": "number",
"description": "Number of processes to launch on each node",
"description": "Number of processes to launch on each node (mandatory for single node, multi gpus distributed training)",
"min": 1
},
"nnodes": {
"name": "nnodes",
"type": "number",
"description": "Number of nodes to use for distributed training",
"description": "Number of nodes to use for distributed training (mandatory for multi nodes, multi gpus distributed training)",
"min": 1
},
"master_addr": {
Expand All @@ -43,7 +43,7 @@
"save_training": {
"name": "save_training",
"type": "checkbox",
"description": "Save the training state by every save_every_iters."
"description": "Save the training state (models, optimizers, trainers, ...) by every save_every_iters."
},
"save_evaluation": {
"name": "save_evaluation",
Expand All @@ -69,18 +69,24 @@
"name": "filename_prefix",
"type": "text",
"value": "checkpointing",
"description": "What prefix would you like to put in front of saved checkpoint file?"
"description": "What prefix would you like to put in front of saved checkpoint file? (mandatory for saving training states)"
},
"save_every_iters": {
"name": "save_every_iters",
"type": "number",
"value": "checkpointing",
"description": "Iteration interval for saving training states (mandatory for saving training states)"
},
"n_saved": {
"name": "n_saved",
"type": "number",
"value": "checkpointing",
"description": "How many checkpoint file would you like to keep on disk?"
"description": "How many checkpoint file would you like to keep on disk? (mandatory for saving both training and evaluation)"
},
"limit_sec": {
"name": "limit_sec",
"type": "number",
"description": "How long do you want to run for the training and then terminate?"
"description": "How long do you want to run for the training and then terminate? (in seconds)"
}
},
"loggers": {
Expand All @@ -89,6 +95,11 @@
"type": "text",
"description": "Directory to save all outputs"
},
"log_every_iters": {
"name": "log_every_iters",
"type": "number",
"description": "Logging interval for training statistics"
},
"logger": {
"name": "logger",
"type": "array",
Expand Down
12 changes: 7 additions & 5 deletions src/store.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,17 @@ export function saveConfig(key, value) {
}

// render the code if there are fetched files for current selected template
export async function genCode() {
export function genCode() {
const currentFiles = files[store.config.template]
if (currentFiles && Object.keys(currentFiles).length) {
for (const file in currentFiles) {
store.code[file] = ejs.render(currentFiles[file], store.config)
store.code[file] = ejs
.render(currentFiles[file], store.config)
.replaceAll(/(\n\n\n\n)+/gi, '\n')
}
if (isDev) {
store.code[__DEV_CONFIG_FILE__] = JSON.stringify(store.config, null, 2)
}
}
if (isDev) {
store.code[__DEV_CONFIG_FILE__] = JSON.stringify(store.config, null, 2)
}
}

Expand Down
125 changes: 125 additions & 0 deletions src/templates/template-common/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#::: if (it.dist === 'launch') { :::#
#::: if (it.nproc_per_node) { :::#
#::: if (it.nnodes && it.master_addr && it.master_port) { :::#

### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)

- Execute on master node

```sh
python -m torch.distributed.launch \
--nproc_per_node #:::= nproc_per_node :::# \
--nnodes #:::= it.nnodes :::# \
--node_rank 0 \
--master_addr #:::= it.master_addr :::# \
--master_port #:::= it.master_port :::# \
--use_env main.py backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

- Execute on worker nodes

```sh
python -m torch.distributed.launch \
--nproc_per_node #:::= nproc_per_node :::# \
--nnodes #:::= it.nnodes :::# \
--node_rank <node_rank> \
--master_addr #:::= it.master_addr :::# \
--master_port #:::= it.master_port :::# \
--use_env main.py backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

#::: } else { :::#

### Multi GPU Training (`torch.distributed.launch`) (recommended)

```sh
python -m torch.distributed.launch \
--nproc_per_node #:::= it.nproc_per_node :::# \
--use_env main.py backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

#::: } :::#
#::: } :::#
#::: } :::#

#::: if (it.dist === 'spawn') { :::#
#::: if (it.nproc_per_node) { :::#
#::: if (it.nnodes && it.master_addr && it.master_port) { :::#

### Multi Node, Multi GPU Training (`torch.multiprocessing.spawn`)

- Execute on master node

```sh
python main.py \
nproc_per_node=#:::= nproc_per_node :::# \
nnodes=#:::= it.nnodes :::# \
node_rank=0 \
master_addr=#:::= it.master_addr :::# \
master_port=#:::= it.master_port :::# \
backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

- Execute on worker nodes

```sh
python main.py \
nproc_per_node=#:::= nproc_per_node :::# \
nnodes=#:::= it.nnodes :::# \
node_rank=<node_rank> \
master_addr=#:::= it.master_addr :::# \
master_port=#:::= it.master_port :::# \
backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

#::: } else { :::#

### Multi GPU Training (`torch.multiprocessing.spawn`)

```sh
python main.py \
nproc_per_node=#:::= it.nproc_per_node :::# \
backend=nccl \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

#::: } :::#
#::: } :::#
#::: } :::#

#::: if (!it.nproc_per_node) { :::#

### 1 GPU Training

```sh
python main.py \
hydra.run.dir=. \
hydra.output_subdir=null \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled
```

#::: } :::#
Loading