diff --git a/README.md b/README.md index dceb902..0a085f6 100644 --- a/README.md +++ b/README.md @@ -72,4 +72,18 @@ Respond in a Markdown code block, conserving \n characters. Respond in a Markdown code block. Conserve \n characters (new-line characters). -i.e., break line before "They parse the English text..." \ No newline at end of file +i.e., break line before "They parse the English text..." + + +### Code Style + +Some guidelines for good code and commenting style. + +#### Commenting + +- Comments should not be redundant. Meaning, that if someone with a basic knowledge of the programming languaged can tell at a glance what it does, there's no need to explain. For example, the JavaScript `const fs = require('fs');` does not need to be explained. +- Don't use personal sign-offs or openings, the code should exist regardless of authors. For example `// maps to emoji`, not `//Colin: maps to emoji`. + +#### Further Reading + +Google has a [Javascript Style guide](https://google.github.io/styleguide/jsguide.html). diff --git a/src/datasets/NCSLGR.json b/src/datasets/NCSLGR.json index aad9aad..f9ef6c0 100644 --- a/src/datasets/NCSLGR.json +++ b/src/datasets/NCSLGR.json @@ -2,7 +2,7 @@ "pub": { "name": "NCSLGR", "year": 2007, - "publication": "dataset:databases2007volumes", + "publication": "dataset:Neidle_2020_NCSLGR_ISLRN", "url": "https://www.bu.edu/asllrp/ncslgr.html" }, "loader": "ncslgr", @@ -15,7 +15,7 @@ "#items": null, "#samples": "1,875 sentences", "#signers": 4, - "license": "TODO", - "licenseUrl": null, + "license": "Research Attribution", + "licenseUrl": "https://www.bu.edu/asllrp/data-credits.html", "contact": "carol@bu.edu" } diff --git a/src/index.md b/src/index.md index 23500e5..63177ab 100644 --- a/src/index.md +++ b/src/index.md @@ -1094,7 +1094,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling contain parallel sequences of signs and spoken language. Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively]. Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending]. -These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. +These datasets are usually synthesized [@dataset:Neidle_2012_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. ###### Availability {-} diff --git a/src/references.bib b/src/references.bib index f01e75d..3241f1e 100644 --- a/src/references.bib +++ b/src/references.bib @@ -3549,3 +3549,19 @@ @inproceedings{dataset:ozdemir2020bosphorussign22k url = {https://aclanthology.org/2020.signlang-1.30}, year = {2020} } + +@inproceedings{Vogler2012ANW, + title={A new web interface to facilitate access to corpora: development of the ASLLRP data access interface}, + author={Christian Vogler and C. Neidle}, + year={2012}, + url={https://api.semanticscholar.org/CorpusID:58305327} +} + +@misc{dataset:Neidle_2012_NCSLGR_ISLRN, + type = {Languageresource}, + title = {National Center for Sign Language and Gesture Resources (NCSLGR) corpus. ISLRN 833-505-711-564-4}, + author = {Carol Neidle and Stan Sclaroff}, + year = {2012}, + publisher = {Boston University}, + url = {https://www.islrn.org/resources/833-505-711-564-4/} +}