From e2ce2ad62d09f9893897bad99aa83bf9cbfdd68f Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 17:39:49 -0400
Subject: [PATCH 1/4] CDL: updating NCSLGR (take 2)

---
 src/datasets/NCSLGR.json |  4 ++--
 src/index.md             |  2 +-
 src/references.bib       | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/datasets/NCSLGR.json b/src/datasets/NCSLGR.json
index aad9aad..a6cb675 100644
--- a/src/datasets/NCSLGR.json
+++ b/src/datasets/NCSLGR.json
@@ -15,7 +15,7 @@
   "#items": null,
   "#samples": "1,875 sentences",
   "#signers": 4,
-  "license": "TODO",
-  "licenseUrl": null,
+  "license": "Research Attribution",
+  "licenseUrl": "https://www.bu.edu/asllrp/data-credits.html",
   "contact": "carol@bu.edu"
 }
diff --git a/src/index.md b/src/index.md
index 51a543e..5ddb333 100644
--- a/src/index.md
+++ b/src/index.md
@@ -1046,7 +1046,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling
 contain parallel sequences of signs and spoken language.
 Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively].
 Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending].
-These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
+These datasets are usually synthesized [@dataset:Neidle_2020_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
 
 
 ###### Availability {-}
diff --git a/src/references.bib b/src/references.bib
index b5c3c6a..3f6c3a1 100644
--- a/src/references.bib
+++ b/src/references.bib
@@ -3457,3 +3457,19 @@ @inproceedings{dataset:dal2022lsa
  url = {https://doi.org/10.1007/978-3-031-22419-5_25},
  year = {2023}
 }
+
+@inproceedings{Vogler2012ANW,
+  title={A new web interface to facilitate access to corpora: development of the ASLLRP data access interface},
+  author={Christian Vogler and C. Neidle},
+  year={2012},
+  url={https://api.semanticscholar.org/CorpusID:58305327}
+}
+
+@misc{dataset:Neidle_2020_NCSLGR_ISLRN,
+  type = {Languageresource},
+  title = {National Center for Sign Language and Gesture Resources (NCSLGR) corpus. ISLRN 833-505-711-564-4},
+  author = {Carol Neidle and Stan Sclaroff},
+  year = {2012},
+  publisher = {Boston University},
+  url = {https://www.islrn.org/resources/833-505-711-564-4/}
+}
\ No newline at end of file

From 51415a5c13db92efbd3753cec7a4cdad848809b9 Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 17:43:29 -0400
Subject: [PATCH 2/4] CDL: change ref in JSON

---
 src/datasets/NCSLGR.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/datasets/NCSLGR.json b/src/datasets/NCSLGR.json
index a6cb675..f9ef6c0 100644
--- a/src/datasets/NCSLGR.json
+++ b/src/datasets/NCSLGR.json
@@ -2,7 +2,7 @@
   "pub": {
     "name": "NCSLGR",
     "year": 2007,
-    "publication": "dataset:databases2007volumes",
+    "publication": "dataset:Neidle_2020_NCSLGR_ISLRN",
     "url": "https://www.bu.edu/asllrp/ncslgr.html"
   },
   "loader": "ncslgr",

From 7395b4af3cabfd8b21da0979602d0b0a30db305f Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 21 Jun 2024 09:48:59 -0400
Subject: [PATCH 3/4] Adding a few notes on code/comment style

---
 README.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cbea270..6dbabd3 100644
--- a/README.md
+++ b/README.md
@@ -72,4 +72,18 @@ Respond in a Markdown code block, conserving \n characters.
 
 Respond in a Markdown code block.
 Conserve \n characters (new-line characters).
-i.e., break line before "They parse the English text..."
\ No newline at end of file
+i.e., break line before "They parse the English text..."
+
+
+### Code Style
+
+Some guidelines for good code and commenting style.
+
+#### Commenting
+
+- Comments should not be redundant. Meaning, that if someone with a basic knowledge of the programming languaged can tell at a glance what it does, there's no need to explain. For example, the JavaScript `const fs = require('fs');` does not need to be explained.
+- Don't use personal sign-offs or openings, the code should exist regardless of authors. For example `// maps to emoji`, not `//Colin: maps to emoji`.
+
+#### Further Reading
+
+Google has a [Javascript Style guide](https://google.github.io/styleguide/jsguide.html).

From 540b49c4c3cfc7021c70c58c813be4216d449822 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 21 Jun 2024 09:50:15 -0400
Subject: [PATCH 4/4] CDL: updating citation key for NCSLGR

---
 src/index.md       | 2 +-
 src/references.bib | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/index.md b/src/index.md
index 5ddb333..e6ddbb0 100644
--- a/src/index.md
+++ b/src/index.md
@@ -1046,7 +1046,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling
 contain parallel sequences of signs and spoken language.
 Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively].
 Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending].
-These datasets are usually synthesized [@dataset:Neidle_2020_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
+These datasets are usually synthesized [@dataset:Neidle_2012_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
 
 
 ###### Availability {-}
diff --git a/src/references.bib b/src/references.bib
index 3f6c3a1..865b6df 100644
--- a/src/references.bib
+++ b/src/references.bib
@@ -3465,7 +3465,7 @@ @inproceedings{Vogler2012ANW
   url={https://api.semanticscholar.org/CorpusID:58305327}
 }
 
-@misc{dataset:Neidle_2020_NCSLGR_ISLRN,
+@misc{dataset:Neidle_2012_NCSLGR_ISLRN,
   type = {Languageresource},
   title = {National Center for Sign Language and Gesture Resources (NCSLGR) corpus. ISLRN 833-505-711-564-4},
   author = {Carol Neidle and Stan Sclaroff},