Skip to content

Commit 539b213

Browse files
Remove OpenNMT and Tensorflow, Update to Python 3.10 (#583)
* Removed OpenNMT and Tensorflow Dependencies -Removed the memory_growth and eager_execution command line arguments -Removed WER score metric * Update Python to 3.10 and transformers to 4.46
1 parent 7ef209b commit 539b213

30 files changed

+3617
-6104
lines changed

.devcontainer/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
ARG PYTHON_VERSION=3.8
1+
ARG PYTHON_VERSION=3.10
22
ARG POETRY_VERSION=1.7.1
3-
FROM ubuntu:20.04
3+
FROM ubuntu:22.04
44
ARG PYTHON_VERSION
55
ARG POETRY_VERSION
66
WORKDIR /app
@@ -44,5 +44,5 @@ ENV SIL_NLP_CACHE_PROJECT_DIR=/root/.cache/silnlp/projects
4444
# Set environment variables
4545
ENV CLEARML_API_HOST="https://api.sil.hosted.allegro.ai"
4646
ENV SIL_NLP_DATA_PATH=/silnlp
47-
ENV EFLOMAL_PATH=/workspaces/silnlp/.venv/lib/python3.8/site-packages/eflomal/bin
47+
ENV EFLOMAL_PATH=/workspaces/silnlp/.venv/lib/python3.12/site-packages/eflomal/bin
4848
CMD ["bash"]

Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
ARG PYTHON_VERSION=3.8
1+
ARG PYTHON_VERSION=3.10
22
ARG POETRY_VERSION=1.7.1
33

4-
FROM python:$PYTHON_VERSION-slim as builder
4+
FROM python:$PYTHON_VERSION-slim AS builder
55
ARG POETRY_VERSION
66

77
ENV POETRY_HOME=/opt/poetry
@@ -22,9 +22,9 @@ RUN poetry export -E eflomal --without-hashes -f requirements.txt > requirements
2222
COPY . /src
2323
RUN poetry build
2424

25-
FROM ubuntu:20.04
25+
FROM ubuntu:22.04
2626

27-
ARG PYTHON_VERSION=3.8
27+
ARG PYTHON_VERSION=3.10
2828

2929
ENV PIP_DISABLE_PIP_VERSION_CHECK=on
3030
ENV TZ=America/New_York

poetry.lock

Lines changed: 3545 additions & 3097 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.black]
22
line-length = 120
3-
target-version = ['py38']
3+
target-version = ['py310']
44
include = '\.pyi?$'
55
exclude = '''
66
/(
@@ -48,7 +48,7 @@ silnlp-alignment-aggregate-results = "silnlp.alignment.aggregate_results:main"
4848
clowder = "clowder.clowder:main"
4949

5050
[tool.poetry.dependencies]
51-
python = ">=3.8.1,<3.9"
51+
python = ">=3.10,<3.11"
5252
pandas = "^1.0.4"
5353
sentencepiece = "^0.1.97"
5454
nltk = "^3.5"
@@ -57,13 +57,11 @@ seaborn = "0.11.2"
5757
morfessor = "^2.0.6"
5858
Morfessor-FlatCat = "^1.0.8"
5959
psutil = "^5.7.3"
60-
scikit-learn = "1.1.2"
61-
numpy = "^1.23.1"
62-
OpenNMT-tf = "^2.31.0"
60+
scikit-learn = "^1.1.2"
61+
numpy = "^1.26.0"
6362
lit-nlp = "0.4.1"
64-
tensorflow = "2.7.3"
6563
google-cloud-translate = "^3.0.2"
66-
scipy = "1.8"
64+
scipy = "^1.11.2"
6765
clearml = ">=1.4.1"
6866
XlsxWriter = "^3.2.0"
6967
python-Levenshtein = "^0.20.9"
@@ -72,18 +70,17 @@ tqdm = "^4.62.2"
7270
s3path = "0.3.4"
7371
sacrebleu = "^2.3.1"
7472
ctranslate2 = "^3.5.1"
75-
tensorflow-addons = "0.17.1"
7673
libclang = "14.0.6"
77-
sil-machine = {extras = ["thot"], version = "^1.1.0"}
74+
sil-machine = {extras = ["thot"], version = "^1.3.0"}
7875
datasets = "^2.7.1"
79-
torch = {version = "2.1.2", source = "torch"}
76+
torch = {version = "^2.4", source = "torch"}
8077
sacremoses = "^0.0.53"
8178
evaluate = "^0.3.0"
8279
python-docx = "^0.8.11"
8380
iso639-lang = "^2.1.0"
8481
eflomal = { version = "^2.0.0", optional = true }
85-
accelerate = "^0.23.0"
86-
transformers = "^4.36.2"
82+
accelerate = "^0.26.0"
83+
transformers = "^4.46"
8784
optimum = "^1.16.0"
8885
google = "^3.0.0"
8986
google-api-python-client = "^2.101.0"

scripts/bible_alignment/scratchpad_align_analyze.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
from pathlib import Path
55
import matplotlib.pyplot as plt
6+
import os
67

78
LOGGER = logging.getLogger("silnlp")
89

scripts/clear_clearml_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
task_name="clear_cache",
66
)
77
task.set_base_docker(
8-
docker_image="nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04",
8+
docker_image="nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu22.04",
99
docker_arguments="-v /home/clearml/.clearml/hf-cache:/root/.cache/huggingface",
1010
docker_setup_bash_script=[
1111
"apt install -y python3-venv",

scripts/scratchpad_s3.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
from silnlp.common.tf_utils import enable_memory_growth
21
from silnlp.nmt.experiment import SILExperiment
32
from silnlp.nmt.translate import TranslationTask
43

5-
enable_memory_growth()
6-
74
exp = SILExperiment(
85
name="BT-Swahili/en-swh-1",
96
make_stats=True, # limited by stats_max_size to process only Bibles

silnlp/common/metrics.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,12 @@
55
from typing import List, Optional
66

77
import psutil
8-
from opennmt.utils.wer import sentence_wer
98

109
from ..common.corpus import write_corpus
1110

1211
METEOR_FULLY_SUPPORTED_LANGS = {"en", "cz", "de", "es", "fr", "ar"}
1312

1413

15-
def compute_wer_score(hyps: List[str], refs: List[List[str]]) -> float:
16-
if len(hyps) == 0:
17-
return 100.0
18-
19-
try:
20-
wer_score = 0.0
21-
for hyp, ref in zip(hyps, refs[0]):
22-
wer_score += sentence_wer(ref.lower(), hyp.lower())
23-
result = wer_score / len(hyps)
24-
except UnicodeDecodeError:
25-
print("Unable to compute WER score")
26-
result = -1
27-
except ZeroDivisionError:
28-
print("Cannot divide by zero. Check for empty lines.")
29-
result = -1
30-
31-
return result * 100
32-
33-
3414
def compute_meteor_score(lang: str, hyps: List[str], refs: List[List[str]]) -> Optional[float]:
3515
if lang.lower() not in METEOR_FULLY_SUPPORTED_LANGS:
3616
return None

silnlp/common/normalizer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
Normalization tooling for cleaning up whitespace and punctuation in extract sentences
33
See normalize_extracts.py for context
44
"""
5-
import logging
6-
import regex
75

6+
import logging
87
from dataclasses import dataclass
98
from enum import Enum, IntEnum
109
from typing import Dict, List, Optional, Set, Tuple
1110

11+
import regex
12+
1213

1314
class PunctuationCategory(Enum):
1415
LEFT_CLINGING = "LEFT_CLINGING"
@@ -46,7 +47,7 @@ def shift_slice(slice: StringSlice, offset: int, new_outer: str) -> StringSlice:
4647
)
4748

4849

49-
def find_slices(reg: regex.Regex, text: str) -> List[StringSlice]:
50+
def find_slices(reg: regex.Pattern, text: str) -> List[StringSlice]:
5051
return [
5152
StringSlice(start_index=match.span()[0], end_index=match.span()[1], slice=match.group(), outer=text)
5253
for match in regex.finditer(reg, text)

silnlp/common/tf_utils.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)