diff --git a/.circleci/config.yml b/.circleci/config.yml index c44a1bcdd2..5ce617105e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -240,17 +240,21 @@ jobs: resource_class: 2xlarge+ steps: - checkout + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly - restore_cache: keys: - - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }} + - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - run: name: Setup command: .circleci/unittest/scripts/setup_env.sh - save_cache: - key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }} + key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} paths: - conda @@ -258,9 +262,23 @@ jobs: - run: name: Install torchtext command: .circleci/unittest/scripts/install.sh + - restore_cache: + keys: + + - data-v1-{{ checksum ".circleci-weekly" }} + - run: name: Run tests + # Downloading embedding vector takes long time. + no_output_timeout: 30m command: .circleci/unittest/scripts/run_test.sh + - save_cache: + + key: data-v1-{{ checksum ".circleci-weekly" }} + + paths: + - .vector_cache + - .data - run: name: Post process command: .circleci/unittest/scripts/post_process.sh diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index c86dbbeca1..097a6d7b19 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -240,17 +240,21 @@ jobs: resource_class: 2xlarge+ steps: - checkout + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly - restore_cache: {% raw %} keys: - - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }} + - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} {% endraw %} - run: name: Setup command: .circleci/unittest/scripts/setup_env.sh - save_cache: {% raw %} - key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }} + key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} {% endraw %} paths: - conda @@ -258,9 +262,23 @@ jobs: - run: name: Install torchtext command: .circleci/unittest/scripts/install.sh + - restore_cache: + keys: + {% raw %} + - data-v1-{{ checksum ".circleci-weekly" }} + {% endraw %} - run: name: Run tests + # Downloading embedding vector takes long time. + no_output_timeout: 30m command: .circleci/unittest/scripts/run_test.sh + - save_cache: + {% raw %} + key: data-v1-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - .vector_cache + - .data - run: name: Post process command: .circleci/unittest/scripts/post_process.sh diff --git a/.circleci/unittest/scripts/run_test.sh b/.circleci/unittest/scripts/run_test.sh index 19649530b5..1eea12cfb1 100755 --- a/.circleci/unittest/scripts/run_test.sh +++ b/.circleci/unittest/scripts/run_test.sh @@ -6,5 +6,5 @@ eval "$(./conda/bin/conda shell.bash hook)" conda activate ./env python -m torch.utils.collect_env -pytest --cov=torchtext --junitxml=test-results/junit.xml -v test +pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 test flake8 torchtext test diff --git a/test/common/test_markers.py b/test/common/test_markers.py deleted file mode 100644 index 903f5c5450..0000000000 --- a/test/common/test_markers.py +++ /dev/null @@ -1,7 +0,0 @@ -import pytest -import os - -slow = pytest.mark.skipif( - os.getenv('RUN_SLOW', 'False') == 'False', - reason="This test is slow." -) diff --git a/test/conftest.py b/test/conftest.py deleted file mode 100644 index 3860ed6e34..0000000000 --- a/test/conftest.py +++ /dev/null @@ -1,3 +0,0 @@ -def pytest_addoption(parser): - parser.addoption("--runslow", action="store_true", - help="Run slow tests") diff --git a/test/data/test_builtin_datasets.py b/test/data/test_builtin_datasets.py index b2723949b2..b388dc61de 100644 --- a/test/data/test_builtin_datasets.py +++ b/test/data/test_builtin_datasets.py @@ -4,7 +4,6 @@ from torchtext.datasets import AG_NEWS import torch from torch.testing import assert_allclose -from ..common.test_markers import slow from ..common.torchtext_test_case import TorchtextTestCase @@ -16,10 +15,16 @@ def conditional_remove(f): class TestDataset(TorchtextTestCase): - @slow def test_wikitext2_legacy(self): from torchtext.datasets import WikiText2 # smoke test to ensure wikitext2 works properly + + # NOTE + # test_wikitext2 and test_wikitext2_legacy have some cache incompatibility. + # Keeping one's cache make the other fail. So we need to clean up the cache dir + cachedir = os.path.join(self.project_root, ".data", "wikitext-2") + conditional_remove(cachedir) + ds = WikiText2 TEXT = data.Field(lower=True, batch_first=True) train, valid, test = ds.splits(TEXT) @@ -30,13 +35,20 @@ def test_wikitext2_legacy(self): train_iter, valid_iter, test_iter = ds.iters(batch_size=4, bptt_len=30) - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", "wikitext-2") - conditional_remove(datafile) + conditional_remove(cachedir) def test_wikitext2(self): from torchtext.experimental.datasets import WikiText2 # smoke test to ensure wikitext2 works properly + + # NOTE + # test_wikitext2 and test_wikitext2_legacy have some cache incompatibility. + # Keeping one's cache make the other fail. So we need to clean up the cache dir + cachedir = os.path.join(self.project_root, ".data", "wikitext-2") + conditional_remove(cachedir) + cachefile = os.path.join(self.project_root, ".data", "wikitext-2-v1.zip") + conditional_remove(cachefile) + train_dataset, test_dataset, valid_dataset = WikiText2() self.assertEqual(len(train_dataset), 2049990) self.assertEqual(len(test_dataset), 241859) @@ -46,13 +58,9 @@ def test_wikitext2(self): tokens_ids = [vocab[token] for token in 'the player characters rest'.split()] self.assertEqual(tokens_ids, [2, 286, 503, 700]) - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", "wikitext-2") - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", "wikitext-2-v1.zip") - conditional_remove(datafile) + conditional_remove(cachedir) + conditional_remove(cachefile) - @slow def test_penntreebank_legacy(self): from torchtext.datasets import PennTreebank # smoke test to ensure penn treebank works properly @@ -66,10 +74,6 @@ def test_penntreebank_legacy(self): train_iter, valid_iter, test_iter = ds.iters(batch_size=4, bptt_len=30) - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", "penn-treebank") - conditional_remove(datafile) - def test_penntreebank(self): from torchtext.experimental.datasets import PennTreebank # smoke test to ensure wikitext2 works properly @@ -82,14 +86,6 @@ def test_penntreebank(self): tokens_ids = [vocab[token] for token in 'the player characters rest'.split()] self.assertEqual(tokens_ids, [2, 2550, 3344, 1125]) - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", 'ptb.train.txt') - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", 'ptb.test.txt') - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", 'ptb.valid.txt') - conditional_remove(datafile) - def test_text_classification(self): # smoke test to ensure ag_news dataset works properly @@ -104,13 +100,6 @@ def test_text_classification(self): assert_allclose(ag_news_test[-1][1][:10], torch.tensor([2351, 758, 96, 38581, 2351, 220, 5, 396, 3, 14786]).long()) - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", "ag_news_csv") - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", "ag_news_csv.tar.gz") - conditional_remove(datafile) - - @slow def test_imdb(self): from torchtext.experimental.datasets import IMDB from torchtext.vocab import Vocab @@ -131,11 +120,3 @@ def test_imdb(self): old_vocab = train_dataset.get_vocab() new_vocab = Vocab(counter=old_vocab.freqs, max_size=2500) new_train_data, new_test_data = IMDB(vocab=new_vocab) - - # Delete the dataset after we're done to save disk space on CI - datafile = os.path.join(self.project_root, ".data", "imdb") - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", "aclImdb") - conditional_remove(datafile) - datafile = os.path.join(self.project_root, ".data", "aclImdb_v1.tar.gz") - conditional_remove(datafile) diff --git a/test/data/test_field.py b/test/data/test_field.py index 14fe043776..385412b26f 100644 --- a/test/data/test_field.py +++ b/test/data/test_field.py @@ -9,7 +9,6 @@ from torch.nn import init from ..common.torchtext_test_case import TorchtextTestCase, verify_numericalized_example -from ..common.test_markers import slow class TestField(TorchtextTestCase): @@ -866,7 +865,6 @@ def test_serialization(self): assert torch.all(torch.eq(original_numericalization, pickled_numericalization)) - @slow def test_build_vocab(self): nesting_field = data.Field(tokenize=list, init_token="", eos_token="") diff --git a/test/test_vocab.py b/test/test_vocab.py index aad82de57c..a083025bd7 100644 --- a/test/test_vocab.py +++ b/test/test_vocab.py @@ -10,7 +10,6 @@ from torchtext import vocab from torchtext.vocab import Vectors, FastText, GloVe, CharNGram -from .common.test_markers import slow from .common.torchtext_test_case import TorchtextTestCase @@ -93,7 +92,6 @@ def test_vocab_set_vectors(self): [0.3, 0.4]]) assert_allclose(v.vectors.numpy(), expected_vectors) - @slow def test_vocab_download_fasttext_vectors(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) # Build a vocab and get vectors twice to test caching, then once more @@ -131,7 +129,6 @@ def test_vocab_download_fasttext_vectors(self): vec_file = os.path.join(self.project_root, ".vector_cache", "wiki.simple.vec") conditional_remove(vec_file) - @slow def test_vocab_extend(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) # Build a vocab and get vectors twice to test caching. @@ -163,7 +160,6 @@ def test_vocab_extend(self): vec_file = os.path.join(self.project_root, ".vector_cache", "wiki.simple.vec") conditional_remove(vec_file) - @slow def test_vocab_download_custom_vectors(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) # Build a vocab and get vectors twice to test caching. @@ -192,7 +188,6 @@ def test_vocab_download_custom_vectors(self): vec_file = os.path.join(self.project_root, ".vector_cache", "wiki.simple.vec") conditional_remove(vec_file) - @slow def test_vocab_vectors_custom_cache(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) vector_cache = os.path.join('/tmp', 'vector_cache') @@ -225,7 +220,6 @@ def test_vocab_vectors_custom_cache(self): vec_file = os.path.join(vector_cache, "wiki.simple.vec") conditional_remove(vec_file) - @slow def test_vocab_download_glove_vectors(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) @@ -268,7 +262,6 @@ def test_vocab_download_glove_vectors(self): conditional_remove(os.path.join(self.project_root, ".vector_cache", "glove.twitter.27B.{}d.txt".format(dim))) - @slow def test_vocab_download_charngram_vectors(self): c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2}) # Build a vocab and get vectors twice to test caching, then once more @@ -343,7 +336,6 @@ def test_serialization_backcompat(self): v_loaded = pickle.load(open(pickle_path, "rb")) assert v == v_loaded - @slow def test_vectors_get_vecs(self): vec = GloVe(name='twitter.27B', dim='25') self.assertEqual(vec.vectors.shape[0], len(vec))