Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 05cb992

Browse files
authored
[WIP] changing default root for datasets (#1361)
1 parent f7c2985 commit 05cb992

File tree

4 files changed

+26
-26
lines changed

4 files changed

+26
-26
lines changed

.circleci/config.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ commands:
4141
steps:
4242
- run:
4343
name: Generate CCI cache key
44-
command:
44+
command: |
4545
echo "$(date "+%D")" > .cachekey
4646
cat .circleci/cached_datasets_list.txt >> .cachekey
4747
- persist_to_workspace:
@@ -380,24 +380,24 @@ jobs:
380380
name: Generate cache
381381
no_output_timeout: 30m
382382
command: |
383-
if [ ! -f .data/cache_status_file.json ] ; then
383+
if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then
384384
.circleci/unittest/linux/scripts/setup_env.sh
385385
.circleci/unittest/linux/scripts/install.sh
386386
.circleci/unittest/linux/scripts/generate_cache.sh
387387
fi
388-
cat .data/cache_status_file.json
388+
cat /root/.torchtext/cache/cache_status_file.json
389389
- save_cache:
390390

391391
key: v1-linux-dataset-{{ checksum ".cachekey" }}
392392

393393
paths:
394-
- .data
394+
- /root/.torchtext/cache
395395
- save_cache:
396396

397397
key: v1-linux-cache-index-{{ checksum ".cachekey" }}
398398

399399
paths:
400-
- .data/cache_status_file.json
400+
- /root/.torchtext/cache/cache_status_file.json
401401

402402
unittest_linux:
403403
<<: *binary_common
@@ -432,7 +432,7 @@ jobs:
432432

433433
paths:
434434
- .vector_cache
435-
- .data
435+
- /root/.torchtext/cache
436436
- run:
437437
name: Post process
438438
command: .circleci/unittest/linux/scripts/post_process.sh
@@ -457,24 +457,24 @@ jobs:
457457
name: Generate daily data Cache
458458
no_output_timeout: 30m
459459
command: |
460-
if [ ! -f .data/cache_status_file.json ] ; then
460+
if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then
461461
.circleci/unittest/windows/scripts/setup_env.sh
462462
.circleci/unittest/windows/scripts/install.sh
463463
.circleci/unittest/windows/scripts/generate_cache.sh
464464
fi
465-
cat .data/cache_status_file.json
465+
cat C:/Users/circleci/.torchtext/cache/cache_status_file.json
466466
- save_cache:
467467

468468
key: v1-windows-dataset-{{ checksum ".cachekey" }}
469469

470470
paths:
471-
- .data
471+
- C:/Users/circleci/.torchtext/cache
472472
- save_cache:
473473

474474
key: v1-windows-cache-index-{{ checksum ".cachekey" }}
475475

476476
paths:
477-
- .data/cache_status_file.json
477+
- C:/Users/circleci/.torchtext/cache/cache_status_file.json
478478

479479
unittest_windows:
480480
<<: *binary_common
@@ -509,7 +509,7 @@ jobs:
509509

510510
paths:
511511
- .vector_cache
512-
- .data
512+
- C:/Users/circleci/.torchtext/cache
513513
- run:
514514
name: Post process
515515
command: .circleci/unittest/windows/scripts/post_process.sh

.circleci/config.yml.in

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ commands:
4141
steps:
4242
- run:
4343
name: Generate CCI cache key
44-
command:
44+
command: |
4545
echo "$(date "+%D")" > .cachekey
4646
cat .circleci/cached_datasets_list.txt >> .cachekey
4747
- persist_to_workspace:
@@ -380,24 +380,24 @@ jobs:
380380
name: Generate cache
381381
no_output_timeout: 30m
382382
command: |
383-
if [ ! -f .data/cache_status_file.json ] ; then
383+
if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then
384384
.circleci/unittest/linux/scripts/setup_env.sh
385385
.circleci/unittest/linux/scripts/install.sh
386386
.circleci/unittest/linux/scripts/generate_cache.sh
387387
fi
388-
cat .data/cache_status_file.json
388+
cat /root/.torchtext/cache/cache_status_file.json
389389
- save_cache:
390390
{% raw %}
391391
key: v1-linux-dataset-{{ checksum ".cachekey" }}
392392
{% endraw %}
393393
paths:
394-
- .data
394+
- /root/.torchtext/cache
395395
- save_cache:
396396
{% raw %}
397397
key: v1-linux-cache-index-{{ checksum ".cachekey" }}
398398
{% endraw %}
399399
paths:
400-
- .data/cache_status_file.json
400+
- /root/.torchtext/cache/cache_status_file.json
401401

402402
unittest_linux:
403403
<<: *binary_common
@@ -432,7 +432,7 @@ jobs:
432432
{% endraw %}
433433
paths:
434434
- .vector_cache
435-
- .data
435+
- /root/.torchtext/cache
436436
- run:
437437
name: Post process
438438
command: .circleci/unittest/linux/scripts/post_process.sh
@@ -457,24 +457,24 @@ jobs:
457457
name: Generate daily data Cache
458458
no_output_timeout: 30m
459459
command: |
460-
if [ ! -f .data/cache_status_file.json ] ; then
460+
if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then
461461
.circleci/unittest/windows/scripts/setup_env.sh
462462
.circleci/unittest/windows/scripts/install.sh
463463
.circleci/unittest/windows/scripts/generate_cache.sh
464464
fi
465-
cat .data/cache_status_file.json
465+
cat C:/Users/circleci/.torchtext/cache/cache_status_file.json
466466
- save_cache:
467467
{% raw %}
468468
key: v1-windows-dataset-{{ checksum ".cachekey" }}
469469
{% endraw %}
470470
paths:
471-
- .data
471+
- C:/Users/circleci/.torchtext/cache
472472
- save_cache:
473473
{% raw %}
474474
key: v1-windows-cache-index-{{ checksum ".cachekey" }}
475475
{% endraw %}
476476
paths:
477-
- .data/cache_status_file.json
477+
- C:/Users/circleci/.torchtext/cache/cache_status_file.json
478478

479479
unittest_windows:
480480
<<: *binary_common
@@ -509,7 +509,7 @@ jobs:
509509
{% endraw %}
510510
paths:
511511
- .vector_cache
512-
- .data
512+
- C:/Users/circleci/.torchtext/cache
513513
- run:
514514
name: Post process
515515
command: .circleci/unittest/windows/scripts/post_process.sh

test/common/cache_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
import torchtext
44
from .parameterized_utils import load_params
55

6-
CACHE_STATUS_FILE = '.data/cache_status_file.json'
6+
CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'), 'cache_status_file.json')
77

88

99
def check_cache_status():
10-
assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists"
10+
assert os.path.exists(CACHE_STATUS_FILE), "Cache status file [{}] does not exists".format(CACHE_STATUS_FILE)
1111
with open(CACHE_STATUS_FILE, 'r') as f:
1212
missing_datasets = []
1313
cache_status = json.load(f)

torchtext/data/datasets_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def _wrap_split_argument_with_fn(fn, splits):
213213
raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn))
214214

215215
@functools.wraps(fn)
216-
def new_fn(root='.data', split=splits, **kwargs):
216+
def new_fn(root=os.path.expanduser('~/.torchtext/cache'), split=splits, **kwargs):
217217
result = []
218218
for item in _check_default_set(split, splits, fn.__name__):
219219
result.append(fn(root, item, **kwargs))
@@ -250,7 +250,7 @@ def decorator(func):
250250
raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn))
251251

252252
@functools.wraps(func)
253-
def wrapper(root='.data', *args, **kwargs):
253+
def wrapper(root=os.path.expanduser('~/.torchtext/cache'), *args, **kwargs):
254254
new_root = os.path.join(root, dataset_name)
255255
if not os.path.exists(new_root):
256256
os.makedirs(new_root)

0 commit comments

Comments
 (0)