Skip to content

Commit 9656037

Browse files
committed
replace print to logging in other modules (#207)
1 parent 1aaeb25 commit 9656037

File tree

6 files changed

+261
-262
lines changed

6 files changed

+261
-262
lines changed

tensorlayer/files.py

Lines changed: 99 additions & 99 deletions
Large diffs are not rendered by default.

tensorlayer/nlp.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#! /usr/bin/python
21
# -*- coding: utf-8 -*-
32

43
import collections
@@ -15,6 +14,7 @@
1514
import tensorflow as tf
1615
from six.moves import urllib, xrange
1716
from tensorflow.python.platform import gfile
17+
from . import _logging as logging
1818

1919
# Iteration functions
2020

@@ -127,17 +127,17 @@ def sample(a=[], temperature=1.0):
127127
return np.argmax(np.random.multinomial(1, a, 1))
128128
except:
129129
# np.set_printoptions(threshold=np.nan)
130-
# print(a)
131-
# print(np.sum(a))
132-
# print(np.max(a))
133-
# print(np.min(a))
130+
# logging.info(a)
131+
# logging.info(np.sum(a))
132+
# logging.info(np.max(a))
133+
# logging.info(np.min(a))
134134
# exit()
135135
message = "For large vocabulary_size, choice a higher temperature\
136136
to avoid log error. Hint : use ``sample_top``. "
137137

138138
warnings.warn(message, Warning)
139-
# print(a)
140-
# print(b)
139+
# logging.info(a)
140+
# logging.info(b)
141141
return np.argmax(np.random.multinomial(1, b, 1))
142142

143143

@@ -153,7 +153,7 @@ def sample_top(a=[], top_k=10):
153153
"""
154154
idx = np.argpartition(a, -top_k)[-top_k:]
155155
probs = a[idx]
156-
# print("new", probs)
156+
# logging.info("new", probs)
157157
probs = probs / np.sum(probs)
158158
choice = np.random.choice(idx, p=probs)
159159
return choice
@@ -163,7 +163,7 @@ def sample_top(a=[], top_k=10):
163163
# idx = idx[:top_k]
164164
# # a = a[idx]
165165
# probs = a[idx]
166-
# print("prev", probs)
166+
# logging.info("prev", probs)
167167
# # probs = probs / np.sum(probs)
168168
# # choice = np.random.choice(idx, p=probs)
169169
# # return choice
@@ -253,8 +253,8 @@ def __init__(self, vocab_file, start_word="<S>", end_word="</S>", unk_word="<UNK
253253

254254
vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
255255

256-
print(" [TL] Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word))
257-
print(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab))
256+
logging.info("Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word))
257+
logging.info(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab))
258258
# tf.logging.info(" vocabulary with %d words" % len(vocab))
259259

260260
self.vocab = vocab # vocab[word] = id
@@ -265,10 +265,10 @@ def __init__(self, vocab_file, start_word="<S>", end_word="</S>", unk_word="<UNK
265265
self.end_id = vocab[end_word]
266266
self.unk_id = vocab[unk_word]
267267
self.pad_id = vocab[pad_word]
268-
print(" start_id: %d" % self.start_id)
269-
print(" end_id: %d" % self.end_id)
270-
print(" unk_id: %d" % self.unk_id)
271-
print(" pad_id: %d" % self.pad_id)
268+
logging.info(" start_id: %d" % self.start_id)
269+
logging.info(" end_id: %d" % self.end_id)
270+
logging.info(" unk_id: %d" % self.unk_id)
271+
logging.info(" pad_id: %d" % self.pad_id)
272272

273273
def word_to_id(self, word):
274274
"""Returns the integer word id of a word string."""
@@ -359,7 +359,7 @@ def create_vocab(sentences, word_counts_output_file, min_word_count=1):
359359
...[['<S>', 'one', 'two', ',', 'three', '</S>'], ['<S>', 'four', 'five', 'five', '</S>']]
360360
361361
>>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1)
362-
... [TL] Creating vocabulary.
362+
... Creating vocabulary.
363363
... Total words: 8
364364
... Words in vocabulary: 8
365365
... Wrote vocabulary file: vocab.txt
@@ -373,24 +373,24 @@ def create_vocab(sentences, word_counts_output_file, min_word_count=1):
373373
... pad_id: 0
374374
"""
375375
from collections import Counter
376-
print(" [TL] Creating vocabulary.")
376+
logging.info("Creating vocabulary.")
377377
counter = Counter()
378378
for c in sentences:
379379
counter.update(c)
380-
# print('c',c)
381-
print(" Total words: %d" % len(counter))
380+
# logging.info('c',c)
381+
logging.info(" Total words: %d" % len(counter))
382382

383383
# Filter uncommon words and sort by descending count.
384384
word_counts = [x for x in counter.items() if x[1] >= min_word_count]
385385
word_counts.sort(key=lambda x: x[1], reverse=True)
386386
word_counts = [("<PAD>", 0)] + word_counts # 1st id should be reserved for padding
387-
# print(word_counts)
388-
print(" Words in vocabulary: %d" % len(word_counts))
387+
# logging.info(word_counts)
388+
logging.info(" Words in vocabulary: %d" % len(word_counts))
389389

390390
# Write out the word counts file.
391391
with tf.gfile.FastGFile(word_counts_output_file, "w") as f:
392392
f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts]))
393-
print(" Wrote vocabulary file: %s" % word_counts_output_file)
393+
logging.info(" Wrote vocabulary file: %s" % word_counts_output_file)
394394

395395
# Create the vocabulary dictionary.
396396
reverse_vocab = [x[0] for x in word_counts]
@@ -506,9 +506,9 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}):
506506
questions_skipped += 1
507507
else:
508508
questions.append(np.array(ids))
509-
print("Eval analogy file: ", eval_file)
510-
print("Questions: ", len(questions))
511-
print("Skipped: ", questions_skipped)
509+
logging.info("Eval analogy file: ", eval_file)
510+
logging.info("Questions: ", len(questions))
511+
logging.info("Skipped: ", questions_skipped)
512512
analogy_questions = np.array(questions, dtype=np.int32)
513513
return analogy_questions
514514

@@ -541,13 +541,13 @@ def build_vocab(data):
541541
"""
542542
# data = _read_words(filename)
543543
counter = collections.Counter(data)
544-
# print('counter', counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1
544+
# logging.info('counter', counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1
545545
count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
546-
# print('count_pairs',count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1)
546+
# logging.info('count_pairs',count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1)
547547
words, _ = list(zip(*count_pairs))
548548
word_to_id = dict(zip(words, range(len(words))))
549-
# print(words) # list of words
550-
# print(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746
549+
# logging.info(words) # list of words
550+
# logging.info(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746
551551
return word_to_id
552552

553553

@@ -627,8 +627,8 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key
627627
count[0][1] = unk_count
628628
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
629629
if printable:
630-
print('Real vocabulary size %d' % len(collections.Counter(words).keys()))
631-
print('Limited vocabulary size {}'.format(vocabulary_size))
630+
logging.info('Real vocabulary size %d' % len(collections.Counter(words).keys()))
631+
logging.info('Limited vocabulary size {}'.format(vocabulary_size))
632632
assert len(collections.Counter(words).keys()) >= vocabulary_size, \
633633
"the limited vocabulary_size must be less than or equal to the read vocabulary_size"
634634
return data, count, dictionary, reverse_dictionary
@@ -670,10 +670,10 @@ def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'):
670670
- `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
671671
"""
672672
# if isinstance(data[0], six.string_types):
673-
# print(type(data[0]))
673+
# logging.info(type(data[0]))
674674
# # exit()
675-
# print(data[0])
676-
# print(word_to_id)
675+
# logging.info(data[0])
676+
# logging.info(word_to_id)
677677
# return [word_to_id[str(word)] for word in data]
678678
# else:
679679

@@ -687,11 +687,11 @@ def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'):
687687
# return [word_to_id[word] for word in data] # this one
688688

689689
# if isinstance(data[0], str):
690-
# # print('is a string object')
690+
# # logging.info('is a string object')
691691
# return [word_to_id[word] for word in data]
692692
# else:#if isinstance(s, bytes):
693-
# # print('is a unicode object')
694-
# # print(data[0])
693+
# # logging.info('is a unicode object')
694+
# # logging.info(data[0])
695695
# return [word_to_id[str(word)] f
696696

697697

@@ -749,7 +749,7 @@ def save_vocab(count=[], name='vocab.txt'):
749749
with open(os.path.join(pwd, name), "w") as f:
750750
for i in xrange(vocabulary_size):
751751
f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1]))
752-
print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
752+
logging.info("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
753753

754754

755755
# Functions for translation
@@ -772,7 +772,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
772772
>>> with gfile.GFile(train_path + ".en", mode="rb") as f:
773773
>>> for line in f:
774774
>>> tokens = tl.nlp.basic_tokenizer(line)
775-
>>> print(tokens)
775+
>>> logging.info(tokens)
776776
>>> exit()
777777
... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How',
778778
... b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home',
@@ -821,14 +821,14 @@ def create_vocabulary(vocabulary_path,
821821
- Code from ``/tensorflow/models/rnn/translation/data_utils.py``
822822
"""
823823
if not gfile.Exists(vocabulary_path):
824-
print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path))
824+
logging.info("Creating vocabulary %s from data %s" % (vocabulary_path, data_path))
825825
vocab = {}
826826
with gfile.GFile(data_path, mode="rb") as f:
827827
counter = 0
828828
for line in f:
829829
counter += 1
830830
if counter % 100000 == 0:
831-
print(" processing line %d" % counter)
831+
logging.info(" processing line %d" % counter)
832832
tokens = tokenizer(line) if tokenizer else basic_tokenizer(line)
833833
for w in tokens:
834834
word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w
@@ -843,7 +843,7 @@ def create_vocabulary(vocabulary_path,
843843
for w in vocab_list:
844844
vocab_file.write(w + b"\n")
845845
else:
846-
print("Vocabulary %s from data %s exists" % (vocabulary_path, data_path))
846+
logging.info("Vocabulary %s from data %s exists" % (vocabulary_path, data_path))
847847

848848

849849
def initialize_vocabulary(vocabulary_path):
@@ -948,19 +948,19 @@ def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, n
948948
- Code from ``/tensorflow/models/rnn/translation/data_utils.py``
949949
"""
950950
if not gfile.Exists(target_path):
951-
print("Tokenizing data in %s" % data_path)
951+
logging.info("Tokenizing data in %s" % data_path)
952952
vocab, _ = initialize_vocabulary(vocabulary_path)
953953
with gfile.GFile(data_path, mode="rb") as data_file:
954954
with gfile.GFile(target_path, mode="w") as tokens_file:
955955
counter = 0
956956
for line in data_file:
957957
counter += 1
958958
if counter % 100000 == 0:
959-
print(" tokenizing line %d" % counter)
959+
logging.info(" tokenizing line %d" % counter)
960960
token_ids = sentence_to_token_ids(line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE)
961961
tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
962962
else:
963-
print("Target path %s exists" % target_path)
963+
logging.info("Target path %s exists" % target_path)
964964

965965

966966
def moses_multi_bleu(hypotheses, references, lowercase=False): # tl.nlp

0 commit comments

Comments
 (0)