Skip to content

Commit b60509f

Browse files
lingvo-botcopybara-github
authored andcommitted
Migrate from std::string to tensorflow::tstring.
Note that during the transition period tstring is typedef'ed to std::string. See: tensorflow/community#91 PiperOrigin-RevId: 282495664
1 parent 952888d commit b60509f

10 files changed

+40
-37
lines changed

lingvo/core/ops/beam_search_step_op_kernels.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ class BeamSearchStepOp : public OpKernel {
301301
input_data.size());
302302
}
303303
} else if (input.dtype() == DT_STRING) {
304-
output->flat<string>() = input.flat<string>();
304+
output->flat<tstring>() = input.flat<tstring>();
305305
}
306306
}
307307
return output;
@@ -573,7 +573,7 @@ class BeamSearchStepOp : public OpKernel {
573573
auto t_out_scores = out_scores->matrix<float>();
574574
auto t_out_hyps = out_hyps->matrix<int>();
575575
auto t_out_prev_hyps = out_prev_hyps->matrix<int>();
576-
auto t_out_done_hyps = out_done_hyps->matrix<string>();
576+
auto t_out_done_hyps = out_done_hyps->matrix<tstring>();
577577
auto t_out_atten_probs = out_atten_probs->tensor<float, 3>();
578578
auto t_all_done = all_done->scalar<bool>();
579579

@@ -700,7 +700,7 @@ class TopKTerminatedHypsOp : public OpKernel {
700700
k, /* unused epsilon id */ -1));
701701
// Each mutex is used to protect corresponding topk_vec.
702702
std::vector<mutex> mu_vec(num_beams);
703-
auto t_done_hyps = in_done_hyps.matrix<string>();
703+
auto t_done_hyps = in_done_hyps.matrix<tstring>();
704704
// The thread sharding is along hyps_size.
705705
Shard(kNumWorkers, workers, hyps_size, 1000 * num_steps,
706706
[&](int64 start, int64 limit) {
@@ -731,7 +731,7 @@ class TopKTerminatedHypsOp : public OpKernel {
731731
}
732732
});
733733

734-
auto t_topk_hyps = topk_hyps->matrix<string>();
734+
auto t_topk_hyps = topk_hyps->matrix<tstring>();
735735
for (int i = 0; i < num_beams; ++i) {
736736
auto ith_topk = topk_vec[i].Get();
737737
CHECK_LE(ith_topk.size(), k);
@@ -836,7 +836,7 @@ class UnpackHypOp : public OpKernel {
836836

837837
void Compute(OpKernelContext* ctx) override {
838838
const Tensor& in_hyps = ctx->input(0);
839-
const auto& t_in_hyps = in_hyps.flat<string>();
839+
const auto& t_in_hyps = in_hyps.flat<tstring>();
840840
const int batch_size = t_in_hyps.size();
841841
std::vector<Hypothesis> hyps(batch_size);
842842
for (int i = 0; i < batch_size; ++i) {
@@ -991,7 +991,7 @@ class HypsFromBeamSearchOuts : public OpKernel {
991991

992992
Tensor* out_hyps;
993993
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, hyps.shape(), &out_hyps));
994-
auto out_hyps_t = out_hyps->matrix<string>();
994+
auto out_hyps_t = out_hyps->matrix<tstring>();
995995

996996
// Use the same thread pool as topk operator.
997997
static thread::ThreadPool* workers =

lingvo/core/ops/best_step_op_kernels.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@ class BestStepOp : public OpKernel {
5252
::std::unique_ptr<RecordReader> reader(new RecordReader(file.get()));
5353

5454
uint64 offset = 0;
55-
string raw_proto;
55+
tstring raw_proto;
5656
while (reader->ReadRecord(&offset, &raw_proto).ok()) {
5757
Event event;
58-
CHECK(::tensorflow::ParseProtoUnlimited(&event, raw_proto));
58+
CHECK(::tensorflow::ParseProtoUnlimited(&event, raw_proto.data(),
59+
raw_proto.size()));
5960
if (event.what_case() != Event::WhatCase::kSummary) {
6061
continue;
6162
}

lingvo/core/ops/generic_input_op_kernels.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class GenericInputProcessor : public RecordProcessor {
122122
args[0] = Tensor(DT_INT32, {});
123123
args[0].scalar<int32>()() = record.source_id;
124124
args[1] = Tensor(DT_STRING, {});
125-
record.value.AppendTo(&args[1].scalar<string>()());
125+
args[1].scalar<tensorflow::tstring>()().append(record.value.ToString());
126126
*bucket_key = 1;
127127
sample->clear();
128128
Status status;
@@ -266,7 +266,7 @@ class GenericInputProcessor : public RecordProcessor {
266266
CASE(float);
267267
CASE(int32);
268268
CASE(int64);
269-
CASE(string);
269+
CASE(tstring);
270270
CASE(uint8);
271271
CASE(bfloat16);
272272
CASE(complex64);

lingvo/core/ops/record_batcher_test.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class TestRP : public RecordProcessor {
5252
const string val = string(record.value);
5353
*bucket_key = val.size();
5454
Tensor t(DT_STRING, {});
55-
record.value.AppendTo(&t.scalar<string>()());
55+
t.scalar<tstring>()().append(record.value.ToString());
5656
Tensor ids(DT_STRING, {1});
5757
auto lab = ids.flat<tensorflow::tstring>();
5858
lab(0) = absl::StrCat(record.source_id);
@@ -68,8 +68,8 @@ class TestRP : public RecordProcessor {
6868
Tensor t(DT_STRING, {n});
6969
Tensor source_ids(DT_STRING, {n});
7070
for (int i = 0; i < samples.size(); ++i) {
71-
t.flat<string>()(i) = samples[i][0].scalar<string>()();
72-
source_ids.flat<string>()(i) = samples[i][1].scalar<string>()();
71+
t.flat<tstring>()(i) = samples[i][0].scalar<tstring>()();
72+
source_ids.flat<tstring>()(i) = samples[i][1].scalar<tstring>()();
7373
}
7474
batch->clear();
7575
batch->push_back(std::move(t));
@@ -107,13 +107,13 @@ TEST(RecordBatcher, Basic) {
107107
ASSERT_LE(source_ids.dim_size(0), bopts.bucket_batch_limit[bucket_id]);
108108
int maxlen = 0;
109109
for (int j = 0; j < t.dim_size(0); ++j) {
110-
auto len = t.vec<string>()(j).size();
110+
auto len = t.vec<tstring>()(j).size();
111111
EXPECT_LE(len, bopts.bucket_upper_bound[bucket_id]);
112112
if (bucket_id != 0) {
113113
EXPECT_LT(bopts.bucket_upper_bound[bucket_id - 1], len);
114114
}
115115
maxlen = std::max<int>(maxlen, len);
116-
ASSERT_EQ(source_ids.vec<string>()(j), "0");
116+
ASSERT_EQ(source_ids.vec<tstring>()(j), "0");
117117
}
118118
VLOG(1) << bucket_id << " " << t.dim_size(0) << " " << maxlen;
119119
}
@@ -146,7 +146,7 @@ TEST(RecordBatcher, BasicMultiThread) {
146146
ASSERT_LE(t.dim_size(0), bopts.bucket_batch_limit[bucket_id]);
147147
int maxlen = 0;
148148
for (int j = 0; j < t.dim_size(0); ++j) {
149-
auto len = t.vec<string>()(j).size();
149+
auto len = t.vec<tstring>()(j).size();
150150
EXPECT_LE(len, bopts.bucket_upper_bound[bucket_id]);
151151
if (bucket_id != 0) {
152152
EXPECT_LT(bopts.bucket_upper_bound[bucket_id - 1], len);
@@ -195,7 +195,7 @@ TEST(RecordBatcher, LearnBuckets) {
195195
const Tensor& t = batch[0];
196196
int maxlen = 0;
197197
for (int j = 0; j < t.dim_size(0); ++j) {
198-
int len = t.vec<string>()(j).size();
198+
int len = t.vec<tstring>()(j).size();
199199
maxlen = std::max<int>(maxlen, len);
200200
}
201201
maxlens[bucket_id] += maxlen;
@@ -241,7 +241,7 @@ TEST(RecordBatcher, FullEpoch) {
241241
TF_CHECK_OK(batcher.GetNext(&bucket_id, &batch));
242242
const Tensor& t = batch[0];
243243
for (int j = 0; j < t.dim_size(0); ++j) {
244-
records.push_back(t.vec<string>()(j));
244+
records.push_back(t.vec<tstring>()(j));
245245
}
246246
}
247247
ASSERT_EQ(N, records.size());
@@ -279,7 +279,7 @@ TEST(RecordBatcher, CaptureYielderStatus) {
279279
const Tensor& t = batch[i];
280280
if (t.dtype() == DT_STRING) {
281281
for (int j = 0; j < t.dim_size(0); ++j) {
282-
records.push_back(t.vec<string>()(j));
282+
records.push_back(t.vec<tstring>()(j));
283283
}
284284
}
285285
}

lingvo/core/ops/record_yielder.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ class TFRecordIterator : public RecordIterator {
250250
std::unique_ptr<RandomAccessFile> file_;
251251
io::SequentialRecordReader reader_;
252252
int64 num_ = 0;
253-
string record_;
253+
tstring record_;
254254

255255
io::RecordReaderOptions ReaderOptions(const string& compression_type) {
256256
auto opts =

lingvo/core/ops/rope.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class Rope : public std::string {
3030
void AppendTo(std::string* dst) const {
3131
dst->append(*this);
3232
}
33+
34+
std::string ToString() const { return *this; }
3335
};
3436

3537
} // namespace lingvo

lingvo/core/ops/simple_vocab.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,13 @@ class VocabTokenToIdOp : public OpKernel {
161161
Tensor* id;
162162
OP_REQUIRES_OK(ctx, ctx->allocate_output("id", token->shape(), &id));
163163
if (token->dims() == 0) {
164-
id->scalar<int32>()() = vocab_.TokenToId(token->scalar<string>()());
164+
id->scalar<int32>()() = vocab_.TokenToId(token->scalar<tstring>()());
165165
} else {
166166
OP_REQUIRES(
167167
ctx, token->dims() == 1,
168168
errors::InvalidArgument("Input must be a scalar or 1D tensor."));
169169
for (int i = 0; i < token->dim_size(0); i++) {
170-
id->vec<int32>()(i) = vocab_.TokenToId(token->vec<string>()(i));
170+
id->vec<int32>()(i) = vocab_.TokenToId(token->vec<tstring>()(i));
171171
}
172172
}
173173
}
@@ -196,13 +196,13 @@ class VocabIdToTokenOp : public OpKernel {
196196
Tensor* token;
197197
OP_REQUIRES_OK(ctx, ctx->allocate_output("token", id->shape(), &token));
198198
if (id->dims() == 0) {
199-
token->scalar<string>()() = vocab_.IdToToken(id->scalar<int32>()());
199+
token->scalar<tstring>()() = vocab_.IdToToken(id->scalar<int32>()());
200200
} else {
201201
OP_REQUIRES(
202202
ctx, id->dims() == 1,
203203
errors::InvalidArgument("Input must be a scalar or 1D tensor."));
204204
for (int i = 0; i < id->dim_size(0); i++) {
205-
token->vec<string>()(i) = vocab_.IdToToken(id->vec<int32>()(i));
205+
token->vec<tstring>()(i) = vocab_.IdToToken(id->vec<int32>()(i));
206206
}
207207
}
208208
}
@@ -232,13 +232,13 @@ class TokenInVocabOp : public OpKernel {
232232
OP_REQUIRES_OK(ctx,
233233
ctx->allocate_output("result", token->shape(), &result));
234234
if (token->dims() == 0) {
235-
result->scalar<bool>()() = vocab_.InVocab(token->scalar<string>()());
235+
result->scalar<bool>()() = vocab_.InVocab(token->scalar<tstring>()());
236236
} else {
237237
OP_REQUIRES(
238238
ctx, token->dims() == 1,
239239
errors::InvalidArgument("Input must be a scalar or 1D tensor."));
240240
for (int i = 0; i < token->dim_size(0); i++) {
241-
result->vec<bool>()(i) = vocab_.InVocab(token->vec<string>()(i));
241+
result->vec<bool>()(i) = vocab_.InVocab(token->vec<tstring>()(i));
242242
}
243243
}
244244
}

lingvo/core/ops/static_map_op.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void Iota(std::vector<T>* vec) {
3434
}
3535

3636
template <>
37-
void Iota<string>(std::vector<string>* vec) {
37+
void Iota<tstring>(std::vector<tstring>* vec) {
3838
// Do nothing.
3939
}
4040

@@ -96,22 +96,22 @@ class StaticMapOp : public OpKernel {
9696
};
9797

9898
REGISTER_KERNEL_BUILDER(Name("StaticMapStringInt").Device(DEVICE_CPU),
99-
StaticMapOp<string, int32>);
99+
StaticMapOp<tstring, int32>);
100100
REGISTER_KERNEL_BUILDER(Name("StaticMapIntString").Device(DEVICE_CPU),
101-
StaticMapOp<int32, string>);
101+
StaticMapOp<int32, tstring>);
102102

103103
#if GOOGLE_CUDA
104104
REGISTER_KERNEL_BUILDER(Name("StaticMapStringInt")
105105
.Device(DEVICE_GPU)
106106
.HostMemory("x")
107107
.HostMemory("y"),
108-
StaticMapOp<string, int32>);
108+
StaticMapOp<tstring, int32>);
109109

110110
REGISTER_KERNEL_BUILDER(Name("StaticMapIntString")
111111
.Device(DEVICE_GPU)
112112
.HostMemory("x")
113113
.HostMemory("y"),
114-
StaticMapOp<int32, string>);
114+
StaticMapOp<int32, tstring>);
115115
#endif
116116

117117
} // namespace

lingvo/core/ops/tokenizer_op_headers.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class LabelToTokenIdOp : public OpKernel {
4646
errors::InvalidArgument("labels must be a vector, but get ",
4747
labels.shape().DebugString()));
4848
const int batch = labels.NumElements();
49-
auto Tlabels = labels.flat<string>();
49+
auto Tlabels = labels.flat<tstring>();
5050
Tensor token_ids(DT_INT32, TensorShape({batch, maxlen_}));
5151
auto Ttoken_ids = token_ids.matrix<int32>();
5252
Ttoken_ids.setZero(); // Sanity
@@ -131,7 +131,7 @@ class IdToTokenOp : public OpKernel {
131131
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({batch}), &out));
132132
const auto& t_ids = ids.matrix<int32>();
133133
const auto& t_seq_lens = seq_lens.vec<int32>();
134-
auto t_out = out->template vec<string>();
134+
auto t_out = out->template vec<tstring>();
135135
for (int i = 0; i < batch; ++i) {
136136
const int len_i = std::max(0, t_seq_lens(i));
137137
std::vector<int32> ids_i(len_i);

lingvo/core/ops/tokenizer_ops_kernels.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class StrToVocabTokensOp : public OpKernel {
5656
void Compute(OpKernelContext* ctx) override {
5757
const Tensor* labels;
5858
OP_REQUIRES_OK(ctx, ctx->input("labels", &labels));
59-
const auto& t_label = labels->vec<string>();
59+
const auto& t_label = labels->vec<tstring>();
6060
const int32 b_size = labels->dim_size(0);
6161
Tensor token_ids(DT_INT32, TensorShape({b_size, maxlen_}));
6262
Tensor target_ids(DT_INT32, TensorShape({b_size, maxlen_}));
@@ -177,7 +177,7 @@ class NgramIdToTokenOp : public OpKernel {
177177
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({batch}), &out));
178178
const auto& t_ids = token_ids->matrix<int32>();
179179
const auto& t_seq_lens = seq_lengths->vec<int32>();
180-
auto t_out = out->template vec<string>();
180+
auto t_out = out->template vec<tstring>();
181181
for (int i = 0; i < batch; ++i) {
182182
const int len_i = std::max(0, t_seq_lens(i));
183183
std::vector<int32> ids_i(len_i);
@@ -235,7 +235,7 @@ class BpeIdsToWordsOp : public OpKernel {
235235
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({batch}), &out));
236236
const auto& t_ids = token_ids->matrix<int32>();
237237
const auto& t_seq_lens = seq_lengths->vec<int32>();
238-
auto t_out = out->template vec<string>();
238+
auto t_out = out->template vec<tstring>();
239239
for (int i = 0; i < batch; ++i) {
240240
const int len_i = std::max(0, t_seq_lens(i));
241241
std::vector<string> labels;
@@ -293,7 +293,7 @@ class BpeWordsToIdsOp : public OpKernel {
293293
void Compute(OpKernelContext* ctx) override {
294294
const Tensor* labels;
295295
OP_REQUIRES_OK(ctx, ctx->input("labels", &labels));
296-
const auto& t_label = labels->vec<string>();
296+
const auto& t_label = labels->vec<tstring>();
297297
const int32 b_size = labels->dim_size(0);
298298
Tensor* token_ids = nullptr;
299299
Tensor* target_ids = nullptr;

0 commit comments

Comments
 (0)