-
Notifications
You must be signed in to change notification settings - Fork 625
refactor(tdigest): convert the flags for reverse in TDigest::Rank to a compile-time argument #3268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
4784442
45281c3
b421adb
1ced14d
70ec5b8
c42ed44
5d3a885
3bb2fe6
f1c559c
7fb6b2f
0ed408d
d5cce3f
843af3d
3c935d8
48c1f47
4bdb79b
b6074af
c2894a0
7487397
0aaa788
be9a404
8f229ce
e73eae4
69bf01a
e1ed70a
798a69d
1c3ae7d
4b37eed
6911242
c097f8c
f4e5488
a06db87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -33,6 +33,115 @@ | |||||
| #include "tdigest.h" | ||||||
|
|
||||||
| namespace redis { | ||||||
|
|
||||||
| // TODO: It should be replaced by a iteration of the rocksdb iterator | ||||||
|
Check warning on line 37 in src/types/redis_tdigest.h
|
||||||
| template <bool Reverse> | ||||||
| class DummyCentroids { | ||||||
| public: | ||||||
| DummyCentroids(const TDigestMetadata& meta_data, const std::vector<Centroid>& centroids) | ||||||
| : meta_data_(meta_data), centroids_(centroids) {} | ||||||
| class Iterator { | ||||||
| public: | ||||||
| using IterType = std::conditional_t<Reverse, std::vector<Centroid>::const_reverse_iterator, | ||||||
| std::vector<Centroid>::const_iterator>; | ||||||
|
|
||||||
| Iterator(IterType iter, const std::vector<Centroid>& centroids) : iter_(iter), centroids_(centroids) {} | ||||||
| std::unique_ptr<Iterator> Clone() const { | ||||||
| if constexpr (Reverse) { | ||||||
| if (iter_ != centroids_.crend()) { | ||||||
| return std::make_unique<Iterator>(std::next(centroids_.crbegin(), std::distance(centroids_.crbegin(), iter_)), | ||||||
| centroids_); | ||||||
| } | ||||||
| return std::make_unique<Iterator>(centroids_.crend(), centroids_); | ||||||
| } else { | ||||||
| if (iter_ != centroids_.cend()) { | ||||||
| return std::make_unique<Iterator>(std::next(centroids_.cbegin(), std::distance(centroids_.cbegin(), iter_)), | ||||||
| centroids_); | ||||||
| } | ||||||
| return std::make_unique<Iterator>(centroids_.cend(), centroids_); | ||||||
| } | ||||||
| } | ||||||
| bool Next() { | ||||||
| if (Valid()) { | ||||||
| std::advance(iter_, 1); | ||||||
| } | ||||||
| if constexpr (Reverse) { | ||||||
| return iter_ != centroids_.crend(); | ||||||
| } else { | ||||||
| return iter_ != centroids_.cend(); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| // The Prev function can only be called for item is not cend, | ||||||
| // because we must guarantee the iterator to be inside the valid range before iteration. | ||||||
| bool Prev() { | ||||||
| if constexpr (Reverse) { | ||||||
| if (Valid() && iter_ != centroids_.crbegin()) { | ||||||
| std::advance(iter_, -1); | ||||||
| } | ||||||
| } else { | ||||||
| if (Valid() && iter_ != centroids_.cbegin()) { | ||||||
| std::advance(iter_, -1); | ||||||
| } | ||||||
| } | ||||||
| return Valid(); | ||||||
| } | ||||||
| bool Valid() const { | ||||||
| if constexpr (Reverse) { | ||||||
| return iter_ != centroids_.crend(); | ||||||
| } else { | ||||||
| return iter_ != centroids_.cend(); | ||||||
| } | ||||||
| } | ||||||
| StatusOr<Centroid> GetCentroid() const { | ||||||
| if constexpr (Reverse) { | ||||||
| if (iter_ == centroids_.crend()) { | ||||||
| return {::Status::NotOK, "invalid iterator during decoding tdigest centroid"}; | ||||||
| } | ||||||
| } else { | ||||||
| if (iter_ == centroids_.cend()) { | ||||||
| return {::Status::NotOK, "invalid iterator during decoding tdigest centroid"}; | ||||||
| } | ||||||
| } | ||||||
| return *iter_; | ||||||
| } | ||||||
|
|
||||||
| private: | ||||||
| IterType iter_; | ||||||
| const std::vector<Centroid>& centroids_; | ||||||
| }; | ||||||
|
|
||||||
| std::unique_ptr<Iterator> Begin() const { | ||||||
| if constexpr (Reverse) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Maybe here could also use the util.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the Begin and End functions are outside the Iterator class, I made getCbeginIter and getCendIter static. Is this a good solution?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the getbegin and getend are both stateless, static is ok. |
||||||
| return std::make_unique<Iterator>(centroids_.crbegin(), centroids_); | ||||||
| } else { | ||||||
| return std::make_unique<Iterator>(centroids_.cbegin(), centroids_); | ||||||
| } | ||||||
| } | ||||||
| std::unique_ptr<Iterator> End() const { | ||||||
| if (centroids_.empty()) { | ||||||
| if constexpr (Reverse) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. |
||||||
| return std::make_unique<Iterator>(centroids_.crend(), centroids_); | ||||||
| } else { | ||||||
| return std::make_unique<Iterator>(centroids_.cend(), centroids_); | ||||||
| } | ||||||
| } | ||||||
| if constexpr (Reverse) { | ||||||
| return std::make_unique<Iterator>(std::prev(centroids_.crend()), centroids_); | ||||||
| } else { | ||||||
| return std::make_unique<Iterator>(std::prev(centroids_.cend()), centroids_); | ||||||
| } | ||||||
| } | ||||||
| double TotalWeight() const { return static_cast<double>(meta_data_.total_weight); } | ||||||
| double Min() const { return meta_data_.minimum; } | ||||||
| double Max() const { return meta_data_.maximum; } | ||||||
| uint64_t Size() const { return meta_data_.merged_nodes; } | ||||||
|
|
||||||
| private: | ||||||
| const TDigestMetadata& meta_data_; | ||||||
| const std::vector<Centroid>& centroids_; | ||||||
| }; | ||||||
|
|
||||||
| inline constexpr uint32_t kTDigestMaxCompression = 1000; // limit the compression to 1k | ||||||
|
|
||||||
| struct CentroidWithKey { | ||||||
|
|
@@ -77,7 +186,8 @@ | |||||
|
|
||||||
| rocksdb::Status Merge(engine::Context& ctx, const Slice& dest_digest, const std::vector<std::string>& source_digests, | ||||||
| const TDigestMergeOptions& options); | ||||||
| rocksdb::Status Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, bool reverse, | ||||||
| template <bool Reverse> | ||||||
| rocksdb::Status Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, | ||||||
| std::vector<int>& result); | ||||||
| rocksdb::Status GetMetaData(engine::Context& context, const Slice& digest_name, TDigestMetadata* metadata); | ||||||
|
|
||||||
|
|
@@ -132,4 +242,39 @@ | |||||
| Centroid* centroid) const; | ||||||
| }; | ||||||
|
|
||||||
| template <bool Reverse> | ||||||
| rocksdb::Status TDigest::Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think maybe we can just expose two non-template function TDigest::Rank and TDigest::RevRank so that we can put the definition of this function into the .cc file instead of header?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necesary in this PR. |
||||||
| std::vector<int>& result) { | ||||||
| auto ns_key = AppendNamespacePrefix(digest_name); | ||||||
| TDigestMetadata metadata; | ||||||
| { | ||||||
| LockGuard guard(storage_->GetLockManager(), ns_key); | ||||||
|
|
||||||
| if (auto status = getMetaDataByNsKey(ctx, ns_key, &metadata); !status.ok()) { | ||||||
| return status; | ||||||
| } | ||||||
|
|
||||||
| if (metadata.total_observations == 0) { | ||||||
| result.resize(inputs.size(), -2); | ||||||
| return rocksdb::Status::OK(); | ||||||
| } | ||||||
|
|
||||||
| if (auto status = mergeNodes(ctx, ns_key, &metadata); !status.ok()) { | ||||||
| return status; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| std::vector<Centroid> centroids; | ||||||
| if (auto status = dumpCentroids(ctx, ns_key, metadata, ¢roids); !status.ok()) { | ||||||
| return status; | ||||||
| } | ||||||
|
|
||||||
| auto dump_centroids = DummyCentroids<Reverse>(metadata, centroids); | ||||||
| auto status = TDigestRank<Reverse>(dump_centroids, inputs, result); | ||||||
| if (!status) { | ||||||
|
Check warning on line 274 in src/types/redis_tdigest.h
|
||||||
|
donghao526 marked this conversation as resolved.
Outdated
|
||||||
| return rocksdb::Status::InvalidArgument(status.Msg()); | ||||||
| } | ||||||
| return rocksdb::Status::OK(); | ||||||
| } | ||||||
|
|
||||||
| } // namespace redis | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,8 +171,8 @@ struct DoubleComparator { | |
| bool operator()(const double& a, const double& b) const { return DoubleCompare(a, b) == -1; } | ||
| }; | ||
|
|
||
| template <typename TD, bool Reverse> | ||
| inline Status TDigestRankImpl(TD&& td, const std::vector<double>& inputs, std::vector<int>& result) { | ||
| template <bool Reverse, typename TD> | ||
| inline Status TDigestRank(TD&& td, const std::vector<double>& inputs, std::vector<int>& result) { | ||
| std::map<double, size_t, DoubleComparator> value_to_index; | ||
| for (size_t i = 0; i < inputs.size(); ++i) { | ||
| value_to_index[inputs[i]] = i; | ||
|
|
@@ -211,7 +211,7 @@ inline Status TDigestRankImpl(TD&& td, const std::vector<double>& inputs, std::v | |
| } | ||
| } | ||
|
|
||
| auto iter = td.Begin(Reverse); | ||
| auto iter = td.Begin(); | ||
| double cumulative_weight = 0; | ||
| while (iter->Valid() && !is_end()) { | ||
| auto centroid = GET_OR_RET(iter->GetCentroid()); | ||
|
|
@@ -266,13 +266,4 @@ inline Status TDigestRankImpl(TD&& td, const std::vector<double>& inputs, std::v | |
| } | ||
| } | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| template <typename TD> | ||
| inline Status TDigestRank(TD&& td, const std::vector<double>& inputs, bool reverse, std::vector<int>& result) { | ||
| if (reverse) { | ||
| return TDigestRankImpl<TD, true>(std::forward<TD>(td), inputs, result); | ||
| } else { | ||
| return TDigestRankImpl<TD, false>(std::forward<TD>(td), inputs, result); | ||
| } | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add an endline for this file. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we could add a private method like below.
Then we could unify the
centroids.cebgin()andcentroids.cend().This could help us reduce the
constexpr ifin every iterator getter.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good suggestion, I will refactor it.
Thanks!😊