-
Notifications
You must be signed in to change notification settings - Fork 625
refactor(tdigest): convert the flags for reverse in TDigest::Rank to a compile-time argument #3268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
4784442
45281c3
b421adb
1ced14d
70ec5b8
c42ed44
5d3a885
3bb2fe6
f1c559c
7fb6b2f
0ed408d
d5cce3f
843af3d
3c935d8
48c1f47
4bdb79b
b6074af
c2894a0
7487397
0aaa788
be9a404
8f229ce
e73eae4
69bf01a
e1ed70a
798a69d
1c3ae7d
4b37eed
6911242
c097f8c
f4e5488
a06db87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,101 @@ | |
| #include "tdigest.h" | ||
|
|
||
| namespace redis { | ||
|
|
||
| // TODO: It should be replaced by a iteration of the rocksdb iterator | ||
| template <bool Reverse> | ||
| class DummyCentroids { | ||
| public: | ||
| DummyCentroids(const TDigestMetadata& meta_data, const std::vector<Centroid>& centroids) | ||
| : meta_data_(meta_data), centroids_(centroids) {} | ||
| class Iterator { | ||
| public: | ||
| using IterType = std::conditional_t<Reverse, std::vector<Centroid>::const_reverse_iterator, | ||
| std::vector<Centroid>::const_iterator>; | ||
| Iterator(IterType iter, const std::vector<Centroid>& centroids) : iter_(iter), centroids_(centroids) {} | ||
| std::unique_ptr<Iterator> Clone() const { | ||
| if (iter_ != getCendIter(centroids_)) { | ||
| return std::make_unique<Iterator>( | ||
| std::next(getCbeginIter(centroids_), std::distance(getCbeginIter(centroids_), iter_)), centroids_); | ||
| } | ||
| return std::make_unique<Iterator>(getCendIter(centroids_), centroids_); | ||
| } | ||
| bool Next() { | ||
| if (Valid()) { | ||
| std::advance(iter_, 1); | ||
| } | ||
| return iter_ != getCendIter(centroids_); | ||
| } | ||
|
|
||
| // The Prev function can only be called for item is not cend, | ||
| // because we must guarantee the iterator to be inside the valid range before iteration. | ||
| bool Prev() { | ||
| if (Valid() && iter_ != getCendIter(centroids_)) { | ||
| std::advance(iter_, -1); | ||
| } | ||
| return Valid(); | ||
| } | ||
| bool Valid() const { return iter_ != getCendIter(centroids_); } | ||
| StatusOr<Centroid> GetCentroid() const { | ||
| if (iter_ == getCendIter(centroids_)) { | ||
| return {::Status::NotOK, "invalid iterator during decoding tdigest centroid"}; | ||
| } | ||
| return *iter_; | ||
| } | ||
|
|
||
| private: | ||
| IterType iter_; | ||
| const std::vector<Centroid>& centroids_; | ||
| template <typename Container> | ||
| decltype(auto) getCbeginIter(const Container& centroids) const { | ||
| if constexpr (Reverse) { | ||
| return centroids.crbegin(); | ||
| } else { | ||
| return centroids.cbegin(); | ||
| } | ||
| } | ||
|
|
||
| template <typename Container> | ||
| decltype(auto) getCendIter(const Container& centroids) const { | ||
| if constexpr (Reverse) { | ||
| return centroids.crend(); | ||
| } else { | ||
| return centroids.cend(); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| std::unique_ptr<Iterator> Begin() const { | ||
| if constexpr (Reverse) { | ||
| return std::make_unique<Iterator>(centroids_.crbegin(), centroids_); | ||
| } else { | ||
| return std::make_unique<Iterator>(centroids_.cbegin(), centroids_); | ||
| } | ||
| } | ||
| std::unique_ptr<Iterator> End() const { | ||
| if (centroids_.empty()) { | ||
| if constexpr (Reverse) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. |
||
| return std::make_unique<Iterator>(centroids_.crend(), centroids_); | ||
| } else { | ||
| return std::make_unique<Iterator>(centroids_.cend(), centroids_); | ||
| } | ||
| } | ||
| if constexpr (Reverse) { | ||
| return std::make_unique<Iterator>(std::prev(centroids_.crend()), centroids_); | ||
| } else { | ||
| return std::make_unique<Iterator>(std::prev(centroids_.cend()), centroids_); | ||
| } | ||
| } | ||
| double TotalWeight() const { return static_cast<double>(meta_data_.total_weight); } | ||
| double Min() const { return meta_data_.minimum; } | ||
| double Max() const { return meta_data_.maximum; } | ||
| uint64_t Size() const { return meta_data_.merged_nodes; } | ||
|
|
||
| private: | ||
| const TDigestMetadata& meta_data_; | ||
| const std::vector<Centroid>& centroids_; | ||
| }; | ||
|
|
||
| inline constexpr uint32_t kTDigestMaxCompression = 1000; // limit the compression to 1k | ||
|
|
||
| struct CentroidWithKey { | ||
|
|
@@ -77,7 +172,8 @@ class TDigest : public SubKeyScanner { | |
|
|
||
| rocksdb::Status Merge(engine::Context& ctx, const Slice& dest_digest, const std::vector<std::string>& source_digests, | ||
| const TDigestMergeOptions& options); | ||
| rocksdb::Status Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, bool reverse, | ||
| template <bool Reverse> | ||
| rocksdb::Status Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, | ||
| std::vector<int>& result); | ||
| rocksdb::Status GetMetaData(engine::Context& context, const Slice& digest_name, TDigestMetadata* metadata); | ||
|
|
||
|
|
@@ -132,4 +228,38 @@ class TDigest : public SubKeyScanner { | |
| Centroid* centroid) const; | ||
| }; | ||
|
|
||
| template <bool Reverse> | ||
| rocksdb::Status TDigest::Rank(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think maybe we can just expose two non-template function TDigest::Rank and TDigest::RevRank so that we can put the definition of this function into the .cc file instead of header?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necesary in this PR. |
||
| std::vector<int>& result) { | ||
| auto ns_key = AppendNamespacePrefix(digest_name); | ||
| TDigestMetadata metadata; | ||
| { | ||
| LockGuard guard(storage_->GetLockManager(), ns_key); | ||
|
|
||
| if (auto status = getMetaDataByNsKey(ctx, ns_key, &metadata); !status.ok()) { | ||
| return status; | ||
| } | ||
|
|
||
| if (metadata.total_observations == 0) { | ||
| result.resize(inputs.size(), -2); | ||
| return rocksdb::Status::OK(); | ||
| } | ||
|
|
||
| if (auto status = mergeNodes(ctx, ns_key, &metadata); !status.ok()) { | ||
| return status; | ||
| } | ||
| } | ||
|
|
||
| std::vector<Centroid> centroids; | ||
| if (auto status = dumpCentroids(ctx, ns_key, metadata, ¢roids); !status.ok()) { | ||
| return status; | ||
| } | ||
|
|
||
| auto dump_centroids = DummyCentroids<Reverse>(metadata, centroids); | ||
| if (auto status = TDigestRank<Reverse>(dump_centroids, inputs, result); !status) { | ||
| return rocksdb::Status::InvalidArgument(status.Msg()); | ||
| } | ||
| return rocksdb::Status::OK(); | ||
| } | ||
|
|
||
| } // namespace redis | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe here could also use the util.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the Begin and End functions are outside the Iterator class, I made getCbeginIter and getCendIter static. Is this a good solution?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the getbegin and getend are both stateless, static is ok.
maybe they could be a free inline function protected by a
detailprivate namespace if you prefer this way.