Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions mooncake-store/include/master_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,12 +484,13 @@ class MasterService {
const UUID& client_id_,
const std::chrono::system_clock::time_point put_start_time_,
size_t value_length, std::vector<Replica>&& reps,
bool enable_soft_pin)
bool enable_soft_pin, bool enable_hard_pin = false)
: client_id(client_id_),
put_start_time(put_start_time_),
size(value_length),
lease_timeout(),
soft_pin_timeout(std::nullopt),
hard_pinned(enable_hard_pin),
replicas_(std::move(reps)) {
MasterMetricManager::instance().inc_key_count(1);
if (enable_soft_pin) {
Expand All @@ -516,6 +517,8 @@ class MasterService {
mutable std::optional<std::chrono::system_clock::time_point>
soft_pin_timeout GUARDED_BY(lock); // optional soft pin, only
// set for vip objects
mutable bool hard_pinned GUARDED_BY(lock){false}; // hard pin:
// never evicted

void AddReplicas(std::vector<Replica>&& replicas) {
replicas_.insert(replicas_.end(),
Expand Down Expand Up @@ -684,6 +687,12 @@ class MasterService {
return soft_pin_timeout && now < *soft_pin_timeout;
}

// Check if is hard pinned (never evicted by eviction policy)
bool IsHardPinned() const {
SpinLocker locker(&lock);
return hard_pinned;
}

Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hard_pinned is set only at creation time and never mutated elsewhere (no assignments beyond the constructor). Keeping it GUARDED_BY(lock) forces IsHardPinned() to take the spinlock on every eviction scan. Consider making hard_pinned immutable (e.g., const bool) and not guarded, so eviction and serialization can read it without lock overhead and without thread-safety annotation violations.

Suggested change
SpinLocker locker(&lock);
return hard_pinned;
}
return hard_pinned;
}

Copilot uses AI. Check for mistakes.
// Check if the metadata is valid
// Valid means it has at least one valid replica and size is greater
// than 0
Expand Down Expand Up @@ -897,15 +906,17 @@ class MasterService {
}

void Create(const UUID& client_id, uint64_t total_length,
std::vector<Replica> replicas, bool enable_soft_pin) {
std::vector<Replica> replicas, bool enable_soft_pin,
bool enable_hard_pin = false) {
if (Exists()) {
throw std::logic_error("Already exists");
}
const auto now = std::chrono::system_clock::now();
auto result = shard_guard_->metadata.emplace(
std::piecewise_construct, std::forward_as_tuple(key_),
std::forward_as_tuple(client_id, now, total_length,
std::move(replicas), enable_soft_pin));
std::move(replicas), enable_soft_pin,
enable_hard_pin));
it_ = result.first;
}

Expand Down
2 changes: 2 additions & 0 deletions mooncake-store/include/replica.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ inline std::ostream& operator<<(std::ostream& os,
struct ReplicateConfig {
size_t replica_num{1};
bool with_soft_pin{false};
bool with_hard_pin{false}; // Hard pin: object cannot be evicted
std::vector<std::string>
preferred_segments{}; // Preferred segments for allocation
std::string preferred_segment{}; // Deprecated: Single preferred segment
Expand All @@ -94,6 +95,7 @@ struct ReplicateConfig {
const ReplicateConfig& config) noexcept {
os << "ReplicateConfig: { replica_num: " << config.replica_num
<< ", with_soft_pin: " << config.with_soft_pin
<< ", with_hard_pin: " << config.with_hard_pin
<< ", preferred_segments: [";
for (size_t i = 0; i < config.preferred_segments.size(); ++i) {
os << config.preferred_segments[i];
Expand Down
44 changes: 32 additions & 12 deletions mooncake-store/src/master_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ auto MasterService::PutStart(const UUID& client_id, const std::string& key,
shard->metadata.emplace(
std::piecewise_construct, std::forward_as_tuple(key),
std::forward_as_tuple(client_id, now, total_length, std::move(replicas),
config.with_soft_pin));
config.with_soft_pin, config.with_hard_pin));
// Also insert the metadata into processing set for monitoring.
shard->processing_keys.insert(key);

Expand Down Expand Up @@ -2886,6 +2886,10 @@ void MasterService::BatchEvict(double evict_ratio_target,
candidates; // can be removed
for (auto it = shard->metadata.begin(); it != shard->metadata.end();
it++) {
// Hard-pinned objects are never evicted
if (it->second.IsHardPinned()) {
continue;
}
// Skip objects that are not expired or have incomplete replicas
if (!it->second.IsLeaseExpired(now) ||
!can_evict_replicas(it->second)) {
Expand Down Expand Up @@ -2920,7 +2924,8 @@ void MasterService::BatchEvict(double evict_ratio_target,
while (it != shard->metadata.end()) {
// Skip objects that are not allowed to be evicted in the first
// pass
if (!it->second.IsLeaseExpired(now) ||
if (it->second.IsHardPinned() ||
!it->second.IsLeaseExpired(now) ||
it->second.IsSoftPinned(now) ||
!can_evict_replicas(it->second)) {
++it;
Expand Down Expand Up @@ -2983,7 +2988,8 @@ void MasterService::BatchEvict(double evict_ratio_target,
(start_idx + i) % kNumShards);
auto it = shard->metadata.begin();
while (it != shard->metadata.end() && target_evict_num > 0) {
if (it->second.lease_timeout <= target_timeout &&
if (!it->second.IsHardPinned() &&
it->second.lease_timeout <= target_timeout &&
!it->second.IsSoftPinned(now) &&
can_evict_replicas(it->second)) {
// Evict this object
Expand Down Expand Up @@ -3025,9 +3031,9 @@ void MasterService::BatchEvict(double evict_ratio_target,

auto it = shard->metadata.begin();
while (it != shard->metadata.end() && target_evict_num > 0) {
// Skip objects that are not expired or have incomplete
// replicas
if (!it->second.IsLeaseExpired(now) ||
// Skip hard-pinned or not-yet-expired objects
if (it->second.IsHardPinned() ||
!it->second.IsLeaseExpired(now) ||
!can_evict_replicas(it->second)) {
++it;
continue;
Expand Down Expand Up @@ -3500,10 +3506,12 @@ MasterService::MetadataSerializer::SerializeMetadata(
MsgpackPacker& packer) const {
// Pack ObjectMetadata using array structure for efficiency
// Format: [client_id, put_start_time, size, lease_timeout,
// has_soft_pin_timeout, soft_pin_timeout, replicas_count, replicas...]
// has_soft_pin_timeout, soft_pin_timeout, replicas_count, replicas...,
// hard_pinned]

size_t array_size = 7; // size, lease_timeout, has_soft_pin_timeout,
// soft_pin_timeout, replicas_count
size_t array_size = 8; // client_id, put_start_time, size, lease_timeout,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we avoid hardcoding here? Use size_t array_size = sizeof(struct xxx) + sizeof(struct xxx)

// has_soft_pin_timeout, soft_pin_timeout,
// replicas_count + hard_pinned
Comment on lines +3513 to +3515
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The comment for array_size calculation could be made clearer. Currently, it lists replicas_count + hard_pinned, which might be misinterpreted as hard_pinned being added to replicas_count. It would be more precise to list hard_pinned as a separate fixed field, making it clear that 8 is the base count of fixed fields before adding the variable replicas_count.

Suggested change
size_t array_size = 8; // client_id, put_start_time, size, lease_timeout,
// has_soft_pin_timeout, soft_pin_timeout,
// replicas_count + hard_pinned
size_t array_size = 8; // client_id, put_start_time, size, lease_timeout,
// has_soft_pin_timeout, soft_pin_timeout,
// replicas_count, hard_pinned

array_size += metadata.CountReplicas(); // One element per replica
packer.pack_array(array_size);

Expand Down Expand Up @@ -3552,6 +3560,9 @@ MasterService::MetadataSerializer::SerializeMetadata(
}
}

// Serialize hard_pinned flag (appended for backward compatibility)
packer.pack(metadata.hard_pinned);
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SerializeMetadata() packs metadata.hard_pinned directly, but hard_pinned is annotated GUARDED_BY(lock). This introduces a new unguarded access to a guarded field. Either read it via IsHardPinned() / SpinLocker (ideally snapshot the guarded fields once under the lock and then pack), or make hard_pinned immutable (e.g., const bool) so it doesn’t need the per-object lock.

Suggested change
packer.pack(metadata.hard_pinned);
packer.pack(metadata.IsHardPinned());

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider this comment

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use IsHardPinned instead?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the comment, and switched to IsHardPinned() as you suggested.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — also caught another direct access in DeserializeShard (L3495) and fixed that too.


return {};
}

Expand All @@ -3567,6 +3578,7 @@ MasterService::MetadataSerializer::DeserializeMetadata(

// Need at least 7 elements: client_id, put_start_time, size, lease_timeout,
// has_soft_pin_timeout, soft_pin_timeout, replicas_count
// (8th element = hard_pinned is optional for backward compat)
if (obj.via.array.size < 7) {
return tl::unexpected(SerializationError(
ErrorCode::DESERIALIZE_FAIL,
Expand Down Expand Up @@ -3599,8 +3611,10 @@ MasterService::MetadataSerializer::DeserializeMetadata(
// Deserialize replicas count
uint32_t replicas_count = array[index++].as<uint32_t>();

// Check if array size matches replicas_count
if (obj.via.array.size != 7 + replicas_count) {
// Array size: 7 + replicas_count (old format) or 8 + replicas_count (new
// format with hard_pinned)
if (obj.via.array.size != 7 + replicas_count &&
obj.via.array.size != 8 + replicas_count) {
return tl::unexpected(SerializationError(
ErrorCode::DESERIALIZE_FAIL,
"deserialize ObjectMetadata array size mismatch"));
Expand All @@ -3619,13 +3633,19 @@ MasterService::MetadataSerializer::DeserializeMetadata(
replicas.emplace_back(std::move(*result.value()));
}

// Deserialize hard_pinned (if present, otherwise default to false)
bool is_hard_pinned = false;
if (index < obj.via.array.size) {
is_hard_pinned = array[index++].as<bool>();
}

// Create ObjectMetadata instance
bool enable_soft_pin = has_soft_pin_timeout;
auto metadata = std::make_unique<ObjectMetadata>(
client_id,
std::chrono::system_clock::time_point(
std::chrono::milliseconds(put_start_time_timestamp)),
size, std::move(replicas), enable_soft_pin);
size, std::move(replicas), enable_soft_pin, is_hard_pinned);
metadata->lease_timeout = std::chrono::system_clock::time_point(
Comment on lines 3642 to 3649
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DeserializeMetadata() correctly parses the optional hard_pinned flag and passes it into the temporary ObjectMetadata instance, but the restore path in DeserializeShard() reconstructs shard metadata by emplacing a new ObjectMetadata from metadata_ptr without forwarding hard_pinned, so hard-pinned objects will come back as not hard pinned after restoring a snapshot. Please ensure the shard-level reconstruction preserves hard_pinned (either pass it into the constructor or set the field under lock after emplace).

Copilot uses AI. Check for mistakes.
std::chrono::milliseconds(lease_timestamp));

Expand Down
164 changes: 164 additions & 0 deletions mooncake-store/tests/master_service_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4272,6 +4272,170 @@ TEST_F(MasterServiceTest, ForceRemoveAllLeasedObjects) {
ASSERT_FALSE(exist_result.value());
}
}
TEST_F(MasterServiceTest, HardPinObjectNotEvicted) {
// Hard-pinned objects must survive eviction under memory pressure,
// even after lease expires and all non-pinned objects are gone.
const uint64_t kv_lease_ttl = 200;
auto service_config = MasterServiceConfig::builder()
.set_default_kv_lease_ttl(kv_lease_ttl)
.build();
std::unique_ptr<MasterService> service_(new MasterService(service_config));
const UUID client_id = generate_uuid();

constexpr size_t buffer = 0x300000000;
constexpr size_t segment_size = 1024 * 1024 * 16;
constexpr size_t value_size = 1024 * 1024;
[[maybe_unused]] const auto context =
PrepareSimpleSegment(*service_, "test_segment", buffer, segment_size);

// Put a hard-pinned object
{
ReplicateConfig config;
config.replica_num = 1;
config.with_hard_pin = true;
auto result =
service_->PutStart(client_id, "pinned_model", value_size, config);
ASSERT_TRUE(result.has_value());
ASSERT_TRUE(
service_->PutEnd(client_id, "pinned_model", ReplicaType::MEMORY)
.has_value());
}

// Fill remaining space with normal objects to trigger eviction
for (int i = 0; i < 20; i++) {
std::string key = "filler_" + std::to_string(i);
ReplicateConfig config;
config.replica_num = 1;
auto result = service_->PutStart(client_id, key, value_size, config);
if (result.has_value()) {
service_->PutEnd(client_id, key, ReplicaType::MEMORY);
}
}

// Wait for leases to expire and eviction to kick in
std::this_thread::sleep_for(std::chrono::milliseconds(kv_lease_ttl + 500));

// Hard-pinned object must still be there
auto get_result = service_->GetReplicaList("pinned_model");
ASSERT_TRUE(get_result.has_value())
<< "Hard-pinned object was evicted, but it should never be";

Comment on lines +4275 to +4322
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test doesn’t currently demonstrate “lease expiry + eviction under memory pressure”: PutEnd() sets the initial lease to 0 (expired immediately), and the test never grants a lease (via GetReplicaList/ExistKey) before sleeping. Also, the test never asserts that eviction actually happened (e.g., that at least one non-pinned filler key was evicted). Consider explicitly granting leases to the non-pinned objects, waiting for expiry, and asserting that some filler keys are gone while the hard-pinned key remains.

Copilot uses AI. Check for mistakes.
// Explicit Remove should still work on hard-pinned objects
auto remove_result = service_->Remove("pinned_model");
ASSERT_TRUE(remove_result.has_value());
auto exist_result = service_->ExistKey("pinned_model");
ASSERT_TRUE(exist_result.has_value());
ASSERT_FALSE(exist_result.value());

service_->RemoveAll();
}

TEST_F(MasterServiceTest, HardPinWithSoftPinEvictionOrder) {
// Verify eviction priority: non-pinned first, then soft-pinned,
// and hard-pinned objects are never evicted even under extreme pressure.
const uint64_t kv_lease_ttl = 200;
const uint64_t kv_soft_pin_ttl = 10000;
const bool allow_evict_soft_pinned_objects = true;
auto service_config = MasterServiceConfig::builder()
.set_default_kv_lease_ttl(kv_lease_ttl)
.set_default_kv_soft_pin_ttl(kv_soft_pin_ttl)
.set_allow_evict_soft_pinned_objects(
allow_evict_soft_pinned_objects)
.set_eviction_ratio(0.5)
.build();
std::unique_ptr<MasterService> service_(new MasterService(service_config));
const UUID client_id = generate_uuid();

constexpr size_t buffer = 0x300000000;
constexpr size_t segment_size = 1024 * 1024 * 16;
constexpr size_t value_size = 1024 * 1024;
[[maybe_unused]] const auto context =
PrepareSimpleSegment(*service_, "test_segment", buffer, segment_size);

// Put a hard-pinned object
{
ReplicateConfig config;
config.replica_num = 1;
config.with_hard_pin = true;
ASSERT_TRUE(
service_->PutStart(client_id, "hard_pinned", value_size, config)
.has_value());
ASSERT_TRUE(
service_->PutEnd(client_id, "hard_pinned", ReplicaType::MEMORY)
.has_value());
}

// Put a soft-pinned object
{
ReplicateConfig config;
config.replica_num = 1;
config.with_soft_pin = true;
ASSERT_TRUE(
service_->PutStart(client_id, "soft_pinned", value_size, config)
.has_value());
ASSERT_TRUE(
service_->PutEnd(client_id, "soft_pinned", ReplicaType::MEMORY)
.has_value());
}

// Fill the rest
for (int i = 0; i < 20; i++) {
std::string key = "normal_" + std::to_string(i);
ReplicateConfig config;
config.replica_num = 1;
auto result = service_->PutStart(client_id, key, value_size, config);
if (result.has_value()) {
service_->PutEnd(client_id, key, ReplicaType::MEMORY);
}
}

// Let leases expire, trigger eviction
std::this_thread::sleep_for(std::chrono::milliseconds(kv_lease_ttl + 500));

// Hard-pinned always survives
ASSERT_TRUE(service_->GetReplicaList("hard_pinned").has_value())
<< "Hard-pinned object was evicted";

std::this_thread::sleep_for(std::chrono::milliseconds(kv_lease_ttl));
Comment on lines +4333 to +4399
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Despite the name/comment (“eviction priority”), this test only asserts that the hard-pinned key exists; it doesn’t verify that eviction occurred or that non-pinned keys are evicted before soft-pinned keys (or that soft-pinned keys are evicted when allowed). To make the test meaningful, add assertions that some normal_* keys disappear after eviction, and (if you want to validate ordering) that soft_pinned survives the first-pass eviction and is only evicted in the soft-pin-allowed pass under pressure.

Copilot uses AI. Check for mistakes.
service_->RemoveAll();
}

TEST_F(MasterServiceTest, HardPinDefaultIsFalse) {
// Objects created without with_hard_pin should not be hard-pinned
auto service_config =
MasterServiceConfig::builder().set_default_kv_lease_ttl(5000).build();
std::unique_ptr<MasterService> service_(new MasterService(service_config));
const UUID client_id = generate_uuid();

constexpr size_t buffer = 0x300000000;
constexpr size_t segment_size = 1024 * 1024 * 16;
[[maybe_unused]] const auto context =
PrepareSimpleSegment(*service_, "test_segment", buffer, segment_size);

// Put without hard_pin (default)
ReplicateConfig config;
config.replica_num = 1;
ASSERT_TRUE(
service_->PutStart(client_id, "normal_key", 1024, config).has_value());
ASSERT_TRUE(service_->PutEnd(client_id, "normal_key", ReplicaType::MEMORY)
.has_value());

// Put with hard_pin
ReplicateConfig hp_config;
hp_config.replica_num = 1;
hp_config.with_hard_pin = true;
ASSERT_TRUE(
service_->PutStart(client_id, "hp_key", 1024, hp_config).has_value());
ASSERT_TRUE(
service_->PutEnd(client_id, "hp_key", ReplicaType::MEMORY).has_value());

// Both should exist
ASSERT_TRUE(service_->GetReplicaList("normal_key").has_value());
ASSERT_TRUE(service_->GetReplicaList("hp_key").has_value());

service_->RemoveAll();
}

} // namespace mooncake::test

int main(int argc, char** argv) {
Expand Down
Loading