diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml new file mode 100644 index 00000000..f688f24d --- /dev/null +++ b/.github/workflows/asan.yml @@ -0,0 +1,95 @@ +name: ASAN (AddressSanitizer & LeakSanitizer) + +# Memory error and leak detection using AddressSanitizer and LeakSanitizer +# This workflow builds memtier_benchmark with sanitizers enabled and runs +# the full test suite to detect memory leaks and address errors. + +on: [push, pull_request] + +jobs: + test-with-sanitizers: + runs-on: ubuntu-latest + name: Memory leak detection (ASAN/LSAN) + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get -qq update + sudo apt-get install -y \ + build-essential \ + autoconf \ + automake \ + pkg-config \ + libevent-dev \ + zlib1g-dev \ + libssl-dev + + - name: Build with sanitizers + run: | + autoreconf -ivf + ./configure --enable-sanitizers + make -j + + - name: Verify ASAN is enabled + run: | + ldd ./memtier_benchmark | grep asan + echo "✓ AddressSanitizer is linked" + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + architecture: x64 + + - name: Install Python test dependencies + run: pip install -r ./tests/test_requirements.txt + + - name: Install Redis + run: | + curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list + sudo apt-get -qq update + sudo apt-get install redis + sudo service redis-server stop + + - name: Increase connection limit + run: | + sudo sysctl -w net.ipv4.tcp_fin_timeout=10 + sudo sysctl -w net.ipv4.tcp_tw_reuse=1 + ulimit -n 40960 + + - name: Generate TLS test certificates + run: ./tests/gen-test-certs.sh + + - name: Test OSS TCP with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 TLS=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS v1.2 with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 TLS_PROTOCOLS='TLSv1.2' TLS=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS v1.3 with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 TLS_PROTOCOLS='TLSv1.3' TLS=1 ./tests/run_tests.sh + + - name: Test OSS-CLUSTER TCP with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 OSS_STANDALONE=0 OSS_CLUSTER=1 ./tests/run_tests.sh + + - name: Test OSS-CLUSTER TCP TLS with ASAN + timeout-minutes: 10 + run: | + ASAN_OPTIONS=detect_leaks=1 OSS_STANDALONE=0 OSS_CLUSTER=1 TLS=1 ./tests/run_tests.sh + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9713cf74..8078f6ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,11 @@ name: CI +# CI testing includes: +# - Debian versions: 11 (bullseye), 12 (bookworm), 13 (trixie), sid (unstable) +# - Various smoke test images (configurable via repository variables) +# - TLS and no-TLS builds +# - Code coverage and static analysis + on: [push, pull_request] jobs: @@ -30,12 +36,62 @@ jobs: run: | apt-get -qq update -y apt-get install -y \ - build-essential autoconf automake libpcre3-dev libevent-dev \ + build-essential autoconf automake libevent-dev \ pkg-config zlib1g-dev libssl-dev libboost-all-dev cmake flex - name: Build run: autoreconf -ivf && ./configure && make -j + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + ldd ./memtier_benchmark | grep libevent + ldd ./memtier_benchmark | grep ssl + + test-debian-versions: + runs-on: ubuntu-latest + continue-on-error: true + env: + DEBIAN_FRONTEND: noninteractive + strategy: + matrix: + debian_version: + - "debian:bullseye" # Debian 11 (oldstable) + - "debian:bookworm" # Debian 12 (stable) + - "debian:trixie" # Debian 13 (testing) + - "debian:sid" # Debian unstable + container: ${{ matrix.debian_version }} + name: Test ${{ matrix.debian_version }} + steps: + - name: Install git and basic tools + run: | + apt-get update -qq + apt-get install -y git ca-certificates + + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + apt-get update -qq + apt-get install -y \ + build-essential \ + autoconf \ + automake \ + pkg-config \ + libevent-dev \ + zlib1g-dev \ + libssl-dev + + - name: Build + run: autoreconf -ivf && ./configure && make -j + + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + ldd ./memtier_benchmark | grep libevent + ldd ./memtier_benchmark | grep ssl + build-notls: runs-on: ubuntu-latest steps: @@ -43,11 +99,16 @@ jobs: - name: Install dependencies run: | sudo apt-get -qq update - sudo apt-get install lcov autoconf automake pkg-config libevent-dev libpcre3-dev + sudo apt-get install lcov autoconf automake pkg-config libevent-dev - - name: Build + - name: Build without TLS run: autoreconf -ivf && ./configure --disable-tls && make -j + - name: Verify version, libevent + run: | + ./memtier_benchmark --version + ldd ./memtier_benchmark | grep libevent + build-ubuntu-latest: runs-on: ubuntu-latest steps: @@ -55,11 +116,16 @@ jobs: - name: Install dependencies run: | sudo apt-get -qq update - sudo apt-get install lcov autoconf automake pkg-config libevent-dev libpcre3-dev + sudo apt-get install lcov autoconf automake pkg-config libevent-dev - - name: Build + - name: Build without TLS run: autoreconf -ivf && ./configure --disable-tls && make -j + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + ldd ./memtier_benchmark | grep libevent + build-ubuntu: strategy: matrix: @@ -70,7 +136,7 @@ jobs: - name: Install dependencies run: | sudo apt-get -qq update - sudo apt-get install lcov autoconf automake pkg-config libevent-dev libpcre3-dev libssl-dev + sudo apt-get install lcov autoconf automake pkg-config libevent-dev libssl-dev - name: Build # for coverage reports we need to use Ubuntu 22.04 or lower @@ -159,29 +225,31 @@ jobs: run: brew install autoconf automake libtool libevent openssl@${{ matrix.openssl }} - name: Build run: autoreconf -ivf && PKG_CONFIG_PATH=`brew --prefix openssl@${{ matrix.openssl }}`/lib/pkgconfig ./configure && make + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + otool -L ./memtier_benchmark | grep libevent + otool -L ./memtier_benchmark | grep ssl - # According to https://github.com/actions/runner-images/blob/macos-14-arm64/20241119.509/images/macos/macos-14-arm64-Readme.md - # [macOS] OpenSSL 1.1 will be removed and OpenSSL 3 will be the default for all macOS images from November 4, 2024 - # so use macos-13 which does not have the deprecation notice - # macos-13 details: https://github.com/actions/runner-images/blob/main/images/macos/macos-13-Readme.md + # macos-13 has been retired as of December 2025, so use macos-14 + # macos-14 details: https://github.com/actions/runner-images/blob/main/images/macos/macos-14-Readme.md build-macos-openssl-1-1: - strategy: - matrix: - platform: [macos-13] - runs-on: ${{ matrix.platform }} + runs-on: macos-14 steps: - uses: actions/checkout@v4 - name: Install dependencies run: brew install autoconf automake libtool libevent openssl@1.1 - name: Build run: autoreconf -ivf && PKG_CONFIG_PATH=`brew --prefix openssl@1.1`/lib/pkgconfig ./configure && make + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + otool -L ./memtier_benchmark | grep libevent + otool -L ./memtier_benchmark | grep ssl build-macos-openssl-1-0-2: - strategy: - matrix: - platform: [macos-13] - runs-on: ${{ matrix.platform }} + runs-on: macos-14 steps: - uses: actions/checkout@v4 - name: Install dependencies @@ -190,3 +258,8 @@ jobs: run: brew install rbenv/tap/openssl@1.0 - name: Build run: autoreconf -ivf && PKG_CONFIG_PATH=`brew --prefix openssl@1.0`/lib/pkgconfig ./configure && make + - name: Verify version, libevent, openssl + run: | + ./memtier_benchmark --version + otool -L ./memtier_benchmark | grep libevent + otool -L ./memtier_benchmark | grep ssl diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 458a2725..8d2abf83 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ on: default: "ubuntu-22.04" type: string permissions: - contents: read + contents: write # Required for creating releases and uploading assets actions: read jobs: build-source-package: @@ -62,7 +62,7 @@ jobs: run: | sudo apt-get update && \ sudo apt-get install \ - build-essential autoconf automake libpcre3-dev libevent-dev \ + build-essential autoconf automake libevent-dev \ pkg-config zlib1g-dev libssl-dev libboost-all-dev cmake flex \ debhelper dput - name: Create changelog diff --git a/.github/workflows/tsan.yml b/.github/workflows/tsan.yml new file mode 100644 index 00000000..cf8d40d7 --- /dev/null +++ b/.github/workflows/tsan.yml @@ -0,0 +1,90 @@ +name: TSAN (ThreadSanitizer) + +# Data race detection using ThreadSanitizer +# This workflow builds memtier_benchmark with TSAN enabled and runs +# tests to detect data races and threading issues. +# +# NOTE: TSAN currently detects known data races in the codebase. +# This workflow is informational and will not fail the build. +# See: https://github.com/google/sanitizers/issues/1716 for TSAN/ASLR issues + +on: [push, pull_request] + +jobs: + test-with-thread-sanitizer: + runs-on: ubuntu-latest + name: Data race detection (TSAN) + continue-on-error: true # Don't fail build on known races + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get -qq update + sudo apt-get install -y \ + build-essential \ + autoconf \ + automake \ + pkg-config \ + libevent-dev \ + zlib1g-dev \ + libssl-dev + + - name: Build with Thread Sanitizer + run: | + autoreconf -ivf + ./configure --enable-thread-sanitizer + make -j + + - name: Verify TSAN is enabled + run: | + ldd ./memtier_benchmark | grep tsan + echo "✓ ThreadSanitizer is linked" + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + architecture: x64 + + - name: Install Python test dependencies + run: pip install -r ./tests/test_requirements.txt + + - name: Install Redis + run: | + curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list + sudo apt-get -qq update + sudo apt-get install redis + sudo service redis-server stop + + - name: Increase connection limit + run: | + sudo sysctl -w net.ipv4.tcp_fin_timeout=10 + sudo sysctl -w net.ipv4.tcp_tw_reuse=1 + ulimit -n 40960 + + - name: Generate TLS test certificates + run: ./tests/gen-test-certs.sh + + - name: Test OSS TCP with TSAN + timeout-minutes: 15 + run: | + # Use setarch to disable ASLR (workaround for TSAN on kernel 6.6+) + # Use suppression file to ignore known benign races + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c './tests/run_tests.sh' + + - name: Test OSS TCP TLS with TSAN + timeout-minutes: 15 + run: | + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c 'TLS=1 ./tests/run_tests.sh' + + - name: Test OSS-CLUSTER TCP with TSAN + timeout-minutes: 15 + run: | + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c 'OSS_STANDALONE=0 OSS_CLUSTER=1 ./tests/run_tests.sh' + diff --git a/.github/workflows/ubsan.yml b/.github/workflows/ubsan.yml new file mode 100644 index 00000000..0795c55b --- /dev/null +++ b/.github/workflows/ubsan.yml @@ -0,0 +1,90 @@ +name: UBSan (UndefinedBehaviorSanitizer) + +# Undefined behavior detection using UndefinedBehaviorSanitizer +# This workflow builds memtier_benchmark with UBSan enabled and runs +# the full test suite to detect undefined behavior issues. + +on: [push, pull_request] + +jobs: + test-with-ubsan: + runs-on: ubuntu-latest + name: Undefined behavior detection (UBSan) + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get -qq update + sudo apt-get install -y \ + build-essential \ + autoconf \ + automake \ + pkg-config \ + libevent-dev \ + zlib1g-dev \ + libssl-dev + + - name: Build with UBSan + run: | + autoreconf -ivf + ./configure --enable-ubsan + make -j + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + architecture: x64 + + - name: Install Python test dependencies + run: pip install -r ./tests/test_requirements.txt + + - name: Install Redis + run: | + curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list + sudo apt-get -qq update + sudo apt-get install redis + sudo service redis-server stop + + - name: Increase connection limit + run: | + sudo sysctl -w net.ipv4.tcp_fin_timeout=10 + sudo sysctl -w net.ipv4.tcp_tw_reuse=1 + ulimit -n 40960 + + - name: Generate TLS test certificates + run: ./tests/gen-test-certs.sh + + - name: Test OSS TCP with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 TLS=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS v1.2 with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 TLS_PROTOCOLS='TLSv1.2' TLS=1 ./tests/run_tests.sh + + - name: Test OSS TCP TLS v1.3 with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 TLS_PROTOCOLS='TLSv1.3' TLS=1 ./tests/run_tests.sh + + - name: Test OSS-CLUSTER TCP with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 OSS_STANDALONE=0 OSS_CLUSTER=1 ./tests/run_tests.sh + + - name: Test OSS-CLUSTER TCP TLS with UBSan + timeout-minutes: 10 + run: | + UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 OSS_STANDALONE=0 OSS_CLUSTER=1 TLS=1 ./tests/run_tests.sh + diff --git a/.gitignore b/.gitignore index 27ec75a2..b4c29ef7 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ config.log config.status libtool stamp-h1 +version.h *.DS_Store .vscode/* .idea/* diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md new file mode 100644 index 00000000..75f1c155 --- /dev/null +++ b/DEVELOPMENT.md @@ -0,0 +1,448 @@ +# Development Guide + +This document provides information for developers working on memtier_benchmark. + +## Building from Source + +### Prerequisites + +The following libraries are required for building: + +* libevent 2.0.10 or newer. +* OpenSSL (unless TLS support is disabled by `./configure --disable-tls`). + +The following tools are required: +* autoconf +* automake +* pkg-config +* GNU make +* GCC C++ compiler + +### CentOS/Red Hat Linux 7 or newer + +Use the following to install prerequisites: +``` +$ sudo yum install autoconf automake make gcc-c++ \ + zlib-devel libmemcached-devel libevent-devel openssl-devel +``` + +### Ubuntu/Debian + +Use the following to install prerequisites: + +``` +$ sudo apt-get install build-essential autoconf automake \ + libevent-dev pkg-config zlib1g-dev libssl-dev +``` + +### macOS + +To build natively on macOS, use Homebrew to install the required dependencies: + +``` +$ brew install autoconf automake libtool libevent pkg-config openssl@3.0 +``` + +When running `./configure`, if it fails to find libssl it may be necessary to +tweak the `PKG_CONFIG_PATH` environment variable: + +``` +PKG_CONFIG_PATH=`brew --prefix openssl@3.0`/lib/pkgconfig ./configure +``` + +### Building and Installing + +After downloading the source tree, use standard autoconf/automake commands: + +``` +$ autoreconf -ivf +$ ./configure +$ make +$ sudo make install +``` + +**Note**: Debug symbols (`-g`) are included by default in all builds for crash analysis and debugging. The default build flags are `-O2 -g -Wall`, which provides: +- Full optimizations (`-O2`) for production performance +- Debug symbols (`-g`) for meaningful stack traces and core dump analysis +- All warnings enabled (`-Wall`) + +For development/debugging builds without optimizations: + +``` +$ ./configure CXXFLAGS="-g -O0 -Wall" +``` + +This disables optimizations (`-O0`), making it easier to step through code in a debugger, but should not be used for performance testing or production. + +## Testing + +The project includes a basic set of integration tests. + +### Integration Tests + +Integration tests are based on [RLTest](https://github.com/RedisLabsModules/RLTest), and specific setup parameters can be provided +to configure tests and topologies (OSS standalone and OSS cluster). By default the tests will be ran for all common commands, and with OSS standalone setup. + +To run all integration tests in a Python virtualenv, follow these steps: + + $ mkdir -p .env + $ virtualenv .env + $ source .env/bin/activate + $ pip install -r tests/test_requirements.txt + $ ./tests/run_tests.sh + +To understand what test options are available simply run: + + $ ./tests/run_tests.sh --help + +### Memory Leak Detection with Sanitizers + +memtier_benchmark supports building with AddressSanitizer (ASAN) and LeakSanitizer (LSAN) to detect memory errors and leaks during testing. + +To build with sanitizers enabled: + + $ ./configure --enable-sanitizers + $ make + +To run tests with leak detection: + + $ ASAN_OPTIONS=detect_leaks=1 ./tests/run_tests.sh + +If memory leaks or errors are detected, tests will fail with detailed error messages showing the location of the issue. + +To verify ASAN is enabled: + + $ ldd ./memtier_benchmark | grep asan + +### Undefined Behavior Detection with UBSan + +memtier_benchmark supports building with UndefinedBehaviorSanitizer (UBSan) to detect undefined behavior such as integer overflows, null pointer dereferences, and alignment issues. + +To build with UBSan enabled: + + $ ./configure --enable-ubsan + $ make + +To run tests with undefined behavior detection: + + $ UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 ./tests/run_tests.sh + +UBSan can be combined with ASAN for comprehensive testing: + + $ ./configure --enable-sanitizers --enable-ubsan + $ make + +**Note:** UBSan can be used together with ASAN/LSAN, but not with ThreadSanitizer (TSAN). + +### Data Race Detection with Thread Sanitizer + +memtier_benchmark supports building with ThreadSanitizer (TSAN) to detect data races and threading issues. + +To build with Thread Sanitizer enabled: + + $ ./configure --enable-thread-sanitizer + $ make + +To run tests with race detection (requires disabling ASLR on kernel 6.6+): + + $ TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" setarch `uname -m` -R ./tests/run_tests.sh + +To verify TSAN is enabled: + + $ ldd ./memtier_benchmark | grep tsan + +**Note:** TSAN and ASAN are mutually exclusive and cannot be used together. A suppression file (`tsan_suppressions.txt`) is provided to ignore known benign data races that do not affect correctness. + +## Crash Handling and Debugging + +memtier_benchmark includes built-in crash handling that automatically prints a detailed bug report when the program crashes due to signals like SIGSEGV, SIGBUS, SIGFPE, SIGILL, or SIGABRT. + +### Crash Report Features + +When a crash occurs, memtier_benchmark will automatically: + +1. **Print a detailed bug report** including: + - Timestamp and process ID + - Signal type and code + - Fault address + - Stack trace for all threads: + - Detailed stack trace for the crashing thread showing the call chain + - Thread IDs for all worker threads (stack traces for non-crashing threads not available on most platforms) + - System information: + - `os`: Operating system name, kernel version, and architecture + - `memtier_version`: Version number from configure.ac + - `memtier_git_sha1`: Git commit SHA (8 characters) at build time + - `memtier_git_dirty`: Whether working directory had uncommitted changes (0=clean, 1=dirty) + - `arch_bits`: CPU architecture (32 or 64 bit) + - `gcc_version`: GCC compiler version used to build + - `libevent_version`: libevent library version + - `openssl_version`: OpenSSL library version (if TLS enabled) + - Client connection information (if benchmark is running): + - For each thread, client, and connection: + - Connection address and port (remote endpoint) + - Local port (client-side port number) + - Connection state (disconnected, connecting, connected) + - Number of pending requests + - Last command type sent on this connection (GET, SET, etc.) + - Instructions for enabling core dumps + +2. **Attempt to enable core dumps** by setting `RLIMIT_CORE` to unlimited at startup + - This allows the OS to generate a core dump file for post-mortem debugging + - Note: Core dump generation also depends on system configuration (e.g., `/proc/sys/kernel/core_pattern`) + +3. **Re-raise the signal** after printing the bug report to allow the OS to generate a core dump + +### Example Crash Report + +``` +=== MEMTIER_BENCHMARK BUG REPORT START: Cut & paste starting from here === +[12345] 16 Dec 2025 11:46:18 # memtier_benchmark crashed by signal: 11 +[12345] 16 Dec 2025 11:46:18 # Crashed running signal +[12345] 16 Dec 2025 11:46:18 # Signal code: 1 +[12345] 16 Dec 2025 11:46:18 # Fault address: (nil) + +[12345] 16 Dec 2025 11:46:18 # --- STACK TRACE (all threads) +[12345] 16 Dec 2025 11:46:18 # Thread 130098597599040 (current/crashing thread): +[12345] 16 Dec 2025 11:46:18 # /lib/x86_64-linux-gnu/libc.so.6(+0x45330) [0x...] +[12345] 16 Dec 2025 11:46:18 # /lib/x86_64-linux-gnu/libc.so.6(clock_nanosleep+0xbf) [0x...] +[12345] 16 Dec 2025 11:46:18 # ./memtier_benchmark(+0x18c6) [0x...] +... +[12345] 16 Dec 2025 11:46:18 # Thread 130098583828160 (worker thread 0): +[12345] 16 Dec 2025 11:46:18 # (Note: Stack trace for non-crashing threads not available on this platform) +[12345] 16 Dec 2025 11:46:18 # Thread 130098575435456 (worker thread 1): +[12345] 16 Dec 2025 11:46:18 # (Note: Stack trace for non-crashing threads not available on this platform) + +[12345] 16 Dec 2025 11:46:18 # --- INFO OUTPUT +[12345] 16 Dec 2025 11:46:18 # os:Linux 6.14.0-37-generic x86_64 +[12345] 16 Dec 2025 11:46:18 # memtier_version:2.2.1 +[12345] 16 Dec 2025 11:46:18 # memtier_git_sha1:8985eb5a +[12345] 16 Dec 2025 11:46:18 # memtier_git_dirty:1 +[12345] 16 Dec 2025 11:46:18 # arch_bits:64 +[12345] 16 Dec 2025 11:46:18 # gcc_version:13.3.0 +[12345] 16 Dec 2025 11:46:18 # libevent_version:2.1.12-stable +[12345] 16 Dec 2025 11:46:18 # openssl_version:OpenSSL 3.0.13 30 Jan 2024 + +[12345] 16 Dec 2025 11:46:18 # --- CLIENT LIST OUTPUT +[12345] 16 Dec 2025 11:46:18 # thread=0 client=0 conn=0 addr=127.0.0.1:6379 local_port=37934 state=connected pending=10 last_cmd=GET +[12345] 16 Dec 2025 11:46:18 # thread=0 client=1 conn=0 addr=127.0.0.1:6379 local_port=37948 state=connected pending=7 last_cmd=GET +[12345] 16 Dec 2025 11:46:18 # thread=1 client=0 conn=0 addr=127.0.0.1:6379 local_port=37964 state=connected pending=10 last_cmd=GET +[12345] 16 Dec 2025 11:46:18 # thread=1 client=1 conn=0 addr=127.0.0.1:6379 local_port=37978 state=connected pending=0 last_cmd=none +[12345] 16 Dec 2025 11:46:18 # For more information, please check the core dump if available. +[12345] 16 Dec 2025 11:46:18 # To enable core dumps: ulimit -c unlimited +[12345] 16 Dec 2025 11:46:18 # Core pattern: /proc/sys/kernel/core_pattern + +=== MEMTIER_BENCHMARK BUG REPORT END. Make sure to include from START to END. === + + Please report this bug by opening an issue on github.com/RedisLabs/memtier_benchmark +``` + +### Testing the Crash Handler + +You can test the crash handler functionality by sending a signal to a running memtier_benchmark process: + +```bash +# Start memtier_benchmark in one terminal +$ ./memtier_benchmark --server localhost --port 6379 --test-time 60 + +# In another terminal, send SEGV signal to trigger crash handler +$ kill -SEGV `pgrep memtier` +``` + +This will trigger the crash handler and display the bug report. This is useful for: +- Verifying crash handling works on your system +- Testing in CI/CD pipelines +- Ensuring core dumps are properly configured + +Test scripts are also available: + +```bash +# Integration tests with active connections - using RLTest framework +$ TEST=test_crash_handler_integration.py ./tests/run_tests.sh +``` + +The integration tests include: + +1. **test_crash_handler_with_active_connections**: Tests crash handler when main thread crashes + - Starts a benchmark with 2 threads and 2 clients + - Waits 5 seconds for connections to be established + - Sends SEGV signal to the main process + - Verifies crash report includes CLIENT LIST OUTPUT with connection states + +2. **test_crash_handler_worker_thread**: Tests crash handler when a worker thread crashes + - Starts a benchmark with 2 threads and 2 clients + - Waits 5 seconds for connections to be established + - Enumerates worker threads using `/proc//task/` + - Sends SEGV signal to a worker thread (not the main thread) + - Verifies crash report correctly identifies the crashing worker thread + - Verifies stack trace shows the worker thread's call stack + +Both tests follow the standard RLTest pattern used by other memtier_benchmark tests and integrate seamlessly with the existing test infrastructure. + +### Enabling Core Dumps + +memtier_benchmark automatically attempts to enable core dumps at startup. However, system settings may prevent core dump generation. + +To manually enable core dumps: + +```bash +# Enable unlimited core dump size +$ ulimit -c unlimited + +# Run memtier_benchmark +$ ./memtier_benchmark [options] +``` + +To check where core dumps are written: + +```bash +$ cat /proc/sys/kernel/core_pattern +``` + +Common patterns: +- `core` - Writes to current directory as `core` or `core.` +- `|/usr/share/apport/apport %p %s %c %d %P` - Ubuntu/Debian using Apport +- `|/usr/lib/systemd/systemd-coredump %P %u %g %s %t %c %h %e` - systemd-coredump + +### Testing Core Dump Generation + +To verify that core dumps are being generated: + +```bash +# 1. Enable core dumps +$ ulimit -c unlimited + +# 2. Check current limit +$ ulimit -c +unlimited + +# 3. Start memtier_benchmark and trigger a test crash +$ ./memtier_benchmark --server localhost --port 6379 --test-time 60 & +$ sleep 2 +$ kill -SEGV `pgrep memtier` + +# 4. Check for core dump in current directory +$ ls -lh core* 2>/dev/null + +# 5. If using systemd-coredump, check with coredumpctl or journalctl +$ coredumpctl list memtier # List all memtier crashes +$ journalctl -xe | grep -i memtier # Check system journal for crash info +$ coredumpctl info # Show details of most recent crash +$ coredumpctl gdb # Open most recent core dump in gdb directly +$ coredumpctl dump -o core.dump # Extract core file to current directory + +# 6. If using Apport (Ubuntu/Debian), check /var/crash +$ ls -lh /var/crash/*memtier* +``` + +**If no core dump appears** (common on Ubuntu with Apport): + +The message "Segmentation fault (core dumped)" may appear even when no core file is created. This happens when Apport intercepts the crash but doesn't save it (often for non-packaged binaries). + +To get actual core files for debugging: + +```bash +# Temporarily disable Apport and configure direct core dumps +$ sudo systemctl stop apport +$ sudo sysctl -w kernel.core_pattern=core.%p + +# Enable core dumps in your shell +$ ulimit -c unlimited + +# Trigger test crash +$ ./memtier_benchmark --server localhost --port 6379 --test-time 60 & +$ sleep 2 +$ kill -SEGV `pgrep memtier` + +# Core file should now appear +$ ls -lh core.* + +# Analyze with gdb +$ gdb ./memtier_benchmark core.83396 +(gdb) bt +(gdb) frame 0 +(gdb) info locals + +# Re-enable Apport when done +$ sudo sysctl -w kernel.core_pattern='|/usr/share/apport/apport -p%p -s%s -c%c -d%d -P%P -u%u -g%g -F%F -- %E' +$ sudo systemctl start apport +``` + +**Note**: The crash handler's stack trace is often sufficient for debugging without needing the core dump. Core dumps are mainly useful for examining variable values and memory state at the time of the crash. + +### Analyzing Core Dumps + +If a core dump is generated, you can analyze it using gdb. + +**With systemd-coredump** (easiest method): + +```bash +# List all core dumps +$ coredumpctl list + +# Show details of most recent crash +$ coredumpctl info + +# Open most recent core dump directly in gdb +$ coredumpctl gdb + +# Or extract the core file +$ coredumpctl dump -o core.dump +$ gdb ./memtier_benchmark core.dump +``` + +**With direct core files**: + +```bash +# Load the core dump +$ gdb ./memtier_benchmark core.12345 + +# In gdb, examine the backtrace +(gdb) bt + +# Examine specific frames +(gdb) frame 0 +(gdb) info locals + +# Print variables +(gdb) print variable_name + +# Quit gdb +(gdb) quit +``` + +**Example session**: + +```bash +$ ./memtier_benchmark --server localhost --port 6379 --test-time 60 & +$ sleep 2 +$ kill -SEGV `pgrep memtier` +[crash report appears] +Segmentation fault (core dumped) + +$ coredumpctl list +TIME PID UID GID SIG COREFILE EXE +Tue 2025-12-16 12:08:17 WET 83396 1000 1000 SIGSEGV present /home/fco/.../memtier_benchmark + +$ coredumpctl gdb +(gdb) bt +#0 0x00007483d6a9eb2c in __pthread_kill_implementation +#1 0x00007483d6a4527e in __GI_raise +#2 0x00005c8f4b42a5d2 in crash_handler +... +(gdb) quit +``` + + + +This will include full debugging information in the binary, making stack traces more readable and allowing detailed inspection with gdb. + +### Reporting Crashes + +If you encounter a crash, please report it by opening an issue on [GitHub](https://github.com/RedisLabs/memtier_benchmark/issues) and include: + +1. The complete bug report output (from START to END markers) +2. The command line used to run memtier_benchmark +3. System information (OS, architecture, library versions) +4. Steps to reproduce the crash (if known) +5. Core dump or gdb backtrace (if available) + diff --git a/Dockerfile b/Dockerfile index 38fb935e..ee168614 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ RUN apt-get update RUN \ DEBIAN_FRONTEND=noninteractive \ apt-get install -y \ - build-essential autoconf automake libpcre3-dev libevent-dev \ + build-essential autoconf automake libevent-dev \ pkg-config zlib1g-dev libssl-dev libboost-all-dev cmake flex COPY . /memtier_benchmark WORKDIR /memtier_benchmark diff --git a/Dockerfile.alpine b/Dockerfile.alpine index 150e70fc..7cfc8de6 100644 --- a/Dockerfile.alpine +++ b/Dockerfile.alpine @@ -2,7 +2,7 @@ FROM alpine:latest as builder RUN \ apk add \ make g++ autoconf automake libtool pkgconfig \ - pcre-dev libevent-dev zlib-dev openssl-dev + libevent-dev zlib-dev openssl-dev COPY . /src WORKDIR /src RUN autoreconf -ivf && ./configure && make && make install @@ -11,6 +11,6 @@ FROM alpine:latest LABEL Description="memtier_benchmark" COPY --from=builder /usr/local/bin/memtier_benchmark /usr/local/bin/memtier_benchmark RUN \ - apk add libstdc++ pcre libevent zlib openssl + apk add libstdc++ libevent zlib openssl ENTRYPOINT ["memtier_benchmark"] diff --git a/JSON_handler.cpp b/JSON_handler.cpp index 030afaa6..22095684 100644 --- a/JSON_handler.cpp +++ b/JSON_handler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/JSON_handler.h b/JSON_handler.h index 88733158..7fad2bd2 100644 --- a/JSON_handler.h +++ b/JSON_handler.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/Makefile.am b/Makefile.am index c4d60839..9696a60f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ # Makefile for memtier_benchmark -# Copyright (C) 2011-2017 Redis Labs Ltd. +# Copyright (C) 2011-2026 Redis Labs Ltd. # This file is part of memtier_benchmark. @@ -18,7 +18,14 @@ AUTOMAKE_OPTIONS = foreign 1.9 subdir-objects CODE_COVERAGE_IGNORE_PATTERN='*/deps/hdr_histogram/*' -EXTRA_DIST = README.md COPYING +EXTRA_DIST = README.md COPYING version.sh + +# Generate version.h before building +BUILT_SOURCES = version.h +CLEANFILES = version.h + +version.h: + $(SHELL) $(srcdir)/version.sh bin_PROGRAMS = memtier_benchmark completionsdir=$(BASH_COMPLETION_DIR) diff --git a/README.md b/README.md index aa4b34bc..0f506eec 100644 --- a/README.md +++ b/README.md @@ -55,87 +55,7 @@ brew install memtier_benchmark ### Installing from source -#### Prerequisites - -The following libraries are required for building: - -* libevent 2.0.10 or newer. -* libpcre 8.x. -* OpenSSL (unless TLS support is disabled by `./configure --disable-tls`). - -The following tools are required -* autoconf -* automake -* pkg-config -* GNU make -* GCC C++ compiler - -#### CentOS/Red Hat Linux 7 or newer - -Use the following to install prerequisites: -``` -$ sudo yum install autoconf automake make gcc-c++ \ - pcre-devel zlib-devel libmemcached-devel libevent-devel openssl-devel -``` - -#### Ubuntu/Debian - -Use the following to install prerequisites: - -``` -$ sudo apt-get install build-essential autoconf automake libpcre3-dev \ - libevent-dev pkg-config zlib1g-dev libssl-dev -``` - -#### macOS - -To build natively on macOS, use Homebrew to install the required dependencies: - -``` -$ brew install autoconf automake libtool libevent pkg-config openssl@3.0 -``` - -When running `./configure`, if it fails to find libssl it may be necessary to -tweak the `PKG_CONFIG_PATH` environment variable: - -``` -PKG_CONFIG_PATH=`brew --prefix openssl@3.0`/lib/pkgconfig ./configure -``` - -#### Building and installing - -After downloading the source tree, use standard autoconf/automake commands: - -``` -$ autoreconf -ivf -$ ./configure -$ make -$ sudo make install -``` - -#### Testing - -The project includes a basic set of integration tests. - - -**Integration tests** - - -Integration tests are based on [RLTest](https://github.com/RedisLabsModules/RLTest), and specific setup parameters can be provided -to configure tests and topologies (OSS standalone and OSS cluster). By default the tests will be ran for all common commands, and with OSS standalone setup. - - -To run all integration tests in a Python virtualenv, follow these steps: - - $ mkdir -p .env - $ virtualenv .env - $ source .env/bin/activate - $ pip install -r tests/test_requirements.txt - $ ./tests/run_tests.sh - -To understand what test options are available simply run: - - $ ./tests/run_tests.sh --help +For detailed instructions on building from source, running tests, and using sanitizers for development, see [DEVELOPMENT.md](DEVELOPMENT.md). ## Using Docker @@ -175,6 +95,17 @@ $ memtier_benchmark --help for command line options. +## Crash Reporting + +memtier_benchmark includes built-in crash handling that automatically generates detailed bug reports when the program crashes. If you encounter a crash, the tool will print a comprehensive report including: + +* Stack traces for all threads +* System and build information +* Active client connection states +* Instructions for generating core dumps + +For more information on crash handling, debugging, and how to report crashes, see the [Crash Handling and Debugging](DEVELOPMENT.md#crash-handling-and-debugging) section in DEVELOPMENT.md. + ### Cluster mode #### Connections diff --git a/client.cpp b/client.cpp index a088d478..abbdcd06 100755 --- a/client.cpp +++ b/client.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -46,10 +46,12 @@ #include #include +#include #include #include "client.h" #include "cluster_client.h" +#include "config_types.h" bool client::setup_client(benchmark_config *config, abstract_protocol *protocol, object_generator *objgen) @@ -72,6 +74,8 @@ bool client::setup_client(benchmark_config *config, abstract_protocol *protocol, else if (config->distinct_client_seed) m_obj_gen->set_random_seed(config->next_client_idx); + m_obj_gen->fill_value_buffer(); + // Setup first arbitrary command if (config->arbitrary_commands->is_defined()) advance_arbitrary_command_index(); @@ -284,7 +288,46 @@ bool client::create_arbitrary_request(unsigned int command_index, struct timeval get_key_response res = get_key_for_conn(command_index, conn_id, &key_index); /* If key not available for this connection, we have a bug of sending partial request */ assert(res == available_for_conn); - cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, m_obj_gen->get_key(), m_obj_gen->get_key_len()); + + //when we have static data mixed with the key placeholder + if (arg->has_key_affixes) { + // Pre-calculate total length to avoid reallocations + const char* key = m_obj_gen->get_key(); + unsigned int key_len = m_obj_gen->get_key_len(); + size_t prefix_len = arg->data_prefix.length(); + size_t suffix_len = arg->data_suffix.length(); + size_t total_len = prefix_len + key_len + suffix_len; + + // Optimization: use stack buffer for small keys to avoid heap allocation + if (total_len < KEY_BUFFER_STACK_SIZE) { + char stack_buffer[KEY_BUFFER_STACK_SIZE]; + char* pos = stack_buffer; + + // Manual copy for better performance + if (prefix_len > 0) { + memcpy(pos, arg->data_prefix.data(), prefix_len); + pos += prefix_len; + } + memcpy(pos, key, key_len); + pos += key_len; + if (suffix_len > 0) { + memcpy(pos, arg->data_suffix.data(), suffix_len); + } + + cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, stack_buffer, total_len); + } else { + // Fallback to string for large keys + std::string combined_key; + combined_key.reserve(total_len); + combined_key.append(arg->data_prefix); + combined_key.append(key, key_len); + combined_key.append(arg->data_suffix); + + cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, combined_key.c_str(), combined_key.length()); + } + } else{ + cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, m_obj_gen->get_key(), m_obj_gen->get_key_len()); + } } else if (arg->type == data_type) { unsigned int value_len; const char *value = m_obj_gen->get_value(0, &value_len); @@ -646,6 +689,32 @@ void client_group::run(void) event_base_dispatch(m_base); } +void client_group::interrupt(void) +{ + // Mark all clients as interrupted + set_all_clients_interrupted(); + // Break the event loop to stop processing + event_base_loopbreak(m_base); + // Set end time for all clients as close as possible to the loop break + finalize_all_clients(); +} + +void client_group::finalize_all_clients(void) +{ + for (std::vector::iterator i = m_clients.begin(); i != m_clients.end(); i++) { + client* c = *i; + c->set_end_time(); + } +} + +void client_group::set_all_clients_interrupted(void) +{ + for (std::vector::iterator i = m_clients.begin(); i != m_clients.end(); i++) { + client* c = *i; + c->get_stats()->set_interrupted(true); + } +} + unsigned long int client_group::get_total_bytes(void) { unsigned long int total_bytes = 0; diff --git a/client.h b/client.h index 6f599a42..18f2727a 100755 --- a/client.h +++ b/client.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -49,6 +49,9 @@ class object_generator; #define SET_CMD_IDX 0 #define GET_CMD_IDX 2 +// Stack buffer size for key operations to avoid heap allocation +#define KEY_BUFFER_STACK_SIZE 512 + enum get_key_response { not_available, available_for_conn, available_for_other_conn }; class client : public connections_manager { @@ -130,6 +133,9 @@ class client : public connections_manager { return m_config->arbitrary_commands->at(command_index); } + /* Get connections for crash reporting */ + std::vector& get_connections(void) { return m_connections; } + /* Set the arbitrary command index to the next to be executed */ void advance_arbitrary_command_index() { while(true) { @@ -210,13 +216,17 @@ class client_group { int create_clients(int count); int prepare(void); void run(void); + void interrupt(void); + void finalize_all_clients(void); + void set_all_clients_interrupted(void); void write_client_stats(const char *prefix); struct event_base *get_event_base(void) { return m_base; } benchmark_config *get_config(void) { return m_config; } abstract_protocol* get_protocol(void) { return m_protocol; } - object_generator* get_obj_gen(void) { return m_obj_gen; } + object_generator* get_obj_gen(void) { return m_obj_gen; } + std::vector& get_clients(void) { return m_clients; } unsigned long int get_total_bytes(void); unsigned long int get_total_ops(void); diff --git a/cluster_client.cpp b/cluster_client.cpp index 10065bce..56ac7581 100644 --- a/cluster_client.cpp +++ b/cluster_client.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/cluster_client.h b/cluster_client.h index c792f67b..93659a03 100644 --- a/cluster_client.h +++ b/cluster_client.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/config_types.cpp b/config_types.cpp index 7525253b..2dcef1ff 100644 --- a/config_types.cpp +++ b/config_types.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/config_types.h b/config_types.h index 323d6a7c..b8ad8621 100644 --- a/config_types.h +++ b/config_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -27,6 +27,7 @@ #include #include +#include struct config_range { int min; @@ -100,7 +101,7 @@ struct server_addr { struct addrinfo *m_server_addr; struct addrinfo *m_used_addr; int m_resolution; - int m_last_error; + std::atomic m_last_error; // Atomic to prevent data race between resolve() and get_connect_info() }; #define KEY_PLACEHOLDER "__key__" @@ -114,9 +115,14 @@ enum command_arg_type { }; struct command_arg { - command_arg(const char* arg, unsigned int arg_len) : type(undefined_type), data(arg, arg_len) {;} + command_arg(const char* arg, unsigned int arg_len) : type(undefined_type), data(arg, arg_len), has_key_affixes(false) {;} command_arg_type type; std::string data; + // the prefix and suffix strings are used for mixed key placeholder storing of substrings + std::string data_prefix; + std::string data_suffix; + // optimization flag to avoid runtime checks + bool has_key_affixes; }; struct arbitrary_command { diff --git a/configure.ac b/configure.ac index 4f743e99..4c5a09e5 100755 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl configure.ac for memtier_benchmark -dnl Copyright (C) 2011-2024 Redis Labs Ltd. +dnl Copyright (C) 2011-2026 Redis Labs Ltd. dnl This file is part of memtier_benchmark. @@ -16,7 +16,7 @@ dnl You should have received a copy of the GNU General Public License dnl along with this program. If not, see . AC_PREREQ(2.59) -AC_INIT(memtier_benchmark,2.2.0,oss@redis.com) +AC_INIT(memtier_benchmark,255.255.255,oss@redis.com) AC_CONFIG_SRCDIR([memtier_benchmark.cpp]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE @@ -38,7 +38,7 @@ AC_HEADER_DIRENT AC_CHECK_HEADERS([stdlib.h string.h sys/time.h getopt.h limits.h malloc.h stdlib.h unistd.h utime.h assert.h sys/socket.h sys/types.h]) AC_CHECK_HEADERS([fcntl.h netinet/tcp.h]) AC_CHECK_HEADERS([pthread.h]) -AC_CHECK_HEADERS([pcre.h zlib.h]) +AC_CHECK_HEADERS([zlib.h]) AC_CHECK_HEADERS([event2/event.h]) # Checks for typedefs, structures, and compiler characteristics. @@ -69,10 +69,43 @@ AS_IF([test "x$enable_tls" != "xno"], [ AC_SUBST(LIBCRYPTO_CFLAGS) AC_SUBST(LIBCRYPTO_LIBS)) ], []) +# Sanitizers support (ASAN/LSAN) is optional. +AC_ARG_ENABLE([sanitizers], + [AS_HELP_STRING([--enable-sanitizers], + [Enable AddressSanitizer and LeakSanitizer for memory error detection])]) +AS_IF([test "x$enable_sanitizers" = "xyes"], [ + AC_MSG_NOTICE([Enabling AddressSanitizer and LeakSanitizer]) + CXXFLAGS="$CXXFLAGS -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer -O1" + LDFLAGS="$LDFLAGS -fsanitize=address -fsanitize=leak" + ], []) + +# UndefinedBehaviorSanitizer (UBSan) is optional and can be combined with ASAN. +AC_ARG_ENABLE([ubsan], + [AS_HELP_STRING([--enable-ubsan], + [Enable UndefinedBehaviorSanitizer for undefined behavior detection])]) +AS_IF([test "x$enable_ubsan" = "xyes"], [ + AC_MSG_NOTICE([Enabling UndefinedBehaviorSanitizer]) + CXXFLAGS="$CXXFLAGS -fsanitize=undefined -fno-omit-frame-pointer" + LDFLAGS="$LDFLAGS -fsanitize=undefined" + ], []) + +# Thread Sanitizer (TSAN) is optional and mutually exclusive with ASAN. +AC_ARG_ENABLE([thread-sanitizer], + [AS_HELP_STRING([--enable-thread-sanitizer], + [Enable ThreadSanitizer for data race detection])]) +AS_IF([test "x$enable_thread_sanitizer" = "xyes"], [ + AS_IF([test "x$enable_sanitizers" = "xyes"], [ + AC_MSG_ERROR([--enable-thread-sanitizer and --enable-sanitizers are mutually exclusive]) + ]) + AC_MSG_NOTICE([Enabling ThreadSanitizer]) + CXXFLAGS="$CXXFLAGS -fsanitize=thread -fno-omit-frame-pointer -O1" + LDFLAGS="$LDFLAGS -fsanitize=thread" + ], []) + # clock_gettime requires -lrt on old glibc only. AC_SEARCH_LIBS([clock_gettime], [rt], , AC_MSG_ERROR([rt is required libevent.])) -AC_CHECK_LIB([pcre], [pcre_compile], , AC_MSG_ERROR([pcre is required; try installing libpcre3-dev.])) + AC_CHECK_LIB([z], [deflateInit_], , AC_MSG_ERROR([zlib is required; try installing zlib1g-dev.])) AC_CHECK_LIB([pthread], [pthread_create], , AC_MSG_ERROR([pthread is required.])) AC_CHECK_LIB([socket], [gai_strerror]) diff --git a/connections_manager.h b/connections_manager.h index 1b7c5a32..3c9600db 100644 --- a/connections_manager.h +++ b/connections_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/debian/control b/debian/control index 77e9771f..06c63e8d 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: memtier-benchmark Section: admin Priority: optional Maintainer: Redis Ltd. -Build-Depends: debhelper-compat (= 10), dh-autoreconf, bash-completion, pkg-config, libpcre3-dev, libevent-dev, libssl-dev, zlib1g-dev +Build-Depends: debhelper-compat (= 10), dh-autoreconf, bash-completion, pkg-config, libevent-dev, libssl-dev, zlib1g-dev Standards-Version: 4.4.1 Homepage: https://github.com/RedisLabs/memtier_benchmark diff --git a/debian/copyright b/debian/copyright index 5ebead8b..32d0e2fc 100644 --- a/debian/copyright +++ b/debian/copyright @@ -4,7 +4,7 @@ Upstream-Contact: Redis Ltd. Source: https://github.com/RedisLabs/memtier_benchmark Files: * -Copyright: 2011-2020 Redis Ltd. +Copyright: 2011-2026 Redis Ltd. License: GPL-2+ with OpenSSL Exception Files: deps/hdr_histogram/* diff --git a/file_io.cpp b/file_io.cpp index 4990fb47..f9f3b915 100644 --- a/file_io.cpp +++ b/file_io.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/file_io.h b/file_io.h index 3c4cbd98..ae5201aa 100644 --- a/file_io.h +++ b/file_io.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/item.cpp b/item.cpp index 67c97719..84d6740a 100644 --- a/item.cpp +++ b/item.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/item.h b/item.h index 4e932296..2e0b088e 100644 --- a/item.h +++ b/item.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/memtier_benchmark.cpp b/memtier_benchmark.cpp index 09823548..a13625b1 100755 --- a/memtier_benchmark.cpp +++ b/memtier_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2019 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -16,15 +16,23 @@ * along with memtier_benchmark. If not, see . */ +// Define _XOPEN_SOURCE before including system headers for ucontext.h on macOS +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE 600 +#endif + #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include "version.h" + #include #include #include #include #include +#include // For strcasecmp() on POSIX systems #include #include #include @@ -32,6 +40,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #ifdef USE_TLS #include @@ -56,6 +71,7 @@ #include #include +#include #include "client.h" #include "JSON_handler.h" @@ -64,6 +80,131 @@ static int log_level = 0; + +// Global flag for signal handling +static volatile sig_atomic_t g_interrupted = 0; + +// Forward declarations +struct cg_thread; +static void print_client_list(FILE* fp, int pid, const char* timestr); +static void print_all_threads_stack_trace(FILE* fp, int pid, const char* timestr); + +// Global pointer to threads for crash handler access +static std::vector* g_threads = NULL; + +// Signal handler for Ctrl+C +static void sigint_handler(int signum) +{ + (void)signum; // unused parameter + g_interrupted = 1; +} + +// Crash handler - prints stack trace and other debugging information +static void crash_handler(int sig, siginfo_t *info, void *secret) +{ + (void)secret; // unused parameter + struct tm *tm; + time_t now; + char timestr[64]; + + // Get current time + now = time(NULL); + tm = localtime(&now); + strftime(timestr, sizeof(timestr), "%d %b %Y %H:%M:%S", tm); + + // Print crash header + fprintf(stderr, "\n\n=== MEMTIER_BENCHMARK BUG REPORT START: Cut & paste starting from here ===\n"); + fprintf(stderr, "[%d] %s # memtier_benchmark crashed by signal: %d\n", getpid(), timestr, sig); + + // Print signal information + const char *signal_name = "UNKNOWN"; + switch(sig) { + case SIGSEGV: signal_name = "SIGSEGV"; break; + case SIGBUS: signal_name = "SIGBUS"; break; + case SIGFPE: signal_name = "SIGFPE"; break; + case SIGILL: signal_name = "SIGILL"; break; + case SIGABRT: signal_name = "SIGABRT"; break; + } + fprintf(stderr, "[%d] %s # Crashed running signal <%s>\n", getpid(), timestr, signal_name); + + if (info) { + fprintf(stderr, "[%d] %s # Signal code: %d\n", getpid(), timestr, info->si_code); + fprintf(stderr, "[%d] %s # Fault address: %p\n", getpid(), timestr, info->si_addr); + } + + // Print stack trace for all threads + print_all_threads_stack_trace(stderr, getpid(), timestr); + + // Print system information + fprintf(stderr, "\n[%d] %s # --- INFO OUTPUT\n", getpid(), timestr); + + struct utsname name; + if (uname(&name) == 0) { + fprintf(stderr, "[%d] %s # os:%s %s %s\n", getpid(), timestr, name.sysname, name.release, name.machine); + } + + fprintf(stderr, "[%d] %s # memtier_version:%s\n", getpid(), timestr, PACKAGE_VERSION); + fprintf(stderr, "[%d] %s # memtier_git_sha1:%s\n", getpid(), timestr, MEMTIER_GIT_SHA1); + fprintf(stderr, "[%d] %s # memtier_git_dirty:%s\n", getpid(), timestr, MEMTIER_GIT_DIRTY); + +#if defined(__x86_64__) || defined(_M_X64) + fprintf(stderr, "[%d] %s # arch_bits:64\n", getpid(), timestr); +#elif defined(__i386__) || defined(_M_IX86) + fprintf(stderr, "[%d] %s # arch_bits:32\n", getpid(), timestr); +#elif defined(__aarch64__) + fprintf(stderr, "[%d] %s # arch_bits:64\n", getpid(), timestr); +#elif defined(__arm__) + fprintf(stderr, "[%d] %s # arch_bits:32\n", getpid(), timestr); +#else + fprintf(stderr, "[%d] %s # arch_bits:unknown\n", getpid(), timestr); +#endif + +#ifdef __GNUC__ + fprintf(stderr, "[%d] %s # gcc_version:%d.%d.%d\n", getpid(), timestr, + __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); +#endif + + fprintf(stderr, "[%d] %s # libevent_version:%s\n", getpid(), timestr, event_get_version()); + +#ifdef USE_TLS + fprintf(stderr, "[%d] %s # openssl_version:%s\n", getpid(), timestr, OPENSSL_VERSION_TEXT); +#endif + + // Print client connection information + print_client_list(stderr, getpid(), timestr); + + fprintf(stderr, "[%d] %s # For more information, please check the core dump if available.\n", getpid(), timestr); + fprintf(stderr, "[%d] %s # To enable core dumps: ulimit -c unlimited\n", getpid(), timestr); + fprintf(stderr, "[%d] %s # Core pattern: /proc/sys/kernel/core_pattern\n", getpid(), timestr); + + fprintf(stderr, "\n=== MEMTIER_BENCHMARK BUG REPORT END. Make sure to include from START to END. ===\n\n"); + fprintf(stderr, " Please report this bug by opening an issue on github.com/RedisLabs/memtier_benchmark\n\n"); + + // Remove the handler and re-raise the signal to generate core dump + struct sigaction act; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND; + act.sa_handler = SIG_DFL; + sigaction(sig, &act, NULL); + raise(sig); +} + +// Setup crash handlers +static void setup_crash_handlers(void) +{ + struct sigaction act; + + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO; + act.sa_sigaction = crash_handler; + + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + sigaction(SIGABRT, &act, NULL); +} + void benchmark_log_file_line(int level, const char *filename, unsigned int line, const char *fmt, ...) { if (level > log_level) @@ -450,8 +591,8 @@ static int generate_random_seed() if (f) { size_t ignore = fread(&R, sizeof(R), 1, f); + (void)ignore; // Suppress unused variable warning fclose(f); - ignore++;//ignore warning } return (int)time(NULL)^getpid()^R; @@ -641,11 +782,36 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf return -1; break; case 'v': - puts(PACKAGE_STRING); - puts("Copyright (C) 2011-2024 Redis Ltd."); - puts("This is free software. You may redistribute copies of it under the terms of"); - puts("the GNU General Public License ."); - puts("There is NO WARRANTY, to the extent permitted by law."); + { + // Print version information similar to Redis format + // First line: memtier_benchmark v=... sha=... bits=... libevent=... openssl=... + printf("memtier_benchmark v=%s sha=%s:%s", PACKAGE_VERSION, MEMTIER_GIT_SHA1, MEMTIER_GIT_DIRTY); + + // Print architecture bits +#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) + printf(" bits=64"); +#elif defined(__i386__) || defined(_M_IX86) || defined(__arm__) + printf(" bits=32"); +#else + printf(" bits=unknown"); +#endif + + // Print libevent version + printf(" libevent=%s", event_get_version()); + + // Print OpenSSL version if TLS is enabled +#ifdef USE_TLS + printf(" openssl=%s", OPENSSL_VERSION_TEXT); +#endif + + printf("\n"); + + // Copyright and license info + printf("Copyright (C) 2011-2026 Redis Ltd.\n"); + printf("This is free software. You may redistribute copies of it under the terms of\n"); + printf("the GNU General Public License .\n"); + printf("There is NO WARRANTY, to the extent permitted by law.\n"); + } exit(0); case 's': case 'h': @@ -1222,7 +1388,7 @@ struct cg_thread { client_group* m_cg; abstract_protocol* m_protocol; pthread_t m_thread; - bool m_finished; + std::atomic m_finished; // Atomic to prevent data race between worker thread write and main thread read cg_thread(unsigned int id, benchmark_config* config, object_generator* obj_gen) : m_thread_id(id), m_config(config), m_obj_gen(obj_gen), m_cg(NULL), m_protocol(NULL), m_finished(false) @@ -1289,12 +1455,98 @@ void size_to_str(unsigned long int size, char *buf, int buf_len) } } +// Print client list for crash handler +static void print_client_list(FILE* fp, int pid, const char* timestr) +{ + if (g_threads != NULL) { + fprintf(fp, "\n[%d] %s # --- CLIENT LIST OUTPUT\n", pid, timestr); + + for (size_t t = 0; t < g_threads->size(); t++) { + cg_thread* thread = (*g_threads)[t]; + if (thread && thread->m_cg) { + std::vector& clients = thread->m_cg->get_clients(); + + for (size_t c = 0; c < clients.size(); c++) { + client* cl = clients[c]; + if (cl) { + std::vector& connections = cl->get_connections(); + + for (size_t conn_idx = 0; conn_idx < connections.size(); conn_idx++) { + shard_connection* conn = connections[conn_idx]; + if (conn) { + const char* state_str = "unknown"; + switch (conn->get_connection_state()) { + case conn_disconnected: state_str = "disconnected"; break; + case conn_in_progress: state_str = "connecting"; break; + case conn_connected: state_str = "connected"; break; + } + + int local_port = conn->get_local_port(); + const char* last_cmd = conn->get_last_request_type(); + + fprintf(fp, "[%d] %s # thread=%zu client=%zu conn=%zu addr=%s:%s local_port=%d state=%s pending=%d last_cmd=%s\n", + pid, timestr, t, c, conn_idx, + conn->get_address() ? conn->get_address() : "unknown", + conn->get_port() ? conn->get_port() : "unknown", + local_port, + state_str, + conn->get_pending_resp(), + last_cmd); + } + } + } + } + } + } + } +} + +// Helper function to print stack trace for all threads +static void print_all_threads_stack_trace(FILE* fp, int pid, const char* timestr) +{ + fprintf(fp, "\n[%d] %s # --- STACK TRACE (all threads)\n", pid, timestr); + + // Get the current (crashing) thread ID + pthread_t current_thread = pthread_self(); + + // Print main/crashing thread first + fprintf(fp, "[%d] %s # Thread %lu (current/crashing thread):\n", pid, timestr, (unsigned long)current_thread); + + void *trace[100]; + int trace_size = backtrace(trace, 100); + char **messages = backtrace_symbols(trace, trace_size); + for (int i = 1; i < trace_size; i++) { + fprintf(fp, "[%d] %s # %s\n", pid, timestr, messages[i]); + } + free(messages); + + // Now print stack traces for worker threads if available + if (g_threads != NULL) { + for (size_t t = 0; t < g_threads->size(); t++) { + cg_thread* thread = (*g_threads)[t]; + if (thread && thread->m_thread) { + pthread_t tid = thread->m_thread; + + // Skip if this is the current thread (already printed) + if (pthread_equal(tid, current_thread)) { + continue; + } + + fprintf(fp, "[%d] %s # Thread %lu (worker thread %zu):\n", pid, timestr, (unsigned long)tid, t); + fprintf(fp, "[%d] %s # (Note: Stack trace for non-crashing threads not available on this platform)\n", pid, timestr); + } + } + } +} + run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj_gen) { fprintf(stderr, "[RUN #%u] Preparing benchmark client...\n", run_id); // prepare threads data std::vector threads; + g_threads = &threads; // Set global pointer for crash handler + for (unsigned int i = 0; i < cfg->threads; i++) { cg_thread* t = new cg_thread(i, cfg, obj_gen); assert(t != NULL); @@ -1320,11 +1572,33 @@ run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj unsigned long int cur_bytes_sec = 0; // provide some feedback... + // NOTE: Reading stats from worker threads without synchronization is a benign race. + // These stats are only for progress display and are approximate. Final results are + // collected after pthread_join() when all threads have finished (race-free). unsigned int active_threads = 0; do { active_threads = 0; sleep(1); + // Check for Ctrl+C interrupt + if (g_interrupted) { + // Calculate elapsed time before interrupting + unsigned long int elapsed_duration = 0; + unsigned int thread_counter = 0; + for (std::vector::iterator i = threads.begin(); i != threads.end(); i++) { + thread_counter++; + float factor = ((float)(thread_counter - 1) / thread_counter); + elapsed_duration = factor * elapsed_duration + (float)(*i)->m_cg->get_duration_usec() / thread_counter; + } + fprintf(stderr, "\n[RUN #%u] Interrupted by user (Ctrl+C) after %.1f secs, stopping threads...\n", + run_id, (float)elapsed_duration / 1000000); + // Interrupt all threads (marks clients as interrupted, breaks event loops, and finalizes stats) + for (std::vector::iterator i = threads.begin(); i != threads.end(); i++) { + (*i)->m_cg->interrupt(); + } + break; + } + unsigned long int total_ops = 0; unsigned long int total_bytes = 0; unsigned long int duration = 0; @@ -1410,6 +1684,8 @@ run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj delete t; } + g_threads = NULL; // Clear global pointer + return stats; } @@ -1492,6 +1768,21 @@ static void cleanup_openssl(void) int main(int argc, char *argv[]) { + // Install signal handler for Ctrl+C + signal(SIGINT, sigint_handler); + + // Install crash handlers for debugging + setup_crash_handlers(); + + // Enable core dumps + struct rlimit core_limit; + core_limit.rlim_cur = RLIM_INFINITY; + core_limit.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_CORE, &core_limit) != 0) { + fprintf(stderr, "warning: failed to set core dump limit: %s\n", strerror(errno)); + fprintf(stderr, "warning: core dumps may not be generated on crash\n"); + } + benchmark_config cfg = benchmark_config(); cfg.arbitrary_commands = new arbitrary_command_list(); @@ -1956,6 +2247,16 @@ int main(int argc, char *argv[]) delete cfg.arbitrary_commands; } + // Clean up dynamically allocated strings from URI parsing + if (cfg.uri) { + if (cfg.server) { + free((void*)cfg.server); + } + if (cfg.authenticate) { + free((void*)cfg.authenticate); + } + } + #ifdef USE_TLS if(cfg.tls) { if (cfg.openssl_ctx) { diff --git a/memtier_benchmark.h b/memtier_benchmark.h index 6eb57ffa..eb289057 100644 --- a/memtier_benchmark.h +++ b/memtier_benchmark.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/obj_gen.cpp b/obj_gen.cpp index f56d95d0..2319a312 100644 --- a/obj_gen.cpp +++ b/obj_gen.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -159,7 +159,6 @@ object_generator::object_generator(size_t n_key_iterators/*= OBJECT_GENERATOR_KE m_key_zipf_Hmax(0), m_key_zipf_s(0), m_value_buffer(NULL), - m_random_fd(-1), m_value_buffer_size(0), m_value_buffer_mutation_pos(0) { @@ -189,7 +188,6 @@ object_generator::object_generator(const object_generator& copy) : m_key_zipf_Hmax(copy.m_key_zipf_Hmax), m_key_zipf_s(copy.m_key_zipf_s), m_value_buffer(NULL), - m_random_fd(-1), m_value_buffer_size(0), m_value_buffer_mutation_pos(0) { @@ -197,7 +195,7 @@ object_generator::object_generator(const object_generator& copy) : m_data_size.size_list != NULL) { m_data_size.size_list = new config_weight_list(*m_data_size.size_list); } - alloc_value_buffer(copy.m_value_buffer); + alloc_value_buffer(); m_next_key.resize(copy.m_next_key.size(), 0); } @@ -210,10 +208,6 @@ object_generator::~object_generator() m_data_size.size_list != NULL) { delete m_data_size.size_list; } - if (m_random_fd != -1) { - close(m_random_fd); - m_random_fd = -1; - } } object_generator* object_generator::clone(void) @@ -226,65 +220,19 @@ void object_generator::set_random_seed(int seed) m_random.set_seed(seed); } -void object_generator::alloc_value_buffer(void) +void object_generator::fill_value_buffer() { - unsigned int size = 0; - - if (m_value_buffer != NULL) - free(m_value_buffer), m_value_buffer = NULL; + if (m_value_buffer == NULL) return; - if (m_data_size_type == data_size_fixed) - size = m_data_size.size_fixed; - else if (m_data_size_type == data_size_range) - size = m_data_size.size_range.size_max; - else if (m_data_size_type == data_size_weighted) { - size = m_data_size.size_list->largest(); - } - - m_value_buffer_size = size; - if (size > 0) { - m_value_buffer = (char*) malloc(size); - assert(m_value_buffer != NULL); - if (!m_random_data) { - memset(m_value_buffer, 'x', size); - } else { - if (m_random_fd == -1) { - m_random_fd = open("/dev/urandom", O_RDONLY); - assert(m_random_fd != -1); - } - - char buf1[64] = { 0 }; - char buf2[64] = { 0 }; - unsigned int buf1_idx = sizeof(buf1); - unsigned int buf2_idx = sizeof(buf2); - char *d = m_value_buffer; - int ret; - int iter = 0; - while (d - m_value_buffer < size) { - if (buf1_idx == sizeof(buf1)) { - buf1_idx = 0; - buf2_idx++; - - if (buf2_idx >= sizeof(buf2)) { - if (iter % 20 == 0) { - ret = read(m_random_fd, buf1, sizeof(buf1)); - assert(ret > -1); - ret = read(m_random_fd, buf2, sizeof(buf2)); - assert(ret > -1); - } - buf2_idx = 0; - iter++; - } - } - *d = buf1[buf1_idx] ^ buf2[buf2_idx] ^ iter; - d++; - buf1_idx++; - } - } + if (!m_random_data) { + memset(m_value_buffer, 'x', m_value_buffer_size); + } else { + for(unsigned int i=0; i < m_value_buffer_size; i++) + m_value_buffer[i] = m_random.get_random(); } } -void object_generator::alloc_value_buffer(const char* copy_from) +void object_generator::alloc_value_buffer(void) { unsigned int size = 0; @@ -302,7 +250,6 @@ void object_generator::alloc_value_buffer(const char* copy_from) if (size > 0) { m_value_buffer = (char*) malloc(size); assert(m_value_buffer != NULL); - memcpy(m_value_buffer, copy_from, size); } } @@ -517,8 +464,10 @@ const char* object_generator::get_value(unsigned long long key_index, unsigned i // modify object content in case of random data if (m_random_data) { m_value_buffer[m_value_buffer_mutation_pos++]++; - if (m_value_buffer_mutation_pos >= m_value_buffer_size) + if (m_value_buffer_mutation_pos >= m_value_buffer_size) { m_value_buffer_mutation_pos = 0; + fill_value_buffer(); // generate completely new random data + } } *len = new_size; diff --git a/obj_gen.h b/obj_gen.h index 7b59b7df..f5a4d8d5 100644 --- a/obj_gen.h +++ b/obj_gen.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -99,13 +99,11 @@ class object_generator { const char *m_key; int m_key_len; char *m_value_buffer; - int m_random_fd; gaussian_noise m_random; unsigned int m_value_buffer_size; unsigned int m_value_buffer_mutation_pos; void alloc_value_buffer(void); - void alloc_value_buffer(const char* copy_from); void random_init(void); public: object_generator(size_t n_key_iterators = OBJECT_GENERATOR_KEY_ITERATORS); @@ -128,6 +126,7 @@ class object_generator { void set_key_distribution(double key_stddev, double key_median); void set_key_zipf_distribution(double key_exp); void set_random_seed(int seed); + void fill_value_buffer(); unsigned long long get_key_index(int iter); void generate_key(unsigned long long key_index); const char * get_key() { return m_key; } diff --git a/protocol.cpp b/protocol.cpp index 5bb14dcd..cb6e3c7a 100644 --- a/protocol.cpp +++ b/protocol.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -713,8 +713,9 @@ bool redis_protocol::format_arbitrary_command(arbitrary_command &cmd) { // check arg type if (current_arg->data.find(KEY_PLACEHOLDER) != std::string::npos) { if (current_arg->data.length() != strlen(KEY_PLACEHOLDER)) { - benchmark_error_log("error: key placeholder can't combined with other data\n"); - return false; + current_arg->has_key_affixes = true; + current_arg->data_prefix = current_arg->data.substr(0, current_arg->data.find(KEY_PLACEHOLDER)); + current_arg->data_suffix = current_arg->data.substr(current_arg->data.find(KEY_PLACEHOLDER) + strlen(KEY_PLACEHOLDER)); } cmd.keys_count++; current_arg->type = key_type; diff --git a/protocol.h b/protocol.h index 05b53bf6..71aa1533 100644 --- a/protocol.h +++ b/protocol.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/run_stats.cpp b/run_stats.cpp index a5e38a08..cbb0216e 100644 --- a/run_stats.cpp +++ b/run_stats.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -112,6 +112,7 @@ inline timeval timeval_factorial_average(timeval a, timeval b, unsigned int weig run_stats::run_stats(benchmark_config *config) : m_config(config), + m_interrupted(false), m_totals(), m_cur_stats(0) { @@ -792,6 +793,11 @@ void run_stats::merge(const run_stats& other, int iteration) m_start_time = timeval_factorial_average( m_start_time, other.m_start_time, iteration ); m_end_time = timeval_factorial_average( m_end_time, other.m_end_time, iteration ); + // If any run was interrupted, mark the merged result as interrupted + if (other.m_interrupted) { + m_interrupted = true; + } + // aggregate the one_second_stats vectors. this is not efficient // but it's not really important (small numbers, not realtime) for (std::list::const_iterator other_i = other.m_stats.begin(); @@ -1221,6 +1227,7 @@ void run_stats::print_json(json_handler *jsonhandler, arbitrary_command_list& co jsonhandler->write_obj("Finish time","%lld", end_time_ms); jsonhandler->write_obj("Total duration","%lld", end_time_ms-start_time_ms); jsonhandler->write_obj("Time unit","\"%s\"","MILLISECONDS"); + jsonhandler->write_obj("Interrupted","\"%s\"", m_interrupted ? "true" : "false"); jsonhandler->close_nesting(); } std::vector timestamps = get_one_sec_cmd_stats_timestamp(); diff --git a/run_stats.h b/run_stats.h index 14796ea2..13f32e94 100644 --- a/run_stats.h +++ b/run_stats.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -93,6 +93,7 @@ class run_stats { struct timeval m_start_time; struct timeval m_end_time; + bool m_interrupted; totals m_totals; @@ -122,6 +123,8 @@ class run_stats { void setup_arbitrary_commands(size_t n_arbitrary_commands); void set_start_time(struct timeval* start_time); void set_end_time(struct timeval* end_time); + void set_interrupted(bool interrupted) { m_interrupted = interrupted; } + bool get_interrupted() const { return m_interrupted; } void update_get_op(struct timeval* ts, unsigned int bytes_rx, unsigned int bytes_tx, unsigned int latency, unsigned int hits, unsigned int misses); void update_set_op(struct timeval* ts, unsigned int bytes_rx, unsigned int bytes_tx, unsigned int latency); diff --git a/run_stats_types.cpp b/run_stats_types.cpp index 7ae7b0e8..fcf22ec8 100644 --- a/run_stats_types.cpp +++ b/run_stats_types.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/run_stats_types.h b/run_stats_types.h index 2b6e9820..1a227753 100644 --- a/run_stats_types.h +++ b/run_stats_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * diff --git a/shard_connection.cpp b/shard_connection.cpp index e8733082..20cc97c7 100644 --- a/shard_connection.cpp +++ b/shard_connection.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -350,6 +350,60 @@ const char* shard_connection::get_readable_id() { return m_readable_id.c_str(); } +int shard_connection::get_local_port() { + if (!m_bev) { + return -1; + } + + int fd = bufferevent_getfd(m_bev); + if (fd < 0) { + return -1; + } + + struct sockaddr_storage local_addr; + socklen_t addr_len = sizeof(local_addr); + + if (getsockname(fd, (struct sockaddr*)&local_addr, &addr_len) != 0) { + return -1; + } + + if (local_addr.ss_family == AF_INET) { + struct sockaddr_in* addr_in = (struct sockaddr_in*)&local_addr; + return ntohs(addr_in->sin_port); + } else if (local_addr.ss_family == AF_INET6) { + struct sockaddr_in6* addr_in6 = (struct sockaddr_in6*)&local_addr; + return ntohs(addr_in6->sin6_port); + } + + return -1; +} + +const char* shard_connection::get_last_request_type() { + if (!m_pipeline || m_pipeline->empty()) { + return "none"; + } + + // Get the last request in the pipeline (the one at the back) + // Note: We can't directly access the back of a std::queue, so we need to check the front + // which represents the oldest pending request + request* req = m_pipeline->front(); + if (!req) { + return "unknown"; + } + + switch (req->m_type) { + case rt_set: return "SET"; + case rt_get: return "GET"; + case rt_wait: return "WAIT"; + case rt_arbitrary: return "ARBITRARY"; + case rt_auth: return "AUTH"; + case rt_select_db: return "SELECT"; + case rt_cluster_slots: return "CLUSTER_SLOTS"; + case rt_hello: return "HELLO"; + default: return "unknown"; + } +} + request* shard_connection::pop_req() { request* req = m_pipeline->front(); m_pipeline->pop(); diff --git a/shard_connection.h b/shard_connection.h index 12fbaae3..cb4db734 100644 --- a/shard_connection.h +++ b/shard_connection.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 Redis Labs Ltd. + * Copyright (C) 2011-2026 Redis Labs Ltd. * * This file is part of memtier_benchmark. * @@ -132,6 +132,19 @@ class shard_connection { return m_connection_state; } + int get_pending_resp() { + return m_pending_resp; + } + + // Get local port for crash reporting + int get_local_port(); + + // Get last command type for crash reporting + const char* get_last_request_type(); + + void handle_reconnect_timer_event(); + void handle_connection_timeout_event(); + private: void setup_event(int sockfd); int setup_socket(struct connect_info* addr); diff --git a/tests/mb.py b/tests/mb.py new file mode 100644 index 00000000..fdbfb058 --- /dev/null +++ b/tests/mb.py @@ -0,0 +1,88 @@ +""" +Simple replacement for mbdirector package. +Contains only the Benchmark and RunConfig classes needed for tests. +""" +import os +import subprocess +import logging + + +class RunConfig(object): + """Configuration for a benchmark run.""" + next_id = 1 + + def __init__(self, base_results_dir, name, config, benchmark_config): + self.id = RunConfig.next_id + RunConfig.next_id += 1 + + self.redis_process_port = config.get('redis_process_port', 6379) + + mbconfig = config.get('memtier_benchmark', {}) + mbconfig.update(benchmark_config) + self.mb_binary = mbconfig.get('binary', 'memtier_benchmark') + self.mb_threads = mbconfig.get('threads') + self.mb_clients = mbconfig.get('clients') + self.mb_pipeline = mbconfig.get('pipeline') + self.mb_requests = mbconfig.get('requests') + self.mb_test_time = mbconfig.get('test_time') + self.explicit_connect_args = bool( + mbconfig.get('explicit_connect_args')) + + self.results_dir = os.path.join(base_results_dir, + '{:04}_{}'.format(self.id, name)) + + def __repr__(self): + return ''.format(self.id) + + +class Benchmark(object): + """Benchmark runner for memtier_benchmark.""" + + def __init__(self, config, **kwargs): + self.config = config + self.binary = self.config.mb_binary + self.name = kwargs['name'] + + # Configure + self.args = [self.binary] + if not self.config.explicit_connect_args: + self.args += ['--server', '127.0.0.1', + '--port', str(self.config.redis_process_port) + ] + self.args += ['--out-file', os.path.join(config.results_dir, + 'mb.stdout'), + '--json-out-file', os.path.join(config.results_dir, + 'mb.json')] + + if self.config.mb_threads is not None: + self.args += ['--threads', str(self.config.mb_threads)] + if self.config.mb_clients is not None: + self.args += ['--clients', str(self.config.mb_clients)] + if self.config.mb_pipeline is not None: + self.args += ['--pipeline', str(self.config.mb_pipeline)] + if self.config.mb_requests is not None: + self.args += ['--requests', str(self.config.mb_requests)] + if self.config.mb_test_time is not None: + self.args += ['--test-time', str(self.config.mb_test_time)] + + self.args += kwargs['args'] + + @classmethod + def from_json(cls, config, json): + return cls(config, **json) + + def write_file(self, name, data): + with open(os.path.join(self.config.results_dir, name), 'wb') as outfile: + outfile.write(data) + + def run(self): + logging.debug(' Command: %s', ' '.join(self.args)) + process = subprocess.Popen( + stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + executable=self.binary, args=self.args) + _stdout, _stderr = process.communicate() + if _stderr: + logging.debug(' >>> stderr <<<\n%s\n', _stderr) + self.write_file('mb.stderr', _stderr) + return process.wait() == 0 + diff --git a/tests/test_crash_handler_integration.py b/tests/test_crash_handler_integration.py new file mode 100755 index 00000000..a86d8fd5 --- /dev/null +++ b/tests/test_crash_handler_integration.py @@ -0,0 +1,293 @@ +import tempfile +import time +import signal +import subprocess +import os +from include import * +from mb import Benchmark, RunConfig + + +def test_crash_handler_with_active_connections(env): + """ + Test that crash handler prints CLIENT LIST OUTPUT when crashing with active connections. + This test starts a benchmark, waits 5 seconds, sends SEGV signal, and verifies the crash report. + """ + # Setup benchmark configuration + benchmark_specs = { + "name": env.testName, + "args": ['--pipeline=10'] + } + addTLSArgs(benchmark_specs, env) + + # Use test_time instead of requests (they are mutually exclusive) + config = get_default_memtier_config(threads=2, clients=2, requests=None, test_time=60) + master_nodes_list = env.getMasterNodesList() + + add_required_env_arguments(benchmark_specs, config, env, master_nodes_list) + + # Create a temporary directory + test_dir = tempfile.mkdtemp() + + config = RunConfig(test_dir, env.testName, config, {}) + ensure_clean_benchmark_folder(config.results_dir) + + benchmark = Benchmark.from_json(config, benchmark_specs) + + # Build the command that will be run + print(f"Starting memtier_benchmark: {' '.join(benchmark.args)}") + + # Create log file for crash output + log_file = os.path.join(config.results_dir, "crash_output.log") + + # Start memtier_benchmark process in background + with open(log_file, 'w') as f: + proc = subprocess.Popen(benchmark.args, stdout=f, stderr=subprocess.STDOUT) + + try: + # Wait 5 seconds for connections to be established and benchmark to be running + print("Waiting 5 seconds for benchmark to start... and then sending SEGV signal to PID") + time.sleep(5) + + # Check if process is still running + if proc.poll() is not None: + with open(log_file, 'r') as f: + log_content = f.read() + print(f"ERROR: Process exited early. Log:\n{log_content}") + env.assertTrue(False, message="memtier_benchmark exited before we could send SEGV signal") + + # Send SEGV signal to trigger crash handler + print(f"Sending SEGV signal to PID {proc.pid}") + os.kill(proc.pid, signal.SIGSEGV) + + # Wait for process to crash + proc.wait(timeout=10) + + except subprocess.TimeoutExpired: + print("ERROR: Process did not exit after SEGV signal") + proc.kill() + proc.wait() + env.assertTrue(False, message="Process did not exit after SEGV signal") + + # Read the crash log + with open(log_file, 'r') as f: + crash_output = f.read() + + print(f"\n{'='*80}") + print("Crash output:") + print('='*80) + print(crash_output) + print('='*80) + + # Verify crash report contains expected sections + errors = [] + + if "=== MEMTIER_BENCHMARK BUG REPORT START" not in crash_output: + errors.append("Missing bug report start marker") + if "memtier_benchmark crashed by signal" not in crash_output: + errors.append("Missing crash signal message") + if "STACK TRACE" not in crash_output: + errors.append("Missing stack trace section") + if "INFO OUTPUT" not in crash_output: + errors.append("Missing info output section") + if "CLIENT LIST OUTPUT" not in crash_output: + errors.append("Missing client list output section") + if "=== MEMTIER_BENCHMARK BUG REPORT END" not in crash_output: + errors.append("Missing bug report end marker") + + # Verify CLIENT LIST OUTPUT contains expected information + # Should have 2 threads with 2 clients each = 4 total client entries + client_lines = [line for line in crash_output.split('\n') if 'thread=' in line and 'client=' in line] + if len(client_lines) < 4: + errors.append(f"Expected at least 4 client connection entries, found {len(client_lines)}") + + # Verify each client line has required fields + for line in client_lines: + if 'thread=' not in line: + errors.append(f"Missing thread field in: {line}") + if 'client=' not in line: + errors.append(f"Missing client field in: {line}") + if 'conn=' not in line: + errors.append(f"Missing conn field in: {line}") + if 'addr=' not in line: + errors.append(f"Missing addr field in: {line}") + if 'state=' not in line: + errors.append(f"Missing state field in: {line}") + if 'pending=' not in line: + errors.append(f"Missing pending field in: {line}") + + # Verify at least some connections are in connected state + connected_lines = [line for line in client_lines if 'state=connected' in line] + if len(connected_lines) == 0: + errors.append("Expected at least some connections to be in 'connected' state") + + if errors: + print("\n❌ ERRORS:") + for error in errors: + print(f" - {error}") + env.assertTrue(False, message="Crash handler test failed: " + "; ".join(errors)) + + print(f"\n✅ SUCCESS!") + print(f"✓ Crash handler test passed! Found {len(client_lines)} client connections in crash report") + print(f"✓ {len(connected_lines)} connections were in 'connected' state") + + # Cleanup + try: + os.remove(log_file) + except: + pass + + # For RLTest compatibility - mark test as passed + env.assertTrue(True) + + +def test_crash_handler_worker_thread(env): + """ + Test that crash handler works when a worker thread crashes (not the main thread). + This test starts a benchmark, waits 5 seconds, sends SEGV signal to a worker thread, + and verifies the crash report shows the correct crashing thread. + """ + # Setup benchmark configuration + benchmark_specs = { + "name": env.testName, + "args": ['--pipeline=10'] + } + addTLSArgs(benchmark_specs, env) + + # Use test_time instead of requests (they are mutually exclusive) + config = get_default_memtier_config(threads=2, clients=2, requests=None, test_time=60) + master_nodes_list = env.getMasterNodesList() + + add_required_env_arguments(benchmark_specs, config, env, master_nodes_list) + + # Create a temporary directory + test_dir = tempfile.mkdtemp() + + config = RunConfig(test_dir, env.testName, config, {}) + ensure_clean_benchmark_folder(config.results_dir) + + benchmark = Benchmark.from_json(config, benchmark_specs) + + # Build the command that will be run + print(f"Starting memtier_benchmark: {' '.join(benchmark.args)}") + + # Create log file for crash output + log_file = os.path.join(config.results_dir, "crash_output.log") + + # Start memtier_benchmark process in background + with open(log_file, 'w') as f: + proc = subprocess.Popen(benchmark.args, stdout=f, stderr=subprocess.STDOUT) + + try: + # Wait 5 seconds for connections to be established and benchmark to be running + print("Waiting 5 seconds for benchmark to start...") + time.sleep(5) + + # Check if process is still running + if proc.poll() is not None: + with open(log_file, 'r') as f: + log_content = f.read() + print(f"ERROR: Process exited early. Log:\n{log_content}") + env.assertTrue(False, message="memtier_benchmark exited before we could send SEGV signal") + + # Find worker threads by enumerating /proc//task/ + # Retry a few times in case threads are still being created + task_dir = f"/proc/{proc.pid}/task" + thread_ids = [] + max_retries = 10 + for retry in range(max_retries): + thread_ids = [] + try: + for tid in os.listdir(task_dir): + tid_int = int(tid) + # Skip the main thread (same as process ID) + if tid_int != proc.pid: + thread_ids.append(tid_int) + except (OSError, ValueError) as e: + print(f"ERROR: Could not enumerate threads: {e}") + proc.kill() + proc.wait() + env.assertTrue(False, message=f"Could not enumerate threads: {e}") + + if len(thread_ids) > 0: + print(f"Found {len(thread_ids)} worker threads: {thread_ids}") + break + + # Wait a bit and retry + print(f"No worker threads found yet, retry {retry+1}/{max_retries}...") + time.sleep(0.5) + + if len(thread_ids) == 0: + print("ERROR: No worker threads found after retries") + proc.kill() + proc.wait() + env.assertTrue(False, message="No worker threads found after retries") + + # Send SEGV signal to the first worker thread + target_tid = thread_ids[0] + print(f"Sending SEGV signal to worker thread TID {target_tid} (main PID: {proc.pid})") + os.kill(target_tid, signal.SIGSEGV) + + # Wait for process to crash + proc.wait(timeout=10) + + except subprocess.TimeoutExpired: + print("ERROR: Process did not exit after SEGV signal") + proc.kill() + proc.wait() + env.assertTrue(False, message="Process did not exit after SEGV signal") + + # Read the crash log + with open(log_file, 'r') as f: + crash_output = f.read() + + print(f"\n{'='*80}") + print("Crash output:") + print('='*80) + print(crash_output) + print('='*80) + + # Verify crash report contains expected sections + errors = [] + + if "=== MEMTIER_BENCHMARK BUG REPORT START" not in crash_output: + errors.append("Missing bug report start marker") + if "memtier_benchmark crashed by signal" not in crash_output: + errors.append("Missing crash signal message") + if "STACK TRACE" not in crash_output: + errors.append("Missing stack trace section") + if "current/crashing thread" not in crash_output: + errors.append("Missing current/crashing thread marker in stack trace") + if "worker thread" not in crash_output: + errors.append("Missing worker thread information in stack trace") + if "INFO OUTPUT" not in crash_output: + errors.append("Missing info output section") + if "CLIENT LIST OUTPUT" not in crash_output: + errors.append("Missing client list output section") + if "=== MEMTIER_BENCHMARK BUG REPORT END" not in crash_output: + errors.append("Missing bug report end marker") + + # Verify CLIENT LIST OUTPUT contains expected information + client_lines = [line for line in crash_output.split('\n') if 'thread=' in line and 'client=' in line] + if len(client_lines) < 4: + errors.append(f"Expected at least 4 client connection entries, found {len(client_lines)}") + + if errors: + print("\n❌ ERRORS:") + for error in errors: + print(f" - {error}") + env.assertTrue(False, message="Worker thread crash handler test failed: " + "; ".join(errors)) + + print(f"\n✅ SUCCESS!") + print(f"✓ Worker thread crash handler test passed!") + print(f"✓ Crash report correctly identified crashing worker thread") + print(f"✓ Found {len(client_lines)} client connections in crash report") + + # Cleanup + try: + os.remove(log_file) + except: + pass + + # For RLTest compatibility - mark test as passed + env.assertTrue(True) + diff --git a/tests/test_requirements.txt b/tests/test_requirements.txt index 8ca9e1a3..5a9469b1 100644 --- a/tests/test_requirements.txt +++ b/tests/test_requirements.txt @@ -1,3 +1,2 @@ redis>=3.0.0 -rltest==0.6.0 -git+https://github.com/RedisLabs/mbdirector.git@master +rltest>=0.7.17 diff --git a/tests/tests_oss_simple_flow.py b/tests/tests_oss_simple_flow.py index 2c059f78..922913de 100644 --- a/tests/tests_oss_simple_flow.py +++ b/tests/tests_oss_simple_flow.py @@ -1,8 +1,11 @@ import tempfile import json +import time +import signal +import subprocess +import os from include import * -from mbdirector.benchmark import Benchmark -from mbdirector.runner import RunConfig +from mb import Benchmark, RunConfig def test_preload_and_set_get(env): @@ -612,6 +615,76 @@ def test_default_arbitrary_command_hset_multi_data_placeholders(env): assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, overall_request_count) + +def test_key_placeholder(env): + env.skipOnCluster() + run_count = 1 + benchmark_specs = {"name": env.testName, "args": ['--command=HSET __key__ f __data__']} + addTLSArgs(benchmark_specs, env) + config = get_default_memtier_config() + master_nodes_list = env.getMasterNodesList() + overall_expected_request_count = get_expected_request_count(config) * run_count + + add_required_env_arguments(benchmark_specs, config, env, master_nodes_list) + + # Create a temporary directory + test_dir = tempfile.mkdtemp() + + config = RunConfig(test_dir, env.testName, config, {}) + ensure_clean_benchmark_folder(config.results_dir) + + benchmark = Benchmark.from_json(config, benchmark_specs) + + # benchmark.run() returns True if the return code of memtier_benchmark was 0 + memtier_ok = benchmark.run() + debugPrintMemtierOnError(config, env) + + master_nodes_connections = env.getOSSMasterNodesConnectionList() + merged_command_stats = {'cmdstat_hset': {'calls': 0}} + overall_request_count = agg_info_commandstats(master_nodes_connections, merged_command_stats) + assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, + overall_request_count) + + +# key placeholder combined with other data +def test_key_placeholder_togetherwithdata(env): + env.skipOnCluster() + run_count = 1 + benchmark_specs = {"name": env.testName, "args": ['--command=SET \"prefix:__key__:suffix\" \"__data__\"']} + addTLSArgs(benchmark_specs, env) + config = get_default_memtier_config(threads=4, clients=1,requests=50) + master_nodes_list = env.getMasterNodesList() + overall_expected_request_count = get_expected_request_count(config) * run_count + + add_required_env_arguments(benchmark_specs, config, env, master_nodes_list) + + # Create a temporary directory + test_dir = tempfile.mkdtemp() + + config = RunConfig(test_dir, env.testName, config, {}) + ensure_clean_benchmark_folder(config.results_dir) + + benchmark = Benchmark.from_json(config, benchmark_specs) + + # benchmark.run() returns True if the return code of memtier_benchmark was 0 + memtier_ok = benchmark.run() + debugPrintMemtierOnError(config, env) + + master_nodes_connections = env.getOSSMasterNodesConnectionList() + merged_command_stats = {'cmdstat_set': {'calls': 0}} + overall_request_count = agg_info_commandstats(master_nodes_connections, merged_command_stats) + assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, + overall_request_count) + + # Ensure all keys have the correct prefix and suffix + for conn in master_nodes_connections: + for key in conn.scan_iter("*"): + decoded_key = key.decode().split(":") + env.assertEqual(decoded_key[0], "prefix") + env.assertEqual(decoded_key[1].split("-")[0], "memtier") + env.assertEqual(decoded_key[2], "suffix") + + def test_default_set_get_rate_limited(env): env.skipOnCluster() master_nodes_list = env.getMasterNodesList() @@ -908,3 +981,88 @@ def test_uri_invalid_database(env): # benchmark.run() should return False for invalid database number memtier_ok = benchmark.run() env.assertFalse(memtier_ok) + + +def test_interrupt_signal_handling(env): + """Test that Ctrl+C (SIGINT) properly stops the benchmark and outputs correct statistics""" + # Use a large number of requests so the test doesn't finish before we interrupt it + benchmark_specs = {"name": env.testName, "args": ['--requests=1000000', '--hide-histogram']} + addTLSArgs(benchmark_specs, env) + config = get_default_memtier_config(threads=4, clients=50, requests=1000000) + master_nodes_list = env.getMasterNodesList() + + add_required_env_arguments(benchmark_specs, config, env, master_nodes_list) + + # Create a temporary directory + test_dir = tempfile.mkdtemp() + config = RunConfig(test_dir, env.testName, config, {}) + ensure_clean_benchmark_folder(config.results_dir) + + benchmark = Benchmark.from_json(config, benchmark_specs) + + # Start the benchmark process manually so we can send SIGINT + import logging + logging.debug(' Command: %s', ' '.join(benchmark.args)) + + stderr_file = open(os.path.join(config.results_dir, 'mb.stderr'), 'wb') + process = subprocess.Popen( + stdin=None, stdout=subprocess.PIPE, stderr=stderr_file, + executable=benchmark.binary, args=benchmark.args) + + # Wait 3 seconds then send SIGINT + time.sleep(3) + process.send_signal(signal.SIGINT) + + # Wait for process to finish + _stdout, _ = process.communicate() + stderr_file.close() + + # Write stdout to file + benchmark.write_file('mb.stdout', _stdout) + + # Read stderr to check for interrupt message + with open(os.path.join(config.results_dir, 'mb.stderr'), 'r') as stderr: + stderr_content = stderr.read() + # Check that the interrupt message is present and shows elapsed time + env.assertTrue("Interrupted by user (Ctrl+C) after" in stderr_content) + env.assertTrue("secs, stopping threads..." in stderr_content) + + # Check JSON output + json_filename = '{0}/mb.json'.format(config.results_dir) + env.assertTrue(os.path.isfile(json_filename)) + + with open(json_filename) as results_json: + results_dict = json.load(results_json) + + # Check that Runtime section exists and has Interrupted flag + env.assertTrue("ALL STATS" in results_dict) + env.assertTrue("Runtime" in results_dict["ALL STATS"]) + runtime = results_dict["ALL STATS"]["Runtime"] + + # Verify interrupted flag is set to "true" + env.assertTrue("Interrupted" in runtime) + env.assertEqual(runtime["Interrupted"], "true") + + # Verify duration is reasonable (should be around 3 seconds, give or take) + env.assertTrue("Total duration" in runtime) + duration_ms = runtime["Total duration"] + env.assertTrue(duration_ms >= 2000) # At least 2 seconds + env.assertTrue(duration_ms <= 5000) # At most 5 seconds + + # Verify that throughput metrics are NOT zero + totals_metrics = results_dict["ALL STATS"]["Totals"] + + # Check ops/sec is not zero + env.assertTrue("Ops/sec" in totals_metrics) + total_ops_sec = totals_metrics["Ops/sec"] + env.assertTrue(total_ops_sec > 0) + + # Check latency metrics are not zero + env.assertTrue("Latency" in totals_metrics) + total_latency = totals_metrics["Latency"] + env.assertTrue(total_latency > 0) + + # Check that we actually processed some operations + env.assertTrue("Count" in totals_metrics) + total_count = totals_metrics["Count"] + env.assertTrue(total_count > 0) diff --git a/tests/tests_oss_zipfian_distribution.py b/tests/tests_oss_zipfian_distribution.py index 9ac603cb..32816da1 100644 --- a/tests/tests_oss_zipfian_distribution.py +++ b/tests/tests_oss_zipfian_distribution.py @@ -13,8 +13,7 @@ agg_info_commandstats, assert_minimum_memtier_outcomes ) -from mbdirector.benchmark import Benchmark -from mbdirector.runner import RunConfig +from mb import Benchmark, RunConfig def correlation_coeficient(x: list[float], y: list[float]) -> float: diff --git a/tests/zipfian_benchmark_runner.py b/tests/zipfian_benchmark_runner.py index e0ae0b71..f0398712 100644 --- a/tests/zipfian_benchmark_runner.py +++ b/tests/zipfian_benchmark_runner.py @@ -12,8 +12,7 @@ assert_minimum_memtier_outcomes, get_expected_request_count, ) -from mbdirector.benchmark import Benchmark -from mbdirector.runner import RunConfig +from mb import Benchmark, RunConfig class MonitorThread(threading.Thread): diff --git a/tsan_suppressions.txt b/tsan_suppressions.txt new file mode 100644 index 00000000..3094d9ac --- /dev/null +++ b/tsan_suppressions.txt @@ -0,0 +1,22 @@ +# ThreadSanitizer suppressions for memtier_benchmark +# +# This file contains suppressions for known benign data races that do not +# affect correctness. These races are intentionally left unfixed for performance. + +# Benign race on stats counters during progress display +# Worker threads update stats while main thread reads them for progress updates. +# This is benign because: +# - Progress stats are approximate and for display only +# - Final results are collected after pthread_join (race-free) +# - Adding synchronization would hurt performance +race:run_stats::set_end_time +race:run_stats::get_duration_usec +race:run_stats::get_total_ops +race:run_stats::get_total_bytes +race:run_stats::get_total_latency +race:totals::update_op + +# OpenSSL internal races (false positives in libcrypto) +# These are known benign races within OpenSSL library itself +race:libcrypto.so* + diff --git a/version.sh b/version.sh new file mode 100755 index 00000000..32396294 --- /dev/null +++ b/version.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# Generate version information from git + +GIT_SHA1="00000000" +GIT_DIRTY="0" + +# Check if we're in a git repository +if command -v git >/dev/null 2>&1 && [ -d .git ] || git rev-parse --git-dir > /dev/null 2>&1; then + # Get the short SHA1 + GIT_SHA1=$(git rev-parse --short=8 HEAD 2>/dev/null || echo "00000000") + + # Check if working directory is dirty + git diff-index --quiet HEAD -- 2>/dev/null + if [ $? -ne 0 ]; then + GIT_DIRTY="1" + else + GIT_DIRTY="0" + fi +fi + +# Generate the header file +cat > version.h.tmp << EOF +/* This file is automatically generated by version.sh */ +#ifndef MEMTIER_VERSION_H +#define MEMTIER_VERSION_H + +#define MEMTIER_GIT_SHA1 "$GIT_SHA1" +#define MEMTIER_GIT_DIRTY "$GIT_DIRTY" + +#endif /* MEMTIER_VERSION_H */ +EOF + +# Only update version.h if it changed (to avoid unnecessary recompilation) +if [ ! -f version.h ] || ! cmp -s version.h.tmp version.h; then + mv version.h.tmp version.h +else + rm -f version.h.tmp +fi +