Skip to content

[zstd][cli] Add performance counters support to bench mode #4354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions programs/benchzstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
#include <stdio.h> /* fprintf, fopen */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset, strerror */
#include "counters.h"
#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
#include "../lib/common/mem.h"
#include "benchfn.h"
#include "timefn.h" /* UTIL_time_t */

#ifndef ZSTD_STATIC_LINKING_ONLY
# define ZSTD_STATIC_LINKING_ONLY
#endif
Expand Down Expand Up @@ -541,6 +543,9 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
"Warning : time measurements may be incorrect in multithreading mode... \n")
}

/* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */
BMK_linuxPerfCounters_t counters;

/* Bench */
{
U64 const crcOrig = (adv->mode == BMK_decodeOnly)
Expand Down Expand Up @@ -599,6 +604,12 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
displayName,
(unsigned)srcSize);

/* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */
if (adv->cpuCounters) {
BMK_countersInit(&counters);
BMK_eventStart(&counters);
}

while (!(compressionCompleted && decompressionCompleted)) {
if (!compressionCompleted) {
BMK_runOutcome_t const cOutcome =
Expand Down Expand Up @@ -680,6 +691,13 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
markNb = (markNb + 1) % NB_MARKS;
} /* while (!(compressionCompleted && decompressionCompleted)) */

/* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */
if (adv->cpuCounters) {
BMK_eventStop(&counters);
BMK_countersClose(&counters);
fprintf(stdout, "###### Perf cycles: %llu\n", counters.cycles);
}

/* CRC Checking */
{ const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
Expand Down Expand Up @@ -763,6 +781,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(

benchResult.cMem =
(1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);

return BMK_benchOutcome_setValidResult(benchResult);
}

Expand Down
1 change: 1 addition & 0 deletions programs/benchzstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ typedef struct {
int ldmHashRateLog;
ZSTD_ParamSwitch_e literalCompressionMode;
int useRowMatchFinder; /* use row-based matchfinder if possible */
int cpuCounters;
} BMK_advancedParams_t;

/* returns default parameters used by nonAdvanced functions */
Expand Down
97 changes: 97 additions & 0 deletions programs/counters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

/****************************************************************************
* Performance counters
*
****************************************************************************/
#ifndef BENCH_ZSTD_COUNTERS
#define BENCH_ZSTD_COUNTERS
/* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */
#include <inttypes.h>
#include <x86intrin.h>
#include <stdio.h>
#define _GNU_SOURCE
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <unistd.h>

#include <inttypes.h>
#include <sys/types.h>

typedef struct {
struct perf_event_attr events;
int fd;
long long cycles;
} BMK_linuxPerfCounters_t;

static int BMK_countersOpen(BMK_linuxPerfCounters_t* counters)
{
pid_t pid = 0;
int cpu = -1;
int group_fd = -1;
unsigned long flags = 0;

counters->fd = syscall(__NR_perf_event_open, &counters->events, pid, cpu,
group_fd, flags);

if (counters->fd != -1) return 0;

return -1;
}

static int BMK_countersInit(BMK_linuxPerfCounters_t* counters)
{
memset(counters, 0, sizeof(struct perf_event_attr));
counters->events.type = PERF_TYPE_HARDWARE;
counters->events.size = sizeof(struct perf_event_attr);
/* TODO(cavalcanti): Add more performance counters:
* PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES,
* PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES.
*/
counters->events.config = PERF_COUNT_HW_CPU_CYCLES;
counters->events.disabled = 1;
counters->events.exclude_kernel = 1;
counters->events.exclude_hv = 1;

counters->cycles = 0;

return BMK_countersOpen(counters);
}

static int BMK_eventStart(BMK_linuxPerfCounters_t* counters)
{
int res = 0;
if (counters->fd != -1) {
res = ioctl(counters->fd, PERF_EVENT_IOC_RESET, 0);
if (res != -1) res = ioctl(counters->fd, PERF_EVENT_IOC_ENABLE, 0);
}

return res;
}

static int BMK_eventStop(BMK_linuxPerfCounters_t* counters)
{
long long count = 0;
ioctl(counters->fd, PERF_EVENT_IOC_DISABLE, 0);
if (read(counters->fd, &count, sizeof(long long)) == -1) return -1;
counters->cycles += count;
}

static int BMK_countersClose(BMK_linuxPerfCounters_t* counters)
{
close(counters->fd);
}

#endif
12 changes: 12 additions & 0 deletions programs/zstdcli.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ static void usageAdvanced(const char* programName)
DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n");
DISPLAYOUT(" -y# Collect CPU counters.\n");
DISPLAYOUT(" --split=# Split input into independent chunks of size #. [Default: No chunking]\n");
DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n");
DISPLAYOUT(" -D dictionary Benchmark using dictionary \n");
Expand Down Expand Up @@ -882,6 +883,7 @@ int main(int argCount, const char* argv[])
cLevelLast = MINCLEVEL - 1, /* for benchmark range */
setThreads_non1 = 0;
unsigned nbWorkers = init_nbWorkers();
unsigned cpuCounters = 0; /* wether we want to harvest CPU counters during benchmark */
ZSTD_ParamSwitch_e mmapDict = ZSTD_ps_auto;
ZSTD_ParamSwitch_e useRowMatchFinder = ZSTD_ps_auto;
FIO_compressionType_t cType = FIO_zstdCompression;
Expand Down Expand Up @@ -1316,6 +1318,15 @@ int main(int argCount, const char* argv[])
compressibility = (double)readU32FromChar(&argument) / 100;
break;

/* Harvest performance counters */
case 'y':
argument++;
cpuCounters = 1;
/* Collecting performance counters requires single threaded mode for now */
nbWorkers = 0;
singleThread = 1;
break;

/* unknown command */
default :
{ char shortArgument[3] = {'-', 0, 0};
Expand Down Expand Up @@ -1423,6 +1434,7 @@ int main(int argCount, const char* argv[])
benchParams.ldmMinMatch = (int)g_ldmMinMatch;
benchParams.ldmHashLog = (int)g_ldmHashLog;
benchParams.useRowMatchFinder = (int)useRowMatchFinder;
benchParams.cpuCounters = (int)cpuCounters;
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
}
Expand Down
Loading