Skip to content

Commit 720e98b

Browse files
Jiri Olsaacmel
Jiri Olsa
authored andcommitted
perf tools: Add perf data cache feature
Storing CPU cache details under perf data. It's stored as new HEADER_CACHE feature and it's displayed under header info with -I option: $ perf report --header-only -I ... # CPU cache info: # L1 Data 32K [0-1] # L1 Instruction 32K [0-1] # L1 Data 32K [2-3] # L1 Instruction 32K [2-3] # L2 Unified 256K [0-1] # L2 Unified 256K [2-3] # L3 Unified 4096K [0-3] ... All distinct caches are stored/displayed. Signed-off-by: Jiri Olsa <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: David Ahern <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] [ Fixed leak on process_caches(), s/cache_level/cpu_cache_level/g ] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent dd629cc commit 720e98b

File tree

4 files changed

+299
-0
lines changed

4 files changed

+299
-0
lines changed

tools/perf/util/env.c

+13
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ struct perf_env perf_env;
66

77
void perf_env__exit(struct perf_env *env)
88
{
9+
int i;
10+
911
zfree(&env->hostname);
1012
zfree(&env->os_release);
1113
zfree(&env->version);
@@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env)
1921
zfree(&env->numa_nodes);
2022
zfree(&env->pmu_mappings);
2123
zfree(&env->cpu);
24+
25+
for (i = 0; i < env->caches_cnt; i++)
26+
cpu_cache_level__free(&env->caches[i]);
27+
zfree(&env->caches);
2228
}
2329

2430
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
7581
env->nr_cpus_avail = nr_cpus;
7682
return 0;
7783
}
84+
85+
void cpu_cache_level__free(struct cpu_cache_level *cache)
86+
{
87+
free(cache->type);
88+
free(cache->map);
89+
free(cache->size);
90+
}

tools/perf/util/env.h

+15
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
11
#ifndef __PERF_ENV_H
22
#define __PERF_ENV_H
33

4+
#include <linux/types.h>
5+
46
struct cpu_topology_map {
57
int socket_id;
68
int core_id;
79
};
810

11+
struct cpu_cache_level {
12+
u32 level;
13+
u32 line_size;
14+
u32 sets;
15+
u32 ways;
16+
char *type;
17+
char *size;
18+
char *map;
19+
};
20+
921
struct perf_env {
1022
char *hostname;
1123
char *os_release;
@@ -31,6 +43,8 @@ struct perf_env {
3143
char *numa_nodes;
3244
char *pmu_mappings;
3345
struct cpu_topology_map *cpu;
46+
struct cpu_cache_level *caches;
47+
int caches_cnt;
3448
};
3549

3650
extern struct perf_env perf_env;
@@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
4155

4256
int perf_env__read_cpu_topology_map(struct perf_env *env);
4357

58+
void cpu_cache_level__free(struct cpu_cache_level *cache);
4459
#endif /* __PERF_ENV_H */

tools/perf/util/header.c

+270
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include "strbuf.h"
2424
#include "build-id.h"
2525
#include "data.h"
26+
#include <api/fs/fs.h>
27+
#include "asm/bug.h"
2628

2729
/*
2830
* magic2 = "PERFILE2"
@@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h,
868870
return err;
869871
}
870872

873+
static int cpu_cache_level__sort(const void *a, const void *b)
874+
{
875+
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
876+
struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
877+
878+
return cache_a->level - cache_b->level;
879+
}
880+
881+
static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
882+
{
883+
if (a->level != b->level)
884+
return false;
885+
886+
if (a->line_size != b->line_size)
887+
return false;
888+
889+
if (a->sets != b->sets)
890+
return false;
891+
892+
if (a->ways != b->ways)
893+
return false;
894+
895+
if (strcmp(a->type, b->type))
896+
return false;
897+
898+
if (strcmp(a->size, b->size))
899+
return false;
900+
901+
if (strcmp(a->map, b->map))
902+
return false;
903+
904+
return true;
905+
}
906+
907+
static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
908+
{
909+
char path[PATH_MAX], file[PATH_MAX];
910+
struct stat st;
911+
size_t len;
912+
913+
scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
914+
scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
915+
916+
if (stat(file, &st))
917+
return 1;
918+
919+
scnprintf(file, PATH_MAX, "%s/level", path);
920+
if (sysfs__read_int(file, (int *) &cache->level))
921+
return -1;
922+
923+
scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
924+
if (sysfs__read_int(file, (int *) &cache->line_size))
925+
return -1;
926+
927+
scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
928+
if (sysfs__read_int(file, (int *) &cache->sets))
929+
return -1;
930+
931+
scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
932+
if (sysfs__read_int(file, (int *) &cache->ways))
933+
return -1;
934+
935+
scnprintf(file, PATH_MAX, "%s/type", path);
936+
if (sysfs__read_str(file, &cache->type, &len))
937+
return -1;
938+
939+
cache->type[len] = 0;
940+
cache->type = rtrim(cache->type);
941+
942+
scnprintf(file, PATH_MAX, "%s/size", path);
943+
if (sysfs__read_str(file, &cache->size, &len)) {
944+
free(cache->type);
945+
return -1;
946+
}
947+
948+
cache->size[len] = 0;
949+
cache->size = rtrim(cache->size);
950+
951+
scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
952+
if (sysfs__read_str(file, &cache->map, &len)) {
953+
free(cache->map);
954+
free(cache->type);
955+
return -1;
956+
}
957+
958+
cache->map[len] = 0;
959+
cache->map = rtrim(cache->map);
960+
return 0;
961+
}
962+
963+
static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
964+
{
965+
fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
966+
}
967+
968+
static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
969+
{
970+
u32 i, cnt = 0;
971+
long ncpus;
972+
u32 nr, cpu;
973+
u16 level;
974+
975+
ncpus = sysconf(_SC_NPROCESSORS_CONF);
976+
if (ncpus < 0)
977+
return -1;
978+
979+
nr = (u32)(ncpus & UINT_MAX);
980+
981+
for (cpu = 0; cpu < nr; cpu++) {
982+
for (level = 0; level < 10; level++) {
983+
struct cpu_cache_level c;
984+
int err;
985+
986+
err = cpu_cache_level__read(&c, cpu, level);
987+
if (err < 0)
988+
return err;
989+
990+
if (err == 1)
991+
break;
992+
993+
for (i = 0; i < cnt; i++) {
994+
if (cpu_cache_level__cmp(&c, &caches[i]))
995+
break;
996+
}
997+
998+
if (i == cnt)
999+
caches[cnt++] = c;
1000+
else
1001+
cpu_cache_level__free(&c);
1002+
1003+
if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
1004+
goto out;
1005+
}
1006+
}
1007+
out:
1008+
*cntp = cnt;
1009+
return 0;
1010+
}
1011+
1012+
#define MAX_CACHES 2000
1013+
1014+
static int write_cache(int fd, struct perf_header *h __maybe_unused,
1015+
struct perf_evlist *evlist __maybe_unused)
1016+
{
1017+
struct cpu_cache_level caches[MAX_CACHES];
1018+
u32 cnt = 0, i, version = 1;
1019+
int ret;
1020+
1021+
ret = build_caches(caches, MAX_CACHES, &cnt);
1022+
if (ret)
1023+
goto out;
1024+
1025+
qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
1026+
1027+
ret = do_write(fd, &version, sizeof(u32));
1028+
if (ret < 0)
1029+
goto out;
1030+
1031+
ret = do_write(fd, &cnt, sizeof(u32));
1032+
if (ret < 0)
1033+
goto out;
1034+
1035+
for (i = 0; i < cnt; i++) {
1036+
struct cpu_cache_level *c = &caches[i];
1037+
1038+
#define _W(v) \
1039+
ret = do_write(fd, &c->v, sizeof(u32)); \
1040+
if (ret < 0) \
1041+
goto out;
1042+
1043+
_W(level)
1044+
_W(line_size)
1045+
_W(sets)
1046+
_W(ways)
1047+
#undef _W
1048+
1049+
#define _W(v) \
1050+
ret = do_write_string(fd, (const char *) c->v); \
1051+
if (ret < 0) \
1052+
goto out;
1053+
1054+
_W(type)
1055+
_W(size)
1056+
_W(map)
1057+
#undef _W
1058+
}
1059+
1060+
out:
1061+
for (i = 0; i < cnt; i++)
1062+
cpu_cache_level__free(&caches[i]);
1063+
return ret;
1064+
}
1065+
8711066
static int write_stat(int fd __maybe_unused,
8721067
struct perf_header *h __maybe_unused,
8731068
struct perf_evlist *evlist __maybe_unused)
@@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused,
11721367
fprintf(fp, "# contains stat data\n");
11731368
}
11741369

1370+
static void print_cache(struct perf_header *ph __maybe_unused,
1371+
int fd __maybe_unused, FILE *fp __maybe_unused)
1372+
{
1373+
int i;
1374+
1375+
fprintf(fp, "# CPU cache info:\n");
1376+
for (i = 0; i < ph->env.caches_cnt; i++) {
1377+
fprintf(fp, "# ");
1378+
cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
1379+
}
1380+
}
1381+
11751382
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
11761383
FILE *fp)
11771384
{
@@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section,
19202127
return err;
19212128
}
19222129

2130+
static int process_cache(struct perf_file_section *section __maybe_unused,
2131+
struct perf_header *ph __maybe_unused, int fd __maybe_unused,
2132+
void *data __maybe_unused)
2133+
{
2134+
struct cpu_cache_level *caches;
2135+
u32 cnt, i, version;
2136+
2137+
if (readn(fd, &version, sizeof(version)) != sizeof(version))
2138+
return -1;
2139+
2140+
if (ph->needs_swap)
2141+
version = bswap_32(version);
2142+
2143+
if (version != 1)
2144+
return -1;
2145+
2146+
if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
2147+
return -1;
2148+
2149+
if (ph->needs_swap)
2150+
cnt = bswap_32(cnt);
2151+
2152+
caches = zalloc(sizeof(*caches) * cnt);
2153+
if (!caches)
2154+
return -1;
2155+
2156+
for (i = 0; i < cnt; i++) {
2157+
struct cpu_cache_level c;
2158+
2159+
#define _R(v) \
2160+
if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
2161+
goto out_free_caches; \
2162+
if (ph->needs_swap) \
2163+
c.v = bswap_32(c.v); \
2164+
2165+
_R(level)
2166+
_R(line_size)
2167+
_R(sets)
2168+
_R(ways)
2169+
#undef _R
2170+
2171+
#define _R(v) \
2172+
c.v = do_read_string(fd, ph); \
2173+
if (!c.v) \
2174+
goto out_free_caches;
2175+
2176+
_R(type)
2177+
_R(size)
2178+
_R(map)
2179+
#undef _R
2180+
2181+
caches[i] = c;
2182+
}
2183+
2184+
ph->env.caches = caches;
2185+
ph->env.caches_cnt = cnt;
2186+
return 0;
2187+
out_free_caches:
2188+
free(caches);
2189+
return -1;
2190+
}
2191+
19232192
struct feature_ops {
19242193
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
19252194
void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
19622231
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
19632232
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
19642233
FEAT_OPA(HEADER_STAT, stat),
2234+
FEAT_OPF(HEADER_CACHE, cache),
19652235
};
19662236

19672237
struct header_print_data {

tools/perf/util/header.h

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ enum {
3232
HEADER_GROUP_DESC,
3333
HEADER_AUXTRACE,
3434
HEADER_STAT,
35+
HEADER_CACHE,
3536
HEADER_LAST_FEATURE,
3637
HEADER_FEAT_BITS = 256,
3738
};

0 commit comments

Comments
 (0)