Skip to content

Commit 9014cd4

Browse files
Kaiwei Tufacebook-github-bot
authored andcommitted
Add a new ai benchmark suite in DCPerf (#139)
Summary: Pull Request resolved: #139 This diff introduces a benchmark suite named `ai` into DCPerf. The `ai` benchmark suite currently includes two embedding jobs, which will install `fbgemm_gpu` and utilize the corresponding benchmark with representative parameters collected from a production model. To use it, ``` ./benchpress -b ai list ./benchpress -b ai install ./benchpress -b ai run ``` The changes include: - Add a new parser for the embedding parser. - Add the `benchmarks_ai.yml` for the ai benchmark suite. - Update the `jobs_ai.yml` file to include jobs for the ai benchmark suite. - Update the `__init__.py `file to register the new parser and new benchmark suite. - `install_embedding.sh` builds the fbgemm_gpu from source. - `cleanup_embedding.sh` clean up the exec from benchmarks directory. Reviewed By: excelle08 Differential Revision: D76377213 fbshipit-source-id: a97a0b35703365f07aac69ea19ee53874c585e5e
1 parent fd2773a commit 9014cd4

File tree

7 files changed

+1152
-0
lines changed

7 files changed

+1152
-0
lines changed

benchpress/config/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def register_benchmark_suite(name):
7070
register_benchmark_suite("internal")
7171
register_benchmark_suite("wdl")
7272
register_benchmark_suite("system")
73+
register_benchmark_suite("ai")
7374

7475

7576
class BenchpressConfig:

benchpress/config/benchmarks_ai.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fbgemm_embedding:
2+
parser: embedding
3+
install_script: ./packages/fbgemm_gpu/install_embedding.sh
4+
cleanup_script: ./packages/fbgemm_gpu/cleanup_embedding.sh
5+
path: ./benchmarks/fbgemm_embedding/tbe_inference_benchmark
6+
metrics:
7+
- bandwidth

benchpress/config/jobs_ai.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
- benchmark: fbgemm_embedding
2+
name: fbgemm_embedding_a_single
3+
description: Embedding workload for ai benchmark with one representative table
4+
args:
5+
- nbit-cpu
6+
- '--num-embeddings={embeddings}'
7+
- '--bag-size={bag_size}'
8+
- '--embedding-dim={embedding_dim}'
9+
- '--batch-size={batch_size}'
10+
- '--num-tables={num_tables}'
11+
- '--weights-precision={weights_precision}'
12+
- '--copies={copies}'
13+
- '--iters={iters}'
14+
15+
vars:
16+
- 'embeddings=40000000'
17+
- 'bag_size=2'
18+
- 'embedding_dim=96'
19+
- 'batch_size=166'
20+
- 'num_tables=1'
21+
- 'weights_precision=int4'
22+
- 'copies=16'
23+
- 'iters=1000'
24+
25+
- benchmark: fbgemm_embedding
26+
name: fbgemm_embedding_a_spec
27+
description: Embedding workload for ai benchmark with 8 representative tables having different bag sizes.
28+
args:
29+
- nbit-device-with-spec
30+
- '--num-embeddings-list={embeddings_list}'
31+
- '--bag-size-list={bag_size_list}'
32+
- '--embedding-dim-list={embedding_dim_list}'
33+
- '--batch-size={batch_size}'
34+
- '--weights-precision={weights_precision}'
35+
- '--cpu-copies={copies}'
36+
- '--iters={iters}'
37+
- '--use-cpu'
38+
39+
vars:
40+
- 'embeddings_list=40000000,40000000,40000000,40000000,40000000,40000000,40000000,40000000'
41+
- 'bag_size_list=1,1,1,1,1,1,1,5'
42+
- 'embedding_dim_list=96,96,96,96,96,96,96,96'
43+
- 'batch_size=166'
44+
- 'weights_precision=int4'
45+
- 'copies=16'
46+
- 'iters=1000'
47+
48+
49+
- benchmark: fbgemm_embedding
50+
name: fbgemm_embedding_b_spec
51+
description: Embedding workload for ai benchmark with 8 representative tables having different bag sizes.
52+
args:
53+
- nbit-device-with-spec
54+
- '--num-embeddings-list={embeddings_list}'
55+
- '--bag-size-list={bag_size_list}'
56+
- '--embedding-dim-list={embedding_dim_list}'
57+
- '--batch-size={batch_size}'
58+
- '--weights-precision={weights_precision}'
59+
- '--cpu-copies={copies}'
60+
- '--iters={iters}'
61+
- '--use-cpu'
62+
63+
vars:
64+
- 'embeddings_list=100000,100000,100000,100000,100000,5000000,5000000,5000000,10000,10000,14794452'
65+
- 'bag_size_list=1,1,1,1,1,1,1,1,1,1,1'
66+
- 'embedding_dim_list=64,64,64,64,64,64,64,64,64,64,64'
67+
- 'batch_size=166'
68+
- 'weights_precision=int4'
69+
- 'copies=16'
70+
- 'iters=1000'

benchpress/plugins/parsers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .cloudsuite_graph import CloudSuiteGraphParser
1616
from .compression_parser import CompressionParser
1717
from .django_workload import DjangoWorkloadParser
18+
from .embedding import EmbeddingParser
1819
from .encryption import EncryptionParser
1920
from .fb_fiosynth import Fiosynth_Parser
2021
from .fbgemm import FbgemmParser
@@ -100,6 +101,7 @@ def register_parsers(factory):
100101
factory.register("wdl_bench", WDLParser)
101102
factory.register("health_check", HealthCheckParser)
102103
factory.register("syscall", SyscallParser)
104+
factory.register("embedding", EmbeddingParser)
103105

104106
if not open_source:
105107
factory.register("adsim", AdSimParser)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
#
4+
# This source code is licensed under the MIT license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# pyre-unsafe
8+
import re
9+
10+
from benchpress.lib.parser import Parser
11+
12+
13+
class EmbeddingParser(Parser):
14+
def parse(self, stdout, stderr, returncode):
15+
metrics = {}
16+
for line in stderr:
17+
match = re.search(r"BW:\s*(\d+\.\d+)\s*GB/s", line)
18+
if match:
19+
metrics["bandwidth"] = float(match.group(1))
20+
21+
return metrics
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
#
4+
# This source code is licensed under the MIT license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
8+
AI_BENCH_ROOT="$(dirname "$(readlink -f "$0")")" # Path to dir with this file.
9+
BENCHPRESS_ROOT="$(readlink -f "$AI_BENCH_ROOT/../..")"
10+
BENCHMARKS_DIR="${BENCHPRESS_ROOT}/benchmarks/fbgemm_embedding"
11+
MEMCACHE_BENCH_DIR="${BENCHMARKS_DIR}/tbe_inference_benchmark"
12+
13+
rm -rf "$MEMCACHE_BENCH_DIR"

0 commit comments

Comments
 (0)