Skip to content

Commit 0013e8c

Browse files
rongzha1TaoLv
authored andcommitted
graph: backend: dnnl: fix bug for pattern lnorm + tc + mul + q
1 parent 2340f5a commit 0013e8c

File tree

3 files changed

+275
-0
lines changed

3 files changed

+275
-0
lines changed

src/graph/backend/dnnl/kernels/layernorm.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ struct layernorm_fwd_t : public kernel_base_t {
9191
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
9292
BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
9393
BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);
94+
95+
// broadcast and swap should be before fuse_post_ops
96+
BACKEND_DNNL_ADD_PASS(pipeline, binary_canonicalization);
97+
BACKEND_DNNL_ADD_PASS(pipeline, binary_broadcast_swap);
98+
9499
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
95100
BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
96101
BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);

tests/benchdnn/inputs/graph/pattern/harness_int8_all

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,5 +117,8 @@
117117
# layernorm with zp != 0
118118
--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
119119
--reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
120+
--reset --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
121+
# layernorm with zp != 0 and broadcast binary
122+
--reset --op-attrs=3:zps:1 --in-shapes=5:512 --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
120123
#softmax
121124
--reset --case=pattern/int8/int8_softmax_add.json
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
{
2+
"version": "3.5.0",
3+
"engine_kind": "cpu",
4+
"fpmath_mode": "strict",
5+
"input_ports": [
6+
0,
7+
1,
8+
2,
9+
5
10+
],
11+
"output_ports": [
12+
7
13+
],
14+
"graph": [
15+
{
16+
"id": 0,
17+
"name": "layernorm",
18+
"kind": "LayerNorm",
19+
"attrs": {
20+
"begin_norm_axis": {
21+
"type": "s64",
22+
"value": -1
23+
},
24+
"use_affine": {
25+
"type": "bool",
26+
"value": 1
27+
},
28+
"keep_stats": {
29+
"type": "bool",
30+
"value": 0
31+
},
32+
"epsilon": {
33+
"type": "f32",
34+
"value": 0.0625
35+
}
36+
},
37+
"inputs": [
38+
{
39+
"id": 0,
40+
"dtype": "bf16",
41+
"shape": [
42+
1,
43+
128,
44+
512
45+
],
46+
"stride": [
47+
65536,
48+
512,
49+
1
50+
],
51+
"layout_type": "strided",
52+
"property_type": "undef"
53+
},
54+
{
55+
"id": 1,
56+
"dtype": "f32",
57+
"shape": [
58+
512
59+
],
60+
"stride": [
61+
1
62+
],
63+
"layout_type": "strided",
64+
"property_type": "undef"
65+
},
66+
{
67+
"id": 2,
68+
"dtype": "f32",
69+
"shape": [
70+
512
71+
],
72+
"stride": [
73+
1
74+
],
75+
"layout_type": "strided",
76+
"property_type": "undef"
77+
}
78+
],
79+
"outputs": [
80+
{
81+
"id": 3,
82+
"dtype": "bf16",
83+
"shape": [
84+
1,
85+
128,
86+
512
87+
],
88+
"stride": [
89+
65536,
90+
512,
91+
1
92+
],
93+
"layout_type": "strided",
94+
"property_type": "undef"
95+
}
96+
]
97+
},
98+
{
99+
"id": 1,
100+
"name": "typecast",
101+
"kind": "TypeCast",
102+
"attrs": {},
103+
"inputs": [
104+
{
105+
"id": 3,
106+
"dtype": "bf16",
107+
"shape": [
108+
1,
109+
128,
110+
512
111+
],
112+
"stride": [
113+
65536,
114+
512,
115+
1
116+
],
117+
"layout_type": "strided",
118+
"property_type": "undef"
119+
}
120+
],
121+
"outputs": [
122+
{
123+
"id": 4,
124+
"dtype": "f32",
125+
"shape": [
126+
1,
127+
128,
128+
512
129+
],
130+
"stride": [
131+
65536,
132+
512,
133+
1
134+
],
135+
"layout_type": "strided",
136+
"property_type": "undef"
137+
}
138+
]
139+
},
140+
{
141+
"id": 2,
142+
"name": "multiply",
143+
"kind": "Multiply",
144+
"attrs": {
145+
"auto_broadcast": {
146+
"type": "string",
147+
"value": "numpy"
148+
}
149+
},
150+
"inputs": [
151+
{
152+
"id": 4,
153+
"dtype": "f32",
154+
"shape": [
155+
1,
156+
128,
157+
512
158+
],
159+
"stride": [
160+
65536,
161+
512,
162+
1
163+
],
164+
"layout_type": "strided",
165+
"property_type": "undef"
166+
},
167+
{
168+
"id": 5,
169+
"dtype": "f32",
170+
"shape": [
171+
1,
172+
128,
173+
512
174+
],
175+
"stride": [
176+
65536,
177+
512,
178+
1
179+
],
180+
"layout_type": "strided",
181+
"property_type": "undef"
182+
}
183+
],
184+
"outputs": [
185+
{
186+
"id": 6,
187+
"dtype": "f32",
188+
"shape": [
189+
1,
190+
128,
191+
512
192+
],
193+
"stride": [
194+
65536,
195+
512,
196+
1
197+
],
198+
"layout_type": "strided",
199+
"property_type": "undef"
200+
}
201+
]
202+
},
203+
{
204+
"id": 3,
205+
"name": "quantize",
206+
"kind": "Quantize",
207+
"attrs": {
208+
"axis": {
209+
"type": "s64",
210+
"value": 0
211+
},
212+
"qtype": {
213+
"type": "string",
214+
"value": "per_tensor"
215+
},
216+
"scales": {
217+
"type": "f32[]",
218+
"value": [
219+
0.5
220+
]
221+
},
222+
"zps": {
223+
"type": "s64[]",
224+
"value": [
225+
0
226+
]
227+
}
228+
},
229+
"inputs": [
230+
{
231+
"id": 6,
232+
"dtype": "f32",
233+
"shape": [
234+
1,
235+
128,
236+
512
237+
],
238+
"stride": [
239+
65536,
240+
512,
241+
1
242+
],
243+
"layout_type": "strided",
244+
"property_type": "undef"
245+
}
246+
],
247+
"outputs": [
248+
{
249+
"id": 7,
250+
"dtype": "s8",
251+
"shape": [
252+
1,
253+
128,
254+
512
255+
],
256+
"stride": [
257+
65536,
258+
512,
259+
1
260+
],
261+
"layout_type": "strided",
262+
"property_type": "undef"
263+
}
264+
]
265+
}
266+
]
267+
}

0 commit comments

Comments
 (0)