[CPU] FullyConnected acceleration for BF16 compressed weights

dmitrygo · dmitrygo · commit 55908c4b3d8b · 2024-05-22T17:40:11.000+04:00
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
@@ -122,7 +122,7 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT
             // f16c kernel saves memory footprint with additional decompression computational overhead
             // which is only meaningful on LLM with small batch-size.
             // TODO: fall-back to use f32 weights on large batch-size
-            if (inputType == f32 && weightsType == f16)
+            if (inputType == f32 && one_of(weightsType, f16, bf16))
                 return true;
         }
     }
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit 373e65b660c0ba274631cf30c422f10606de1618
+Subproject commit 50be010d88432476ac7077ea3ae745386dcfde72

Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT`
`122`	`122`	`// f16c kernel saves memory footprint with additional decompression computational overhead`
`123`	`123`	`// which is only meaningful on LLM with small batch-size.`
`124`	`124`	`// TODO: fall-back to use f32 weights on large batch-size`
`125`		`- if (inputType == f32 && weightsType == f16)`
	`125`	`+ if (inputType == f32 && one_of(weightsType, f16, bf16))`
`126`	`126`	`return true;`
`127`	`127`	`}`
`128`	`128`	`}`