@@ -1705,7 +1705,7 @@ private let i8xEncode:
17051705 return 1
17061706 }
17071707
1708- private func i8xDecode (
1708+ private func i8xDecodeImpl (
17091709 _ data: UnsafeRawPointer ? , _ dataSize: Int , _ dataType: Int32 ,
17101710 _ dimensions: UnsafePointer < Int32 > ? , _ dimensionCount: Int32 , _ identifier: UInt32 ,
17111711 _ context: UnsafeMutableRawPointer ? , _ params: ccv_nnc_tensor_param_t ,
@@ -1746,9 +1746,26 @@ private func i8xDecode(
17461746 maxDecodedElements == numberOfElements
17471747 || ( maxDecodedElements > 0 && maxDecodedElements % rowLength == 0 )
17481748 else { return 0 }
1749- ccv_nnc_dequantize_8i_rowwise (
1750- data, dataType, Int32 ( CCV_TENSOR_CPU_MEMORY) , dataSize, rowLength, decoded,
1751- maxDecodedElements)
1749+ let originalScaleOffset = ( numberOfElements + 127 ) & - 128
1750+ let originalScaleSize = ( numberOfElements / rowLength) * elementSize
1751+ guard dataSize >= originalScaleOffset + originalScaleSize else { return 0 }
1752+ if maxDecodedElements == numberOfElements {
1753+ ccv_nnc_dequantize_8i_rowwise (
1754+ data, dataType, Int32 ( CCV_TENSOR_CPU_MEMORY) , dataSize, rowLength, decoded,
1755+ maxDecodedElements)
1756+ } else {
1757+ let partialScaleOffset = ( maxDecodedElements + 127 ) & - 128
1758+ let partialScaleSize = ( maxDecodedElements / rowLength) * elementSize
1759+ let partialDataSize = partialScaleOffset + partialScaleSize
1760+ let partialData = UnsafeMutableRawPointer . allocate (
1761+ byteCount: partialDataSize, alignment: MemoryLayout< UInt64> . alignment)
1762+ defer { partialData. deallocate ( ) }
1763+ memcpy ( partialData, data, maxDecodedElements)
1764+ memcpy ( partialData + partialScaleOffset, data + originalScaleOffset, partialScaleSize)
1765+ ccv_nnc_dequantize_8i_rowwise (
1766+ partialData, dataType, Int32 ( CCV_TENSOR_CPU_MEMORY) , partialDataSize, rowLength, decoded,
1767+ maxDecodedElements)
1768+ }
17521769 decodedSize [ 0 ] = maxDecodedElements * elementSize
17531770 return 1
17541771}
@@ -1762,12 +1779,12 @@ private let i8xDecode:
17621779 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params, tensorOut,
17631780 decoded, decodedSize
17641781 in
1765- return i8xDecode (
1782+ return i8xDecodeImpl (
17661783 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
17671784 tensorOut, decoded, decodedSize)
17681785 }
17691786
1770- private func i8xDecodeJit (
1787+ private func i8xDecodeJitImpl (
17711788 _ data: UnsafeRawPointer ? , _ dataSize: Int , _ dataType: Int32 ,
17721789 _ dimensions: UnsafePointer < Int32 > ? , _ dimensionCount: Int32 , _ identifier: UInt32 ,
17731790 _ context: UnsafeMutableRawPointer ? , _ params: ccv_nnc_tensor_param_t ,
@@ -1791,7 +1808,7 @@ private func i8xDecodeJit(
17911808 decodedSize [ 0 ] = dataSize
17921809 return 1
17931810 }
1794- return i8xDecode (
1811+ return i8xDecodeImpl (
17951812 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
17961813 tensorOut, decoded, decodedSize)
17971814 }
@@ -1800,14 +1817,14 @@ private func i8xDecodeJit(
18001817 numberOfElements *= Int ( dimensions [ i] )
18011818 }
18021819 guard TensorShape ( dims: params. dim) . reduce ( 1 , * ) == numberOfElements else {
1803- return i8xDecode (
1820+ return i8xDecodeImpl (
18041821 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18051822 tensorOut, decoded, decodedSize)
18061823 }
18071824 let rowwiseParams = ccv_nnc_tensor_8i_rowwise ( params)
18081825 let encodedDataSize = ccv_nnc_tensor_data_size_without_padding ( rowwiseParams)
18091826 guard dataSize >= encodedDataSize && decodedSize [ 0 ] >= encodedDataSize else {
1810- return i8xDecode (
1827+ return i8xDecodeImpl (
18111828 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18121829 tensorOut, decoded, decodedSize)
18131830 }
@@ -1828,7 +1845,7 @@ private let i8xDecodeJit:
18281845 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params, tensorOut,
18291846 decoded, decodedSize
18301847 in
1831- return i8xDecodeJit (
1848+ return i8xDecodeJitImpl (
18321849 data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18331850 tensorOut, decoded, decodedSize)
18341851 }
@@ -3420,7 +3437,7 @@ private let i8xDecodeJitWithExternalStore:
34203437 let offset = Int ( data. load ( as: UInt64 . self) )
34213438 let length = Int ( ( data + MemoryLayout < UInt64 > . size) . load ( as: UInt64 . self) )
34223439 let mappedData = store. loadBytes ( offset: offset, length: length)
3423- return i8xDecodeJit (
3440+ return i8xDecodeJitImpl (
34243441 mappedData, length, dataType, dimensions, dimensionCount, identifier, context, params,
34253442 tensorOut, decoded, decodedSize)
34263443 }
@@ -4734,7 +4751,7 @@ private let i8xDecodeWithExternalStore:
47344751 let offset = Int ( data. load ( as: UInt64 . self) )
47354752 let length = Int ( ( data + MemoryLayout < UInt64 > . size) . load ( as: UInt64 . self) )
47364753 let mappedData = store. loadBytes ( offset: offset, length: length)
4737- return i8xDecode (
4754+ return i8xDecodeImpl (
47384755 mappedData, length, dataType, dimensions, dimensionCount, identifier, context, params,
47394756 tensorOut, decoded, decodedSize)
47404757 }
0 commit comments