Skip to content

Commit ac96142

Browse files
committed
Actual fix the failures.
1 parent 003c9a5 commit ac96142

File tree

1 file changed

+29
-12
lines changed

1 file changed

+29
-12
lines changed

nnc/Store.swift

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,7 +1705,7 @@ private let i8xEncode:
17051705
return 1
17061706
}
17071707

1708-
private func i8xDecode(
1708+
private func i8xDecodeImpl(
17091709
_ data: UnsafeRawPointer?, _ dataSize: Int, _ dataType: Int32,
17101710
_ dimensions: UnsafePointer<Int32>?, _ dimensionCount: Int32, _ identifier: UInt32,
17111711
_ context: UnsafeMutableRawPointer?, _ params: ccv_nnc_tensor_param_t,
@@ -1746,9 +1746,26 @@ private func i8xDecode(
17461746
maxDecodedElements == numberOfElements
17471747
|| (maxDecodedElements > 0 && maxDecodedElements % rowLength == 0)
17481748
else { return 0 }
1749-
ccv_nnc_dequantize_8i_rowwise(
1750-
data, dataType, Int32(CCV_TENSOR_CPU_MEMORY), dataSize, rowLength, decoded,
1751-
maxDecodedElements)
1749+
let originalScaleOffset = (numberOfElements + 127) & -128
1750+
let originalScaleSize = (numberOfElements / rowLength) * elementSize
1751+
guard dataSize >= originalScaleOffset + originalScaleSize else { return 0 }
1752+
if maxDecodedElements == numberOfElements {
1753+
ccv_nnc_dequantize_8i_rowwise(
1754+
data, dataType, Int32(CCV_TENSOR_CPU_MEMORY), dataSize, rowLength, decoded,
1755+
maxDecodedElements)
1756+
} else {
1757+
let partialScaleOffset = (maxDecodedElements + 127) & -128
1758+
let partialScaleSize = (maxDecodedElements / rowLength) * elementSize
1759+
let partialDataSize = partialScaleOffset + partialScaleSize
1760+
let partialData = UnsafeMutableRawPointer.allocate(
1761+
byteCount: partialDataSize, alignment: MemoryLayout<UInt64>.alignment)
1762+
defer { partialData.deallocate() }
1763+
memcpy(partialData, data, maxDecodedElements)
1764+
memcpy(partialData + partialScaleOffset, data + originalScaleOffset, partialScaleSize)
1765+
ccv_nnc_dequantize_8i_rowwise(
1766+
partialData, dataType, Int32(CCV_TENSOR_CPU_MEMORY), partialDataSize, rowLength, decoded,
1767+
maxDecodedElements)
1768+
}
17521769
decodedSize[0] = maxDecodedElements * elementSize
17531770
return 1
17541771
}
@@ -1762,12 +1779,12 @@ private let i8xDecode:
17621779
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params, tensorOut,
17631780
decoded, decodedSize
17641781
in
1765-
return i8xDecode(
1782+
return i8xDecodeImpl(
17661783
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
17671784
tensorOut, decoded, decodedSize)
17681785
}
17691786

1770-
private func i8xDecodeJit(
1787+
private func i8xDecodeJitImpl(
17711788
_ data: UnsafeRawPointer?, _ dataSize: Int, _ dataType: Int32,
17721789
_ dimensions: UnsafePointer<Int32>?, _ dimensionCount: Int32, _ identifier: UInt32,
17731790
_ context: UnsafeMutableRawPointer?, _ params: ccv_nnc_tensor_param_t,
@@ -1791,7 +1808,7 @@ private func i8xDecodeJit(
17911808
decodedSize[0] = dataSize
17921809
return 1
17931810
}
1794-
return i8xDecode(
1811+
return i8xDecodeImpl(
17951812
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
17961813
tensorOut, decoded, decodedSize)
17971814
}
@@ -1800,14 +1817,14 @@ private func i8xDecodeJit(
18001817
numberOfElements *= Int(dimensions[i])
18011818
}
18021819
guard TensorShape(dims: params.dim).reduce(1, *) == numberOfElements else {
1803-
return i8xDecode(
1820+
return i8xDecodeImpl(
18041821
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18051822
tensorOut, decoded, decodedSize)
18061823
}
18071824
let rowwiseParams = ccv_nnc_tensor_8i_rowwise(params)
18081825
let encodedDataSize = ccv_nnc_tensor_data_size_without_padding(rowwiseParams)
18091826
guard dataSize >= encodedDataSize && decodedSize[0] >= encodedDataSize else {
1810-
return i8xDecode(
1827+
return i8xDecodeImpl(
18111828
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18121829
tensorOut, decoded, decodedSize)
18131830
}
@@ -1828,7 +1845,7 @@ private let i8xDecodeJit:
18281845
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params, tensorOut,
18291846
decoded, decodedSize
18301847
in
1831-
return i8xDecodeJit(
1848+
return i8xDecodeJitImpl(
18321849
data, dataSize, dataType, dimensions, dimensionCount, identifier, context, params,
18331850
tensorOut, decoded, decodedSize)
18341851
}
@@ -3420,7 +3437,7 @@ private let i8xDecodeJitWithExternalStore:
34203437
let offset = Int(data.load(as: UInt64.self))
34213438
let length = Int((data + MemoryLayout<UInt64>.size).load(as: UInt64.self))
34223439
let mappedData = store.loadBytes(offset: offset, length: length)
3423-
return i8xDecodeJit(
3440+
return i8xDecodeJitImpl(
34243441
mappedData, length, dataType, dimensions, dimensionCount, identifier, context, params,
34253442
tensorOut, decoded, decodedSize)
34263443
}
@@ -4734,7 +4751,7 @@ private let i8xDecodeWithExternalStore:
47344751
let offset = Int(data.load(as: UInt64.self))
47354752
let length = Int((data + MemoryLayout<UInt64>.size).load(as: UInt64.self))
47364753
let mappedData = store.loadBytes(offset: offset, length: length)
4737-
return i8xDecode(
4754+
return i8xDecodeImpl(
47384755
mappedData, length, dataType, dimensions, dimensionCount, identifier, context, params,
47394756
tensorOut, decoded, decodedSize)
47404757
}

0 commit comments

Comments
 (0)