diff --git a/backends/apple/coreml/CMakeLists.txt b/backends/apple/coreml/CMakeLists.txt index 3b3c26ece94..b3d0182999a 100644 --- a/backends/apple/coreml/CMakeLists.txt +++ b/backends/apple/coreml/CMakeLists.txt @@ -13,6 +13,8 @@ if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) endif() +option(COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." OFF) + # inmemoryfs sources set(INMEMORYFS_SOURCES runtime/inmemoryfs/inmemory_filesystem.cpp @@ -181,6 +183,14 @@ target_link_libraries(coremldelegate ${SQLITE_LIBRARY} ) +if(COREML_BUILD_EXECUTOR_RUNNER) +target_link_libraries(coremldelegate + PRIVATE + portable_ops_lib + portable_kernels +) +endif() + target_compile_options(coremldelegate PRIVATE "-fobjc-arc") target_compile_options(coremldelegate PRIVATE "-fno-exceptions") diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm index da399e80d54..6fe37925d27 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm @@ -630,7 +630,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing } if (_estimatedSizeInBytes <= sizeInBytes) { - return YES; + return _estimatedSizeInBytes; } std::error_code ec; diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h index eab239b496c..78c76fadd04 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h @@ -27,6 +27,9 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLDefaultModelExec /// The model. @property (readonly, strong, nonatomic) ETCoreMLModel* model; +/// If set to `YES` then output backing are ignored. +@property (readwrite, atomic) BOOL ignoreOutputBackings; + @end NS_ASSUME_NONNULL_END diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm index 399c91bd495..57316e28015 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm @@ -26,6 +26,9 @@ - (instancetype)initWithModel:(ETCoreMLModel *)model { loggingOptions:(const executorchcoreml::ModelLoggingOptions& __unused)loggingOptions eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable __unused)eventLogger error:(NSError * __autoreleasing *)error { + if (self.ignoreOutputBackings) { + predictionOptions.outputBackings = @{}; + } id outputs = [self.model.mlModel predictionFromFeatures:inputs options:predictionOptions error:error]; diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h index 1a1b10848bb..d9c4d4ef638 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h @@ -7,6 +7,7 @@ #import +#import #import NS_ASSUME_NONNULL_BEGIN @@ -48,7 +49,11 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) { /// Record the error with `os_log_error` and fills `*errorOut` with `NSError`. #define ETCoreMLLogErrorAndSetNSError(errorOut, errorCode, formatString, ...) \ - os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \ + if (ET_LOG_ENABLED) { \ + ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \ + } else { \ + os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \ + } \ if (errorOut) { \ *errorOut = \ [NSError errorWithDomain:ETCoreMLErrorDomain \ @@ -58,24 +63,31 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) { }]; \ } -/// Record the error and its underlying error with `os_log_error` and fills -/// `*errorOut` with NSError. +/// Record the error and its underlying error with `os_log_error` and fills `*errorOut` with `NSError`. #define ETCoreMLLogUnderlyingErrorAndSetNSError(errorOut, errorCode, underlyingNSError, formatString, ...) \ - os_log_error(ETCoreMLErrorUtils.loggingChannel, \ - formatString ", with underlying error= %@.", \ - ##__VA_ARGS__, \ - (underlyingNSError).localizedDescription); \ + if (ET_LOG_ENABLED) { \ + ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \ + } else { \ + os_log_error(ETCoreMLErrorUtils.loggingChannel, \ + formatString ", with underlying error= %@.", \ + ##__VA_ARGS__, \ + (underlyingNSError).localizedDescription); \ + } \ if (errorOut) { \ *errorOut = [ETCoreMLErrorUtils errorWithCode:errorCode \ underlyingError:underlyingNSError \ format:@formatString, ##__VA_ARGS__]; \ } -#define ETCoreMLLogError(error, formatString, ...) \ - os_log_error(ETCoreMLErrorUtils.loggingChannel, \ - formatString ", with error= %@.", \ - ##__VA_ARGS__, \ - (error).localizedDescription); +#define ETCoreMLLogError(error, formatString, ...) \ + if (ET_LOG_ENABLED) { \ + ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \ + } else { \ + os_log_error(ETCoreMLErrorUtils.loggingChannel, \ + formatString ", with error= %@.", \ + ##__VA_ARGS__, \ + (error).localizedDescription); \ + } #pragma clang diagnostic pop diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h index 0f8a440c858..14c90694464 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h @@ -6,13 +6,18 @@ // Please refer to the license found in the LICENSE file in the root directory of the source tree. #import +#import NS_ASSUME_NONNULL_BEGIN @class ETCoreMLAsset; +namespace executorchcoreml { +class MultiArray; +} + /// Represents a ML model, the class is a thin wrapper over `MLModel` with additional properties. -@interface ETCoreMLModel : NSObject +__attribute__((objc_subclassing_restricted)) @interface ETCoreMLModel : NSObject - (instancetype)init NS_UNAVAILABLE; @@ -31,6 +36,12 @@ NS_ASSUME_NONNULL_BEGIN orderedOutputNames:(NSOrderedSet*)orderedOutputNames error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER; +- (nullable NSArray*)prepareInputs:(const std::vector&)inputs + error:(NSError* __autoreleasing*)error; + +- (nullable NSArray*)prepareOutputBackings:(const std::vector&)outputs + error:(NSError* __autoreleasing*)error; + /// The underlying MLModel. @property (strong, readonly, nonatomic) MLModel* mlModel; diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm index 791fb7c03b6..ee7218bd271 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm @@ -8,6 +8,164 @@ #import #import +#import +#import +#import +#import + +#pragma mark - ETCoreMLMultiArrayDescriptor +__attribute__((objc_subclassing_restricted)) +@interface ETCoreMLMultiArrayDescriptor: NSObject + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +- (instancetype)initWithShape:(NSArray *)shape + dataType:(MLMultiArrayDataType)dataType NS_DESIGNATED_INITIALIZER; + +@property (copy, readonly, nonatomic) NSArray *shape; + +@property (assign, readonly, nonatomic) MLMultiArrayDataType dataType; + +@end + +@implementation ETCoreMLMultiArrayDescriptor + +- (instancetype)initWithShape:(NSArray *)shape + dataType:(MLMultiArrayDataType)dataType { + self = [super init]; + if (self) { + _shape = shape; + _dataType = dataType; + } + + return self; +} + +- (BOOL)isEqual:(id)object { + if (object == self) { + return YES; + } + + if (![object isKindOfClass:self.class]) { + return NO; + } + + ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object; + return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType; +} + +- (NSUInteger)hash { + return [self.shape hash] ^ (NSUInteger)self.dataType; +} + +- (instancetype)copyWithZone:(NSZone *)zone { + return [[ETCoreMLMultiArrayDescriptor allocWithZone:zone] initWithShape:self.shape + dataType:self.dataType]; +} + +@end + +namespace { + +using namespace executorchcoreml; + +size_t get_number_of_bytes(MLMultiArrayDataType data_type) { + switch (data_type) { + case MLMultiArrayDataTypeFloat16: { + return 2; + } + case MLMultiArrayDataTypeFloat32: { + return 4; + } + case MLMultiArrayDataTypeInt32: { + return 4; + } + case MLMultiArrayDataTypeFloat64: { + return 8; + } + default: { + return 0; + } + } +} + +std::vector calculate_strides(const std::vector& shape) { + if (shape.size() == 0) { + return {}; + } + + if (shape.size() == 1) { + return {1}; + } + + std::vector strides(shape.size(), 1); + size_t product = 1; + for (size_t i = shape.size(); i > 0; i--) { + strides[i - 1] = product; + product *= shape[i - 1]; + } + + return strides; +} + +MLMultiArray * _Nullable make_ml_multi_array(const std::vector& shape, + MLMultiArrayDataType dataType, + NSCache *cache, + NSError * __autoreleasing *error) { + ETCoreMLMultiArrayDescriptor *descriptor = [[ETCoreMLMultiArrayDescriptor alloc] initWithShape:to_array(shape) + dataType:dataType]; + // Check the cache first otherwise allocate a new backing storage. + NSMutableData *backing_storage = [cache objectForKey:descriptor]; + if (backing_storage) { + [cache removeObjectForKey:descriptor]; + } else { + size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies{}); + backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)]; + } + + __weak NSCache *weakCache = cache; + // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage. + MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes + shape:descriptor.shape + dataType:descriptor.dataType + strides:to_array(calculate_strides(shape)) + deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];} + error:error]; + + return result; +} + +NSDictionary * +get_multi_array_constraints_by_name(NSDictionary *feature_descriptions) { + NSMutableDictionary *result = [NSMutableDictionary dictionaryWithCapacity:feature_descriptions.count]; + [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) { + result[key] = description.multiArrayConstraint; + }]; + + return result; +} + +NSDictionary *get_multi_array_input_constraints_by_name(MLModelDescription *description) { + return get_multi_array_constraints_by_name(description.inputDescriptionsByName); +} + +NSDictionary *get_multi_array_output_constraints_by_name(MLModelDescription *description) { + return get_multi_array_constraints_by_name(description.outputDescriptionsByName); +} + +} + +#pragma mark - ETCoreMLModel +@interface ETCoreMLModel () + +@property (strong, readonly, nonatomic) NSCache *cache; +@property (copy, readonly, nonatomic) NSDictionary *inputConstraintsByName; +@property (copy, readonly, nonatomic) NSDictionary *outputConstraintsByName; + +@end + @implementation ETCoreMLModel @@ -33,8 +191,11 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset _asset = asset; _orderedInputNames = [orderedInputNames copy]; _orderedOutputNames = [orderedOutputNames copy]; + _cache = [[NSCache alloc] init]; + _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription); + _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription); } - + return self; } @@ -42,4 +203,73 @@ - (NSString *)identifier { return self.asset.identifier; } +- (nullable NSArray *)prepareArgs:(const std::vector&)args + argNames:(NSOrderedSet *)argNames + argConstraintsByName:(NSDictionary *)argConstraintsByName + copyData:(const BOOL)copyData + error:(NSError * __autoreleasing *)error { + NSEnumerator *nameEnumerator = [argNames objectEnumerator]; + NSMutableArray *result = [NSMutableArray arrayWithCapacity:args.size()]; + for (const auto& arg : args) { + BOOL lCopyData = copyData; + NSString *argName = [nameEnumerator nextObject]; + MLMultiArrayConstraint *constraint = argConstraintsByName[argName]; + const auto& layout = arg.layout(); + auto dataType = to_ml_multiarray_data_type(layout.dataType()); + MLMultiArray *multiArrayArg = nil; + if (dataType == constraint.dataType) { + // We can use the same data storage. + multiArrayArg = [[MLMultiArray alloc] initWithDataPointer:arg.data() + shape:to_array(layout.shape()) + dataType:constraint.dataType + strides:to_array(layout.strides()) + deallocator:^(void * _Nonnull bytes) {} + error:error]; + lCopyData = NO; + } else { + // We can't use the same data storage, data types are not the same. + multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error); + } + + if (!multiArrayArg) { + return nil; + } + + if (multiArrayArg && lCopyData) { + [multiArrayArg getMutableBytesWithHandler:^(void *_Nonnull mutableBytes, + NSInteger __unused size, + NSArray *strides) { + MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(), + layout.shape(), + to_vector(strides))); + arg.copy(buffer); + }]; + } + + [result addObject:multiArrayArg]; + } + + return result; +} + +- (nullable NSArray *)prepareInputs:(const std::vector&)inputs + error:(NSError * __autoreleasing *)error { + return [self prepareArgs:inputs + argNames:self.orderedInputNames + argConstraintsByName:self.inputConstraintsByName + copyData:YES + error:error]; + +} + +- (nullable NSArray *)prepareOutputBackings:(const std::vector&)outputs + error:(NSError * __autoreleasing *)error { + return [self prepareArgs:outputs + argNames:self.orderedOutputNames + argConstraintsByName:self.outputConstraintsByName + copyData:NO + error:error]; + +} + @end diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h index e6e329c9ddd..2f1b22f456b 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h @@ -35,6 +35,9 @@ NS_ASSUME_NONNULL_BEGIN /// The model. @property (readonly, strong, nonatomic) ETCoreMLModel* model; +/// If set to `YES` then output backing are ignored. +@property (readwrite, atomic) BOOL ignoreOutputBackings; + @end diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h index fb616c71527..6bfdbade9c4 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h @@ -7,11 +7,14 @@ #import +#import + NS_ASSUME_NONNULL_BEGIN namespace executorchcoreml { struct ModelLoggingOptions; class ModelEventLogger; +class MultiArray; }; @class ETCoreMLModel; @@ -49,7 +52,7 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N /// Executes the loaded model. /// /// @param handle The handle to the loaded model. -/// @param args The arguments to the model. +/// @param args The arguments (inputs and outputs) of the model. /// @param loggingOptions The model logging options. /// @param error On failure, error is filled with the failure information. /// @retval `YES` if the execution succeeded otherwise `NO`. @@ -59,6 +62,19 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger error:(NSError* __autoreleasing*)error; +/// Executes the loaded model. +/// +/// @param handle The handle to the loaded model. +/// @param argsVec The arguments (inputs and outputs) of the model. +/// @param loggingOptions The model logging options. +/// @param error On failure, error is filled with the failure information. +/// @retval `YES` if the execution succeeded otherwise `NO`. +- (BOOL)executeModelWithHandle:(ModelHandle*)handle + argsVec:(const std::vector&)argsVec + loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions + eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger + error:(NSError* __autoreleasing*)error; + /// Unloads the loaded model. /// /// @param handle The handle to the loaded model. diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm index 1c0d2a30f97..c51de9d1e14 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm @@ -22,6 +22,8 @@ #import #import #import +#import +#import #import #import #import @@ -98,32 +100,60 @@ BOOL is_backed_by_same_buffer(MLMultiArray *array1, MLMultiArray *array2) { return options; } -BOOL copy(MLMultiArray *src, MLMultiArray *dst, NSError * __autoreleasing *error) { - if (![src.shape isEqualToArray:dst.shape]) { - ETCoreMLLogErrorAndSetNSError(error, 0, "%@: Model is broken", NSStringFromClass(ETCoreMLModelManager.class)); - return NO; - } +void copy(MLMultiArray *src, MLMultiArray *dst) { if (::is_backed_by_same_buffer(src, dst)) { - return YES; - } - @autoreleasepool { - [src copyInto:dst]; + return; } - return YES; + + [src copyInto:dst]; } -BOOL set_outputs(NSArray *outputs, - NSArray *model_outputs, - NSError * __autoreleasing *error) { +void set_outputs(NSArray *outputs, NSArray *model_outputs) { NSEnumerator *enumerator = [model_outputs objectEnumerator]; for (MLMultiArray *output in outputs) { MLMultiArray *model_output = [enumerator nextObject]; - if (!::copy(output, model_output, error)) { - return NO; + ::copy(model_output, output); + } +} + +std::optional get_data_type(MLMultiArrayDataType data_type) { + switch (data_type) { + case MLMultiArrayDataTypeFloat16: { + return MultiArray::DataType::Float16; + } + case MLMultiArrayDataTypeFloat32: { + return MultiArray::DataType::Float32; + } + case MLMultiArrayDataTypeFloat64: { + return MultiArray::DataType::Float64; + } + case MLMultiArrayDataTypeInt32: { + return MultiArray::DataType::Int32; + } + default: { + return std::nullopt; } } - - return YES; +} + +void copy(MLMultiArray *src, executorchcoreml::MultiArray& dst) { + [src getBytesWithHandler:^(const void * _Nonnull bytes, NSInteger size) { + if (bytes == dst.data()) { + return; + } + + MultiArray::MemoryLayout src_layout(get_data_type(src.dataType).value(), to_vector(src.shape), to_vector(src.strides)); + MultiArray(const_cast(bytes), std::move(src_layout)).copy(dst); + }]; +} + +void set_outputs(std::vector& outputs, + NSArray *model_outputs) { + NSEnumerator *enumerator = [model_outputs objectEnumerator]; + for (auto& output : outputs) { + MLMultiArray *model_output = [enumerator nextObject]; + ::copy(model_output, output); + } } NSData * _Nullable get_file_data(const inmemoryfs::InMemoryFileSystem *inMemoryFS, @@ -313,6 +343,7 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) { return result; } + #endif } //namespace @@ -467,7 +498,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier return [[ETCoreMLModelAnalyzer alloc] initWithCompiledModelAsset:compiledModelAsset modelAsset:modelAsset metadata:metadata - operationPathToDebugSymbolMap: operation_path_to_symbol_name_map + operationPathToDebugSymbolMap:operation_path_to_symbol_name_map configuration:configuration assetManager:self.assetManager error:error]; @@ -641,6 +672,48 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset { os_unfair_lock_unlock(&_lock); } +- (nullable NSArray *)executeModelUsingExecutor:(id)executor + inputs:(NSArray *)inputs + outputBackings:(NSArray *)outputBackings + loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions + eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger + error:(NSError * __autoreleasing *)error { + NSError *localError = nil; + ETCoreMLModel *model = executor.model; + MLPredictionOptions *predictionOptions = ::get_prediction_options(outputBackings, model.orderedOutputNames, error); + if (!predictionOptions) { + return nil; + } + + id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error); + if (!inputFeatures) { + return nil; + } + + NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures + predictionOptions:predictionOptions + loggingOptions:loggingOptions + eventLogger:eventLogger + error:&localError]; + // Try without output backings. + if (!modelOutputs && predictionOptions.outputBackings.count > 0) { + localError = nil; + executor.ignoreOutputBackings = YES; + } + + modelOutputs = [executor executeModelWithInputs:inputFeatures + predictionOptions:predictionOptions + loggingOptions:loggingOptions + eventLogger:eventLogger + error:&localError]; + + if (error) { + *error = localError; + } + + return modelOutputs; +} + - (BOOL)executeModelWithHandle:(ModelHandle *)handle args:(NSArray *)args loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions @@ -659,33 +732,91 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle if (args.count != model.orderedInputNames.count + model.orderedOutputNames.count) { ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorCorruptedModel, - "%@: Model is invalid.", - NSStringFromClass(self.class)); + "%@: Model is invalid, expected args count to be %lu but got %lu.", + NSStringFromClass(self.class), + static_cast(model.orderedInputNames.count + model.orderedOutputNames.count), + args.count); return NO; } - - NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)]; - NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)]; - id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error); - if (!inputFeatures) { - return NO; + @autoreleasepool { + NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)]; + NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)]; + NSArray *outputBackings = @[]; + if (executor.ignoreOutputBackings == NO) { + outputBackings = outputs; + } + + NSArray *modelOutputs = [self executeModelUsingExecutor:executor + inputs:inputs + outputBackings:outputBackings + loggingOptions:loggingOptions + eventLogger:eventLogger + error:error]; + if (!modelOutputs) { + return NO; + } + + ::set_outputs(outputs, modelOutputs); } - MLPredictionOptions *predictionOptions = ::get_prediction_options(outputs, model.orderedOutputNames, error); - if (!predictionOptions) { + return YES; +} + +- (BOOL)executeModelWithHandle:(ModelHandle *)handle + argsVec:(const std::vector&)argsVec + loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions + eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger + error:(NSError * __autoreleasing *)error { + id executor = [self executorWithHandle:handle]; + if (!executor) { + ETCoreMLLogErrorAndSetNSError(error, + 0, + "%@: Model is already unloaded.", + NSStringFromClass(self.class)); return NO; } - NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures - predictionOptions:predictionOptions - loggingOptions:loggingOptions - eventLogger:eventLogger - error:error]; - if (!outputs) { + ETCoreMLModel *model = executor.model; + if (argsVec.size() != model.orderedInputNames.count + model.orderedOutputNames.count) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "%@: Model is invalid, expected args count to be %lu but got %lu.", + NSStringFromClass(self.class), + static_cast(model.orderedInputNames.count + model.orderedOutputNames.count), + argsVec.size()); return NO; } - return ::set_outputs(outputs, modelOutputs, error); + std::vector inputArgs(argsVec.begin(), argsVec.begin() + model.orderedInputNames.count); + std::vector outputArgs(argsVec.begin() + model.orderedInputNames.count, argsVec.end()); + @autoreleasepool { + NSArray *inputs = [model prepareInputs:inputArgs error:error]; + if (!inputs) { + return NO; + } + + NSArray *outputBackings = @[]; + if (executor.ignoreOutputBackings == NO) { + outputBackings = [model prepareOutputBackings:outputArgs error:error]; + } + + if (!outputBackings) { + return NO; + } + + NSArray *modelOutputs = [self executeModelUsingExecutor:executor + inputs:inputs + outputBackings:outputBackings + loggingOptions:loggingOptions + eventLogger:eventLogger + error:error]; + if (!modelOutputs) { + return NO; + } + + ::set_outputs(outputArgs, modelOutputs); + return YES; + } } - (BOOL)unloadModelWithHandle:(ModelHandle *)handle { diff --git a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm index 4aa5fffe94a..b8a10fcbbbc 100644 --- a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm +++ b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm @@ -7,55 +7,17 @@ #import +#import #import namespace { using namespace executorchcoreml; -template -T toValue(NSNumber *value); - -template<> size_t toValue(NSNumber *value) { - return value.unsignedLongValue; -} - -template<> ssize_t toValue(NSNumber *value) { - return value.longLongValue; -} - -template::value, T>::type> -std::vector to_vector(NSArray *numbers) { - std::vector result; - result.reserve(numbers.count); - for (NSNumber *number in numbers) { - result.emplace_back(toValue(number)); - } - - return result; -} - -MultiArray::DataType to_multi_array_data_type(MLMultiArrayDataType data_type) { - switch (data_type) { - case MLMultiArrayDataTypeInt32: { - return MultiArray::DataType::Int; - } - case MLMultiArrayDataTypeFloat: { - return MultiArray::DataType::Float; - } - case MLMultiArrayDataTypeFloat16: { - return MultiArray::DataType::Float16; - } - case MLMultiArrayDataTypeDouble: { - return MultiArray::DataType::Double; - } - } -} - MultiArray to_multi_array(void *data, MLMultiArrayDataType dataType, NSArray *shape, NSArray *strides) { - auto layout = MultiArray::MemoryLayout(to_multi_array_data_type(dataType), + auto layout = MultiArray::MemoryLayout(to_multiarray_data_type(dataType).value(), to_vector(shape), to_vector(strides)); return MultiArray(data, std::move(layout)); diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.h b/backends/apple/coreml/runtime/delegate/backend_delegate.h index d6a6016c087..ed921fb35bd 100644 --- a/backends/apple/coreml/runtime/delegate/backend_delegate.h +++ b/backends/apple/coreml/runtime/delegate/backend_delegate.h @@ -26,7 +26,7 @@ class BackendDelegate { struct Config { // Max models cache size in bytes. - size_t max_models_cache_size = 2 * size_t(1024) * size_t(1024) * size_t(1024); + size_t max_models_cache_size = 10 * size_t(1024) * size_t(1024) * size_t(1024); // If set to `true`, delegate pre-warms the most recently used asset. bool should_prewarm_asset = true; // If set to `true`, delegate pre-warms the model in `init`. diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.mm b/backends/apple/coreml/runtime/delegate/backend_delegate.mm index b91a6208b6a..1ded4a76b3b 100644 --- a/backends/apple/coreml/runtime/delegate/backend_delegate.mm +++ b/backends/apple/coreml/runtime/delegate/backend_delegate.mm @@ -44,44 +44,6 @@ MLComputeUnits get_compute_units(const Buffer& buffer) { return configuration; } -template::value, T>::type> -NSArray *to_array(const std::vector& array) { - NSMutableArray *result = [NSMutableArray arrayWithCapacity:array.size()]; - for (T value : array) { - [result addObject:@(value)]; - } - - return result; -} - -MLMultiArrayDataType get_data_type(MultiArray::DataType dataType) { - switch (dataType) { - case MultiArray::DataType::Float16: { - return MLMultiArrayDataTypeFloat16; - } - case MultiArray::DataType::Float: { - return MLMultiArrayDataTypeFloat32; - } - case MultiArray::DataType::Double: { - return MLMultiArrayDataTypeDouble; - } - case MultiArray::DataType::Int: { - return MLMultiArrayDataTypeInt32; - } - } -} - -MLMultiArray * _Nullable to_ml_multiarray(const MultiArray& array, NSError * __autoreleasing *error) { - const auto& layout = array.layout(); - MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:array.data() - shape:to_array(layout.shape()) - dataType:get_data_type(layout.dataType()) - strides:to_array(layout.strides()) - deallocator:^(void * _Nonnull bytes) {} - error:error]; - return result; -} - NSURL * _Nullable create_directory_if_needed(NSURL *url, NSFileManager *fileManager, NSError * __autoreleasing *error) { @@ -194,17 +156,8 @@ bool execute(Handle* handle, ModelEventLogger *event_logger, std::error_code& ec) const noexcept override { NSError *error = nil; - NSMutableArray *model_args = [NSMutableArray arrayWithCapacity:args.size()]; - for (const auto& arg : args) { - MLMultiArray *multi_array = to_ml_multiarray(arg, &error); - if (!multi_array) { - return false; - } - [model_args addObject:multi_array]; - } - if (![model_manager_ executeModelWithHandle:handle - args:model_args + argsVec:args loggingOptions:logging_options eventLogger:event_logger error:&error]) { diff --git a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist index 7dd12acaaf8..df37a47755f 100644 --- a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist +++ b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist @@ -7,6 +7,6 @@ shouldPrewarmModel maxAssetsSizeInBytes - 2147483648 + 1073741824 diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm index a51e73ee68d..b672d4a08e4 100644 --- a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm +++ b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm @@ -28,16 +28,25 @@ using namespace executorchcoreml; std::optional get_data_type(ScalarType scalar_type) { - if (scalar_type == ScalarType::Float) { - return MultiArray::DataType::Float; - } else if (scalar_type == ScalarType::Double) { - return MultiArray::DataType::Double; - } else if (scalar_type == ScalarType::Half) { - return MultiArray::DataType::Float16; - } else if (scalar_type == ScalarType::Int) { - return MultiArray::DataType::Int; - } else { - return std::nullopt; + switch (scalar_type) { + case ScalarType::Bool: + return MultiArray::DataType::Bool; + case ScalarType::Byte: + return MultiArray::DataType::Byte; + case ScalarType::Short: + return MultiArray::DataType::Short; + case ScalarType::Int: + return MultiArray::DataType::Int32; + case ScalarType::Long: + return MultiArray::DataType::Int64; + case ScalarType::Half: + return MultiArray::DataType::Float16; + case ScalarType::Float: + return MultiArray::DataType::Float32; + case ScalarType::Double: + return MultiArray::DataType::Float64; + default: + return std::nullopt; } } @@ -54,6 +63,7 @@ auto tensor = eValue->toTensor(); auto dataType = get_data_type(tensor.scalar_type()); if (!dataType.has_value()) { + ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type()); return std::nullopt; } @@ -167,7 +177,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) { auto multi_array = get_multi_array(args[i], ArgType::Input); ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(), Internal, - "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i); + "%s: Failed to create multiarray from input at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i); delegate_args.emplace_back(std::move(multi_array.value())); } @@ -176,7 +186,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) { auto multi_array = get_multi_array(args[i], ArgType::Output); ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(), Internal, - "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i); + "%s: Failed to create multiarray from output at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i); delegate_args.emplace_back(std::move(multi_array.value())); } diff --git a/backends/apple/coreml/runtime/delegate/multiarray.h b/backends/apple/coreml/runtime/delegate/multiarray.h index cd165373dc8..70a2a08a2f7 100644 --- a/backends/apple/coreml/runtime/delegate/multiarray.h +++ b/backends/apple/coreml/runtime/delegate/multiarray.h @@ -7,6 +7,9 @@ #pragma once +#import +#import +#import #import namespace executorchcoreml { @@ -29,13 +32,33 @@ class Buffer { }; /// A class representing a MultiArray. -class MultiArray { +class MultiArray final { public: /// The MultiArray datatype. - enum class DataType : uint8_t { Int = 0, Double, Float, Float16 }; + enum class DataType : uint8_t { + Bool = 0, + Byte, + Char, + Short, + Int32, + Int64, + Float16, + Float32, + Float64, + }; + + /// Options for copying. + struct CopyOptions { + inline CopyOptions() noexcept : use_bnns(true), use_memcpy(true) { } + + inline CopyOptions(bool use_bnns, bool use_memcpy) noexcept : use_bnns(use_bnns), use_memcpy(use_memcpy) { } + + bool use_bnns = true; + bool use_memcpy = true; + }; /// A class describing the memory layout of a MultiArray. - class MemoryLayout { + class MemoryLayout final { public: MemoryLayout(DataType dataType, std::vector shape, std::vector strides) : dataType_(dataType), shape_(std::move(shape)), strides_(std::move(strides)) { } @@ -53,7 +76,10 @@ class MultiArray { inline size_t rank() const noexcept { return shape_.size(); } /// Returns the number of elements in the MultiArray. - size_t get_num_elements() const noexcept; + size_t num_elements() const noexcept; + + /// Returns the byte size of an element. + size_t num_bytes() const noexcept; /// Returns `true` if the memory layout is packed otherwise `false`. bool is_packed() const noexcept; @@ -78,11 +104,42 @@ class MultiArray { /// Copies this into another `MultiArray`. /// /// @param dst The destination `MultiArray`. - bool copy(MultiArray& dst) const noexcept; + void copy(MultiArray& dst, CopyOptions options = CopyOptions()) const noexcept; + + /// Get the value at `indices`. + template inline T value(const std::vector& indices) const noexcept { + return *(static_cast(data(indices))); + } + + /// Set the value at `indices`. + template inline void set_value(const std::vector& indices, T value) const noexcept { + T* ptr = static_cast(data(indices)); + *ptr = value; + } + + /// Get the value at `index`. + template inline T value(size_t index) const noexcept { return *(static_cast(data(index))); } + + /// Set the value at `index`. + template inline void set_value(size_t index, T value) const noexcept { + T* ptr = static_cast(data(index)); + *ptr = value; + } private: + void* data(const std::vector& indices) const noexcept; + + void* data(size_t index) const noexcept; + void* data_; MemoryLayout layout_; }; +/// Converts `MultiArray::DataType` to `MLMultiArrayDataType`. +std::optional to_ml_multiarray_data_type(MultiArray::DataType data_type); + +/// Converts `MLMultiArrayDataType` to `MultiArray::DataType`. +std::optional to_multiarray_data_type(MLMultiArrayDataType data_type); + + } // namespace executorchcoreml diff --git a/backends/apple/coreml/runtime/delegate/multiarray.mm b/backends/apple/coreml/runtime/delegate/multiarray.mm index 3b8dcb98a30..74996fb8d5a 100644 --- a/backends/apple/coreml/runtime/delegate/multiarray.mm +++ b/backends/apple/coreml/runtime/delegate/multiarray.mm @@ -10,120 +10,16 @@ #import #import - #import #import +#import +#import #import namespace { using namespace executorchcoreml; -template -struct TypedMultiArray { - explicit TypedMultiArray(T *data, MultiArray::MemoryLayout layout) noexcept - :data(data), layout(std::move(layout)) - {} - - T *data; - MultiArray::MemoryLayout layout; -}; - -#pragma mark - BNNS - -template -struct BNNSCopier { - static bool supported() noexcept { - return false; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dstNNSDesc) noexcept {} -}; - -// float -> _Float16 -template<> -struct BNNSCopier { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeFloat32; - dst_bnns_desc->data_type = BNNSDataTypeFloat16; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -// float -> int32_t -template<> -struct BNNSCopier { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeFloat32; - dst_bnns_desc->data_type = BNNSDataTypeInt32; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -// _Float16 -> float -template<> -struct BNNSCopier<_Float16, float> { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeFloat16; - dst_bnns_desc->data_type = BNNSDataTypeFloat32; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -// _Float16 -> int32_t -template<> -struct BNNSCopier<_Float16, int32_t> { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeFloat16; - dst_bnns_desc->data_type = BNNSDataTypeInt32; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -// int32_t -> _Float16 -template<> -struct BNNSCopier { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeInt32; - dst_bnns_desc->data_type = BNNSDataTypeFloat16; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -// int32_t -> float -template<> -struct BNNSCopier { - static bool supported() noexcept { - return true; - } - - static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept { - src_bnns_desc->data_type = BNNSDataTypeInt32; - dst_bnns_desc->data_type = BNNSDataTypeFloat32; - BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL); - } -}; - -/// Returns BNNSDataLayout and sets strides from the multi-array strides. +// Returns BNNSDataLayout and sets strides from the multi-array strides. /// /// BNNS requires strides to be non-decreasing order; /// `bnns_strides[i] <= bnns_strides[i + 1]`. BNNSDataLayout defines @@ -132,408 +28,491 @@ static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *ds /// @param multi_array_strides The multiarray strides. /// @param bnns_strides The bnns strides. /// @retval The `BNNSDataLayout`. -BNNSDataLayout get_bnns_data_layout(const std::vector& multi_array_strides, size_t *bnns_strides) { - uint32_t firstMajorFlag = 1; +std::optional get_bnns_data_layout(const std::vector& multi_array_strides, + size_t *bnns_strides) { + bool first_major = false; uint32_t rank = static_cast(multi_array_strides.size()); if (rank > BNNS_MAX_TENSOR_DIMENSION) { - return (BNNSDataLayout)-1; + return std::nullopt; } if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::less())) { - firstMajorFlag = 0; + first_major = false; std::copy(multi_array_strides.begin(), multi_array_strides.end(), bnns_strides); } else if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::greater()) ) { - firstMajorFlag = 1; + first_major = true; std::copy(multi_array_strides.rbegin(), multi_array_strides.rend(), bnns_strides); } else { - return (BNNSDataLayout)-1; + return std::nullopt; } // See BNNSDataLayout's raw value how this bitwise-or makes sense. - return (BNNSDataLayout)((rank << 16) | (8 << 12) | firstMajorFlag); + return (BNNSDataLayout) (0x08000 + // flags as canonical first/last major type + 0x10000 * rank + // set dimensionality + (first_major ? 1 : 0)); // set first/last major bit } -/// Initializes BNNSNDArrayDescriptor for the shape and strides. +/// Returns `BNNSDataType` from `MultiArray::DataType`. /// -/// @param layout The memory layout. -/// @param desc The ``BNNSNDArrayDescriptor` to be initialized. -/// @retval `true` if the initialization succeeded otherwise `false`. -bool init_bnns_array_descriptor(const MultiArray::MemoryLayout& layout, BNNSNDArrayDescriptor *desc) { - BNNSDataLayout bnns_layout = get_bnns_data_layout(layout.strides(), desc->stride); - if (bnns_layout == (BNNSDataLayout)-1) { - return false; - } - - std::memset(desc, 0, sizeof(*desc)); - const auto& shape = layout.shape(); - std::copy(shape.begin(), shape.end(), desc->size); - desc->layout = bnns_layout; - desc->data_scale = 1.0f; - desc->data_bias = 0.0f; - - return true; -} - -template -struct MultiArrayBNNSCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - if (!BNNSCopier::supported()) { - return false; +/// @param datatype The multiarray datatype. +/// @retval The `BNNSDataType`. +std::optional get_bnns_data_type(MultiArray::DataType datatype) { + switch (datatype) { + case MultiArray::DataType::Bool: { + return BNNSDataTypeBoolean; } - - BNNSNDArrayDescriptor src_bnns_array; - BNNSNDArrayDescriptor dst_bnns_array; - if (!init_bnns_array_descriptor(src.layout, &src_bnns_array) || !init_bnns_array_descriptor(dst.layout, &dst_bnns_array)) { - return false; + case MultiArray::DataType::Byte: { + return BNNSDataTypeUInt8; + } + case MultiArray::DataType::Char: { + return BNNSDataTypeInt8; + } + case MultiArray::DataType::Short: { + return BNNSDataTypeInt16; + } + case MultiArray::DataType::Int32: { + return BNNSDataTypeInt32; + } + case MultiArray::DataType::Int64: { + return BNNSDataTypeInt64; + } + case MultiArray::DataType::Float16: { + return BNNSDataTypeFloat16; + } + case MultiArray::DataType::Float32: { + return BNNSDataTypeFloat32; + } + default: { + return std::nullopt; } - - BNNSCopier::copy(&src_bnns_array, &dst_bnns_array); - return true; } -}; - -#pragma mark - VImageCopier +} -bool init_vi_Buffer(const MultiArray::MemoryLayout& layout, vImage_Buffer *viBuf, size_t bytesPerScalar) { - size_t rank = layout.rank(); - const auto& shape = layout.shape(); - const auto& strides = layout.strides(); - - if (rank < 2) { - // vImage path requires at least two dimensions. - return false; - } - - // vImage blitter requires first major and every dimension except row (shape[rank - 2]) is contiguous. - if (!std::is_sorted(strides.begin(), strides.end(), std::greater())) { +/// Initializes BNNS array descriptor from multi array. +/// +/// @param bnns_descriptor The descriptor to be initialized. +/// @param multi_array The multiarray. +/// @retval `true` if the initialization succeeded otherwise `false`. +bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArray& multi_array) { + const auto& layout = multi_array.layout(); + if (layout.num_elements() == 1) { return false; } - if (strides[rank - 1] != 1) { + auto bnns_datatype = get_bnns_data_type(layout.dataType()); + if (!bnns_datatype) { return false; } - size_t height = std::accumulate(shape.begin(), shape.end() - 1, size_t(1), std::multiplies()); - if (height * strides[rank - 2] != strides[0] * shape[0]) { + std::memset(&bnns_descriptor, 0, sizeof(bnns_descriptor)); + auto bnns_layout = get_bnns_data_layout(layout.strides(), bnns_descriptor.stride); + if (!bnns_layout) { return false; } - size_t width = shape[rank - 1]; - size_t rowBytes = strides[rank - 2] * bytesPerScalar; - - viBuf->data = NULL; - viBuf->height = height; - viBuf->width = width; - viBuf->rowBytes = rowBytes; + const auto& shape = layout.shape(); + std::copy(shape.begin(), shape.end(), bnns_descriptor.size); + bnns_descriptor.layout = bnns_layout.value(); + bnns_descriptor.data_scale = 1.0f; + bnns_descriptor.data_bias = 0.0f; + bnns_descriptor.data_type = bnns_datatype.value(); + bnns_descriptor.data = multi_array.data(); return true; } -template -struct VImageCopier { - static bool supported() noexcept { +bool copy_using_bnns(const MultiArray& src, MultiArray& dst) { + if (dst.layout().num_bytes() < src.layout().num_bytes()) { return false; } - - static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {} -}; - -template -struct VImageCopier { - static bool supported() noexcept { - return true; + BNNSNDArrayDescriptor src_descriptor; + if (!init_bnns_descriptor(src_descriptor, src)) { + return false; } - static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept { - vImageCopyBuffer(src_vi_buffer, dst_vi_buffer, sizeof(T), kvImageDoNotTile); - } -}; - -// float -> _Float16 -template <> -struct VImageCopier { - static bool supported() noexcept { - return true; + BNNSNDArrayDescriptor dst_descriptor; + if (!init_bnns_descriptor(dst_descriptor, dst)) { + return false; } - static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept { - vImageConvert_PlanarFtoPlanar16F(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile); - } -}; + return BNNSCopy(&dst_descriptor, &src_descriptor, NULL) == 0; +} -// _Float16 -> float -template <> -struct VImageCopier<_Float16, float> { - static bool supported() noexcept { - return true; - } +std::vector get_layouts(const std::vector& arrays) { + std::vector result; + result.reserve(arrays.size()); - static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept { - vImageConvert_Planar16FtoPlanarF(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile); - } -}; - -template -struct MultiArrayVImageCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - if (!VImageCopier::supported()) { - return false; - } - - vImage_Buffer src_vi_buffer; - vImage_Buffer dst_vi_buffer; - if (!init_vi_Buffer(src.layout, &src_vi_buffer, sizeof(T1))) { - return false; - } - - if (!init_vi_Buffer(dst.layout, &dst_vi_buffer, sizeof(T2))) { - return false; - } - - VImageCopier::copy(&src_vi_buffer, &dst_vi_buffer); - return true; - } -}; - -#pragma mark - VDSPCopier - -template -struct VDSPCopier { - static bool supported() noexcept { - return false; - } + std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) { + return array.layout(); + }); - static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {} -}; + return result; +} -// Double -> Float -template<> -struct VDSPCopier { - static bool supported() noexcept { - return true; - } +std::vector get_datas(const std::vector& arrays) { + std::vector result; + result.reserve(arrays.size()); - static void copy(const double *src_data, float *dst_data, size_t num_elements) noexcept { - vDSP_vdpsp(src_data, 1, dst_data, 1, num_elements); - } -}; - -// Float -> Double -template<> -struct VDSPCopier { - static bool supported() noexcept { - return true; - } + std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) { + return array.data(); + }); - static void copy(const float *src_data, double *dst_data, size_t num_elements) noexcept { - vDSP_vspdp(src_data, 1, dst_data, 1, num_elements); - } -}; + return result; +} -// Float -> Int32 -template<> -struct VDSPCopier { - static bool supported() noexcept { +// We can coalesce two adjacent dimensions if either dim has size 1 or if `shape[n] * stride[n] == stride[n + 1]`. +bool can_coalesce_dimensions(const std::vector& shape, + const std::vector& strides, + size_t dim1, + size_t dim2) { + auto shape1 = shape[dim1]; + auto shape2 = shape[dim2]; + if (shape1 == 1 || shape2 == 1) { return true; } - static void copy(const float *src_data, int32_t *dst_data, size_t num_elements) noexcept { - vDSP_vfix32(src_data, 1, dst_data, 1, num_elements); - } -}; + auto stride1 = strides[dim1]; + auto stride2 = strides[dim2]; + return shape1 * stride1 == stride2; +} -// Int32 -> Double -template<> -struct VDSPCopier { - static bool supported() noexcept { - return true; +bool can_coalesce_dimensions(const std::vector& shape, + const std::vector>& all_strides, + size_t dim1, + size_t dim2) { + for (const auto& strides : all_strides) { + if (!::can_coalesce_dimensions(shape, strides, dim1, dim2)) { + return false; + } } - static void copy(const int32_t *src_data, double *dst_data, size_t num_elements) noexcept { - vDSP_vflt32D(src_data, 1, dst_data, 1, num_elements); - } -}; + return true; +} -// Int32 -> Float -template<> -struct VDSPCopier { - static bool supported() noexcept { - return true; - } - - static void copy(const int32_t *src_data, float *dst_data, size_t num_elements) noexcept { - vDSP_vflt32(src_data, 1, dst_data, 1, num_elements); +void update_strides(std::vector>& all_strides, + size_t dim1, + size_t dim2) { + for (auto& strides : all_strides) { + strides[dim1] = strides[dim2]; } -}; +} -template -struct MultiArrayVDSPCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - if (!VDSPCopier::supported()) { - return false; - } - - if (!src.layout.is_packed() || !dst.layout.is_packed()) { - return false; +std::vector coalesce_dimensions(std::vector layouts) { + if (layouts.size() == 0) { + return {}; + } + + std::vector shape = layouts.back().shape(); + // reverse shape. + std::reverse(shape.begin(), shape.end()); + std::vector> all_strides; + // reverse strides. + all_strides.reserve(layouts.size()); + std::transform(layouts.begin(), layouts.end(), std::back_inserter(all_strides), [](const MultiArray::MemoryLayout& layout) { + auto strides = layout.strides(); + std::reverse(strides.begin(), strides.end()); + return strides; + }); + size_t rank = layouts[0].rank(); + size_t prev_dim = 0; + for (size_t dim = 1; dim < rank; ++dim) { + if (::can_coalesce_dimensions(shape, all_strides, prev_dim, dim)) { + if (shape[prev_dim] == 1) { + ::update_strides(all_strides, prev_dim, dim); + } + shape[prev_dim] *= shape[dim]; + } else { + ++prev_dim; + if (prev_dim != dim) { + ::update_strides(all_strides, prev_dim, dim); + shape[prev_dim] = shape[dim]; + } } - - VDSPCopier::copy(src.data, dst.data, src.layout.get_num_elements()); - return true; } -}; - -#pragma mark - MemCopy - -template -struct MemCopier { - static bool supported() noexcept { - return false; + + if (rank == prev_dim + 1) { + return layouts; } - static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {} -}; - -template -struct MemCopier { - static bool supported() noexcept { - return true; + shape.resize(prev_dim + 1); + for (auto& strides : all_strides) { + strides.resize(prev_dim + 1); } - static void copy(const T *src_data, T *dst_data, size_t num_elements) noexcept { - std::memcpy(dst_data, src_data, num_elements); + std::vector result; + result.reserve(layouts.size()); + std::reverse(shape.begin(), shape.end()); + for (size_t i = 0; i < layouts.size(); ++i) { + std::reverse(all_strides[i].begin(), all_strides[i].end()); + result.emplace_back(layouts[i].dataType(), shape, std::move(all_strides[i])); } + + return result; +} + +enum class Direction : uint8_t { + Forward = 0, + Backward }; -template -struct MultiArrayMemCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - if (!MemCopier::supported()) { - return false; - } - - if (!src.layout.is_packed() || !dst.layout.is_packed()) { - return false; +void set_data_pointers(std::vector& data_pointers, + ssize_t index, + size_t dim, + Direction direction, + const std::vector& layouts) { + for (size_t i = 0; i < layouts.size(); ++i) { + const auto& layout = layouts[i]; + const ssize_t stride = layout.strides()[dim]; + const size_t num_bytes = layout.num_bytes(); + ssize_t offset = 0; + switch (direction) { + case Direction::Forward: { + offset = stride * index * num_bytes; + break; + } + case Direction::Backward: { + offset = - stride * index * num_bytes; + break; + } } - - MemCopier::copy(src.data, dst.data, src.layout.get_num_elements()); - return true; + data_pointers[i] = (void *)(static_cast(data_pointers[i]) + offset); } -}; +} + +void increment_data_pointers(std::vector& data_pointers, + size_t index, + size_t dim, + const std::vector& layouts) { + set_data_pointers(data_pointers, index, dim, Direction::Forward, layouts); +} -#pragma mark - MultiArrayIterator -/// TODO - remove recursion and coalesce contiguous dimensions. -template -struct MultiArrayIterator { - explicit MultiArrayIterator(TypedMultiArray& array1, TypedMultiArray& array2) - :array1(array1), array2(array2) +void decrement_data_pointers(std::vector& data_pointers, + size_t index, + size_t dim, + const std::vector& layouts) { + set_data_pointers(data_pointers, index, dim, Direction::Backward, layouts); +} + +class MultiArrayIterator final { +public: + explicit MultiArrayIterator(const std::vector& arrays) + :datas_(get_datas(arrays)), + layouts_(coalesce_dimensions(get_layouts(arrays))) {} +private: template - void loop(FN&& fn, T1 *data1, T2 *data2, size_t dim) { - const size_t index = dim - 1; - const auto& layout1 = array1.layout; - const auto& layout2 = array2.layout; - const ssize_t stride1 = layout1.strides()[index]; - const ssize_t stride2 = layout2.strides()[index]; - const size_t bound = layout1.shape()[index]; - - if (index == 0) { - for (size_t i = 0; i < bound; i++) { - if (fn(data1 + stride1 * i, data2 + stride2 * i)) { - break; + void exec(FN&& fn, const std::vector& layouts, std::vector datas, size_t n) { + const auto& layout = layouts.back(); + // Avoid function call for rank <= 2. + switch (n) { + case 0: { + break; + } + case 1: { + for (size_t i = 0; i < layout.shape()[0]; ++i) { + ::increment_data_pointers(datas, i, 0, layouts); + fn(datas); + ::decrement_data_pointers(datas, i, 0, layouts); + } + break; + } + case 2: { + for (size_t i = 0; i < layout.shape()[1]; ++i) { + ::increment_data_pointers(datas, i, 1, layouts); + for (size_t j = 0; j < layout.shape()[0]; ++j) { + ::increment_data_pointers(datas, j, 0, layouts); + fn(datas); + ::decrement_data_pointers(datas, j, 0, layouts); + } + ::decrement_data_pointers(datas, i, 1, layouts); + } + + break; + } + + default: { + const size_t bound = layouts.back().shape()[n - 1]; + for (size_t index = 0; index < bound; ++index) { + ::increment_data_pointers(datas, index, n - 1, layouts); + exec(std::forward(fn), layouts, datas, n - 1); + ::decrement_data_pointers(datas, index, n - 1, layouts); } } - return; - } - - for (size_t i = 0; i < bound; i++) { - loop(fn, data1 + stride1 * i, data2 + stride2 * i, dim - 1); } } +public: template - void loop(FN&& fn) { - loop(fn, array1.data, array2.data, array1.layout.rank()); + void exec(FN&& fn) { + std::vector datas = datas_; + exec(fn, layouts_, datas, layouts_[0].rank()); } - TypedMultiArray array1; - TypedMultiArray array2; +private: + std::vector datas_; + std::vector layouts_; }; +/// BNNS has no double type, so we handle the conversions here. template -struct MultiArrayLoopingCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - auto looper = MultiArrayIterator(src, dst); - looper.loop([](T1 *src, T2 *dst){ - *dst = static_cast(*src); - return true; - }); - - return true; - } -}; +inline void copy_value(void *dst, const void *src) { + const T2 *src_ptr = static_cast(src); + T1 *dst_ptr = static_cast(dst); + *dst_ptr = static_cast(*src_ptr); +} -template -struct MultiArrayCopier { - static bool copy(TypedMultiArray& src, TypedMultiArray& dst) { - if (src.layout.shape() != dst.layout.shape()) { - return false; +template +void copy(void *dst, + MultiArray::DataType dst_data_type, + const void *src) { + switch (dst_data_type) { + case MultiArray::DataType::Bool: { + ::copy_value(dst, src); + break; + } + + case MultiArray::DataType::Byte: { + ::copy_value(dst, src); + break; + } + + case MultiArray::DataType::Char: { + ::copy_value(dst, src); + break; + } + + case MultiArray::DataType::Short: { + ::copy_value(dst, src); + break; } - - if (src.layout.get_num_elements() == 0) { - return true; + + case MultiArray::DataType::Int32: { + ::copy_value(dst, src); + break; } - - if (MultiArrayBNNSCopier::copy(src, dst)) { - return true; + + case MultiArray::DataType::Int64: { + ::copy_value(dst, src); + break; } - - if (MultiArrayVImageCopier::copy(src, dst)) { - return true; + + case MultiArray::DataType::Float16: { + ::copy_value<_Float16, T>(dst, src); + break; } - - if (MultiArrayVDSPCopier::copy(src, dst)) { - return true; + + case MultiArray::DataType::Float32: { + ::copy_value(dst, src); + break; } - - if (MultiArrayMemCopier::copy(src, dst)) { - return true; + + case MultiArray::DataType::Float64: { + ::copy_value(dst, src); + break; } - - return MultiArrayLoopingCopier::copy(src, dst); } -}; +} -template -bool copy(TypedMultiArray& src, MultiArray& dst) { - const auto& dstLayout = dst.layout(); - switch (dstLayout.dataType()) { - case MultiArray::DataType::Int: { - auto dst_array = TypedMultiArray(reinterpret_cast(dst.data()), dstLayout); - return MultiArrayCopier::copy(src, dst_array); +void copy(void *dst, + MultiArray::DataType dst_data_type, + const void *src, + MultiArray::DataType src_data_type) { + switch (src_data_type) { + case MultiArray::DataType::Bool: { + ::copy(dst, dst_data_type, src); + break; + } + + case MultiArray::DataType::Byte: { + ::copy(dst, dst_data_type, src); + break; + } + + case MultiArray::DataType::Char: { + ::copy(dst, dst_data_type, src); + break; + } + + case MultiArray::DataType::Short: { + ::copy(dst, dst_data_type, src); + break; + } + + case MultiArray::DataType::Int32: { + ::copy(dst, dst_data_type, src); + break; + } + + case MultiArray::DataType::Int64: { + ::copy(dst, dst_data_type, src); + break; } case MultiArray::DataType::Float16: { - auto dst_array = TypedMultiArray<_Float16>(reinterpret_cast<_Float16 *>(dst.data()), dstLayout); - return MultiArrayCopier::copy(src, dst_array); + ::copy<_Float16>(dst, dst_data_type, src); + break; } - case MultiArray::DataType::Float: { - auto dst_array = TypedMultiArray(reinterpret_cast(dst.data()), dstLayout); - return MultiArrayCopier::copy(src, dst_array); + case MultiArray::DataType::Float32: { + ::copy(dst, dst_data_type, src); + break; } - case MultiArray::DataType::Double: { - auto dst_array = TypedMultiArray(reinterpret_cast(dst.data()), dstLayout); - return MultiArrayCopier::copy(src, dst_array); + case MultiArray::DataType::Float64: { + ::copy(dst, dst_data_type, src); + break; } } } -} //namespace + +void copy(const MultiArray& src, MultiArray& dst, MultiArray::CopyOptions options) { + if (options.use_bnns && copy_using_bnns(src, dst)) { + return; + } + + if (options.use_memcpy && + src.layout().dataType() == dst.layout().dataType() && + src.layout().is_packed() && + dst.layout().is_packed()) { + std::memcpy(dst.data(), src.data(), src.layout().num_elements() * src.layout().num_bytes()); + return; + } + + auto iterator = MultiArrayIterator({src, dst}); + iterator.exec([&](const std::vector& datas){ + void *src_data = datas[0]; + void *dst_data = datas[1]; + ::copy(dst_data, dst.layout().dataType(), src_data, src.layout().dataType()); + }); +} + +ssize_t get_data_offset(const std::vector& indices, const std::vector& strides) { + ssize_t offset = 0; + for (size_t i = 0; i < indices.size(); ++i) { + offset += static_cast(indices[i]) * strides[i]; + } + + return offset; +} + +ssize_t get_data_offset(size_t index, const std::vector& shape, const std::vector& strides) { + size_t div = std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies());; + size_t offset = 0; + for (size_t i = 0; i < shape.size(); ++i) { + div /= shape[i]; + size_t dim_index = index / div; + offset += dim_index * strides[i]; + index %= div; + } + + return offset; +} +} namespace executorchcoreml { -size_t MultiArray::MemoryLayout::get_num_elements() const noexcept { +size_t MultiArray::MemoryLayout::num_elements() const noexcept { if (shape_.size() == 0) { return 0; } @@ -553,32 +532,101 @@ bool copy(TypedMultiArray& src, MultiArray& dst) { return false; } expectedStride = expectedStride * (*shapeIt); + stridesIt++; } return true; } -bool MultiArray::copy(MultiArray& dst) const noexcept { - switch (layout().dataType()) { - case MultiArray::DataType::Int: { - auto src = TypedMultiArray(reinterpret_cast(data()), layout()); - return ::copy(src, dst); +size_t MultiArray::MemoryLayout::num_bytes() const noexcept { + switch (dataType()) { + case MultiArray::DataType::Bool: { + return 1; + } + case MultiArray::DataType::Byte: { + return 1; + } + case MultiArray::DataType::Char: { + return 1; + } + case MultiArray::DataType::Short: { + return 2; + } + case MultiArray::DataType::Int32: { + return 4; + } + case MultiArray::DataType::Int64: { + return 8; } - case MultiArray::DataType::Float16: { - auto src = TypedMultiArray<_Float16>(reinterpret_cast<_Float16 *>(data()), layout()); - return ::copy(src, dst); + return 2; } - - case MultiArray::DataType::Float: { - auto src = TypedMultiArray(reinterpret_cast(data()), layout()); - return ::copy(src, dst); + case MultiArray::DataType::Float32: { + return 4; } - - case MultiArray::DataType::Double: { - auto src = TypedMultiArray(reinterpret_cast(data()), layout()); - return ::copy(src, dst); + case MultiArray::DataType::Float64: { + return 8; + } + } +} + +void MultiArray::copy(MultiArray& dst, CopyOptions options) const noexcept { + assert(layout().shape() == dst.layout().shape()); + ::copy(*this, dst, options); +} + +std::optional to_ml_multiarray_data_type(MultiArray::DataType data_type) { + switch (data_type) { + case MultiArray::DataType::Float16: { + return MLMultiArrayDataTypeFloat16; + } + case MultiArray::DataType::Float32: { + return MLMultiArrayDataTypeFloat32; + } + case MultiArray::DataType::Float64: { + return MLMultiArrayDataTypeDouble; + } + case MultiArray::DataType::Int32: { + return MLMultiArrayDataTypeInt32; + } + default: { + return std::nullopt; + } + } +} + +std::optional to_multiarray_data_type(MLMultiArrayDataType data_type) { + switch (data_type) { + case MLMultiArrayDataTypeFloat16: { + return MultiArray::DataType::Float16; + } + case MLMultiArrayDataTypeFloat32: { + return MultiArray::DataType::Float32; + } + case MLMultiArrayDataTypeFloat64: { + return MultiArray::DataType::Float64; + } + case MLMultiArrayDataTypeInt32: { + return MultiArray::DataType::Int32; + } + default: { + return std::nullopt; } } } + +void *MultiArray::data(const std::vector& indices) const noexcept { + assert(indices.size() == layout().shape().size()); + uint8_t *ptr = static_cast(data()); + ssize_t offset = ::get_data_offset(indices, layout().strides()); + return ptr + offset * layout().num_bytes(); +} + +void *MultiArray::data(size_t index) const noexcept { + assert(index < layout().num_elements()); + uint8_t *ptr = static_cast(data()); + ssize_t offset = ::get_data_offset(index, layout().shape(), layout().strides()); + return ptr + offset * layout().num_bytes(); +} + } // namespace executorchcoreml diff --git a/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.h b/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.h index 51204e34387..4048dae5fea 100644 --- a/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.h +++ b/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.h @@ -48,6 +48,9 @@ __attribute__((objc_subclassing_restricted)) /// The model. @property (readonly, strong, nonatomic) ETCoreMLModel* model; +/// If set to `YES` then output backing are ignored. +@property (readwrite, atomic) BOOL ignoreOutputBackings; + @end NS_ASSUME_NONNULL_END diff --git a/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.mm b/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.mm index e7f05662d28..57212445e55 100644 --- a/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.mm +++ b/backends/apple/coreml/runtime/sdk/ETCoreMLModelAnalyzer.mm @@ -170,6 +170,10 @@ - (nullable instancetype)initWithCompiledModelAsset:(ETCoreMLAsset *)compiledMod loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger error:(NSError * __autoreleasing *)error { + if (self.ignoreOutputBackings) { + predictionOptions.outputBackings = @{}; + } + NSError *localError = nil; NSArray *outputs = nil; if (loggingOptions.log_profiling_info) { diff --git a/backends/apple/coreml/runtime/test/BackendDelegateTests.mm b/backends/apple/coreml/runtime/test/BackendDelegateTests.mm index c74cb564495..6f0e3cff31f 100644 --- a/backends/apple/coreml/runtime/test/BackendDelegateTests.mm +++ b/backends/apple/coreml/runtime/test/BackendDelegateTests.mm @@ -14,69 +14,32 @@ #import #import #import +#import using namespace executorchcoreml; namespace { -template -T toValue(NSNumber *value); -template<> -size_t toValue(NSNumber *value) { - return value.unsignedLongLongValue; -} - -template<> -ssize_t toValue(NSNumber *value) { - return value.longLongValue; -} - -template -std::vector toVector(NSArray *values) { - std::vector result; - result.reserve(values.count); - for (NSNumber *value in values) { - result.emplace_back(toValue(value)); - } - - return result; -} - -MultiArray::DataType toDataType(MLMultiArrayDataType dataType) { - switch (dataType) { - case MLMultiArrayDataTypeFloat: { - return MultiArray::DataType::Float; - } - case MLMultiArrayDataTypeFloat16: { - return MultiArray::DataType::Float16; - } - case MLMultiArrayDataTypeDouble: { - return MultiArray::DataType::Double; - } - case MLMultiArrayDataTypeInt32: { - return MultiArray::DataType::Int; - } - } -} - -MultiArray toMultiArray(MLMultiArray *mlMultiArray) { - auto shape = toVector(mlMultiArray.shape); - auto strides = toVector(mlMultiArray.strides); - auto layout = MultiArray::MemoryLayout(toDataType(mlMultiArray.dataType), std::move(shape), std::move(strides)); +MultiArray to_multiarray(MLMultiArray *ml_multiarray) { + auto shape = to_vector(ml_multiarray.shape); + auto strides = to_vector(ml_multiarray.strides); + auto layout = MultiArray::MemoryLayout(to_multiarray_data_type(ml_multiarray.dataType).value(), + std::move(shape), + std::move(strides)); __block void *bytes = nullptr; - [mlMultiArray getMutableBytesWithHandler:^(void *mutableBytes, __unused NSInteger size, __unused NSArray *strides) { + [ml_multiarray getMutableBytesWithHandler:^(void *mutableBytes, __unused NSInteger size, __unused NSArray *strides) { bytes = mutableBytes; }]; return MultiArray(bytes, std::move(layout)); } -std::vector toMultiArrays(NSArray *mlMultiArrays) { +std::vector to_multiarrays(NSArray *ml_multiarrays) { std::vector result; - result.reserve(mlMultiArrays.count); + result.reserve(ml_multiarrays.count); - for (MLMultiArray *mlMultiArray in mlMultiArrays) { - result.emplace_back(toMultiArray(mlMultiArray)); + for (MLMultiArray *ml_multiarray in ml_multiarrays) { + result.emplace_back(to_multiarray(ml_multiarray)); } return result; } @@ -198,7 +161,7 @@ - (void)testAddModelExecution { NSArray *args = [inputs arrayByAddingObject:output]; std::error_code errorCode; XCTAssertTrue(_delegate->execute(handle, - toMultiArrays(args), + to_multiarrays(args), ModelLoggingOptions(), nullptr, errorCode)); @@ -223,7 +186,7 @@ - (void)testMulModelExecution { NSArray *args = [inputs arrayByAddingObject:output]; std::error_code errorCode; XCTAssertTrue(_delegate->execute(handle, - toMultiArrays(args), + to_multiarrays(args), ModelLoggingOptions(), nullptr, errorCode)); diff --git a/backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm b/backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm index 13f8343adf2..94b862d8424 100644 --- a/backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm +++ b/backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm @@ -15,7 +15,7 @@ #import #import -static constexpr size_t kRuntimeMemorySize = 10 * 1024U * 1024U; // 10 MB +static constexpr size_t kRuntimeMemorySize = 50 * 1024U * 1024U; // 50 MB using namespace torch::executor; using torch::executor::testing::TensorFactory; @@ -104,7 +104,7 @@ ET_LOG(Info, "Skipping non-tensor input %zu", i); continue; } - Buffer buffer(tensor_meta->nbytes(), 1); + Buffer buffer(tensor_meta->nbytes(), 0); auto sizes = tensor_meta->sizes(); exec_aten::TensorImpl tensor_impl(tensor_meta->scalar_type(), std::size(sizes), const_cast(sizes.data()), buffer.data()); exec_aten::Tensor tensor(&tensor_impl); @@ -155,8 +155,8 @@ - (void)testProgramLoad { XCTAssert(method.ok()); } -- (void)executeModelAtURL:(NSURL *)modelURL nTimes:(NSUInteger)nTimes { - for (NSUInteger i = 0; i < nTimes; i++) { +- (void)executeModelAtURL:(NSURL *)modelURL nLoads:(NSUInteger)nLoads nExecutions:(NSUInteger)nExecutions { + for (NSUInteger i = 0; i < nLoads; ++i) { auto loader = std::make_unique(modelURL.path.UTF8String); auto program = get_program(loader.get()); XCTAssert(program != nullptr); @@ -165,41 +165,44 @@ - (void)executeModelAtURL:(NSURL *)modelURL nTimes:(NSUInteger)nTimes { auto plannedBuffers = get_planned_buffers(methodName.get(), program.get()); XCTAssert(plannedBuffers.ok()); Buffer methodBuffer(kRuntimeMemorySize, 0); - MemoryAllocator methodAllocator(static_cast(methodBuffer.size()), methodBuffer.data()); + __block MemoryAllocator methodAllocator(static_cast(methodBuffer.size()), methodBuffer.data()); auto spans = to_spans(plannedBuffers.get()); HierarchicalAllocator plannedAllocator({spans.data(), spans.size()}); MemoryManager memoryManger(&methodAllocator, &plannedAllocator); - auto method = program->load_method(methodName.get().c_str(), &memoryManger); + __block auto method = program->load_method(methodName.get().c_str(), &memoryManger); XCTAssert(method.ok()); auto inputs = ::prepare_input_tensors(method.get()); - auto status = method->execute(); - XCTAssertEqual(status, Error::Ok); auto outputs = methodAllocator.allocateList(method->outputs_size()); - status = method->get_outputs(outputs, method->outputs_size()); - XCTAssertEqual(status, Error::Ok); + for (NSUInteger j = 0; j < nExecutions; ++j) { + auto status = method->execute(); + XCTAssertEqual(status, Error::Ok); + status = method->get_outputs(outputs, method->outputs_size()); + XCTAssertEqual(status, Error::Ok); + } } } - (void)testAddProgramExecute { NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"pte"]; XCTAssertNotNil(modelURL); - [self executeModelAtURL:modelURL nTimes:10]; + [self executeModelAtURL:modelURL nLoads:5 nExecutions:2]; } - (void)testMulProgramExecute { NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"pte"]; XCTAssertNotNil(modelURL); - [self executeModelAtURL:modelURL nTimes:10]; + [self executeModelAtURL:modelURL nLoads:5 nExecutions:2]; } - (void)testMV3ProgramExecute { NSURL *modelURL = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"]; XCTAssertNotNil(modelURL); - [self executeModelAtURL:modelURL nTimes:10]; + [self executeModelAtURL:modelURL nLoads:5 nExecutions:2]; } - (void)executeMultipleModelsConcurrently:(NSArray *)modelURLs - nTimes:(NSUInteger)nTimes + nLoads:(NSUInteger)nLoads + nExecutions:(NSUInteger)nExecutions timeout:(NSTimeInterval)timeout { NSMutableArray *expectations = [NSMutableArray arrayWithCapacity:modelURLs.count]; dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); @@ -208,7 +211,7 @@ - (void)executeMultipleModelsConcurrently:(NSArray *)modelURLs XCTestExpectation *expectation = [[XCTestExpectation alloc] initWithDescription:description]; [expectations addObject:expectation]; dispatch_async(queue, ^{ - [self executeModelAtURL:modelURL nTimes:nTimes]; + [self executeModelAtURL:modelURL nLoads:nLoads nExecutions:nExecutions]; [expectation fulfill]; }); } @@ -221,7 +224,8 @@ - (void)testMultipleModelExecutionConcurrently { NSURL *modelURL2 = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"pte"]; NSURL *modelURL3 = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"]; [self executeMultipleModelsConcurrently:@[modelURL1, modelURL2, modelURL3] - nTimes:10 + nLoads:5 + nExecutions:2 timeout:5 * 60]; } @@ -229,7 +233,8 @@ - (void)testSameModelExecutionConcurrently { NSURL *modelURL1 = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"]; NSURL *modelURL2 = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"]; [self executeMultipleModelsConcurrently:@[modelURL1, modelURL2] - nTimes:10 + nLoads:5 + nExecutions:2 timeout:5 * 60]; } diff --git a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm index d20d292cf69..8ad712497ea 100644 --- a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm +++ b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm @@ -115,7 +115,7 @@ - (void)testAddModelExecution { NSArray *args = [inputs arrayByAddingObject:output]; XCTAssertTrue([self.modelManager executeModelWithHandle:handle args:args - loggingOptions:executorchcoreml::ModelLoggingOptions() + loggingOptions:executorchcoreml::ModelLoggingOptions() eventLogger:nullptr error:&localError]); for (NSUInteger i = 0; i < output.count; i++) { diff --git a/backends/apple/coreml/runtime/test/MultiArrayTests.mm b/backends/apple/coreml/runtime/test/MultiArrayTests.mm new file mode 100644 index 00000000000..895702ae154 --- /dev/null +++ b/backends/apple/coreml/runtime/test/MultiArrayTests.mm @@ -0,0 +1,133 @@ +// +// MultiArrayTests.mm +// +// Copyright © 2024 Apple Inc. All rights reserved. +// +// Please refer to the license found in the LICENSE file in the root directory of the source tree. + +#import +#import +#import + +#import + +using namespace executorchcoreml; + +namespace { +size_t get_buffer_size(const std::vector& shape, const std::vector& srides) { + auto max_stride_it = std::max_element(srides.begin(), srides.end()); + size_t max_stride_axis = static_cast(std::distance(srides.begin(), max_stride_it)); + size_t dimension_with_max_stride = shape[max_stride_axis]; + return dimension_with_max_stride * (*max_stride_it); +} + +template +MultiArray::DataType get_multiarray_data_type(); + +template<> MultiArray::DataType get_multiarray_data_type() { + return MultiArray::DataType::Float32; +} + +template<> MultiArray::DataType get_multiarray_data_type() { + return MultiArray::DataType::Float64; +} + +template<> MultiArray::DataType get_multiarray_data_type() { + return MultiArray::DataType::Int64; +} + +template<> MultiArray::DataType get_multiarray_data_type() { + return MultiArray::DataType::Int32; +} + +template<> MultiArray::DataType get_multiarray_data_type() { + return MultiArray::DataType::Short; +} + +template<> MultiArray::DataType get_multiarray_data_type<_Float16>() { + return MultiArray::DataType::Float16; +} + +template +void verify_values(const MultiArray& multiarray1, const MultiArray& multiarray2) { + for (size_t i = 0; i < multiarray1.layout().num_elements(); ++i) { + XCTAssertEqual(multiarray1.value(i), multiarray2.value(i)); + } +} + +template +MultiArray make_multi_array(const std::vector& shape, const std::vector& strides, std::vector& storage) { + storage.resize(get_buffer_size(shape, strides) * sizeof(T), 0); + MultiArray::MemoryLayout layout(get_multiarray_data_type(), shape, strides); + return MultiArray(storage.data(), std::move(layout)); +} + +template +MultiArray make_multi_array_and_fill(const std::vector& shape, const std::vector& strides, std::vector& storage) { + auto result = make_multi_array(shape, strides, storage); + for (size_t i = 0; i < result.layout().num_elements(); ++i) { + T value = static_cast(i); + result.set_value(i, value); + } + + return result; +} + +template +void verify_copy_(const std::vector& shape, + const std::vector& src_strides, + const std::vector& dst_strides) { + std::vector src_storage; + auto src_multiarray = make_multi_array_and_fill(shape, src_strides, src_storage); + + std::vector dst_storage; + auto dst_multiarray = make_multi_array(shape, dst_strides, dst_storage); + src_multiarray.copy(dst_multiarray, MultiArray::CopyOptions(true, false)); + verify_values(src_multiarray, dst_multiarray); + + dst_storage.clear(); + dst_storage.resize(get_buffer_size(shape, dst_strides) * sizeof(T2), 0); + src_multiarray.copy(dst_multiarray, MultiArray::CopyOptions(false, false)); + verify_values(src_multiarray, dst_multiarray); +} + +template +void verify_copy(const std::vector& shape, + const std::vector& src_strides, + const std::vector& dst_strides) { + verify_copy_(shape, src_strides, dst_strides); + verify_copy_(shape, src_strides, dst_strides); +} +} //namespace + +@interface MultiArrayTests : XCTestCase + +@end + +@implementation MultiArrayTests + +- (void)verifyDataCopyWithShape:(const std::vector&)shape + srcStrides:(const std::vector&)srcStrides + dstStrides:(const std::vector&)dstStrides { + verify_copy(shape, srcStrides, dstStrides); + verify_copy(shape, srcStrides, dstStrides); + verify_copy(shape, srcStrides, dstStrides); + verify_copy(shape, srcStrides, srcStrides); + verify_copy(shape, srcStrides, dstStrides); + verify_copy(shape, srcStrides, srcStrides); +} + +- (void)testAdjacentDataCopy { + std::vector shape = {1, 3, 10, 10}; + std::vector strides = {3 * 10 * 10, 10 * 10, 10, 1}; + [self verifyDataCopyWithShape:shape srcStrides:strides dstStrides:strides]; +} + +- (void)testNonAdjacentDataCopy { + std::vector shape = {1, 3, 10, 10}; + std::vector srcStrides = {3 * 10 * 64, 10 * 64, 64, 1}; + std::vector dstStrides = {3 * 10 * 10 * 10, 10 * 10 * 10, 100, 10}; + [self verifyDataCopyWithShape:shape srcStrides:srcStrides dstStrides:dstStrides]; +} + +@end diff --git a/backends/apple/coreml/runtime/util/objc_array_util.h b/backends/apple/coreml/runtime/util/objc_array_util.h new file mode 100644 index 00000000000..5f4c8c7bc26 --- /dev/null +++ b/backends/apple/coreml/runtime/util/objc_array_util.h @@ -0,0 +1,42 @@ +// +// objc_array_util.h +// util +// +// Copyright © 2024 Apple Inc. All rights reserved. +// +// Please refer to the license found in the LICENSE file in the root directory of the source tree. + +#import +#import +#import + +namespace executorchcoreml { + +template T to_value(NSNumber* value); + +template <> inline size_t to_value(NSNumber* value) { return value.unsignedLongValue; } + +template <> inline ssize_t to_value(NSNumber* value) { return value.longLongValue; } + +template ::value, T>::type> +inline NSArray* to_array(const std::vector& array) { + NSMutableArray* result = [NSMutableArray arrayWithCapacity:array.size()]; + for (T value: array) { + [result addObject:@(value)]; + } + + return result; +} + +template ::value, T>::type> +inline std::vector to_vector(NSArray* numbers) { + std::vector result; + result.reserve(numbers.count); + for (NSNumber* number in numbers) { + result.emplace_back(to_value(number)); + } + + return result; +} + +} diff --git a/backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj b/backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj index cba1bfab8b0..d8ee4ea693a 100644 --- a/backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj +++ b/backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj @@ -101,6 +101,7 @@ C9E7D7962AB3F9BF00CCAE5D /* KeyValueStoreTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9E7D78E2AB3F9BF00CCAE5D /* KeyValueStoreTests.mm */; }; C9E7D7A22AB3FBB200CCAE5D /* CoreMLBackendDelegateTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9E7D7A12AB3FBB200CCAE5D /* CoreMLBackendDelegateTests.mm */; }; F24817E52BC655E100E80D98 /* libexecutorch_no_prim_ops.a in Frameworks */ = {isa = PBXBuildFile; fileRef = F24817E42BC655E100E80D98 /* libexecutorch_no_prim_ops.a */; }; + C9EC7E1B2BC73B3200A6B166 /* MultiArrayTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9EC7E1A2BC73B3200A6B166 /* MultiArrayTests.mm */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -299,6 +300,8 @@ C9EA3FDE2B73EEA000B7D7BD /* libsqlite3.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.tbd; path = usr/lib/libsqlite3.tbd; sourceTree = SDKROOT; }; C9EA3FE52B73EF6300B7D7BD /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; F24817E42BC655E100E80D98 /* libexecutorch_no_prim_ops.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexecutorch_no_prim_ops.a; path = ../libraries/libexecutorch_no_prim_ops.a; sourceTree = ""; }; + C9EC7E092BC662A300A6B166 /* objc_array_util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = objc_array_util.h; path = ../util/objc_array_util.h; sourceTree = ""; }; + C9EC7E1A2BC73B3200A6B166 /* MultiArrayTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = MultiArrayTests.mm; path = ../test/MultiArrayTests.mm; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -540,6 +543,7 @@ C97716DB2AF44D9A00FC0DAC /* objc_json_serde.h */, C97716DC2AF44E7B00FC0DAC /* objc_json_serde.mm */, C97716DE2AF44FC400FC0DAC /* objc_safe_cast.h */, + C9EC7E092BC662A300A6B166 /* objc_array_util.h */, ); name = util; sourceTree = ""; @@ -578,6 +582,7 @@ C998838C2B96841D000953A3 /* ETCoreMLModelStructurePathTests.mm */, C998838E2B96999F000953A3 /* ETCoreMLModelProfilerTests.mm */, C962271A2B984FB9002D13B7 /* ETCoreMLModelDebuggerTests.mm */, + C9EC7E1A2BC73B3200A6B166 /* MultiArrayTests.mm */, ); name = test; sourceTree = ""; @@ -728,6 +733,7 @@ C945E9372B997EEE009C3FAC /* FeatureTypes.pb.cc in Sources */, C945E9402B997EEE009C3FAC /* OneHotEncoder.pb.cc in Sources */, C94D50E82ABDF81100AF47FD /* key_value_store.cpp in Sources */, + C9EC7E1B2BC73B3200A6B166 /* MultiArrayTests.mm in Sources */, C945E9452B997EEE009C3FAC /* BayesianProbitRegressor.pb.cc in Sources */, C945E8E52B997ECE009C3FAC /* ETCoreMLOperationProfilingInfo.mm in Sources */, C945E9312B997EEE009C3FAC /* DataStructures.pb.cc in Sources */, diff --git a/examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj b/examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj index 9f52b0e1e07..16e9e590027 100644 --- a/examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj +++ b/examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj @@ -15,6 +15,8 @@ C94D51642ACFCBC500AF47FD /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C94D51632ACFCBC500AF47FD /* CoreML.framework */; }; C94D51662ACFCBCB00AF47FD /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C94D51652ACFCBCB00AF47FD /* Accelerate.framework */; }; C94D51682ACFCC7100AF47FD /* libcoremldelegate.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C94D51672ACFCC7100AF47FD /* libcoremldelegate.a */; }; + C97BFFA42BC0C17300F55BAC /* libportable_kernels.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C97BFFA32BC0C17300F55BAC /* libportable_kernels.a */; }; + C97BFFA62BC0C1F200F55BAC /* libportable_ops_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C97BFFA52BC0C1F200F55BAC /* libportable_ops_lib.a */; }; C988D69D2B998CDE00979CF6 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C988D69C2B998CD700979CF6 /* libprotobuf-lite.a */; }; F24817E72BC65B2000E80D98 /* libexecutorch_no_prim_ops.a in Frameworks */ = {isa = PBXBuildFile; fileRef = F24817E62BC65B2000E80D98 /* libexecutorch_no_prim_ops.a */; }; /* End PBXBuildFile section */ @@ -41,6 +43,8 @@ C94D51632ACFCBC500AF47FD /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; }; C94D51652ACFCBCB00AF47FD /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; C94D51672ACFCC7100AF47FD /* libcoremldelegate.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcoremldelegate.a; path = libraries/libcoremldelegate.a; sourceTree = ""; }; + C97BFFA32BC0C17300F55BAC /* libportable_kernels.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libportable_kernels.a; path = libraries/libportable_kernels.a; sourceTree = ""; }; + C97BFFA52BC0C1F200F55BAC /* libportable_ops_lib.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libportable_ops_lib.a; path = libraries/libportable_ops_lib.a; sourceTree = ""; }; C988D69C2B998CD700979CF6 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "libraries/libprotobuf-lite.a"; sourceTree = ""; }; F24817E62BC65B2000E80D98 /* libexecutorch_no_prim_ops.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexecutorch_no_prim_ops.a; path = libraries/libexecutorch_no_prim_ops.a; sourceTree = ""; }; /* End PBXFileReference section */ @@ -56,7 +60,9 @@ C94D51682ACFCC7100AF47FD /* libcoremldelegate.a in Frameworks */, C94D51662ACFCBCB00AF47FD /* Accelerate.framework in Frameworks */, C988D69D2B998CDE00979CF6 /* libprotobuf-lite.a in Frameworks */, + C97BFFA62BC0C1F200F55BAC /* libportable_ops_lib.a in Frameworks */, C94D51642ACFCBC500AF47FD /* CoreML.framework in Frameworks */, + C97BFFA42BC0C17300F55BAC /* libportable_kernels.a in Frameworks */, C94D51622ACFCBBA00AF47FD /* libsqlite3.tbd in Frameworks */, C94D515E2ACFCBA000AF47FD /* libexecutorch.a in Frameworks */, ); @@ -94,6 +100,8 @@ C94D51612ACFCBBA00AF47FD /* libsqlite3.tbd */, C94D51672ACFCC7100AF47FD /* libcoremldelegate.a */, F24817E62BC65B2000E80D98 /* libexecutorch_no_prim_ops.a */, + C97BFFA32BC0C17300F55BAC /* libportable_kernels.a */, + C97BFFA52BC0C1F200F55BAC /* libportable_ops_lib.a */, ); name = Frameworks; sourceTree = ""; diff --git a/examples/apple/coreml/scripts/build_executor_runner.sh b/examples/apple/coreml/scripts/build_executor_runner.sh index d47bdf8b0ff..347f3b4474f 100755 --- a/examples/apple/coreml/scripts/build_executor_runner.sh +++ b/examples/apple/coreml/scripts/build_executor_runner.sh @@ -37,6 +37,7 @@ cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_BUILD_DIR_PATH" \ -DEXECUTORCH_BUILD_XNNPACK=OFF \ -DEXECUTORCH_BUILD_SDK=ON \ -DEXECUTORCH_BUILD_COREML=ON \ +-DCOREML_BUILD_EXECUTOR_RUNNER=ON \ -Dprotobuf_BUILD_TESTS=OFF \ -Dprotobuf_BUILD_EXAMPLES=OFF \ -DCMAKE_MACOSX_BUNDLE=OFF \ @@ -60,13 +61,15 @@ cp -rf "$COREML_DIR_PATH/runtime/include/" "$INCLUDE_DIR_PATH" # Copy required libraries echo "ExecuTorch: Copying libraries" mkdir "$LIBRARIES_DIR_PATH" -find "$CMAKE_BUILD_DIR_PATH/" -name 'libexecutorch.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH" \; -find "$CMAKE_BUILD_DIR_PATH/" -name 'libexecutorch_no_prim_ops.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH" \; -find "$CMAKE_BUILD_DIR_PATH/" -name 'libetdump.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH" \; -find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoremldelegate.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH" \; -find "$CMAKE_BUILD_DIR_PATH/" -name 'libprotobuf-lite.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libexecutorch.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libexecutorch.a" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libexecutorch_no_prim_ops.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libexecutorch_no_prim_ops.a" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libprotobuf-lite.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libprotobuf-lite.a" \; find "$CMAKE_BUILD_DIR_PATH/" -name 'libprotobuf-lited.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libprotobuf-lite.a" \; -cp -f "$EXECUTORCH_ROOT_PATH/third-party/flatcc/lib/libflatccrt.a" "$LIBRARIES_DIR_PATH" +find "$CMAKE_BUILD_DIR_PATH/" -name 'libetdump.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libetdump.a" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoremldelegate.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libcoremldelegate.a" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_ops_lib.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_ops_lib.a" \; +find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_kernels.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_kernels.a" \; +cp -f "$EXECUTORCH_ROOT_PATH/third-party/flatcc/lib/libflatccrt.a" "$LIBRARIES_DIR_PATH/libflatccrt.a" # Build the runner echo "ExecuTorch: Building runner" diff --git a/examples/apple/coreml/scripts/extract_coreml_models.py b/examples/apple/coreml/scripts/extract_coreml_models.py index 32c750196dd..6317b0f3d3f 100644 --- a/examples/apple/coreml/scripts/extract_coreml_models.py +++ b/examples/apple/coreml/scripts/extract_coreml_models.py @@ -1,5 +1,3 @@ -#!/usr/bin/env bash -# # Copyright © 2024 Apple Inc. All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -55,7 +53,8 @@ def extract_coreml_models(pte_data: bytes): if executorchcoreml.unflatten_directory_contents( coreml_processed_bytes, str(model_path.absolute()) ): - print(f"CoreML model is extracted and saved to path = {model_path}") + print(f"Core ML models are extracted and saved to path = {model_path}") + model_index += 1 if len(coreml_delegates) == 0: print("The model isn't delegated to CoreML.") @@ -63,7 +62,7 @@ def extract_coreml_models(pte_data: bytes): if __name__ == "__main__": """ - Extracts the CoreML models embedded in the ``.pte`` file and saves them to the + Extracts the Core ML models embedded in the ``.pte`` file and saves them to the file system. """ parser = argparse.ArgumentParser() diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py index bc9f1cef513..eef0bcee965 100644 --- a/examples/models/llama2/export_llama_lib.py +++ b/examples/models/llama2/export_llama_lib.py @@ -775,7 +775,8 @@ def _export_llama(modelname, args) -> str: # noqa: C901 # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`. compile_specs = CoreMLBackend.generate_compile_specs( compute_precision=ct.precision(ct.precision.FLOAT16.value), - compute_unit=ct.ComputeUnit[ct.ComputeUnit.ALL.name.upper()], + # using `ComputeUnit.ALL` can increase the model load time, default to `ComputeUnit.CPU_AND_GPU` + compute_unit=ct.ComputeUnit[ct.ComputeUnit.CPU_AND_GPU.name.upper()], # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple` model_type=CoreMLBackend.MODEL_TYPE.MODEL, )