Skip to content

ex.extract时,没有任何报错信息,返回0xC0000005(STATUS_ACCESS_VIOLATION)。input正常。 #6622

@emofalling

Description

@emofalling

error log | 日志或报错信息 | ログ

ex.extract时,没有任何报错信息,返回0xC0000005。input正常。

context | 编译/运行环境 | バックグラウンド

平台:Windows 11 专业工作站版
可用设备:Intel Core Ultra 9, Intel Graphics, RTX4070Super
Python版本:3.14.2

how to reproduce | 复现步骤 | 再現方法

如下代码:

import os, sys
import ncnn
import pnnx
import numpy as np

# 模型缓存目录
cache_directory = ".cache/ncnn/"

# 模型系列名称
model_name = "rife_v4.25_lite"

# 编译模型时的量化精度。小写。
precision = "fp16"

# ONNX模型文件目录
model_path = f"models/{model_name}/model.onnx"

# NCNN模型文件目录
ncnn_model_dir = f"models/{model_name}/{precision}/ncnn"

# PNNX使用哪个设备进行模型转换
cvt_device = "gpu"

# 设备
device = "GPU.1"

# 前后帧
frame_0_path = f"f0.jpg"
frame_1_path = f"f1.jpg"

# 插帧时间
timestamp = 0.5

# 对齐字节数
alignment_byte = 1

# 编译时模型默认输入尺寸
default_input_width = 1920
default_input_height = 1080

# NCNN特定:是否启用Vulkan
Use_Vulkan = False
# NCNN特定:推理线程数
num_threads = 4

def onnx_to_ncnn():
    """(当ncnn_model_path不存在时调用)将ONNX模型转换为NCNN模型,并自动存储在合适的目录下"""
    if precision == "fp16":
        print("量化到FP16精度")
        Use_FP16 = True
    elif precision == "fp32":
        print("不进行量化,使用原始的FP32精度")
        Use_FP16 = False
    else:
        raise ValueError(f"未知的量化精度 {precision}")
    # 若不存在,则创建模型目录
    os.makedirs(ncnn_model_dir, exist_ok=True)
    # 转换ONNX模型为NCNN模型
    pnnx.convert(
        model_path, 
        input_shapes = [
            [1, 7, default_input_height, default_input_width]
           ], 
        input_types = ["f32"],
        ncnnparam = os.path.join(ncnn_model_dir, "model.param"),
        ncnnbin = os.path.join(ncnn_model_dir, "model.bin"),
        ncnnpy = os.path.join(ncnn_model_dir, "example.py"),
        pnnxbin = os.path.join(ncnn_model_dir, "model.pnnx.bin"),
        pnnxparam = os.path.join(ncnn_model_dir, "model.pnnx.param"),
        pnnxonnx = os.path.join(ncnn_model_dir, "model.pnnx.onnx"),
        pnnxpy = os.path.join(ncnn_model_dir, "example_pnnx.py"),
        device=cvt_device,
        fp16=Use_FP16,
        # optlevel=0, #如果指定了optlevel, 则不会生成ncnn模型
        customop=["rife.Warp"],
    )
    # 检查模型文件是否生成成功
    if not os.path.exists(os.path.join(ncnn_model_dir, "model.bin")) or \
       not os.path.exists(os.path.join(ncnn_model_dir, "model.param")):
        raise RuntimeError("NCNN模型转换失败")


# 主程序
# 检查IR模型文件是否存在
if not os.path.exists(os.path.join(ncnn_model_dir, "model.bin")) or \
   not os.path.exists(os.path.join(ncnn_model_dir, "model.param")):
    print(f"NCNN模型文件 {ncnn_model_dir} 不存在,正在转换ONNX模型...")
    onnx_to_ncnn()
    print(f"ONNX模型已成功转换为NCNN模型,并保存到 {ncnn_model_dir}")
# 列出设备
print("可用设备列表:")
for i in range(ncnn.get_gpu_count()): # type: ignore
    info = ncnn.get_gpu_info(i) # type: ignore
    print(f"""设备 {i}: 
        Device ID: {info.device_id()}
        Device Name: {info.device_name()}
        Driver Version: {info.driver_version()}
        Type: {info.type()}
        Vendor ID: {info.vendor_id()}
    """)
# 加载ONNX模型
print(f"正在加载 {model_name} 模型...")
model = ncnn.Net() # type: ignore
model.opt.use_vulkan_compute = Use_Vulkan
model.opt.num_threads = num_threads

model.opt.use_fp16_storage = False
model.opt.use_fp16_arithmetic = False

model.load_param(os.path.join(ncnn_model_dir, "model.param")) # type: ignore
model.load_model(os.path.join(ncnn_model_dir, "model.bin")) # type: ignore

print(f"{model_name} 模型加载完成")

# 加载图像,分离RGB通道,转换为numpy数组,归一化到[0, 1]
from PIL import Image
frame_0_img = Image.open(frame_0_path)
frame_1_img = Image.open(frame_1_path)
img_width = frame_0_img.width
img_height = frame_0_img.height
img_width_aligned = (img_width + alignment_byte - 1) // alignment_byte * alignment_byte
img_height_aligned = (img_height + alignment_byte - 1) // alignment_byte * alignment_byte
print(f"图像尺寸: {img_width}x{img_height}")
print(f"对齐后图像尺寸: {img_width_aligned}x{img_height_aligned}")
if frame_1_img.size != (img_width, img_height):
    raise ValueError("前后帧图像尺寸不一致")
# 得到[H, W, 3](NHWC)的RGB数组,并归一化到[0, 1]
f0_rgb_arr = np.array(frame_0_img, dtype=np.float32) / 255.0
f1_rgb_arr = np.array(frame_1_img, dtype=np.float32) / 255.0
# 扩容到对齐的尺寸,填充部分用0
if img_width_aligned != img_width or img_height_aligned != img_height:
    f0_rgb_arr_padded = np.zeros((img_height_aligned, img_width_aligned, 3), dtype=np.float32)
    f1_rgb_arr_padded = np.zeros((img_height_aligned, img_width_aligned, 3), dtype=np.float32)
    f0_rgb_arr_padded[:img_height, :img_width, :] = f0_rgb_arr
    f1_rgb_arr_padded[:img_height, :img_width, :] = f1_rgb_arr
else:
    f0_rgb_arr_padded = f0_rgb_arr
    f1_rgb_arr_padded = f1_rgb_arr
# 分离rgb
r_f0 = f0_rgb_arr_padded[:, :, 0]
g_f0 = f0_rgb_arr_padded[:, :, 1]
b_f0 = f0_rgb_arr_padded[:, :, 2]
r_f1 = f1_rgb_arr_padded[:, :, 0]
g_f1 = f1_rgb_arr_padded[:, :, 1]
b_f1 = f1_rgb_arr_padded[:, :, 2]
# 生成输入层 [Batch,7,H,W]
input_data = np.zeros((1, 7, img_height_aligned, img_width_aligned), dtype=np.float32)
# 填充输入数据
input_data[0, 0, :, :] = r_f0
input_data[0, 1, :, :] = g_f0
input_data[0, 2, :, :] = b_f0
input_data[0, 3, :, :] = r_f1
input_data[0, 4, :, :] = g_f1
input_data[0, 5, :, :] = b_f1
# 6不需要填充
# 固化模型维度

def new_frame(t: float, change_img: np.ndarray):


    print("正在进行计算...")

    # 填充t
    input_data[0, 6, :, :] = t
        
    with model.create_extractor() as ex:

        input_mat = ncnn.Mat(input_data).clone() # type: ignore

        # 检查是否为空指针
        assert input_mat.w > 0 and input_mat.h > 0 and input_mat.c > 0, "输入Mat尺寸无效"


        input_names = model.input_names()
        output_names = model.output_names()

        input_name = 'in0'
        output_name = 'out0'
        print(f"模型输入名称: {input_name}, 可用输入名称列表: {input_names}")
        print(f"模型输出名称: {output_name}, 可用输出名称列表: {output_names}")


        print("开始推理")

        ex.input(input_name, input_mat) # type: ignore

        print("开始导出")

        _, output_mat = ex.extract(output_name) # type: ignore

        print("计算完成")

        output_data = np.array(output_mat)



    # 提取出RGB通道
    r_out = output_data[0, 0, :img_height, :img_width]
    g_out = output_data[0, 1, :img_height, :img_width]
    b_out = output_data[0, 2, :img_height, :img_width]

    # 紧凑地将RGB通道合并为图像
    output_img_arr = np.stack((r_out, g_out, b_out), axis=-1)
    # 反归一化到[0, 255]并转换为uint8

    output_img_arr *= 255.0

    # 写入到change_img
    np.copyto(change_img, output_img_arr.astype(np.uint8))

test_frame = np.zeros((img_height, img_width, 3), dtype=np.uint8)
new_frame(timestamp, test_frame)

print("初始化完成,即将进入GUI")

from PySide6.QtWidgets import QApplication, QMainWindow, QLabel, QVBoxLayout, QHBoxLayout, QWidget, QSlider
from PySide6.QtGui import QPixmap, QImage
from PySide6.QtCore import Qt
from CustomWidgets import CustomUI

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("RIFE Test for Python OpenVINO")
        self.frame = np.zeros((img_height, img_width, 3), dtype=np.uint8)
        new_frame(timestamp, self.frame)
        self.setcontent()
    def setcontent(self):
        # 主类
        central_widget = QWidget()
        self.setCentralWidget(central_widget)
        layout = QVBoxLayout()
        central_widget.setLayout(layout)

        img_layout = QHBoxLayout()
        layout.addLayout(img_layout)

        self.img_0 = CustomUI.GenerelPicViewer(
            np.array(frame_0_img, dtype=np.uint8),
            "前帧",
            QImage.Format.Format_RGB888
        )
        self.img_1 = CustomUI.GenerelPicViewer(
            np.array(frame_1_img, dtype=np.uint8),
            "后帧",
            QImage.Format.Format_RGB888
        )
        self.img_i = CustomUI.GenerelPicViewer(
            self.frame,
            "插帧",
            QImage.Format.Format_RGB888
        )

        img_layout.addWidget(self.img_0)
        img_layout.addWidget(self.img_i)
        img_layout.addWidget(self.img_1)

        # 底部添加一个滑动条
        self.slider = QSlider()
        self.slider.setOrientation(Qt.Orientation.Horizontal) # 水平
        self.slider.setMinimum(0)
        self.slider.setMaximum(100)
        self.slider.setValue(int(timestamp * 100))
        layout.addWidget(self.slider)

        self.slider.valueChanged.connect(self.update_img_i)


    def update_img_i(self, value):
        t = value / 100.0
        new_frame(t, self.frame)
        self.img_i.update_()

app = QApplication(sys.argv)
window = MainWindow()
window.show()

sys.exit(app.exec())

编译过程能够顺利进行。

但是在开始导出时产生错误码0xC0000005(STATUS_ACCESS_VIOLATION)。

运行结果:

PS D:\emofalling\LimitRIFE\LimitRIFE_Env> & "D:/Program Files/Python314/python.exe" d:/emofalling/LimitRIFE/LimitRIFE_Env/rife_demo_ncnn.py
可用设备列表:
[0 NVIDIA GeForce RTX 4070 SUPER]  queueC=2[8]  queueT=1[2]  rebar=1  r-score=80
[0 NVIDIA GeForce RTX 4070 SUPER]  fp16-p/s/u/a=1/1/1/1  int8-p/s/u/a=1/1/1/1  bf16-p/s=1/1
[0 NVIDIA GeForce RTX 4070 SUPER]  subgroup=32(32~32)  ops=1/1/1/1/1/1/1/1/1/1
[0 NVIDIA GeForce RTX 4070 SUPER]  fp16-cm=16x16x16/16x8x16/16x8x8  int8-cm=16x16x32/16x8x32  bf16-cm=16x16x16  fp8-cm=16x16x32
[1 Intel(R) Graphics]  queueC=1[4]  queueT=2[2]  rebar=1  r-score=10
[1 Intel(R) Graphics]  fp16-p/s/u/a=1/1/1/1  int8-p/s/u/a=1/1/1/1  bf16-p/s=1/0
[1 Intel(R) Graphics]  subgroup=32(8~32)  ops=1/1/1/1/1/1/1/1/1/1
[1 Intel(R) Graphics]  fp16-cm=0  int8-cm=0  bf16-cm=0  fp8-cm=0
设备 0:
        Device ID: 10115
        Device Name: NVIDIA GeForce RTX 4070 SUPER
        Driver Version: 2496905216
        Type: 0
        Vendor ID: 4318

设备 1:
        Device ID: 32103
        Device Name: Intel(R) Graphics
        Driver Version: 1663410
        Type: 1
        Vendor ID: 32902

正在加载 rife_v4.25_lite 模型...
rife_v4.25_lite 模型加载完成
图像尺寸: 1920x1080
对齐后图像尺寸: 1920x1080
正在进行计算...
模型输入名称: in0, 可用输入名称列表: ['in0']
模型输出名称: out0, 可用输出名称列表: ['172', 'out0']
开始推理
开始导出
PS D:\emofalling\LimitRIFE\LimitRIFE_Env> $LASTEXITCODE                                                                                           
-1073741819

more | 其他 | その他

源ONNX模型:rife_v4.25_lite.7z下的rife_v2\rife_v4.25_lite.onnx

我尝试使用了一个成功案例现成的ncnn模型
VapourSynth-RIFE-ncnn-Vulkan/tree/master/models/rife-v4.25-lite_ensembleFalse

问题仍旧。

输出:

PS D:\emofalling\LimitRIFE\LimitRIFE_Env> & "D:/Program Files/Python314/python.exe" d:/emofalling/LimitRIFE/LimitRIFE_Env/rife_demo_ncnn.py
可用设备列表:
[0 NVIDIA GeForce RTX 4070 SUPER]  queueC=2[8]  queueT=1[2]  rebar=1  r-score=80
[0 NVIDIA GeForce RTX 4070 SUPER]  fp16-p/s/u/a=1/1/1/1  int8-p/s/u/a=1/1/1/1  bf16-p/s=1/1
[0 NVIDIA GeForce RTX 4070 SUPER]  subgroup=32(32~32)  ops=1/1/1/1/1/1/1/1/1/1
[0 NVIDIA GeForce RTX 4070 SUPER]  fp16-cm=16x16x16/16x8x16/16x8x8  int8-cm=16x16x32/16x8x32  bf16-cm=16x16x16  fp8-cm=16x16x32
[1 Intel(R) Graphics]  queueC=1[4]  queueT=2[2]  rebar=1  r-score=10
[1 Intel(R) Graphics]  fp16-p/s/u/a=1/1/1/1  int8-p/s/u/a=1/1/1/1  bf16-p/s=1/0
[1 Intel(R) Graphics]  subgroup=32(8~32)  ops=1/1/1/1/1/1/1/1/1/1
[1 Intel(R) Graphics]  fp16-cm=0  int8-cm=0  bf16-cm=0  fp8-cm=0
设备 0:
        Device ID: 10115
        Device Name: NVIDIA GeForce RTX 4070 SUPER
        Driver Version: 2496905216
        Type: 0
        Vendor ID: 4318

设备 1:
        Device ID: 32103
        Device Name: Intel(R) Graphics
        Driver Version: 1663410
        Type: 1
        Vendor ID: 32902

正在加载 rife_v4.25_lite 模型...
layer rife.Warp not exists or registered
network graph not ready
rife_v4.25_lite 模型加载完成
图像尺寸: 1920x1080
对齐后图像尺寸: 1920x1080
正在进行计算...
模型输入名称: in0, 可用输入名称列表: []
模型输出名称: out0, 可用输出名称列表: []
开始推理
find_blob_index_by_name in0 failed
Try
开始导出
find_blob_index_by_name out0 failed
Try
计算完成
PS D:\emofalling\LimitRIFE\LimitRIFE_Env> $LASTEXITCODE
-1073741819

于是我自己尝试编译NCNN并自行加载,NCNN里面的所有example都通过了,但我的程序仍然会返回这个错误码,问题仍未解决。
经过Visual Studio的调试,找到了函数点

int ncnn::Reshape_x86_fma::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions