python环境的yolov11.rknn物体检测

游戏开发
2025-09-01 12:36:01

1.首先是我手里生成的一个yolo11的.rknn模型：

2.比对一下yolov5的模型：

2.1 yolov5模型的后期处理：

outputs = rknn.inference(inputs=[img2], data_format=['nhwc']) np.save('./onnx_yolov5_0.npy', outputs[0]) np.save('./onnx_yolov5_1.npy', outputs[1]) np.save('./onnx_yolov5_2.npy', outputs[2]) print('done') # post process input0_data = outputs[0] input1_data = outputs[1] input2_data = outputs[2] input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:])) input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:])) input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:])) input_data = list() input_data.append(np.transpose(input0_data, (2, 3, 0, 1))) input_data.append(np.transpose(input1_data, (2, 3, 0, 1))) input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

然后：

def yolov5_post_process(input_data): masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]] boxes, classes, scores = [], [], [] for input, mask in zip(input_data, masks): b, c, s = process(input, mask, anchors) b, c, s = filter_boxes(b, c, s) boxes.append(b) classes.append(c) scores.append(s)

然后：

def process(input, mask, anchors): anchors = [anchors[i] for i in mask] grid_h, grid_w = map(int, input.shape[0:2]) box_confidence = input[..., 4] box_confidence = np.expand_dims(box_confidence, axis=-1) box_class_probs = input[..., 5:] box_xy = input[..., :2]*2 - 0.5 col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) grid = np.concatenate((col, row), axis=-1) box_xy += grid box_xy *= int(IMG_SIZE/grid_h) box_wh = pow(input[..., 2:4]*2, 2) box_wh = box_wh * anchors box = np.concatenate((box_xy, box_wh), axis=-1) return box, box_confidence, box_class_probs 3.修改1 - 基于昨天在宿主机上成功执行的onnx代码： #!/usr/bin/env python3 # -*- coding:utf-8 -*- import os import sys from math import exp import cv2 import numpy as np ROOT = os.getcwd() if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) RKNN_MODEL = r'/home/firefly/app/models/sim_moonpie-640-640_rk3588.rknn' IMG_PATH = '/home/firefly/app/images/cake26.jpg' QUANTIZE_ON = False CLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'moonpie'] meshgrid = [] class_num = len(CLASSES) headNum = 3 strides = [8, 16, 32] mapSize = [[80, 80], [40, 40], [20, 20]] nmsThresh = 0.45 objectThresh = 0.5 input_imgH = 640 input_imgW = 640 from rknn.api import RKNN def rk3588_detect(model, pic, classes): rknn = RKNN(verbose=True) ''' # model config rknn.config(mean_values=[[0, 0, 0]], std_values=[[255,255,255]], quant_img_RGB2BGR=False, target_platform='rk3588') ''' rknn.load_rknn(path=model) rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO) outputs = rknn.inference(inputs=[pic], data_format=['nhwc']) return outputs class DetectBox: def __init__(self, classId, score, xmin, ymin, xmax, ymax): self.classId = classId self.score = score self.xmin = xmin self.ymin = ymin self.xmax = xmax self.ymax = ymax class YOLOV11DetectObj: def __init__(self): pass def GenerateMeshgrid(self): for index in range(headNum): for i in range(mapSize[index][0]): for j in range(mapSize[index][1]): meshgrid.append(j + 0.5) meshgrid.append(i + 0.5) def IOU(self, xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2): xmin = max(xmin1, xmin2) ymin = max(ymin1, ymin2) xmax = min(xmax1, xmax2) ymax = min(ymax1, ymax2) innerWidth = xmax - xmin innerHeight = ymax - ymin innerWidth = innerWidth if innerWidth > 0 else 0 innerHeight = innerHeight if innerHeight > 0 else 0 innerArea = innerWidth * innerHeight area1 = (xmax1 - xmin1) * (ymax1 - ymin1) area2 = (xmax2 - xmin2) * (ymax2 - ymin2) total = area1 + area2 - innerArea return innerArea / total def NMS(self, detectResult): predBoxs = [] sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True) for i in range(len(sort_detectboxs)): xmin1 = sort_detectboxs[i].xmin ymin1 = sort_detectboxs[i].ymin xmax1 = sort_detectboxs[i].xmax ymax1 = sort_detectboxs[i].ymax classId = sort_detectboxs[i].classId if sort_detectboxs[i].classId != -1: predBoxs.append(sort_detectboxs[i]) for j in range(i + 1, len(sort_detectboxs), 1): if classId == sort_detectboxs[j].classId: xmin2 = sort_detectboxs[j].xmin ymin2 = sort_detectboxs[j].ymin xmax2 = sort_detectboxs[j].xmax ymax2 = sort_detectboxs[j].ymax iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2) if iou > nmsThresh: sort_detectboxs[j].classId = -1 return predBoxs def sigmoid(self, x): return 1 / (1 + exp(-x)) def postprocess(self, out, img_h, img_w): print('postprocess ... ') detectResult = [] output = [] for i in range(len(out)): print(out[i].shape) output.append(out[i].reshape((-1))) scale_h = img_h / input_imgH scale_w = img_w / input_imgW gridIndex = -2 cls_index = 0 cls_max = 0 for index in range(headNum): reg = output[index * 2 + 0] cls = output[index * 2 + 1] for h in range(mapSize[index][0]): for w in range(mapSize[index][1]): gridIndex += 2 if 1 == class_num: cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) cls_index = 0 else: for cl in range(class_num): cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] if 0 == cl: cls_max = cls_val cls_index = cl else: if cls_val > cls_max: cls_max = cls_val cls_index = cl cls_max = self.sigmoid(cls_max) if cls_max > objectThresh: regdfl = [] for lc in range(4): sfsum = 0 locval = 0 for df in range(16): temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp sfsum += temp for df in range(16): sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum locval += sfval * df regdfl.append(locval) x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index] y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index] x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index] y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index] xmin = x1 * scale_w ymin = y1 * scale_h xmax = x2 * scale_w ymax = y2 * scale_h xmin = xmin if xmin > 0 else 0 ymin = ymin if ymin > 0 else 0 xmax = xmax if xmax < img_w else img_w ymax = ymax if ymax < img_h else img_h box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax) detectResult.append(box) # NMS print('detectResult:', len(detectResult)) predBox = self.NMS(detectResult) return predBox def precess_image(self, img_src, resize_w, resize_h): print(f'{type(img_src)}') image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.astype(np.float32) image /= 255.0 return image def detect(self, img_path): self.GenerateMeshgrid() orig = cv2.imread(img_path) if orig is None: print(f"无法读取图像: {img_path}") return img_h, img_w = orig.shape[:2] image = self.precess_image(orig, input_imgW, input_imgH) image = image.transpose((2, 0, 1)) image = np.expand_dims(image, axis=0) #image = np.ones((1, 3, 640, 640), dtype=np.uint8) # print(image.shape) #ort_session = ort.InferenceSession(ONNX_MODEL) #pred_results = (ort_session.run(None, {'data': image})) pred_results = rk3588_detect(RKNN_MODEL, image, CLASSES) out = [] for i in range(len(pred_results)): out.append(pred_results[i]) predbox = self.postprocess(out, img_h, img_w) print('obj num is :', len(predbox)) for i in range(len(predbox)): xmin = int(predbox[i].xmin) ymin = int(predbox[i].ymin) xmax = int(predbox[i].xmax) ymax = int(predbox[i].ymax) classId = predbox[i].classId score = predbox[i].score cv2.rectangle(orig, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) ptext = (xmin, ymin) title = CLASSES[classId] + "%.2f" % score cv2.putText(orig, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.imwrite('./test_onnx_result.jpg', orig) if __name__ == '__main__': print('This is main ....') img_path = IMG_PATH obj = YOLOV11DetectObj() obj.detect(img_path)

输出不对：

firefly@firefly:~/app/test$ python3 ./detect_rk3588.py This is main .... <class 'numpy.ndarray'> I rknn-toolkit2 version: 2.3.0 I target set by user is: rk3588 postprocess ... (1, 64, 80, 80) (1, 81, 80, 80) (1, 64, 40, 40) (1, 81, 40, 40) (1, 64, 20, 20) (1, 81, 20, 20) detectResult: 0 obj num is : 0

4.修改2 - 基于yolov5.rknn代码 4.1 似乎有如下映射关系：

似乎：yolo11.output5 == yolo5.output2, yolo11.output3 == yolo5.output1, yolo11.output1 == yolo5.output

#!/usr/bin/env python3 # -*- coding:utf-8 -*- import os import sys import urllib import traceback import time import numpy as np import cv2 from rknn.api import RKNN ROOT = os.getcwd() if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # Model from github /airockchip/rknn_model_zoo yolov11->selftrained. yolo11s? ONNX_MODEL = r'/home/firefly/app/models/yolo11_selfgen.onnx' RKNN_MODEL = r'/home/firefly/app/models/new_moonpie_yolo11_640x640.rknn' IMG_PATH = r'/home/firefly/app/images/cake26.jpg' QUANTIZE_ON = True DATASET=r'./dataset.txt' CLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] meshgrid = [] class_num = len(CLASSES) headNum = 3 strides = [8, 16, 32] mapSize = [[80, 80], [40, 40], [20, 20]] input_imgH = 640 input_imgW = 640 IMG_SIZE = input_imgH QUANTIZE_ON = True OBJ_THRESH = 0.25 NMS_THRESH = 0.45 def xywh2xyxy(x): # Convert [x, y, w, h] to [x1, y1, x2, y2] y = np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y return y def process(input, mask, anchors): anchors = [anchors[i] for i in mask] grid_h, grid_w = map(int, input.shape[0:2]) box_confidence = input[..., 4] box_confidence = np.expand_dims(box_confidence, axis=-1) box_class_probs = input[..., 5:] box_xy = input[..., :2]*2 - 0.5 col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) grid = np.concatenate((col, row), axis=-1) box_xy += grid box_xy *= int(IMG_SIZE/grid_h) box_wh = pow(input[..., 2:4]*2, 2) box_wh = box_wh * anchors box = np.concatenate((box_xy, box_wh), axis=-1) return box, box_confidence, box_class_probs def filter_boxes(boxes, box_confidences, box_class_probs): """Filter boxes with box threshold. It's a bit different with origin yolov5 post process! # Arguments boxes: ndarray, boxes of objects. box_confidences: ndarray, confidences of objects. box_class_probs: ndarray, class_probs of objects. # Returns boxes: ndarray, filtered boxes. classes: ndarray, classes for boxes. scores: ndarray, scores for boxes. """ boxes = boxes.reshape(-1, 4) box_confidences = box_confidences.reshape(-1) box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1]) _box_pos = np.where(box_confidences >= OBJ_THRESH) boxes = boxes[_box_pos] box_confidences = box_confidences[_box_pos] box_class_probs = box_class_probs[_box_pos] class_max_score = np.max(box_class_probs, axis=-1) classes = np.argmax(box_class_probs, axis=-1) _class_pos = np.where(class_max_score >= OBJ_THRESH) boxes = boxes[_class_pos] classes = classes[_class_pos] scores = (class_max_score* box_confidences)[_class_pos] return boxes, classes, scores def nms_boxes(boxes, scores): """Suppress non-maximal boxes. # Arguments boxes: ndarray, boxes of objects. scores: ndarray, scores of objects. # Returns keep: ndarray, index of effective boxes. """ x = boxes[:, 0] y = boxes[:, 1] w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] areas = w * h order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x[i], x[order[1:]]) yy1 = np.maximum(y[i], y[order[1:]]) xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) w1 = np.maximum(0.0, xx2 - xx1 + 0.00001) h1 = np.maximum(0.0, yy2 - yy1 + 0.00001) inter = w1 * h1 ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= NMS_THRESH)[0] order = order[inds + 1] keep = np.array(keep) return keep def yolov5_post_process(input_data): masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]] boxes, classes, scores = [], [], [] for input, mask in zip(input_data, masks): b, c, s = process(input, mask, anchors) b, c, s = filter_boxes(b, c, s) boxes.append(b) classes.append(c) scores.append(s) boxes = np.concatenate(boxes) boxes = xywh2xyxy(boxes) classes = np.concatenate(classes) scores = np.concatenate(scores) nboxes, nclasses, nscores = [], [], [] for c in set(classes): inds = np.where(classes == c) b = boxes[inds] c = classes[inds] s = scores[inds] keep = nms_boxes(b, s) nboxes.append(b[keep]) nclasses.append(c[keep]) nscores.append(s[keep]) if not nclasses and not nscores: return None, None, None boxes = np.concatenate(nboxes) classes = np.concatenate(nclasses) scores = np.concatenate(nscores) return boxes, classes, scores def draw(image, boxes, scores, classes): """Draw the boxes on the image. # Argument: image: original image. boxes: ndarray, boxes of objects. classes: ndarray, classes of objects. scores: ndarray, scores of objects. all_classes: all classes name. """ print("{:^12} {:^12} {}".format('class', 'score', 'xmin, ymin, xmax, ymax')) print('-' * 50) for box, score, cl in zip(boxes, scores, classes): top, left, right, bottom = box top = int(top) left = int(left) right = int(right) bottom = int(bottom) cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2) cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score), (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) print("{:^12} {:^12.3f} [{:>4}, {:>4}, {:>4}, {:>4}]".format(CLASSES[cl], score, top, left, right, bottom)) def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) def detect_on_ubuntu(onnx_model_path=ONNX_MODEL, rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES): # Create RKNN object rknn = RKNN(verbose=True) # pre-process config print('--> Config model') rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588') print('done') # Load ONNX model print('--> Loading model') ret = rknn.load_onnx(model=onnx_model_path) if ret != 0: print('Load model failed!') exit(ret) print('done') # Build model print('--> Building model') ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET) if ret != 0: print('Build model failed!') exit(ret) print('done') # Export RKNN model print('--> Export rknn model') ret = rknn.export_rknn(rknn_path) if ret != 0: print('Export rknn model failed!') exit(ret) print('done') # Init runtime environment print('--> Init runtime environment') ret = rknn.init_runtime() if ret != 0: print('Init runtime environment failed!') exit(ret) print('done') # Set inputs img = cv2.imread(image_path) # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # Inference print('--> Running model') img2 = np.expand_dims(img, 0) np.save('./raw_yolov11_in.npy', [img2]) outputs = rknn.inference(inputs=[img2], data_format=['nhwc']) np.save('./onnx_yolov11_0_raw.npy', outputs[1]) np.save('./onnx_yolov11_1_raw.npy', outputs[3]) np.save('./onnx_yolov11_2_raw.npy', outputs[5]) print('done') # post process input0_data = outputs[1] input1_data = outputs[3] input2_data = outputs[5] # 创建一个全零数组，用于填充 # 计算需要填充的通道数 pad_channels = 255 - len(classes) padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32) input0_data = np.concatenate((input0_data, padding), axis=1) padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32) input1_data = np.concatenate((input1_data, padding), axis=1) padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32) input2_data = np.concatenate((input2_data, padding), axis=1) np.save('./onnx_yolov11_0.npy', input0_data) np.save('./onnx_yolov11_1.npy', input1_data) np.save('./onnx_yolov11_2.npy', input2_data) input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:])) input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:])) input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:])) input_data = list() input_data.append(np.transpose(input0_data, (2, 3, 0, 1))) input_data.append(np.transpose(input1_data, (2, 3, 0, 1))) input_data.append(np.transpose(input2_data, (2, 3, 0, 1))) boxes, classes, scores = yolov5_post_process(input_data) img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) if boxes is not None: draw(img_1, boxes, scores, classes) cv2.imwrite('result.jpg', img_1) print('Save results to result.jpg!') rknn.release() def precess_image(img_src, resize_w, resize_h): orig = cv2.imread(img_src) if orig is None: print(f"无法读取图像: {img_path}") return img_h, img_w = orig.shape[:2] print(f'{type(orig)}') image = cv2.resize(orig, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.astype(np.float32) image /= 255.0 ''' # Set inputs img = cv2.imread(IMG_PATH) # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # Inference print('--> Running model') img2 = np.expand_dims(img, 0) ''' return image def detect_on_rk3588(rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES): # Create RKNN object rknn = RKNN(verbose=True) # pre-process config print('--> Config model') rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588') print('done') # Load ONNX model ret = rknn.load_rknn(rknn_path) if ret != 0: print('Export rknn model failed!') exit(ret) print('done') # Init runtime environment print('--> Init runtime environment') ret = rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO) if ret != 0: print('Init runtime environment failed!') exit(ret) print('done') img2 = precess_image(image_path, input_imgW, input_imgH) np.save('./raw_yolov11_in.npy', [img2]) outputs = rknn.inference(inputs=[img2], data_format=['nhwc']) np.save('./raw_yolov11_0.npy', outputs[1]) np.save('./raw_yolov11_1.npy', outputs[3]) np.save('./raw_yolov11_2.npy', outputs[5]) print('done') # 创建一个全零数组，用于填充 # 计算需要填充的通道数 pad_channels = 255 - len(classes) padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32) input0_data = np.concatenate((input0_data, padding), axis=1) padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32) input1_data = np.concatenate((input1_data, padding), axis=1) padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32) input2_data = np.concatenate((input2_data, padding), axis=1) np.save('./modified_yolov11_0.npy', input0_data) np.save('./modified_yolov11_1.npy', input1_data) np.save('./modified_yolov11_2.npy', input2_data) input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:])) input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:])) input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:])) input_data = list() input_data.append(np.transpose(input0_data, (2, 3, 0, 1))) input_data.append(np.transpose(input1_data, (2, 3, 0, 1))) input_data.append(np.transpose(input2_data, (2, 3, 0, 1))) boxes, classes, scores = yolov5_post_process(input_data) img = cv2.imread(image_path) img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) if boxes is not None: draw(img_1, boxes, scores, classes) cv2.imwrite('rknn_detect_result.jpg', img_1) print('Save results to rknn_detect_result.jpg!') else: print('target can not be found!') rknn.release() if __name__ == '__main__': detect_on_ubuntu(ONNX_MODEL, RKNN_MODEL, IMG_PATH, CLASSES) #detect_on_rk3588(RKNN_MODEL, IMG_PATH, CLASSES) 5.结论：

最终通过仔细核对yolov5, yolo11, onnx几个模型的输入输出参数，发现了这样的事情：

yolov11 的输出参数有6个，大概率，按照上面的结论是，1,3,5相当于原来的输出参数0,1,2yolov11的输出的6个参数，第二维尺寸，现在不是255（-1）个，而是len(classes)个。我的问题在于我在训练时把classes设置为81，而在导出.rknn时，仍然导出为80.所以，结果就是onnx模式访问正常，而.rknn方式访问错误。yolo detect代码中的.save是瑞芯微的那些同志在调试接口时留下的一些调试语句，它们不必存在。rknn代码输入输出参数建议u8化，在simluation环境传递的是float32。这个修改结束，应该速度会快不少。还有，yolo detect的matrix pack in pack out时，效率很低，他还在进行float转换。这个我没有仔细看代码，理论上，onnx的识别信息析取是更快的。

还在测试。如果确认最终稿的结论成立。我会在这个帖子里留下标记。上面的两端代码没有大问题，我在测试成功后会更新，现在就是对的。

标签：

python环境的yolov11.rknn物体检测由讯客互联游戏开发栏目发布，感谢您对讯客互联的认可，以及对我们原创作品以及文章的青睐，非常欢迎各位朋友分享到个人网站或者朋友圈，但转载请说明文章出处“python环境的yolov11.rknn物体检测”

上一篇
gcc和g++的区别以及明明函数有定义为何链接找不到

下一篇
什么是HTTP和HTTPS？它们之间有什么区别？