训练yolov8n

发表于 2025-12-14 更新于 2026-01-44

训练yolov8n

第一步：创建并激活新环境

创建名为 yolo_rk 的环境：conda create -n yolo_rk python=3.9 -y
激活环境：conda activate yolo_rk

第二步：安装 PyTorch (GPU版)

显卡是 RTX 3090，驱动支持 CUDA 12.6。我们安装适配 CUDA 12.4 的 PyTorch（向下兼容，非常稳定）。

pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

第三步：安装 YOLOv8 库

pip install ultralytics

第四步：验证环境 (关键)

验证 GPU：输入 python 进入交互模式，然后输入：

import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
exit()

验证 YOLO：在命令行输入：

yolo version

都成功后：

训练数据集：yolo detect train model=yolov8n.pt data=coco128.yaml epochs=50 imgsz=640 device=0 batch=64 workers=8 name=rk3566_test

导出onnx模型：yolo export model="D:\deeplearning\yolov8n\runs\detect\rk3566_test\weights\best.pt" format=onnx opset=12

转化为rknn

from rknn.api import RKNN

# --- 修改这里 ---
ONNX_MODEL = 'best.onnx'          # 你的ONNX文件名
RKNN_MODEL = 'yolov8_rk3566.rknn' # 导出的文件名
# ----------------

if __name__ == '__main__':
    # 1. 初始化
    rknn = RKNN(verbose=True)

    # 2. 配置 (RK3566)
    # 输入给网络的数据 = (原始像素 - mean) / std,后续输入图片不需要归一化
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3566')

    # 3. 加载 ONNX
    print('--> Loading ONNX')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load failed!')
        exit(ret)

    # 4. 构建 (FP16模式)  True (INT8 模式) false是fp16模式
    print('--> Building')
    ret = rknn.build(do_quantization=False)
    if ret != 0:
        print('Build failed!')
        exit(ret)

    # 5. 导出
    print('--> Exporting RKNN')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export failed!')
        exit(ret)

    print(f"\n成功！文件已保存为: {RKNN_MODEL}")

使用代码如下：

#include <iostream>
#include <vector>
#include <chrono>
#include <string>
#include <thread>
#include <opencv2/opencv.hpp>
#include "rknn_api.h"
#include "camera_runner.h"

// ================= 配置区域 =================
#define MODEL_PATH   "./model/yolov8_rk3566.rknn" // 你的模型路径
#define DEVICE_ID    14
#define CONF_THRESH  0.45f
#define NMS_THRESH   0.45f

// 【修改点1】这里定义你的3个类别
// 如果以后增加了类别，只需要在这里加字符串，代码逻辑不用动
std::vector<std::string> CLASSES = {
    "wire",    // ID 0
    "slipper", // ID 1
    "dock"     // ID 2
};

// ================= 辅助函数 =================

static unsigned char* load_model(const char* filename, int* model_size) {
    FILE* fp = fopen(filename, "rb");
    if(fp == nullptr) { printf("Open file %s failed.\n", filename); return nullptr; }
    fseek(fp, 0, SEEK_END);
    int size = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    unsigned char* data = (unsigned char*)malloc(size);
    fread(data, 1, size, fp);
    fclose(fp);
    *model_size = size;
    return data;
}

// 【修改点2】后处理函数完全动态化
// 增加了两个参数: model_height (特征维度, 如 7或84), model_anchors (框数量, 如 6300或8400)
void post_process(float* output, cv::Mat& img, float scale_w, float scale_h, int model_height, int model_anchors) {
    std::vector<int> classIds;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    // 1. 动态计算类别数量
    // YOLOv8 输出结构: [1, 4 + classes, anchors]
    // 所以: 类别数 = model_height - 4
    int num_classes = model_height - 4;
    
    // 安全检查：防止代码里的 CLASSES 列表和模型对不上
    if (CLASSES.size() < num_classes) {
        printf("[Error] 代码里的类别名只有 %lu 个，但模型预测了 %d 个类别！\n", CLASSES.size(), num_classes);
        return;
    }

    // 2. 遍历所有锚点 (6300 或 8400 由 model_anchors 决定)
    for (int i = 0; i < model_anchors; ++i) {
        float max_score = 0.0f;
        int class_id = -1;

        // 3. 遍历所有类别 (动态循环)
        for (int c = 0; c < num_classes; ++c) {
            // 访问位置: output[(4 + c) * 总列数 + 当前列]
            float score = output[(4 + c) * model_anchors + i];
            if (score > max_score) {
                max_score = score;
                class_id = c;
            }
        }

        if (max_score > CONF_THRESH) {
            float cx = output[0 * model_anchors + i];
            float cy = output[1 * model_anchors + i];
            float w  = output[2 * model_anchors + i];
            float h  = output[3 * model_anchors + i];

            int left   = int((cx - 0.5 * w) * scale_w);
            int top    = int((cy - 0.5 * h) * scale_h);
            int width  = int(w * scale_w);
            int height = int(h * scale_h);

            boxes.push_back(cv::Rect(left, top, width, height));
            confidences.push_back(max_score);
            classIds.push_back(class_id);
        }
    }

    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confidences, CONF_THRESH, NMS_THRESH, indices);

    for (int idx : indices) {
        cv::Rect box = boxes[idx];
        std::string label = CLASSES[classIds[idx]]; // 从列表中取名字
        float score = confidences[idx];

        printf("   -> 检测到: %s (%.2f)\n", label.c_str(), score);

        cv::rectangle(img, box, cv::Scalar(0, 255, 0), 2);
        std::string text = label + ": " + std::to_string(score).substr(0, 4);
        cv::putText(img, text, cv::Point(box.x, box.y - 5), 
                    cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 0), 2);
    }
}

// ================= 主函数 =================
int main() {
    printf("--- Dynamic YOLOv8 Demo ---\n");

    int model_size = 0;
    unsigned char* model_data = load_model(MODEL_PATH, &model_size);
    if (!model_data) return -1;

    rknn_context ctx;
    int ret = rknn_init(&ctx, model_data, model_size, 0, nullptr);
    if (ret < 0) { printf("rknn_init failed!\n"); return -1; }

    // ============================================================
    // 【修改点3】 关键步骤：查询模型信息 (Introspection)
    // ============================================================
    
    // A. 查询输入张量属性 (自动获取模型需要的宽和高)
    rknn_tensor_attr input_attrs[1];
    input_attrs[0].index = 0;
    rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, input_attrs, sizeof(input_attrs));

    int model_width  = input_attrs[0].dims[2]; // 获取模型宽 (640)
    int model_height = input_attrs[0].dims[1]; // 获取模型高 (480 或 640)
    // 注意：dims顺序取决于 fmt，通常 NHWC 下 [1]是H, [2]是W；NCHW 下 [2]是H, [3]是W
    // RKNN 默认输入通常是 NHWC，所以 dims[1]=H, dims[2]=W
    
    printf("Model Input: %d x %d\n", model_width, model_height);

    // B. 查询输出张量属性 (自动获取锚点数和类别信息)
    rknn_tensor_attr output_attrs[1];
    output_attrs[0].index = 0;
    rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, output_attrs, sizeof(output_attrs));

    // YOLOv8 输出通常是 [1, 84, 8400] 这种形状
    // dims[1] = 84 (4个坐标 + 80个类)
    // dims[2] = 8400 (锚点数量)
    int out_features = output_attrs[0].dims[1]; 
    int out_anchors  = output_attrs[0].dims[2];

    printf("Model Output: %d features, %d anchors\n", out_features, out_anchors);
    printf("Detected Classes: %d\n", out_features - 4);

    // ============================================================

    // 2. 初始化相机 (这里写死物理相机的采集分辨率，通常是 640x480)
    int cam_width = 640;
    int cam_height = 480;
    CameraRunner camera(DEVICE_ID, cam_width, cam_height);
    if (!camera.start()) return -1;
    std::this_thread::sleep_for(std::chrono::milliseconds(1000));

    // 3. 准备输入
    rknn_input inputs[1]; // 1. 声明一个数组，因为可能有多个输入（比如双目相机），但这里只有1个
	memset(inputs, 0, sizeof(inputs)); // 2. 清零！这是C语言的好习惯，防止里面有随机垃圾数据

	inputs[0].index = 0; // 3. 指定入口号。告诉 NPU：“这是给第 0 号入口的数据”

	inputs[0].type = RKNN_TENSOR_UINT8; 
// 4. 数据类型。
// 这里的 UINT8 对应 OpenCV 的 CV_8U。
// 意思是：我喂给你的每个像素点都是 0~255 的整数，没有小数点。
	
	inputs[0].size = model_width * model_height * 3; 
// 5. 数据包总大小 (字节数)。
// 宽 x 高 x 3通道 (RGB)。NPU 会严格检查这个大小，差一个字节都会报错。

	inputs[0].fmt = RKNN_TENSOR_NHWC;
// 6. 数据排列格式。
// NHWC 意思是：[Batch, Height, Width, Channel]。
// 这正是 OpenCV (cv::Mat) 在内存里的默认排列方式：先放第一个像素的RGB，再放第二个像素的RGB...
// 如果你选错了（比如 NCHW），图像就会花屏。

    rknn_output outputs[1]; // 1. 声明输出数组
	memset(outputs, 0, sizeof(outputs)); // 2. 清零

	outputs[0].want_float = 1; 
// 3. 【核心参数】“请给我说人话”。
// NPU 内部算出来的可能是 INT8（整数）或者 FP16（半精度浮点）。
// 设置 want_float = 1，NPU 驱动会自动把这些乱七八糟的格式转成标准的 float 给我们。
// 如果不设这个，你拿到的数据还需要自己写公式去转换，非常麻烦。

    cv::Mat frame, img_input;
    long long timestamp_ms;

    while (true) {
        auto start = std::chrono::steady_clock::now();

        if (!camera.get_latest_frame(frame, timestamp_ms)) {
            std::this_thread::sleep_for(std::chrono::milliseconds(5));
            continue;
        }

        // 动态 Resize: 缩放到模型需要的大小
        cv::cvtColor(frame, img_input, cv::COLOR_BGR2RGB);
        if (frame.cols != model_width || frame.rows != model_height) {
            cv::resize(img_input, img_input, cv::Size(model_width, model_height));
        }

        inputs[0].buf = img_input.data;
        rknn_inputs_set(ctx, 1, inputs);//这里有拷贝消耗

        ret = rknn_run(ctx, nullptr);
        if(ret < 0) printf("Run error\n");//这里是阻塞的

        ret = rknn_outputs_get(ctx, 1, outputs, nullptr);

        // 计算 FPS
        auto end = std::chrono::steady_clock::now();
        double fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();

        // 动态后处理: 传入动态计算的 scale 和 查询到的模型参数
        float scale_w = (float)cam_width / model_width;
        float scale_h = (float)cam_height / model_height;

        // 【关键】传入动态获取的 out_features 和 out_anchors
        post_process((float*)outputs[0].buf, frame, scale_w, scale_h, out_features, out_anchors);

        printf("FPS: %.1f\n", fps);
        
        // 调试显示
        // cv::imshow("Web", frame); // 如果没屏幕注释掉
        // cv::waitKey(1);

        rknn_outputs_release(ctx, 1, outputs);
    }

    camera.stop();
    rknn_destroy(ctx);
    free(model_data);
    return 0;
}