カスタムデータで学習させた YOLOX で推論【Python】

はじめに
前提条件
推論プログラムの作成
おわりに

はじめに

前回は YOLOX をカスタムデータで学習させる方法について説明しました。

今回はカスタムデータで学習させた YOLOX を推論する方法について説明します。

前提条件

前提条件は以下の通りです。

Windows11
Python3.9
torch==1.12.1+cu113

推論プログラムの作成

まずは前回の学習結果である YOLOX_outputs/custom_yolox_s/best_ckpt.pth を custom_yolox_s.py と同じフォルダに移動させておいてください。

inference.py

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.

import os
import time
from loguru import logger
import cv2
import torch
import glob

from yolox.data.data_augment import ValTransform
# from yolox.data.datasets import COCO_CLASSES
COCO_CLASSES = ("finger","__ignore__")
from yolox.exp import get_exp
from yolox.utils import postprocess, vis

IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]


class Predictor(object):
    def __init__(
        self,
        model,
        exp,
        cls_names=COCO_CLASSES,
        trt_file=None,
        decoder=None,
        device="gpu",
        fp16=True,
        legacy=False,
    ):
        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        self.preproc = ValTransform(legacy=legacy)

    def inference(self, img):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = os.path.basename(img)
            img = cv2.imread(img)
        else:
            img_info["file_name"] = None

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        img_info["raw_img"] = img

        ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
        img_info["ratio"] = ratio

        img, _ = self.preproc(img, None, self.test_size)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.float()
        img = img.cuda()
        img = img.half()

        # if self.device == "gpu":
        #     img = img.cuda()
        #     if self.fp16:
        #         img = img.half()  # to FP16

        with torch.no_grad():
            t0 = time.time()
            outputs = self.model(img)
            if self.decoder is not None:
                outputs = self.decoder(outputs, dtype=outputs.type())
            outputs = postprocess(
                outputs, self.num_classes, self.confthre,
                self.nmsthre, class_agnostic=True
            )
            logger.info("Infer time: {:.4f}s".format(time.time() - t0))
        return outputs, img_info

    def visual(self, output, img_info, cls_conf=0.35):
        ratio = img_info["ratio"]
        img = img_info["raw_img"]
        if output is None:
            return img
        output = output.cpu()

        bboxes = output[:, 0:4]

        # preprocessing: resize
        bboxes /= ratio

        cls = output[:, 6]
        scores = output[:, 4] * output[:, 5]

        vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
        return vis_res


def image_demo(predictor, path):
    basename = os.path.basename(path)
    frame = cv2.imread(path)
    outputs, img_info = predictor.inference(frame)
    ratio = img_info["ratio"]
    output = outputs[0].cpu()

    bboxes = output[:, 0:4]
    # preprocessing: resize
    bboxes /= ratio
    bboxes = bboxes.numpy()
    cls = output[:, 6].numpy()
    scores = (output[:, 4] * output[:, 5]).numpy()
    # print(bboxes, cls, scores)

    result_image = predictor.visual(outputs[0], img_info, predictor.confthre)
    cv2.imwrite("YOLOX_outputs/results/"+basename, result_image)
    cv2.imshow("result", result_image)
    cv2.waitKey(0)

def main(exp):
    exp.test_conf = 0.5
    exp.nmsthre = 0.3
    model = exp.get_model()

    model.cuda()
    model.half()

    model.eval()

    ckpt_file = "./best_ckpt.pth"
    ckpt = torch.load(ckpt_file, map_location="cpu")

    model.load_state_dict(ckpt["model"])

    predictor = Predictor(
        model, exp, COCO_CLASSES, None, None, "gpu", True, False,
    )
    filepath = glob.glob("./datasets/train_COCO_format/val2017/*.jpg")
    for f in filepath:
        image_demo(predictor, f)

if __name__ == "__main__":
    exp = get_exp(".\custom_yolox_s.py", None)
    main(exp)

上記を実行すると、以下のように結果が出力されます。

python inference.py
2023-10-29 06:55:19.725 | INFO     | __main__:inference:80 - Infer time: 2.2135s
2023-10-29 06:55:55.856 | INFO     | __main__:inference:80 - Infer time: 0.0851s
2023-10-29 06:55:55.991 | INFO     | __main__:inference:80 - Infer time: 0.0920s
2023-10-29 06:55:56.110 | INFO     | __main__:inference:80 - Infer time: 0.0930s
2023-10-29 06:55:56.225 | INFO     | __main__:inference:80 - Infer time: 0.0880s
2023-10-29 06:55:56.277 | INFO     | __main__:inference:80 - Infer time: 0.0210s
2023-10-29 06:55:56.394 | INFO     | __main__:inference:80 - Infer time: 0.0180s
2023-10-29 06:55:56.522 | INFO     | __main__:inference:80 - Infer time: 0.0190s
2023-10-29 06:55:56.955 | INFO     | __main__:inference:80 - Infer time: 0.0200s
2023-10-29 06:55:57.418 | INFO     | __main__:inference:80 - Infer time: 0.0190s