はじめに
前回は YOLOX をカスタムデータで学習させる方法について説明しました。
今回はカスタムデータで学習させた YOLOX を推論する方法について説明します。
前提条件
前提条件は以下の通りです。
- Windows11
- Python3.9
- torch==1.12.1+cu113
推論プログラムの作成
まずは前回の学習結果である YOLOX_outputs/custom_yolox_s/best_ckpt.pth を custom_yolox_s.py と同じフォルダに移動させておいてください。
inference.py
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
import time
from loguru import logger
import cv2
import torch
import glob
from yolox.data.data_augment import ValTransform
# from yolox.data.datasets import COCO_CLASSES
COCO_CLASSES = ("finger","__ignore__")
from yolox.exp import get_exp
from yolox.utils import postprocess, vis
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
class Predictor(object):
def __init__(
self,
model,
exp,
cls_names=COCO_CLASSES,
trt_file=None,
decoder=None,
device="gpu",
fp16=True,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.decoder = decoder
self.num_classes = exp.num_classes
self.confthre = exp.test_conf
self.nmsthre = exp.nmsthre
self.test_size = exp.test_size
self.device = device
self.fp16 = fp16
self.preproc = ValTransform(legacy=legacy)
def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None
height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
img_info["ratio"] = ratio
img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
img = img.cuda()
img = img.half()
# if self.device == "gpu":
# img = img.cuda()
# if self.fp16:
# img = img.half() # to FP16
with torch.no_grad():
t0 = time.time()
outputs = self.model(img)
if self.decoder is not None:
outputs = self.decoder(outputs, dtype=outputs.type())
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre, class_agnostic=True
)
logger.info("Infer time: {:.4f}s".format(time.time() - t0))
return outputs, img_info
def visual(self, output, img_info, cls_conf=0.35):
ratio = img_info["ratio"]
img = img_info["raw_img"]
if output is None:
return img
output = output.cpu()
bboxes = output[:, 0:4]
# preprocessing: resize
bboxes /= ratio
cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
return vis_res
def image_demo(predictor, path):
basename = os.path.basename(path)
frame = cv2.imread(path)
outputs, img_info = predictor.inference(frame)
ratio = img_info["ratio"]
output = outputs[0].cpu()
bboxes = output[:, 0:4]
# preprocessing: resize
bboxes /= ratio
bboxes = bboxes.numpy()
cls = output[:, 6].numpy()
scores = (output[:, 4] * output[:, 5]).numpy()
# print(bboxes, cls, scores)
result_image = predictor.visual(outputs[0], img_info, predictor.confthre)
cv2.imwrite("YOLOX_outputs/results/"+basename, result_image)
cv2.imshow("result", result_image)
cv2.waitKey(0)
def main(exp):
exp.test_conf = 0.5
exp.nmsthre = 0.3
model = exp.get_model()
model.cuda()
model.half()
model.eval()
ckpt_file = "./best_ckpt.pth"
ckpt = torch.load(ckpt_file, map_location="cpu")
model.load_state_dict(ckpt["model"])
predictor = Predictor(
model, exp, COCO_CLASSES, None, None, "gpu", True, False,
)
filepath = glob.glob("./datasets/train_COCO_format/val2017/*.jpg")
for f in filepath:
image_demo(predictor, f)
if __name__ == "__main__":
exp = get_exp(".\custom_yolox_s.py", None)
main(exp)
上記を実行すると、以下のように結果が出力されます。
python inference.py
2023-10-29 06:55:19.725 | INFO | __main__:inference:80 - Infer time: 2.2135s
2023-10-29 06:55:55.856 | INFO | __main__:inference:80 - Infer time: 0.0851s
2023-10-29 06:55:55.991 | INFO | __main__:inference:80 - Infer time: 0.0920s
2023-10-29 06:55:56.110 | INFO | __main__:inference:80 - Infer time: 0.0930s
2023-10-29 06:55:56.225 | INFO | __main__:inference:80 - Infer time: 0.0880s
2023-10-29 06:55:56.277 | INFO | __main__:inference:80 - Infer time: 0.0210s
2023-10-29 06:55:56.394 | INFO | __main__:inference:80 - Infer time: 0.0180s
2023-10-29 06:55:56.522 | INFO | __main__:inference:80 - Infer time: 0.0190s
2023-10-29 06:55:56.955 | INFO | __main__:inference:80 - Infer time: 0.0200s
2023-10-29 06:55:57.418 | INFO | __main__:inference:80 - Infer time: 0.0190s
無事に検出できています!
おわりに
今回は YOLOX の推論プログラムを作成しました。
次回は物体の三次元姿勢推定AIのCenterSnapのデモを試していきます。
コメント