168 lines
5.0 KiB
Python
168 lines
5.0 KiB
Python
import cv2
|
|
import torch
|
|
import onnxruntime
|
|
import numpy as np
|
|
import pandas as pd
|
|
import albumentations as A
|
|
|
|
from torchvision import transforms, ops
|
|
from albumentations.pytorch import ToTensorV2
|
|
|
|
from utils.conversions import scale_bboxes
|
|
from utils.manipulations import get_cutout
|
|
|
|
def detect(img_path: str, yolo_path: str, resnet_path: str):
|
|
"""Load an image, detect individual plants and label them as
|
|
healthy or wilted.
|
|
|
|
:param str img_path: path to image
|
|
:param str yolo_path: path to yolo weights
|
|
:param str resnet_path: path to resnet weights
|
|
:returns: tuple of recent image and dict of bounding boxes and
|
|
their predictions
|
|
|
|
"""
|
|
img = cv2.imread(img_path)
|
|
|
|
# Get bounding boxes from object detection model
|
|
box_coords = get_boxes(yolo_path, img.copy())
|
|
|
|
box_coords.sort_values(by=['xmin'], ignore_index=True, inplace=True)
|
|
|
|
predictions = []
|
|
for _, row in box_coords.iterrows():
|
|
xmin, xmax = int(row['xmin']), int(row['xmax'])
|
|
ymin, ymax = int(row['ymin']), int(row['ymax'])
|
|
|
|
# Get tensor of ROI in BGR
|
|
cropped_image = get_cutout(img.copy(), xmin, xmax, ymin, ymax)
|
|
|
|
# Classify ROI in RGB
|
|
predictions.append(classify(resnet_path, cropped_image[..., ::-1]))
|
|
|
|
# Gather top class and confidence values
|
|
cls = []
|
|
cls_conf = []
|
|
for pred in predictions:
|
|
ans, index = torch.topk(pred, 1)
|
|
cls.append(index.int().item())
|
|
cls_conf.append(round(ans.double().item(), 6))
|
|
|
|
# Add predicted classes and confidence values to pandas dataframe
|
|
box_coords['cls'] = cls
|
|
box_coords['cls_conf'] = cls_conf
|
|
|
|
return box_coords
|
|
|
|
|
|
def classify(resnet_path, img):
|
|
"""Classify img with object classification model.
|
|
|
|
:param model: object classification model
|
|
:param img: opencv2 image object in RGB
|
|
:returns: tensor of class predictions
|
|
"""
|
|
|
|
# Transform image for ResNet
|
|
transform = transforms.Compose([
|
|
transforms.ToTensor(),
|
|
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
|
std=[0.229, 0.224, 0.225]),
|
|
transforms.Resize((224, 224))
|
|
])
|
|
|
|
img = transform(img.copy())
|
|
batch = img.unsqueeze(0)
|
|
|
|
# Do inference
|
|
session = onnxruntime.InferenceSession(resnet_path)
|
|
outname = [i.name for i in session.get_outputs()]
|
|
inname = [i.name for i in session.get_inputs()]
|
|
inp = {inname[0]: batch.numpy()}
|
|
out = torch.tensor(np.array(session.run(outname, inp)))[0]
|
|
|
|
# Apply softmax to get percentage confidence of classes
|
|
out = torch.nn.functional.softmax(out, dim=1)[0] * 100
|
|
return out
|
|
|
|
|
|
def apply_nms(predictions,
|
|
confidence_threshold: float = 0.3,
|
|
nms_threshold: float = 0.65):
|
|
"""Apply Non Maximum Suppression to a list of bboxes.
|
|
|
|
:param predictions List[Tensor[N, 7]]: predicted bboxes
|
|
:param confidence_threshold float: discard all bboxes with lower
|
|
confidence
|
|
:param nms_threshold float: discard all overlapping bboxes with
|
|
higher IoU
|
|
:returns List[Tensor[N, 7]]: filtered bboxes
|
|
"""
|
|
preds_nms = []
|
|
for pred in predictions:
|
|
pred = pred[pred[:, 6] > confidence_threshold]
|
|
|
|
nms_idx = ops.batched_nms(
|
|
boxes=pred[:, 1:5],
|
|
scores=pred[:, 6],
|
|
idxs=pred[:, 5],
|
|
iou_threshold=nms_threshold,
|
|
)
|
|
preds_nms.append(pred[nms_idx])
|
|
|
|
return preds_nms
|
|
|
|
|
|
def get_boxes(yolo_path, image):
|
|
"""Run object detection model on an image and get the bounding box
|
|
coordinates of all matches.
|
|
|
|
:param model: path to onnx object detection model (YOLO)
|
|
:param img: opencv2 image object
|
|
:returns: pandas dataframe of matches
|
|
"""
|
|
# Convert from BGR to RGB
|
|
img = image[..., ::-1].copy()
|
|
|
|
resized_hw = (640, 640)
|
|
original_hw = (image.shape[0], image.shape[1])
|
|
|
|
transform = [
|
|
A.LongestMaxSize(max(resized_hw)),
|
|
A.PadIfNeeded(
|
|
resized_hw[0],
|
|
resized_hw[1],
|
|
border_mode=0,
|
|
value=(114, 114, 114),
|
|
),
|
|
A.ToFloat(max_value=255),
|
|
ToTensorV2(transpose_mask=True),
|
|
]
|
|
|
|
# Pad (letterbox) and transform image to correct dims
|
|
transform = A.Compose(transform)
|
|
img = transform(image=img)
|
|
|
|
# Add batch dimension
|
|
img['image'] = img['image'].unsqueeze(0)
|
|
|
|
# Do inference
|
|
session = onnxruntime.InferenceSession(yolo_path)
|
|
outname = [i.name for i in session.get_outputs()]
|
|
inname = [i.name for i in session.get_inputs()]
|
|
inp = {inname[0]: img['image'].numpy()}
|
|
out = torch.tensor(np.array(session.run(outname, inp)))[0]
|
|
|
|
# Apply NMS to results
|
|
preds_nms = apply_nms([out])[0]
|
|
|
|
# Convert boxes from resized img to original img
|
|
xyxy_boxes = preds_nms[:, [1, 2, 3, 4]] # xmin, ymin, xmax, ymax
|
|
bboxes = scale_bboxes(xyxy_boxes, resized_hw, original_hw).int().numpy()
|
|
|
|
# Construct DataFrame with bboxes and their confidence
|
|
box_coords = pd.DataFrame(np.c_[bboxes, preds_nms[:, 6]])
|
|
box_coords.columns = ['xmin', 'ymin', 'xmax', 'ymax', 'box_conf']
|
|
|
|
return box_coords
|