diff --git a/evaluation/detection.py b/evaluation/detection.py new file mode 100644 index 0000000..4fd1deb --- /dev/null +++ b/evaluation/detection.py @@ -0,0 +1,167 @@ +import cv2 +import torch +import onnxruntime +import numpy as np +import pandas as pd +import albumentations as A + +from torchvision import transforms, ops +from albumentations.pytorch import ToTensorV2 + +from utils.conversions import scale_bboxes +from utils.manipulations import get_cutout + +def detect(img_path: str, yolo_path: str, resnet_path: str): + """Load an image, detect individual plants and label them as + healthy or wilted. + + :param str img_path: path to image + :param str yolo_path: path to yolo weights + :param str resnet_path: path to resnet weights + :returns: tuple of recent image and dict of bounding boxes and + their predictions + + """ + img = cv2.imread(img_path) + + # Get bounding boxes from object detection model + box_coords = get_boxes(yolo_path, img.copy()) + + box_coords.sort_values(by=['xmin'], ignore_index=True, inplace=True) + + predictions = [] + for _, row in box_coords.iterrows(): + xmin, xmax = int(row['xmin']), int(row['xmax']) + ymin, ymax = int(row['ymin']), int(row['ymax']) + + # Get tensor of ROI in BGR + cropped_image = get_cutout(img.copy(), xmin, xmax, ymin, ymax) + + # Classify ROI in RGB + predictions.append(classify(resnet_path, cropped_image[..., ::-1])) + + # Gather top class and confidence values + cls = [] + cls_conf = [] + for pred in predictions: + ans, index = torch.topk(pred, 1) + cls.append(index.int().item()) + cls_conf.append(round(ans.double().item(), 6)) + + # Add predicted classes and confidence values to pandas dataframe + box_coords['cls'] = cls + box_coords['cls_conf'] = cls_conf + + return box_coords + + +def classify(resnet_path, img): + """Classify img with object classification model. + + :param model: object classification model + :param img: opencv2 image object in RGB + :returns: tensor of class predictions + """ + + # Transform image for ResNet + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + transforms.Resize((224, 224)) + ]) + + img = transform(img.copy()) + batch = img.unsqueeze(0) + + # Do inference + session = onnxruntime.InferenceSession(resnet_path) + outname = [i.name for i in session.get_outputs()] + inname = [i.name for i in session.get_inputs()] + inp = {inname[0]: batch.numpy()} + out = torch.tensor(np.array(session.run(outname, inp)))[0] + + # Apply softmax to get percentage confidence of classes + out = torch.nn.functional.softmax(out, dim=1)[0] * 100 + return out + + +def apply_nms(predictions, + confidence_threshold: float = 0.3, + nms_threshold: float = 0.65): + """Apply Non Maximum Suppression to a list of bboxes. + + :param predictions List[Tensor[N, 7]]: predicted bboxes + :param confidence_threshold float: discard all bboxes with lower + confidence + :param nms_threshold float: discard all overlapping bboxes with + higher IoU + :returns List[Tensor[N, 7]]: filtered bboxes + """ + preds_nms = [] + for pred in predictions: + pred = pred[pred[:, 6] > confidence_threshold] + + nms_idx = ops.batched_nms( + boxes=pred[:, 1:5], + scores=pred[:, 6], + idxs=pred[:, 5], + iou_threshold=nms_threshold, + ) + preds_nms.append(pred[nms_idx]) + + return preds_nms + + +def get_boxes(yolo_path, image): + """Run object detection model on an image and get the bounding box + coordinates of all matches. + + :param model: path to onnx object detection model (YOLO) + :param img: opencv2 image object + :returns: pandas dataframe of matches + """ + # Convert from BGR to RGB + img = image[..., ::-1].copy() + + resized_hw = (640, 640) + original_hw = (image.shape[0], image.shape[1]) + + transform = [ + A.LongestMaxSize(max(resized_hw)), + A.PadIfNeeded( + resized_hw[0], + resized_hw[1], + border_mode=0, + value=(114, 114, 114), + ), + A.ToFloat(max_value=255), + ToTensorV2(transpose_mask=True), + ] + + # Pad (letterbox) and transform image to correct dims + transform = A.Compose(transform) + img = transform(image=img) + + # Add batch dimension + img['image'] = img['image'].unsqueeze(0) + + # Do inference + session = onnxruntime.InferenceSession(yolo_path) + outname = [i.name for i in session.get_outputs()] + inname = [i.name for i in session.get_inputs()] + inp = {inname[0]: img['image'].numpy()} + out = torch.tensor(np.array(session.run(outname, inp)))[0] + + # Apply NMS to results + preds_nms = apply_nms([out])[0] + + # Convert boxes from resized img to original img + xyxy_boxes = preds_nms[:, [1, 2, 3, 4]] # xmin, ymin, xmax, ymax + bboxes = scale_bboxes(xyxy_boxes, resized_hw, original_hw).int().numpy() + + # Construct DataFrame with bboxes and their confidence + box_coords = pd.DataFrame(np.c_[bboxes, preds_nms[:, 6]]) + box_coords.columns = ['xmin', 'ymin', 'xmax', 'ymax', 'box_conf'] + + return box_coords diff --git a/evaluation/evaluation-end2end.ipynb b/evaluation/evaluation-end2end.ipynb new file mode 100644 index 0000000..27cadeb --- /dev/null +++ b/evaluation/evaluation-end2end.ipynb @@ -0,0 +1,469 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "3fe8177c", + "metadata": {}, + "outputs": [], + "source": [ + "import fiftyone as fo\n", + "from PIL import Image\n", + "from detection import detect" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "32f0f8ec", + "metadata": {}, + "outputs": [], + "source": [ + "name = \"dataset-small\"\n", + "dataset_dir = \"/home/zenon/Documents/master-thesis/evaluation/dataset-small\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6343aa55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 100% |█████████████████| 401/401 [633.3ms elapsed, 0s remaining, 633.2 samples/s] \n" + ] + } + ], + "source": [ + "# The splits to load\n", + "splits = [\"val\"]\n", + "\n", + "# Load the dataset, using tags to mark the samples in each split\n", + "dataset = fo.Dataset(name)\n", + "for split in splits:\n", + " dataset.add_dir(\n", + " dataset_dir=dataset_dir,\n", + " dataset_type=fo.types.YOLOv5Dataset,\n", + " split=split,\n", + " tags=split,\n", + " )\n", + "\n", + "classes = dataset.default_classes\n", + "predictions_view = dataset.view()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "29827e3f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 100% |█████████████████| 401/401 [5.4m elapsed, 0s remaining, 1.4 samples/s] \n" + ] + } + ], + "source": [ + "# Do detections with model and save bounding boxes\n", + "with fo.ProgressBar() as pb:\n", + " for sample in pb(predictions_view):\n", + " image = Image.open(sample.filepath)\n", + " w, h = image.size\n", + " pred = detect(sample.filepath, 'yolo.onnx', 'resnet.onnx')\n", + "\n", + " detections = []\n", + " for _, row in pred.iterrows():\n", + " xmin, xmax = int(row['xmin']), int(row['xmax'])\n", + " ymin, ymax = int(row['ymin']), int(row['ymax'])\n", + " rel_box = [\n", + " xmin / w, ymin / h, (xmax - xmin) / w, (ymax - ymin) / h\n", + " ]\n", + " detections.append(\n", + " fo.Detection(label=classes[int(row['cls'])],\n", + " bounding_box=rel_box,\n", + " confidence=int(row['cls_conf'])))\n", + "\n", + " sample[\"yolo_resnet\"] = fo.Detections(detections=detections)\n", + " sample.save()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8ad67806", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Evaluating detections...\n", + " 100% |█████████████████| 401/401 [1.2s elapsed, 0s remaining, 339.9 samples/s] \n", + "Performing IoU sweep...\n", + " 100% |█████████████████| 401/401 [1.4s elapsed, 0s remaining, 288.5 samples/s] \n" + ] + } + ], + "source": [ + "results = predictions_view.evaluate_detections(\n", + " \"yolo_resnet\",\n", + " gt_field=\"ground_truth\",\n", + " eval_key=\"eval\",\n", + " compute_mAP=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b180420b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " Healthy 0.80 0.81 0.81 430\n", + " Stressed 0.77 0.72 0.75 315\n", + "\n", + " micro avg 0.79 0.77 0.78 745\n", + " macro avg 0.79 0.77 0.78 745\n", + "weighted avg 0.79 0.77 0.78 745\n", + "\n", + "0.6336217415940075\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8e819dea581e48d69877fb551a949e49", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FigureWidget({\n", + " 'data': [{'mode': 'markers',\n", + " 'opacity': 0.1,\n", + " 'type': 'scatter',\n", + " 'uid': '918e4315-b093-4d2b-8af4-789b5a5d5152',\n", + " 'x': array([0, 1, 2, 0, 1, 2, 0, 1, 2]),\n", + " 'y': array([0, 0, 0, 1, 1, 1, 2, 2, 2])},\n", + " {'colorscale': [[0.0, 'rgb(255,245,235)'], [0.125,\n", + " 'rgb(254,230,206)'], [0.25, 'rgb(253,208,162)'],\n", + " [0.375, 'rgb(253,174,107)'], [0.5, 'rgb(253,141,60)'],\n", + " [0.625, 'rgb(241,105,19)'], [0.75, 'rgb(217,72,1)'],\n", + " [0.875, 'rgb(166,54,3)'], [1.0, 'rgb(127,39,4)']],\n", + " 'hoverinfo': 'skip',\n", + " 'showscale': False,\n", + " 'type': 'heatmap',\n", + " 'uid': 'c2b76e66-be9d-4ca2-9060-d350e1d5e030',\n", + " 'z': array([[ 86, 68, 0],\n", + " [ 0, 228, 87],\n", + " [348, 0, 82]]),\n", + " 'zmax': 348,\n", + " 'zmin': 0},\n", + " {'colorbar': {'len': 1, 'lenmode': 'fraction'},\n", + " 'colorscale': [[0.0, 'rgb(255,245,235)'], [0.125,\n", + " 'rgb(254,230,206)'], [0.25, 'rgb(253,208,162)'],\n", + " [0.375, 'rgb(253,174,107)'], [0.5, 'rgb(253,141,60)'],\n", + " [0.625, 'rgb(241,105,19)'], [0.75, 'rgb(217,72,1)'],\n", + " [0.875, 'rgb(166,54,3)'], [1.0, 'rgb(127,39,4)']],\n", + " 'hovertemplate': 'count: %{z}
truth: %{y}
predicted: %{x}',\n", + " 'opacity': 0.25,\n", + " 'type': 'heatmap',\n", + " 'uid': 'faa2cc05-e7dd-41df-8b22-533e4ae70f67',\n", + " 'z': array([[ 86, 68, 0],\n", + " [ 0, 228, 87],\n", + " [348, 0, 82]]),\n", + " 'zmax': 348,\n", + " 'zmin': 0}],\n", + " 'layout': {'clickmode': 'event',\n", + " 'margin': {'b': 0, 'l': 0, 'r': 0, 't': 30},\n", + " 'template': '...',\n", + " 'title': {},\n", + " 'xaxis': {'constrain': 'domain',\n", + " 'range': [-0.5, 2.5],\n", + " 'tickmode': 'array',\n", + " 'ticktext': [Healthy, Stressed, (none)],\n", + " 'tickvals': array([0, 1, 2])},\n", + " 'yaxis': {'constrain': 'domain',\n", + " 'range': [-0.5, 2.5],\n", + " 'scaleanchor': 'x',\n", + " 'scaleratio': 1,\n", + " 'tickmode': 'array',\n", + " 'ticktext': array(['(none)', 'Stressed', 'Healthy'], dtype=object),\n", + " 'tickvals': array([0, 1, 2])}}\n", + "})" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "34fe31e66ebc452aba7202b9d206dee8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FigureWidget({\n", + " 'data': [{'customdata': array([99. , 99. , 99. , 99. , 99. , 99. , 99. , 99. , 99. , 99. , 99. , 98. ,\n", + " 98. , 98. , 97. , 97. , 97. , 97. , 97. , 96. , 96. , 95. , 94.2, 94. ,\n", + " 93.2, 93. , 92.9, 91.9, 91.2, 91. , 91. , 90.8, 89.8, 88.8, 88. , 87.9,\n", + " 87. , 86.9, 86. , 85.9, 85.8, 85. , 84.8, 84.6, 83.1, 81.9, 81.8, 81.1,\n", + " 80.1, 79.9, 79.7, 78.9, 78.6, 77.9, 77.7, 76.8, 76. , 75.6, 74.6, 74.2,\n", + " 73.3, 71.9, 70.7, 69.5, 68.3, 67.1, 66.2, 65.1, 64. , 62.9, 61.2, 60.6,\n", + " 59.7, 58.9, 52.6, 51.1, 49.4, 47.2, 46.4, 40.7, 30.5, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ]),\n", + " 'hovertemplate': ('class: %{text}
recal' ... 'customdata:.3f}'),\n", + " 'line': {'color': '#3366CC'},\n", + " 'mode': 'lines',\n", + " 'name': 'Healthy (AP = 0.674)',\n", + " 'text': array(['Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy',\n", + " 'Healthy', 'Healthy', 'Healthy', 'Healthy', 'Healthy'], dtype='class: %{text}
recal' ... 'customdata:.3f}'),\n", + " 'line': {'color': '#DC3912'},\n", + " 'mode': 'lines',\n", + " 'name': 'Stressed (AP = 0.593)',\n", + " 'text': array(['Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed',\n", + " 'Stressed', 'Stressed', 'Stressed', 'Stressed', 'Stressed'], dtype='" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "session = fo.launch_app(dataset, auto=False)\n", + "session.view = predictions_view\n", + "session.open_tab()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "535003f4", + "metadata": {}, + "outputs": [], + "source": [ + "session.plots.attach(matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3ba32f0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/evaluation/evaluation.py b/evaluation/evaluation.py new file mode 100644 index 0000000..dc15c6e --- /dev/null +++ b/evaluation/evaluation.py @@ -0,0 +1,64 @@ +import fiftyone as fo +from PIL import Image +from evaluate import detect + +name = "dataset-small" +dataset_dir = "/home/zenon/Documents/master-thesis/evaluation/dataset-small" + +# The splits to load +splits = ["val"] + +# Load the dataset, using tags to mark the samples in each split +dataset = fo.Dataset(name) +for split in splits: + dataset.add_dir( + dataset_dir=dataset_dir, + dataset_type=fo.types.YOLOv5Dataset, + split=split, + tags=split, + ) + +classes = dataset.default_classes +predictions_view = dataset.view() + +with fo.ProgressBar() as pb: + for sample in pb(predictions_view): + image = Image.open(sample.filepath) + w, h = image.size + pred = detect(sample.filepath, 'yolo.onnx', 'resnet.onnx') + + detections = [] + for _, row in pred.iterrows(): + xmin, xmax = int(row['xmin']), int(row['xmax']) + ymin, ymax = int(row['ymin']), int(row['ymax']) + rel_box = [ + xmin / w, ymin / h, (xmax - xmin) / w, (ymax - ymin) / h + ] + detections.append( + fo.Detection(label=classes[int(row['cls'])], + bounding_box=rel_box, + confidence=int(row['cls_conf']))) + + sample["yolo_resnet"] = fo.Detections(detections=detections) + sample.save() + +results = predictions_view.evaluate_detections( + "yolo_resnet", + gt_field="ground_truth", + eval_key="eval", + compute_mAP=True, +) + +# Get the 10 most common classes in the dataset +counts = dataset.count_values("ground_truth.detections.label") +classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10] + +# Print a classification report for the top-10 classes +results.print_report(classes=classes_top10) + +plot = results.plot_pr_curves(classes=["Healthy", "Stressed"]) +plot.show() + +session = fo.launch_app(dataset) +session.view = predictions_view +session.wait() diff --git a/evaluation/labeling.py b/evaluation/labeling.py new file mode 100644 index 0000000..26f699c --- /dev/null +++ b/evaluation/labeling.py @@ -0,0 +1,135 @@ +import logging +import argparse +import cv2 +import json +import os + +from utils.conversions import convert_to_yolo +from detection import detect + +template = [{ + "data": { + "image": "/data/local-files/?d=evaluation/images/0.jpg" + }, + "predictions": [{ + "model_version": + "one", + "score": + 0.0, + "result": [{ + "id": "result1", + "type": "rectanglelabels", + "from_name": "label", + "to_name": "image", + "original_width": 474, + "original_height": 266, + "image_rotation": 0, + "confidence": 0, + "value": { + "rotation": 0, + "x": 19.62, + "y": 15.04, + "width": 55.06, + "height": 78.2, + "rectanglelabels": ["Stressed"] + } + }] + }] +}] + + +def create_json_annotation(image_path, yolo_path, resnet_path): + """Create a JSON representation of identified bounding boxes. + + :param image_path str: path to image + :param yolo_path str: path to YOLO model in ONNX format + :param resnet_path str: path to ResNet model in ONNX format + :returns Dict: bounding boxes in labelstudio JSON format + """ + template[0]['data'][ + 'image'] = "/data/local-files/?d=evaluation/" + image_path + img = cv2.imread(image_path) + (height, width) = img.shape[0], img.shape[1] + bboxes = detect(image_path, yolo_path, resnet_path) + result = template[0]['predictions'][0]['result'] + + results = [] + for idx, row in bboxes.iterrows(): + modified = convert_to_yolo(row, width, height) + json_result = {} + json_result['id'] = 'result' + str(idx + 1) + json_result['type'] = 'rectanglelabels' + json_result['from_name'] = 'label' + json_result['to_name'] = 'image' + json_result['original_width'] = width + json_result['original_height'] = height + json_result['image_rotation'] = 0 + json_result['value'] = {} + json_result['value']['rotation'] = 0 + json_result['value']['x'] = modified['xmin%'] + json_result['value']['y'] = modified['ymin%'] + json_result['value']['width'] = modified['width%'] + json_result['value']['height'] = modified['height%'] + if modified['cls'] == 0: + json_result['value']['rectanglelabels'] = ['Healthy'] + else: + json_result['value']['rectanglelabels'] = ['Stressed'] + results.append(json_result) + + template[0]['predictions'][0]['result'] = results + return template + + +def write_labels_to_disk(image_dir, output_dir, yolo_path, resnet_path): + """Read images from disk, classify them and output bounding boxes + in labelstudio JSON format. + + :param image_dir str: directory containing images to label + :param output_dir str: directory to save JSON files to + :param yolo_path str: path to YOLO model in ONNX format + :param resnet_path str: path to ResNet model in ONNX format + :returns: None + """ + image_dir = os.path.join(image_dir, '') + for file in os.listdir(image_dir): + filename = os.fsdecode(file) + filename_wo_ext = os.path.splitext(filename)[0] + rel_output_path = os.path.join(output_dir, filename_wo_ext + '.json') + json_data = create_json_annotation(image_dir + filename, yolo_path, + resnet_path) + os.makedirs(os.path.dirname(os.path.join(output_dir, filename)), + exist_ok=True) + logging.info('Writing json file for %s', filename) + with open(rel_output_path, 'w') as f: + json.dump(json_data, f) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--source', + type=str, + help='source folder with images', + required=True) + parser.add_argument('--output', + type=str, + help='output folder for json files', + required=True) + parser.add_argument('--yolo', + type=str, + help='path to YOLO model in ONNX format', + required=True) + parser.add_argument('--resnet', + type=str, + help='path to ResNet model in ONNX format', + required=True) + parser.add_argument( + '--log', + type=str, + help='log level (debug, info, warning, error, critical)', + default='warning') + opt = parser.parse_args() + numeric_level = getattr(logging, opt.log.upper(), None) + logging.basicConfig(format='%(levelname)s::%(asctime)s::%(message)s', + datefmt='%Y-%m-%dT%H:%M:%S', + level=numeric_level) + write_labels_to_disk(opt.source, opt.output, opt.yolo, opt.resnet) diff --git a/evaluation/utils/conversions.py b/evaluation/utils/conversions.py new file mode 100644 index 0000000..02039e2 --- /dev/null +++ b/evaluation/utils/conversions.py @@ -0,0 +1,42 @@ +def convert_to_yolo(bbox, width, height): + modified = bbox.copy() + modified['xmin%'] = round(bbox['xmin'] / width * 100, 2) + modified['ymin%'] = round(bbox['ymin'] / height * 100, 2) + modified['width%'] = round((bbox['xmax'] - bbox['xmin']) / width * 100, 2) + modified['height%'] = round((bbox['ymax'] - bbox['ymin']) / height * 100, + 2) + return modified + + +def scale_bboxes(bboxes, resized_hw, original_hw): + """Scale bounding boxes from a padded and resized image to fit on + original image. + + :param xyxy_boxes Tensor[N, 4]: tensor of xmin, ymin, xmax, ymax + per bounding box + :param resized_hw Tuple: height and width of the resized image + :param original_hw Tuple: height and width of the original image + :returns Tensor[N, 4]: tensor of xmin, ymin, xmax, ymax per + bounding box + """ + scaled_boxes = bboxes.clone() + scale_ratio = resized_hw[0] / original_hw[0], resized_hw[1] / original_hw[1] + + # Remove padding + pad_scale = min(scale_ratio) + padding = (resized_hw[1] - original_hw[1] * pad_scale) / 2, ( + resized_hw[0] - original_hw[0] * pad_scale) / 2 + scaled_boxes[:, [0, 2]] -= padding[0] # x padding + scaled_boxes[:, [1, 3]] -= padding[1] # y padding + scale_ratio = (pad_scale, pad_scale) + + scaled_boxes[:, [0, 2]] /= scale_ratio[1] + scaled_boxes[:, [1, 3]] /= scale_ratio[0] + + # Clip bounding xyxy bounding boxes to image shape (height, width) + scaled_boxes[:, 0].clamp_(0, original_hw[1]) # xmin + scaled_boxes[:, 1].clamp_(0, original_hw[0]) # ymin + scaled_boxes[:, 2].clamp_(0, original_hw[1]) # xmax + scaled_boxes[:, 3].clamp_(0, original_hw[0]) # ymax + + return scaled_boxes diff --git a/evaluation/utils/manipulations.py b/evaluation/utils/manipulations.py new file mode 100644 index 0000000..78308e9 --- /dev/null +++ b/evaluation/utils/manipulations.py @@ -0,0 +1,31 @@ +import cv2 + + +def draw_boxes(image, bboxes): + img = image.copy() + for idx, bbox in enumerate(bboxes): + xmin, ymin, xmax, ymax = bbox + # Draw bounding box and number on original image + img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) + img = cv2.putText(img, str(idx), (xmin + 5, ymin + 25), + cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 0), 4, + cv2.LINE_AA) + img = cv2.putText(img, str(idx), (xmin + 5, ymin + 25), + cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2, + cv2.LINE_AA) + return img + + +def get_cutout(img, xmin, xmax, ymin, ymax): + """Cut out a bounding box from an image and transform it for + object classification model. + + :param img: opencv2 image object in BGR + :param int xmin: start of bounding box on x axis + :param int xmax: end of bounding box on x axis + :param int ymin: start of bounding box on y axis + :param int ymax: end of bounding box on y axis + :returns: tensor of cropped image in BGR + """ + cropped_image = img[ymin:ymax, xmin:xmax] + return cropped_image