diff --git a/thesis/references.bib b/thesis/references.bib index 02cc869..f0ab05c 100644 --- a/thesis/references.bib +++ b/thesis/references.bib @@ -703,6 +703,14 @@ file = {/home/zenon/Zotero/storage/DQAJEA4B/Kingma and Ba - 2017 - Adam A Method for Stochastic Optimization.pdf} } +@dataset{krasin2017, + title = {{{OpenImages}}: {{A}} Public Dataset for Large-Scale Multi-Label and Multi-Class Image Classification}, + author = {Krasin, Ivan and Duerig, Tom and Alldrin, Neil and Ferrari, Vittorio and Abu-El-Haija, Sami and Kuznetsova, Alina and Rom, Hassan and Uijlings, Jasper and Popov, Stefan and Kamali, Shahab and Malloci, Matteo and Pont-Tuset, Jordi and Veit, Andreas and Belongie, Serge and Gomes, Victor and Gupta, Abhinav and Sun, Chen and Chechik, Gal and Cai, David and Feng, Zheyun and Narayanan, Dhyanesh and Murphy, Kevin}, + date = {2017}, + url = {https://storage.googleapis.com/openimages/web/index.html}, + urldate = {2023-12-06} +} + @inproceedings{krizhevsky2012, title = {{{ImageNet Classification}} with {{Deep Convolutional Neural Networks}}}, booktitle = {Advances in {{Neural Information Processing Systems}}}, @@ -1572,16 +1580,18 @@ } @online{wang2022, - title = {{{YOLOv7}}: {{Trainable Bag-of-Freebies Sets New State-of-the-Art}} for {{Real-Time Object Detectors}}}, + title = {{{YOLOv7}}: {{Trainable}} Bag-of-Freebies Sets New State-of-the-Art for Real-Time Object Detectors}, shorttitle = {{{YOLOv7}}}, author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, date = {2022-07-06}, eprint = {2207.02696}, eprinttype = {arxiv}, + eprintclass = {cs}, doi = {10.48550/arXiv.2207.02696}, - issue = {arXiv:2207.02696}, + abstract = {YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8\% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9\% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9\% AP) by 509\% in speed and 2\% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2\% AP) by 551\% in speed and 0.7\% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights. Source code is released in https://github.com/WongKinYiu/yolov7.}, + pubstate = {preprint}, keywords = {Computer Science - Computer Vision and Pattern Recognition}, - file = {/home/zenon/Zotero/storage/G27M4VFA/Wang et al. - 2022 - YOLOv7 Trainable Bag-of-Freebies Sets New State-o.pdf} + file = {/home/zenon/Zotero/storage/TF6HV2VR/Wang et al. - 2022 - YOLOv7 Trainable bag-of-freebies sets new state-o.pdf;/home/zenon/Zotero/storage/4H69GZYY/2207.html} } @online{wang2022a, @@ -1598,21 +1608,6 @@ file = {/home/zenon/Zotero/storage/TFTTKYRE/Wang et al. - 2022 - Designing Network Design Strategies Through Gradie.pdf;/home/zenon/Zotero/storage/5I43K6ZQ/2211.html} } -@online{wang2022b, - title = {{{YOLOv7}}: {{Trainable}} Bag-of-Freebies Sets New State-of-the-Art for Real-Time Object Detectors}, - shorttitle = {{{YOLOv7}}}, - author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, - date = {2022-07-06}, - eprint = {2207.02696}, - eprinttype = {arxiv}, - eprintclass = {cs}, - doi = {10.48550/arXiv.2207.02696}, - abstract = {YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8\% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9\% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9\% AP) by 509\% in speed and 2\% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2\% AP) by 551\% in speed and 0.7\% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights. Source code is released in https://github.com/WongKinYiu/yolov7.}, - pubstate = {preprint}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - file = {/home/zenon/Zotero/storage/TF6HV2VR/Wang et al. - 2022 - YOLOv7 Trainable bag-of-freebies sets new state-o.pdf;/home/zenon/Zotero/storage/4H69GZYY/2207.html} -} - @inproceedings{woo2018, title = {{{CBAM}}: {{Convolutional Block Attention Module}}}, shorttitle = {{{CBAM}}}, @@ -1759,15 +1754,6 @@ file = {/home/zenon/Zotero/storage/CLHDBTJ2/qWPwnQEACAAJ.html} } -@article{zotero-338, - title = {Ultralytics/Yolov5: V7.0 - {{YOLOv5 SOTA Realtime Instance Segmentation}}}, - shorttitle = {Ultralytics/Yolov5}, - doi = {10.5281/zenodo.7347926}, - abstract = {{$<$}div align="center"{$>$} {$<$}a align="center" href="https://ultralytics.com/yolov5" target="\_blank"{$>$} {$<$}img width="850" src="https://github.com/ultralytics/assets/blob/master/yolov5/v70/splash.png"{$><$}/a{$>$} {$<$}/div{$>$} {$<$}br{$>$} Our new YOLOv5 v7.0 instance segmentation models are the fastest and most accurate in the world, beating all current SOTA benchmarks. We've made them super simple to train, validate and deploy. See full details in our Release Notes and visit our YOLOv5 Segmentation Colab Notebook for quickstart tutorials. {$<$}div align="center"{$>$} {$<$}a align="center" href="https://ultralytics.com/yolov5" target="\_blank"{$>$} {$<$}img width="800" src="https://user-images.githubusercontent.com/26833433/203348073-9b85607b-03e2-48e1-a6ba-fe1c1c31749c.png"{$><$}/a{$>$} {$<$}/div{$>$} {$<$}br{$>$} Our primary goal with this release is to introduce super simple YOLOv5 segmentation workflows just like our existing object detection models. The new v7.0 YOLOv5-seg models below are just a start, we will continue to improve these going forward together with our existing detection and classification models. We'd love your feedback and contributions on this effort! This release incorporates 280 PRs from 41 contributors since our last release in August 2022. Important Updates Segmentation Models ⭐ NEW: SOTA YOLOv5-seg COCO-pretrained segmentation models are now available for the first time (https://github.com/ultralytics/yolov5/pull/9052 by @glenn-jocher, @AyushExel and @Laughing-q) Paddle Paddle Export: Export any YOLOv5 model (cls, seg, det) to Paddle format with python export.py --include paddle (https://github.com/ultralytics/yolov5/pull/9459 by @glenn-jocher) YOLOv5 AutoCache: Use python train.py --cache ram will now scan available memory and compare against predicted dataset RAM usage. This reduces risk in caching and should help improve adoption of the dataset caching feature, which can significantly speed up training. (https://github.com/ultralytics/yolov5/pull/10027 by @glenn-jocher) Comet Logging and Visualization Integration: Free forever, Comet lets you save YOLOv5 models, resume training, and interactively visualise and debug predictions. (https://github.com/ultralytics/yolov5/pull/9232 by @DN6) New Segmentation Checkpoints We trained YOLOv5 segmentations models on COCO for 300 epochs at image size 640 using A100 GPUs. We exported all models to ONNX FP32 for CPU speed tests and to TensorRT FP16 for GPU speed tests. We ran all speed tests on Google Colab Pro notebooks for easy reproducibility. Model size{$<$}br{$>$}\textsuperscript{(pixels) mAP\textsuperscript{box{$<$}br{$>$}50-95 mAP\textsuperscript{mask{$<$}br{$>$}50-95 Train time{$<$}br{$>$}\textsuperscript{300 epochs{$<$}br{$>$}A100 (hours) Speed{$<$}br{$>$}\textsuperscript{ONNX CPU{$<$}br{$>$}(ms) Speed{$<$}br{$>$}\textsuperscript{TRT A100{$<$}br{$>$}(ms) params{$<$}br{$>$}\textsuperscript{(M) FLOPs{$<$}br{$>$}\textsuperscript{@640 (B) YOLOv5n-seg 640 27.6 23.4 80:17 62.7 1.2 2.0 7.1 YOLOv5s-seg 640 37.6 31.7 88:16 173.3 1.4 7.6 26.4 YOLOv5m-seg 640 45.0 37.1 108:36 427.0 2.2 22.0 70.8 YOLOv5l-seg 640 49.0 39.9 66:43 (2x) 857.4 2.9 47.9 147.7 YOLOv5x-seg 640 50.7 41.4 62:56 (3x) 1579.2 4.5 88.8 265.7 All checkpoints are trained to 300 epochs with SGD optimizer with lr0=0.01 and weight\_decay=5e-5 at image size 640 and all default settings.{$<$}br{$>$}Runs logged to https://wandb.ai/glenn-jocher/YOLOv5\_v70\_official Accuracy values are for single-model single-scale on COCO dataset.{$<$}br{$>$}Reproduce by python segment/val.py --data coco.yaml --weights yolov5s-seg.pt Speed averaged over 100 inference images using a Colab Pro A100 High-RAM instance. Values indicate inference speed only (NMS adds about 1ms per image). {$<$}br{$>$}Reproduce by python segment/val.py --data coco.yaml --weights yolov5s-seg.pt --batch 1 Export to ONNX at FP32 and TensorRT at FP16 done with export.py. {$<$}br{$>$}Reproduce by python export.py --weights yolov5s-seg.pt --include engine --device 0 --half New Segmentation Usage Examples Train YOLOv5 segmentation training supports auto-download COCO128-seg segmentation dataset with --data coco128-seg.yaml argument and manual download of COCO-segments dataset with bash data/scripts/get\_coco.sh --train --val --segments and then python train.py --data coco.yaml. \# Single-GPU python segment/train.py --model yolov5s-seg.pt --data coco128-seg.yaml --epochs 5 --img 640 \# Multi-GPU DDP python -m torch.distributed.run --nproc\_per\_node 4 --master\_port 1 segment/train.py --model yolov5s-seg.pt --data coco128-seg.yaml --epochs 5 --img 640 --device 0,1,2,3 Val Validate YOLOv5m-seg accuracy on ImageNet-1k dataset: bash data/scripts/get\_coco.sh --val --segments \# download COCO val segments split (780MB, 5000 images) python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 \# validate Predict Use pretrained YOLOv5m-seg to predict bus.jpg: python segment/predict.py --weights yolov5m-seg.pt --data data/images/bus.jpg model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5m-seg.pt') \# load from PyTorch Hub (WARNING: inference not yet supported) Export Export YOLOv5s-seg model to ONNX and TensorRT: python export.py --weights yolov5s-seg.pt --include onnx engine --img 640 --device 0 Changelog Changes between previous release and this release: https://github.com/ultralytics/yolov5/compare/v6.2...v7.0 Changes since this release: https://github.com/ultralytics/yolov5/compare/v7.0...HEAD {$<$}details{$>$} {$<$}summary{$>$}🛠️ New Features and Bug Fixes (280){$<$}/summary{$>$} * Improve classification comments by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/8997 * Update `attempt\_download(release='v6.2')` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/8998 * Update README\_cn.md by @KieraMengru0907 in https://github.com/ultralytics/yolov5/pull/9001 * Update dataset `names` from array to dictionary by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9000 * [segment]: Allow inference on dirs and videos by @AyushExel in https://github.com/ultralytics/yolov5/pull/9003 * DockerHub tag update Usage example by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9005 * Add weight `decay` to argparser by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9006 * Add glob quotes to detect.py usage example by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9007 * Fix TorchScript JSON string key bug by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9015 * EMA FP32 assert classification bug fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9016 * Faster pre-processing for gray image input by @cher-liang in https://github.com/ultralytics/yolov5/pull/9009 * Improved `Profile()` inference timing by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9024 * `torch.empty()` for speed improvements by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9025 * Remove unused `time\_sync` import by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9026 * Add PyTorch Hub classification CI checks by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9027 * Attach transforms to model by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9028 * Default --data `imagenette160` training (fastest) by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9033 * VOC `names` dictionary fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9034 * Update train.py `import val as validate` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9037 * AutoBatch protect from negative batch sizes by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9048 * Temporarily remove `macos-latest` from CI by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9049 * Add `--save-hybrid` mAP warning by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9050 * Refactor for simplification by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9054 * Refactor for simplification 2 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9055 * zero-mAP fix return `.detach()` to EMA by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9056 * zero-mAP fix 3 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9058 * Daemon `plot\_labels()` for faster start by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9057 * TensorBoard fix in tutorial.ipynb by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9064 * zero-mAP fix remove `torch.empty()` forward pass in `.train()` mode by @0zppd in https://github.com/ultralytics/yolov5/pull/9068 * Rename 'labels' to 'instances' by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9066 * Threaded TensorBoard graph logging by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9070 * De-thread TensorBoard graph logging by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9071 * Two dimensional `size=(h,w)` AutoShape support by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9072 * Remove unused Timeout import by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9073 * Improved Usage example docstrings by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9075 * Install `torch` latest stable by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9092 * New `@try\_export` decorator by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9096 * Add optional `transforms` argument to LoadStreams() by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9105 * Streaming Classification support by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9106 * Fix numpy to torch cls streaming bug by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9112 * Infer Loggers project name by @AyushExel in https://github.com/ultralytics/yolov5/pull/9117 * Add CSV logging to GenericLogger by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9128 * New TryExcept decorator by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9154 * Fixed segment offsets by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9155 * New YOLOv5 v6.2 splash images by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9142 * Rename onnx\_dynamic -{$>$} dynamic by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9168 * Inline `\_make\_grid()` meshgrid by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9170 * Comment EMA assert by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9173 * Fix confidence threshold for ClearML debug images by @HighMans in https://github.com/ultralytics/yolov5/pull/9174 * Update Dockerfile-cpu by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9184 * Update Dockerfile-cpu to libpython3-dev by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9185 * Update Dockerfile-arm64 to libpython3-dev by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9187 * Fix AutoAnchor MPS bug by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9188 * Skip AMP check on MPS by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9189 * ClearML's set\_report\_period's time is defined in minutes not seconds. by @HighMans in https://github.com/ultralytics/yolov5/pull/9186 * Add `check\_git\_status(..., branch='master')` argument by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9199 * `check\_font()` on notebook init by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9200 * Comment `protobuf` in requirements.txt by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9207 * `check\_font()` fstring update by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9208 * AutoBatch protect from extreme batch sizes by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9209 * Default AutoBatch 0.8 fraction by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9212 * Delete rebase.yml by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9202 * Duplicate segment verification fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9225 * New `LetterBox(size)` `CenterCrop(size)`, `ToTensor()` transforms (\#9213) by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9213 * Add ClassificationModel TF export assert by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9226 * Remove usage of `pathlib.Path.unlink(missing\_ok=...)` by @ymerkli in https://github.com/ultralytics/yolov5/pull/9227 * Add support for `*.pfm` images by @spacewalk01 in https://github.com/ultralytics/yolov5/pull/9230 * Python check warning emoji by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9238 * Add `url\_getsize()` function by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9247 * Update dataloaders.py by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9250 * Refactor Loggers : Move code outside train.py by @AyushExel in https://github.com/ultralytics/yolov5/pull/9241 * Update general.py by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9252 * Add LoadImages.\_cv2\_rotate() by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9249 * Move `cudnn.benchmarks(True)` to LoadStreams by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9258 * `cudnn.benchmark = True` on Seed 0 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9259 * Update `TryExcept(msg='...')`` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9261 * Make sure best.pt model file is preserved ClearML by @thepycoder in https://github.com/ultralytics/yolov5/pull/9265 * DetectMultiBackend improvements by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9269 * Update DetectMultiBackend for tuple outputs by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9274 * Update DetectMultiBackend for tuple outputs 2 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9275 * Update benchmarks CI with `--hard-fail` min metric floor by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9276 * Add new `--vid-stride` inference parameter for videos by @VELCpro in https://github.com/ultralytics/yolov5/pull/9256 * [pre-commit.ci] pre-commit suggestions by @pre-commit-ci in https://github.com/ultralytics/yolov5/pull/9295 * Replace deprecated `np.int` with `int` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9307 * Comet Logging and Visualization Integration by @DN6 in https://github.com/ultralytics/yolov5/pull/9232 * Comet changes by @DN6 in https://github.com/ultralytics/yolov5/pull/9328 * Train.py line 486 typo fix by @robinned in https://github.com/ultralytics/yolov5/pull/9330 * Add dilated conv support by @YellowAndGreen in https://github.com/ultralytics/yolov5/pull/9347 * Update `check\_requirements()` single install by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9353 * Update `check\_requirements(args, cmds='')` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9355 * Update `check\_requirements()` multiple string by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9356 * Add PaddlePaddle export and inference by @kisaragychihaya in https://github.com/ultralytics/yolov5/pull/9240 * PaddlePaddle Usage examples by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9358 * labels.jpg names fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9361 * Exclude `ipython` from hubconf.py `check\_requirements()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9362 * `torch.jit.trace()` fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9363 * AMP Check fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9367 * Remove duplicate line in setup.cfg by @zldrobit in https://github.com/ultralytics/yolov5/pull/9380 * Remove `.train()` mode exports by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9429 * Continue on Docker arm64 failure by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9430 * Continue on Docker failure (all backends) by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9432 * Continue on Docker fail (all backends) fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9433 * YOLOv5 segmentation model support by @AyushExel in https://github.com/ultralytics/yolov5/pull/9052 * Fix val.py zero-TP bug by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9431 * New model.yaml `activation:` field by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9371 * Fix tick labels for background FN/FP by @hotohoto in https://github.com/ultralytics/yolov5/pull/9414 * Fix TensorRT exports to ONNX opset 12 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9441 * AutoShape explicit arguments fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9443 * Update Detections() instance printing by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9445 * AutoUpdate TensorFlow in export.py by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9447 * AutoBatch `cudnn.benchmark=True` fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9448 * Do not move downloaded zips by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9455 * Update general.py by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9454 * `Detect()` and `Segment()` fixes for CoreML and Paddle by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9458 * Add Paddle exports to benchmarks by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9459 * Add `macos-latest` runner for CoreML benchmarks by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9453 * Fix cutout bug by @Oswells in https://github.com/ultralytics/yolov5/pull/9452 * Optimize imports by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9464 * TensorRT SegmentationModel fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9465 * `Conv()` dilation argument fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9466 * Update ClassificationModel default training `imgsz=224` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9469 * Standardize warnings with `WARNING ⚠️ ...` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9467 * TensorFlow macOS AutoUpdate by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9471 * `segment/predict --save-txt` fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9478 * TensorFlow SegmentationModel support by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9472 * AutoBatch report include reserved+allocated by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9491 * Update Detect() grid init `for` loop by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9494 * Accelerate video inference by @mucunwuxian in https://github.com/ultralytics/yolov5/pull/9487 * Comet Image Logging Fix by @DN6 in https://github.com/ultralytics/yolov5/pull/9498 * Fix visualization title bug by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9500 * Add paddle tips by @Zengyf-CVer in https://github.com/ultralytics/yolov5/pull/9502 * Segmentation `polygons2masks\_overlap()` in `np.int32` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9493 * Fix `random\_perspective` param bug in segment by @FeiGeChuanShu in https://github.com/ultralytics/yolov5/pull/9512 * Remove `check\_requirements('flatbuffers==1.12')` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9514 * Fix TF Lite exports by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9517 * TFLite fix 2 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9518 * FROM nvcr.io/nvidia/pytorch:22.08-py3 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9520 * Remove scikit-learn constraint on coremltools 6.0 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9530 * Update scikit-learn constraint per coremltools 6.0 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9531 * Update `coremltools{$>$}=6.0` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9532 * Update albumentations by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9503 * import re by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9535 * TF.js fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9536 * Refactor dataset batch-size by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9551 * Add `--source screen` for screenshot inference by @zombob in https://github.com/ultralytics/yolov5/pull/9542 * Update `is\_url()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9566 * Detect.py supports running against a Triton container by @gaziqbal in https://github.com/ultralytics/yolov5/pull/9228 * New `scale\_segments()` function by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9570 * generator seed fix for DDP mAP drop by @Forever518 in https://github.com/ultralytics/yolov5/pull/9545 * Update default GitHub assets by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9573 * Update requirements.txt comment https://pytorch.org/get-started/locally/ by @davidamacey in https://github.com/ultralytics/yolov5/pull/9576 * Add segment line predictions by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9571 * TensorRT detect.py inference fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9581 * Update Comet links by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9587 * Add global YOLOv5\_DATASETS\_DIR by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9586 * Add Paperspace Gradient badges by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9588 * \#YOLOVISION22 announcement by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9590 * Bump actions/stale from 5 to 6 by @dependabot in https://github.com/ultralytics/yolov5/pull/9595 * \#YOLOVISION22 update by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9598 * Apple MPS -{$>$} CPU NMS fallback strategy by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9600 * Updated Segmentation and Classification usage by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9607 * Update export.py Usage examples by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9609 * Fix `is\_url('https://ultralytics.com')` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9610 * Add `results.save(save\_dir='path', exist\_ok=False)` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9617 * NMS MPS device wrapper by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9620 * Add SegmentationModel unsupported warning by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9632 * Disabled upload\_dataset flag temporarily due to an artifact related bug by @soumik12345 in https://github.com/ultralytics/yolov5/pull/9652 * Add NVIDIA Jetson Nano Deployment tutorial by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9656 * Added cutout import from utils/augmentations.py to use Cutout Aug in … by @senhorinfinito in https://github.com/ultralytics/yolov5/pull/9668 * Simplify val.py benchmark mode with speed mode by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9674 * Allow list for Comet artifact class 'names' field by @KristenKehrer in https://github.com/ultralytics/yolov5/pull/9654 * [pre-commit.ci] pre-commit suggestions by @pre-commit-ci in https://github.com/ultralytics/yolov5/pull/9685 * TensorRT `--dynamic` fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9691 * FROM nvcr.io/nvidia/pytorch:22.09-py3 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9711 * Error in utils/segment/general `masks2segments()` by @paulguerrie in https://github.com/ultralytics/yolov5/pull/9724 * Fix segment evolution keys by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9742 * Remove \#YOLOVISION22 notice by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9751 * Update Loggers by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9760 * update mask2segments and saving results by @vladoossss in https://github.com/ultralytics/yolov5/pull/9785 * HUB VOC fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9792 * Update hubconf.py local repo Usage example by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9803 * Fix xView dataloaders import by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9807 * Argoverse HUB fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9809 * `smart\_optimizer()` revert to weight with decay by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9817 * Allow PyTorch Hub results to display in notebooks by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9825 * Logger Cleanup by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9828 * Remove ipython from `check\_requirements` exclude list by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9841 * Update HUBDatasetStats() usage examples by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9842 * Update ZipFile to context manager by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9843 * Update README.md by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9846 * Webcam show fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9847 * Fix OpenVINO Usage example by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9874 * ClearML Dockerfile fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9876 * Windows Python 3.7 .isfile() fix by @SSTato in https://github.com/ultralytics/yolov5/pull/9879 * Add TFLite Metadata to TFLite and Edge TPU models by @paradigmn in https://github.com/ultralytics/yolov5/pull/9903 * Add `gnupg` to Dockerfile-cpu by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9932 * Add ClearML minimum version requirement by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9933 * Update Comet Integrations table text by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9937 * Update README.md by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9957 * Update README.md by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9958 * Update README.md by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9961 * Switch from suffix checks to archive checks by @kalenmike in https://github.com/ultralytics/yolov5/pull/9963 * FROM nvcr.io/nvidia/pytorch:22.10-py3 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9966 * Full-size proto code (optional) by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9980 * Update README.md by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9970 * Segmentation Tutorial by @paulguerrie in https://github.com/ultralytics/yolov5/pull/9521 * Fix `is\_colab()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9994 * Check online twice on AutoUpdate by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9999 * Add `min\_items` filter option by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9997 * Improved `check\_online()` robustness by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10000 * Fix `min\_items` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10001 * Update default `--epochs 100` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10024 * YOLOv5 AutoCache Update by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10027 * IoU `eps` adjustment by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10051 * Update get\_coco.sh by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10057 * [pre-commit.ci] pre-commit suggestions by @pre-commit-ci in https://github.com/ultralytics/yolov5/pull/10068 * Use MNIST160 by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10069 * Update Dockerfile keep default torch installation by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10071 * Add `ultralytics` pip package by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10103 * AutoShape integer image-size fix by @janus-zheng in https://github.com/ultralytics/yolov5/pull/10090 * YouTube Usage example comments by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10106 * Mapped project and name to ClearML by @thepycoder in https://github.com/ultralytics/yolov5/pull/10100 * Update IoU functions by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10123 * Add Ultralytics HUB to README by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10070 * Fix benchmark.py usage comment by @rusamentiaga in https://github.com/ultralytics/yolov5/pull/10131 * Update HUB banner image by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10134 * Copy-Paste zero value fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10152 * Add Copy-Paste to `mosaic9()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10165 * Add `join\_threads()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10086 * Fix dataloader filepath modification to perform replace only once and not for all occurences of string by @adumrewal in https://github.com/ultralytics/yolov5/pull/10163 * fix: prevent logging config clobbering by @rkechols in https://github.com/ultralytics/yolov5/pull/10133 * Filter PyTorch 1.13 UserWarnings by @triple-Mu in https://github.com/ultralytics/yolov5/pull/10166 * Revert "fix: prevent logging config clobbering" by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10177 * Apply make\_divisible for ONNX models in Autoshape by @janus-zheng in https://github.com/ultralytics/yolov5/pull/10172 * data.yaml `names.keys()` integer assert by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10190 * fix: try 2 - prevent logging config clobbering by @rkechols in https://github.com/ultralytics/yolov5/pull/10192 * Segment prediction labels normalization fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10205 * Simplify dataloader tqdm descriptions by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10210 * New global `TQDM\_BAR\_FORMAT` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10211 * Feature/classification tutorial refactor by @paulguerrie in https://github.com/ultralytics/yolov5/pull/10039 * Remove Colab notebook High-Memory notices by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10212 * Revert `--save-txt` to default False by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10213 * Add `--source screen` Usage example by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10215 * Add `git` info to training checkpoints by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/9655 * Add git info to cls, seg checkpoints by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10217 * Update Comet preview image by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10220 * Scope gitpyhon import in `check\_git\_info()` by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10221 * Squeezenet reshape outputs fix by @glenn-jocher in https://github.com/ultralytics/yolov5/pull/10222 {$<$}/details{$><$}details{$>$} {$<$}summary{$>$}😃 New Contributors (30){$<$}/summary{$>$} * @KieraMengru0907 made their first contribution in https://github.com/ultralytics/yolov5/pull/9001 * @cher-liang made their first contribution in https://github.com/ultralytics/yolov5/pull/9009 * @0zppd made their first contribution in https://github.com/ultralytics/yolov5/pull/9068 * @HighMans made their first contribution in https://github.com/ultralytics/yolov5/pull/9174 * @ymerkli made their first contribution in https://github.com/ultralytics/yolov5/pull/9227 * @spacewalk01 made their first contribution in https://github.com/ultralytics/yolov5/pull/9230 * @VELCpro made their first contribution in https://github.com/ultralytics/yolov5/pull/9256 * @DN6 made their first contribution in https://github.com/ultralytics/yolov5/pull/9232 * @robinned made their first contribution in https://github.com/ultralytics/yolov5/pull/9330 * @kisaragychihaya made their first contribution in https://github.com/ultralytics/yolov5/pull/9240 * @hotohoto made their first contribution in https://github.com/ultralytics/yolov5/pull/9414 * @Oswells made their first contribution in https://github.com/ultralytics/yolov5/pull/9452 * @mucunwuxian made their first contribution in https://github.com/ultralytics/yolov5/pull/9487 * @FeiGeChuanShu made their first contribution in https://github.com/ultralytics/yolov5/pull/9512 * @zombob made their first contribution in https://github.com/ultralytics/yolov5/pull/9542 * @gaziqbal made their first contribution in https://github.com/ultralytics/yolov5/pull/9228 * @Forever518 made their first contribution in https://github.com/ultralytics/yolov5/pull/9545 * @davidamacey made their first contribution in https://github.com/ultralytics/yolov5/pull/9576 * @soumik12345 made their first contribution in https://github.com/ultralytics/yolov5/pull/9652 * @senhorinfinito made their first contribution in https://github.com/ultralytics/yolov5/pull/9668 * @KristenKehrer made their first contribution in https://github.com/ultralytics/yolov5/pull/9654 * @paulguerrie made their first contribution in https://github.com/ultralytics/yolov5/pull/9724 * @vladoossss made their first contribution in https://github.com/ultralytics/yolov5/pull/9785 * @SSTato made their first contribution in https://github.com/ultralytics/yolov5/pull/9879 * @janus-zheng made their first contribution in https://github.com/ultralytics/yolov5/pull/10090 * @rusamentiaga made their first contribution in https://github.com/ultralytics/yolov5/pull/10131 * @adumrewal made their first contribution in https://github.com/ultralytics/yolov5/pull/10163 * @rkechols made their first contribution in https://github.com/ultralytics/yolov5/pull/10133 * @triple-Mu made their first contribution in https://github.com/ultralytics/yolov5/pull/10166 {$<$}/details{$>$}}}}}}}}}}, - langid = {english}, - file = {/home/zenon/Zotero/storage/IWYM45AP/7347926.html} -} - @article{zou2023, title = {Object {{Detection}} in 20 {{Years}}: {{A Survey}}}, shorttitle = {Object {{Detection}} in 20 {{Years}}}, diff --git a/thesis/thesis.pdf b/thesis/thesis.pdf index c25fc45..89d7890 100644 Binary files a/thesis/thesis.pdf and b/thesis/thesis.pdf differ diff --git a/thesis/thesis.tex b/thesis/thesis.tex index 9cabe14..3110e3f 100644 --- a/thesis/thesis.tex +++ b/thesis/thesis.tex @@ -1598,7 +1598,7 @@ computational cost of between eight to nine times. MobileNet v2 \emph{squeeze and excitation layers} among other improvements. These concepts led to better classification accuracy at the same or smaller model size. The authors evaluate a large and a small variant of -MobileNet v3 on Imagenet on single-core phone processors and achieve a +MobileNet v3 on ImageNet on single-core phone processors and achieve a top-1 accuracy of 75.2\% and 67.4\% respectively. \section{Transfer Learning} @@ -1664,7 +1664,7 @@ which have to be made as a result of using transfer learning can introduce more complexity than would otherwise be necessary for a particular problem. It does, however, allow researchers to get started quickly and to iterate faster because popular network architectures -pretrained on Imagenet are integrated into the major machine learning +pretrained on ImageNet are integrated into the major machine learning frameworks. Transfer learning is used extensively in this work to train a classifier as well as an object detection model. @@ -2300,7 +2300,7 @@ the \gls{coco} test data set. The authors of \gls{yolo}v6 \cite{li2022a} use a new backbone based on RepVGG \cite{ding2021} which they call EfficientRep. They also use -different losses for classification (Varifocal loss \cite{zhang2021}) +different losses for classification (varifocal loss \cite{zhang2021}) and bounding box regression (\gls{siou} \cite{gevorgyan2022}/\gls{giou} \cite{rezatofighi2019}). \gls{yolo}v6 is made available in eight scaled version of which the largest @@ -2310,7 +2310,7 @@ achieves a \gls{map} of 57.2\% on the \gls{coco} test set. \label{sssec:yolov7} At the time of implementation of our own plant detector, \gls{yolo}v7 -\cite{wang2022b} was the newest version within the \gls{yolo} +\cite{wang2022} was the newest version within the \gls{yolo} family. Similarly to \gls{yolo}v4, it introduces more trainable bag of freebies which do not impact inference time. The improvements include the use of \glspl{eelan} (based on \glspl{elan} \cite{wang2022a}), @@ -2444,31 +2444,79 @@ random value within a range with a specified probability. \chapter{Prototype Implementation} \label{chap:implementation} +In this chapter we describe the implementation of the prototype. Part +of the implementation is how the two models were trained and with +which data sets, how the models are deployed to the \gls{sbc}, and how +they were optimized. + \section{Object Detection} \label{sec:development-detection} -Describe how the object detection model was trained and what the -training set looks like. Include a section on hyperparameter -optimization and go into detail about how the detector was optimized. +As mentioned before, our approach is split into a detection and a +classification stage. The object detector detects all plants in an +image during the first stage and passes the cutouts on to the +classifier. In this section, we describe what the data set the object +detector was trained with looks like, what the results of the training +phase are and how the model was optimized with respect to its +hyperparameters. -The object detection model was trained for 300 epochs on 79204 images -with 284130 ground truth labels. The weights from the best-performing -epoch were saved. The model's fitness for each epoch is calculated as -the weighted average of \textsf{mAP}@0.5 and \textsf{mAP}@0.5:0.95: +\subsection{Data Set} +\label{ssec:obj-train-dataset} + +The object detection model has to correctly detect plants in various +locations, different lighting conditions, and in partially occluded +settings. Fortunately, there are many data sets available which +contain a large amount of classes and samples of common everyday +objects. Most of these data sets contain at least one class about +plants and multiple related classes such as \emph{houseplant} and +\emph{potted plant} can be merged together to form a single +\emph{plant} class which exhibits a great variety of samples. One such +data set which includes the aforementioned classes is the \gls{oid} +\cite{kuznetsova2020,krasin2017}. + +The \gls{oid} has been published in multiple versions starting in 2016 +with version one. The most recent iteration is version seven which has +been released in October 2022. We use version six of the data set in +our own work which contains \num{9011219} training, \num{41620} +validation, and \num{125436} testing images. The data set provides +image-level labels, bounding boxes, object segmentations, visual +relationships, and localized narratives on those images. For our own +work, we are only interested in the labeled bounding boxes of all +images which belong to the classes \emph{Houseplant} and \emph{Plant} +with their respective class identifiers \texttt{/m/03fp41} and +\texttt{/m/05s2s}. These images have been extracted from the data set +and arranged in the directory structure which \gls{yolo}v7 +requires. The bounding boxes themselves are collapsed into one single +label \emph{Plant} and converted to the \gls{yolo}v7 label format. In +total, there are \num{79204} images with \num{284130} bounding boxes +in the training set. \gls{yolo}v7 continuously validates the training +progress after every epoch on a validation set of \num{3091} images +with \num{4092} bounding boxes. + +\subsection{Training Phase} +\label{ssec:obj-training-phase} + +We use the smallest \gls{yolo}v7 model which has \num{36.9e6} +parameters \cite{wang2022} and has been pretrained on the \gls{coco} +data set \cite{lin2015} with an input size of \num{640} by \num{640} +pixels. The object detection model was then fine-tuned for \num{300} +epochs on the training set. The weights from the best-performing epoch +were saved. The model's fitness for each epoch is calculated as the +weighted average of \gls{map}@0.5 and \gls{map}@0.5:0.95: \begin{equation} \label{eq:fitness} - f_{epoch} = 0.1 \cdot \mathsf{mAP}@0.5 + 0.9 \cdot \mathsf{mAP}@0.5\mathrm{:}0.95 + f_{epoch} = 0.1 \cdot \mathrm{\gls{map}}@0.5 + 0.9 \cdot \mathrm{\gls{map}}@0.5\mathrm{:}0.95 \end{equation} Figure~\ref{fig:fitness} shows the model's fitness over the training -period of 300 epochs. The gray vertical line indicates the maximum -fitness of 0.61 at epoch 133. The weights of that epoch were frozen to -be the final model parameters. Since the fitness metric assigns the -\textsf{mAP} at the higher range the overwhelming weight, the -\textsf{mAP}@0.5 starts to decrease after epoch 30, but the -\textsf{mAP}@0.5:0.95 picks up the slack until the maximum fitness at -epoch 133. This is an indication that the model achieves good +period of \num{300} epochs. The gray vertical line indicates the +maximum fitness of \num{0.61} at epoch \num{133}. The weights of that +epoch were frozen to be the final model parameters. Since the fitness +metric assigns the \gls{map} at the higher range the overwhelming +weight, the \gls{map}@0.5 starts to decrease after epoch \num{30}, but +the \gls{map}@0.5:0.95 picks up the slack until the maximum fitness at +epoch \num{133}. This is an indication that the model achieves good performance early on and continues to gain higher confidence values until performance deteriorates due to overfitting. @@ -2477,8 +2525,8 @@ until performance deteriorates due to overfitting. \includegraphics{graphics/model_fitness.pdf} \caption[Object detection fitness per epoch.]{Object detection model fitness for each epoch calculated as in - equation~\ref{eq:fitness}. The vertical gray line at 133 marks the - epoch with the highest fitness.} + equation~\ref{eq:fitness}. The vertical gray line at \num{133} + marks the epoch with the highest fitness.} \label{fig:fitness} \end{figure} @@ -2489,11 +2537,11 @@ starts to decrease from the beginning, while recall experiences a barely noticeable increase. Taken together with the box and object loss from figure~\ref{fig:box-obj-loss}, we speculate that the pre-trained model already generalizes well to plant detection because -one of the categories in the COCO~\cite{lin2015} dataset is +one of the categories in the \gls{coco} \cite{lin2015} dataset is \emph{potted plant}. Any further training solely impacts the confidence of detection, but does not lead to higher detection rates. This conclusion is supported by the increasing -\textsf{mAP}@0.5:0.95 until epoch 133. +\gls{map}@0.5:0.95 until epoch \num{133}. \begin{figure} \centering @@ -2524,226 +2572,67 @@ the bounding boxes become tighter around objects of interest. With increasing training time, however, the object loss increases, indicating that less and less plants are present in the predicted bounding boxes. It is likely that overfitting is a cause for the -increasing object loss from epoch 40 onward. Since the best weights as -measured by fitness are found at epoch 133 and the object loss -accelerates from that point, epoch 133 is probably the correct cutoff -before overfitting occurs. +increasing object loss from epoch \num{40} onward. Since the best +weights as measured by fitness are found at epoch \num{133} and the +object loss accelerates from that point, epoch \num{133} is arguably +the correct cutoff before overfitting occurs. \begin{figure} \centering \includegraphics{graphics/val_box_obj_loss.pdf} \caption[Object detection box and object loss.]{Box and object loss - measured against the validation set of 3091 images and 4092 ground - truth labels. The class loss is omitted because there is only one - class in the dataset and the loss is therefore always zero.} + measured against the validation set of \num{3091} images and + \num{4092} ground truth labels. The class loss is omitted because + there is only one class in the dataset and the loss is therefore + always zero.} \label{fig:box-obj-loss} \end{figure} -Estimated 2 pages for this section. - -\section{Classification} -\label{sec:development-classification} - -Describe how the classification model was trained and what the -training set looks like. Include a subsection hyperparameter -optimization and go into detail about how the classifier was -optimized. - -The dataset was split 85/15 into training and validation sets. The -images in the training set were augmented with a random crop to arrive -at the expected image dimensions of 224 pixels. Additionally, the -training images were modified with a random horizontal flip to -increase the variation in the set and to train a rotation invariant -classifier. All images, regardless of their membership in the training -or validation set, were normalized with the mean and standard -deviation of the ImageNet~\cite{deng2009} dataset, which the original -\gls{resnet} model was pre-trained with. Training was done for 50 -epochs and the best-performing model as measured by validation -accuracy was selected as the final version. - -Figure~\ref{fig:classifier-training-metrics} shows accuracy and loss -on the training and validation sets. There is a clear upwards trend -until epoch 20 when validation accuracy and loss stabilize at around -0.84 and 0.3, respectively. The quick convergence and resistance to -overfitting can be attributed to the model already having robust -feature extraction capabilities. - -\begin{figure} - \centering - \includegraphics{graphics/classifier-metrics.pdf} - \caption[Classifier accuracy and loss during training.]{Accuracy and - loss during training of the classifier. The model converges - quickly, but additional epochs do not cause validation loss to - increase, which would indicate overfitting. The maximum validation - accuracy of 0.9118 is achieved at epoch 27.} - \label{fig:classifier-training-metrics} -\end{figure} - -Estimated 2 pages for this section. - -\section{Deployment} - -Describe the Jetson Nano, how the model is deployed to the device and -how it reports its results (REST API). - -Estimated 2 pages for this section. - -\chapter{Evaluation} -\label{chap:evaluation} - -The following sections contain a detailed evaluation of the model in -various scenarios. First, we present metrics from the training phases -of the constituent models. Second, we employ methods from the field of -\gls{xai} such as \gls{grad-cam} to get a better understanding of the -models' abstractions. Finally, we turn to the models' aggregate -performance on the test set. - -\section{Methodology} -\label{sec:methodology} - -Go over the evaluation methodology by explaining the test datasets, -where they come from, and how they're structured. Explain how the -testing phase was done and which metrics are employed to compare the -models to the SOTA. - -Estimated 2 pages for this section. - -\section{Results} -\label{sec:results} - -Systematically go over the results from the testing phase(s), show the -plots and metrics, and explain what they contain. - -Estimated 4 pages for this section. - -\subsection{Object Detection} -\label{ssec:yolo-eval} - -The following parapraph should probably go into -section~\ref{sec:development-detection}. - -The object detection model was pre-trained on the COCO~\cite{lin2015} -dataset and fine-tuned with data from the \gls{oid} -\cite{kuznetsova2020} in its sixth version. Since the full \gls{oid} -dataset contains considerably more classes and samples than would be -feasibly trainable on a small cluster of \glspl{gpu}, only images from -the two classes \emph{Plant} and \emph{Houseplant} have been -downloaded. The samples from the Houseplant class are merged into the -Plant class because the distinction between the two is not necessary -for our model. Furthermore, the \gls{oid} contains not only bounding -box annotations for object detection tasks, but also instance -segmentations, classification labels and more. These are not needed -for our purposes and are omitted as well. In total, the dataset -consists of 91479 images with a roughly 85/5/10 split for training, -validation and testing, respectively. - -\subsubsection{Test Phase} -\label{sssec:yolo-test} - -Of the 91479 images around 10\% were used for the test phase. These -images contain a total of 12238 ground truth -labels. Table~\ref{tab:yolo-metrics} shows precision, recall and the -harmonic mean of both ($\mathrm{F}_1$-score). The results indicate -that the model errs on the side of sensitivity because recall is -higher than precision. Although some detections are not labeled as -plants in the dataset, if there is a labeled plant in the ground truth -data, the chance is high that it will be detected. This behavior is in -line with how the model's detections are handled in practice. The -detections are drawn on the original image and the user is able to -check the bounding boxes visually. If there are wrong detections, the -user can ignore them and focus on the relevant ones instead. A higher -recall will thus serve the user's needs better than a high precision. - -\begin{table}[h] - \centering - \begin{tabular}{lrrrr} - \toprule - {} & Precision & Recall & $\mathrm{F}_1$-score & Support \\ - \midrule - Plant & 0.547571 & 0.737866 & 0.628633 & 12238.0 \\ - \bottomrule - \end{tabular} - \caption{Precision, recall and $\mathrm{F}_1$-score for the object - detection model.} - \label{tab:yolo-metrics} -\end{table} - -Figure~\ref{fig:yolo-ap} shows the \gls{ap} for the \gls{iou} -thresholds of 0.5 and 0.95. Predicted bounding boxes with an \gls{iou} -of less than 0.5 are not taken into account for the precision and -recall values of table~\ref{tab:yolo-metrics}. The lower the detection -threshold, the more plants are detected. Conversely, a higher -detection threshold leaves potential plants undetected. The -precision-recall curves confirm this behavior because the area under -the curve for the threshold of 0.5 is higher than for the threshold of -0.95 ($0.66$ versus $0.41$). These values are combined in COCO's -\cite{lin2015} main evaluation metric which is the \gls{ap} averaged -across the \gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps. This -value is then averaged across all classes and called \gls{map}. The -object detection model achieves a state-of-the-art \gls{map} of 0.5727 -for the \emph{Plant} class. - -\begin{figure} - \centering - \includegraphics{graphics/APpt5-pt95.pdf} - \caption[Object detection AP@0.5 and AP@0.95.]{Precision-recall - curves for \gls{iou} thresholds of 0.5 and 0.95. The \gls{ap} of a - specific threshold is defined as the area under the - precision-recall curve of that threshold. The \gls{map} across - \gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps - \textsf{mAP}@0.5:0.95 is 0.5727.} - \label{fig:yolo-ap} -\end{figure} - -\subsubsection{Hyperparameter Optimization} -\label{sssec:yolo-hyp-opt} - -This section should be moved to the hyperparameter optimization -section in the development chapter -(section~\ref{sec:development-detection}). +\subsection{Hyperparameter Optimization} +\label{ssec:obj-hypopt} To further improve the object detection performance, we perform -hyper-parameter optimization using a genetic algorithm. Evolution of -the hyper-parameters starts from the initial 30 default values -provided by the authors of YOLO. Of those 30 values, 26 are allowed to -mutate. During each generation, there is an 80\% chance that a -mutation occurs with a variance of 0.04. To determine which generation -should be the parent of the new mutation, all previous generations are -ordered by fitness in decreasing order. At most five top generations -are selected and one of them is chosen at random. Better generations -have a higher chance of being selected as the selection is weighted by -fitness. The parameters of that chosen generation are then mutated -with the aforementioned probability and variance. Each generation is -trained for three epochs and the fitness of the best epoch is -recorded. +hyperparameter optimization using a genetic algorithm. Evolution of +the hyperparameters starts from the initial \num{30} default values +provided by the authors of \gls{yolo}. Of those \num{30} values, +\num{26} are allowed to mutate. During each generation, there is an +80\% chance that a mutation occurs with a variance of \num{0.04}. To +determine which generation should be the parent of the new mutation, +all previous generations are ordered by fitness in decreasing +order. At most five top generations are selected and one of them is +chosen at random. Better generations have a higher chance of being +selected as the selection is weighted by fitness. The parameters of +that chosen generation are then mutated with the aforementioned +probability and variance. Each generation is trained for three epochs +and the fitness of the best epoch is recorded. -In total, we ran 87 iterations of which the 34\textsuperscript{th} -generation provides the best fitness of 0.6076. Due to time -constraints, it was not possible to train each generation for more -epochs or to run more iterations in total. We assume that the -performance of the first few epochs is a reasonable proxy for model -performance overall. The optimized version of the object detection -model is then trained for 70 epochs using the parameters of the -34\textsuperscript{th} generation. +In total, we ran \num{87} iterations of which the +\num{34}\textsuperscript{th} generation provides the best fitness of +\num{0.6076}. Due to time constraints, it was not possible to train +each generation for more epochs or to run more iterations in total. We +assume that the performance of the first few epochs is a reasonable +proxy for model performance overall. The optimized version of the +object detection model is then trained for \num{70} epochs using the +parameters of the \num{34}\textsuperscript{th} generation. \begin{figure} \centering \includegraphics{graphics/model_fitness_final.pdf} \caption[Optimized object detection fitness per epoch.]{Object detection model fitness for each epoch calculated as in - equation~\ref{eq:fitness}. The vertical gray line at 27 marks the - epoch with the highest fitness of 0.6172.} + equation~\ref{eq:fitness}. The vertical gray line at \num{27} + marks the epoch with the highest fitness of \num{0.6172}.} \label{fig:hyp-opt-fitness} \end{figure} Figure~\ref{fig:hyp-opt-fitness} shows the model's fitness during -training for each epoch. After the highest fitness of 0.6172 at epoch -27, the performance quickly declines and shows that further training -would likely not yield improved results. The model converges to its -highest fitness much earlier than the non-optimized version, which -indicates that the adjusted parameters provide a better starting point -in general. Furthermore, the maximum fitness is 0.74\% higher than in -the non-optimized version. +training for each epoch. After the highest fitness of \num{0.6172} at +epoch \num{27}, the performance quickly declines and shows that +further training would likely not yield improved results. The model +converges to its highest fitness much earlier than the non-optimized +version, which indicates that the adjusted parameters provide a better +starting point in general. Furthermore, the maximum fitness is 0.74 +percentage points higher than in the non-optimized version. \begin{figure} \centering @@ -2751,7 +2640,7 @@ the non-optimized version. \caption[Hyper-parameter optimized object detection precision and recall during training.]{Overall precision and recall during training for each epoch of the optimized model. The vertical gray - line at 27 marks the epoch with the highest fitness.} + line at \num{27} marks the epoch with the highest fitness.} \label{fig:hyp-opt-prec-rec} \end{figure} @@ -2766,9 +2655,9 @@ non-optimized version and recall hovers at the same levels. \includegraphics{graphics/val_box_obj_loss_final.pdf} \caption[Hyper-parameter optimized object detection box and object loss.]{Box and object loss measured against the validation set of - 3091 images and 4092 ground truth labels. The class loss is - omitted because there is only one class in the dataset and the - loss is therefore always zero.} + \num{3091} images and \num{4092} ground truth labels. The class + loss is omitted because there is only one class in the dataset and + the loss is therefore always zero.} \label{fig:hyp-opt-box-obj-loss} \end{figure} @@ -2777,96 +2666,84 @@ figure~\ref{fig:hyp-opt-box-obj-loss}. Both losses start from a lower level which suggests that the initial optimized parameters allow the model to converge quicker. The object loss exhibits a similar slope to the non-optimized model in figure~\ref{fig:box-obj-loss}. The vertical -gray line again marks epoch 27 with the highest fitness. The box loss -reaches its lower limit at that point and the object loss starts to -increase again after epoch 27. +gray line again marks epoch \num{27} with the highest fitness. The box +loss reaches its lower limit at that point and the object loss starts +to increase again after epoch \num{27}. -\begin{table}[h] - \centering - \begin{tabular}{lrrrr} - \toprule - {} & Precision & Recall & $\mathrm{F}_1$-score & Support \\ - \midrule - Plant & 0.633358 & 0.702811 & 0.666279 & 12238.0 \\ - \bottomrule - \end{tabular} - \caption{Precision, recall and $\mathrm{F}_1$-score for the - optimized object detection model.} - \label{tab:yolo-metrics-hyp} -\end{table} +\section{Classification} +\label{sec:development-classification} -Turning to the evaluation of the optimized model on the test dataset, -table~\ref{tab:yolo-metrics-hyp} shows precision, recall and the -$\mathrm{F}_1$-score for the optimized model. Comparing these metrics -with the non-optimized version from table~\ref{tab:yolo-metrics}, -precision is significantly higher by more than 8.5\%. Recall, however, -is 3.5\% lower. The $\mathrm{F}_1$-score is higher by more than 3.7\% -which indicates that the optimized model is better overall despite the -lower recall. We feel that the lower recall value is a suitable trade -off for the substantially higher precision considering that the -non-optimized model's precision is quite low at 0.55. +The second stage of our approach consists of the classification model +which determines whether the plant in question is water-stressed or +not. The classifier receives the cutouts for each plant from stage one +(object detection). We chose a \gls{resnet}-50 model (see +section~\ref{sec:methods-classification}) which has been pretrained on +ImageNet. We chose the \gls{resnet} architecture due to its popularity +and ease of implementation as well as its consistently high +performance on various classification tasks. While its classification +speed in comparison with networks optimized for mobile and edge +devices (e.g. MobileNet) is significantly lower, the deeper structure +and the additional parameters are necessary for the fairly complex +task at hand. Furthermore, the generous time budget for object +detection \emph{and} classification allows for more accurate results +at the expense of speed. The \num{50} layer architecture +(\gls{resnet}-50) is adequate for our use case. In the following +sections we describe the data set the classifier was trained on, the +metrics of the training phase and how the performance of the model was +further improved with hyperparameter optimization. -The precision-recall curves in figure~\ref{fig:yolo-ap-hyp} for the -optimized model show that the model draws looser bounding boxes than -the optimized model. The \gls{ap} for both \gls{iou} thresholds of 0.5 -and 0.95 is lower indicating worse performance. It is likely that more -iterations during evolution would help increase the \gls{ap} values as -well. Even though the precision and recall values from -table~\ref{tab:yolo-metrics-hyp} are better, the \textsf{mAP}@0.5:0.95 -is lower by 1.8\%. +\subsection{Data Set} +\label{ssec:class-train-dataset} + +The data set we used for training the classifier consists of \num{452} +images of healthy and \num{452} stressed plants. + +%% TODO: write about data set + +The dataset was split 85/15 into training and validation sets. The +images in the training set were augmented with a random crop to arrive +at the expected image dimensions of \num{224} pixels. Additionally, +the training images were modified with a random horizontal flip to +increase the variation in the set and to train a rotation invariant +classifier. All images, regardless of their membership in the training +or validation set, were normalized with the mean and standard +deviation of the ImageNet \cite{deng2009} dataset, which the original +\gls{resnet}-50 model was pretrained with. Training was done for +\num{50} epochs and the best-performing model as measured by +validation accuracy was selected as the final version. + +Figure~\ref{fig:classifier-training-metrics} shows accuracy and loss +on the training and validation sets. There is a clear upwards trend +until epoch \num{20} when validation accuracy and loss stabilize at +around \num{0.84} and \num{0.3}, respectively. The quick convergence +and resistance to overfitting can be attributed to the model already +having robust feature extraction capabilities. \begin{figure} \centering - \includegraphics{graphics/APpt5-pt95-final.pdf} - \caption[Hyper-parameter optimized object detection AP@0.5 and - AP@0.95.]{Precision-recall curves for \gls{iou} thresholds of 0.5 - and 0.95. The \gls{ap} of a specific threshold is defined as the - area under the precision-recall curve of that threshold. The - \gls{map} across \gls{iou} thresholds from 0.5 to 0.95 in 0.05 - steps \textsf{mAP}@0.5:0.95 is 0.5546.} - \label{fig:yolo-ap-hyp} + \includegraphics{graphics/classifier-metrics.pdf} + \caption[Classifier accuracy and loss during training.]{Accuracy and + loss during training of the classifier. The model converges + quickly, but additional epochs do not cause validation loss to + increase, which would indicate overfitting. The maximum validation + accuracy of \num{0.9118} is achieved at epoch \num{27}.} + \label{fig:classifier-training-metrics} \end{figure} -\subsection{Classification} -\label{ssec:classifier-eval} - -The classifier receives cutouts from the object detection model and -determines whether the image shows a stressed plant or not. To achieve -this goal, we trained a \gls{resnet} \cite{he2016} on a dataset of 452 -images of healthy and 452 stressed plants. We chose the \gls{resnet} -architecture due to its popularity and ease of implementation as well -as its consistently high performance on various classification -tasks. While its classification speed in comparison with networks -optimized for mobile and edge devices (e.g. MobileNet) is -significantly lower, the deeper structure and the additional -parameters are necessary for the fairly complex task at -hand. Furthermore, the generous time budget for object detection -\emph{and} classification allows for more accurate results at the -expense of speed. The architecture allows for multiple different -structures, depending on the amount of layers. The smallest one has 18 -and the largest 152 layers with 34, 50 and 101 in-between. The larger -networks have better accuracy in general, but come with trade-offs -regarding training and inference time as well as required space. The -50 layer architecture (\gls{resnet}50) is adequate for our use case. - -\subsubsection{Hyperparameter Optimization} -\label{sssec:classifier-hyp-opt} - -This section should be moved to the hyperparameter optimization -section in the development chapter -(section~\ref{sec:development-classification}). +\subsection{Hyperparameter Optimization} +\label{ssec:class-hypopt} In order to improve the aforementioned accuracy values, we perform -hyper-parameter optimization across a wide range of -parameters. Table~\ref{tab:classifier-hyps} lists the hyper-parameters +hyperparameter optimization across a wide range of +parameters. Table~\ref{tab:classifier-hyps} lists the hyperparameters and their possible values. Since the number of all combinations of -values is 11520 and each combination is trained for 10 epochs with a -training time of approximately six minutes per combination, exhausting -the search space would take 48 days. Due to time limitations, we have -chosen to not search exhaustively but to pick random combinations -instead. Random search works surprisingly well---especially compared to -grid search---in a number of domains, one of which is hyper-parameter -optimization~\cite{bergstra2012}. +values is \num{11520} and each combination is trained for \num{10} +epochs with a training time of approximately six minutes per +combination, exhausting the search space would take \num{48} days. Due +to time limitations, we have chosen to not search exhaustively but to +pick random combinations instead. Random search works surprisingly +well---especially compared to grid search---in a number of domains, one of +which is hyperparameter optimization \cite{bergstra2012}. \begin{table}[h] \centering @@ -3010,6 +2887,186 @@ $\mathrm{F}_1$-score of 1 on the training set. \label{fig:classifier-hyp-folds} \end{figure} +\section{Deployment} + +Describe the Jetson Nano, how the model is deployed to the device and +how it reports its results (REST API). + +Estimated 2 pages for this section. + +\chapter{Evaluation} +\label{chap:evaluation} + +The following sections contain a detailed evaluation of the model in +various scenarios. First, we present metrics from the training phases +of the constituent models. Second, we employ methods from the field of +\gls{xai} such as \gls{grad-cam} to get a better understanding of the +models' abstractions. Finally, we turn to the models' aggregate +performance on the test set. + +\section{Methodology} +\label{sec:methodology} + +Go over the evaluation methodology by explaining the test datasets, +where they come from, and how they're structured. Explain how the +testing phase was done and which metrics are employed to compare the +models to the SOTA. + +Estimated 2 pages for this section. + +\section{Results} +\label{sec:results} + +Systematically go over the results from the testing phase(s), show the +plots and metrics, and explain what they contain. + +Estimated 4 pages for this section. + +\subsection{Object Detection} +\label{ssec:yolo-eval} + +The following parapraph should probably go into +section~\ref{sec:development-detection}. + +The object detection model was pre-trained on the COCO~\cite{lin2015} +dataset and fine-tuned with data from the \gls{oid} +\cite{kuznetsova2020} in its sixth version. Since the full \gls{oid} +dataset contains considerably more classes and samples than would be +feasibly trainable on a small cluster of \glspl{gpu}, only images from +the two classes \emph{Plant} and \emph{Houseplant} have been +downloaded. The samples from the Houseplant class are merged into the +Plant class because the distinction between the two is not necessary +for our model. Furthermore, the \gls{oid} contains not only bounding +box annotations for object detection tasks, but also instance +segmentations, classification labels and more. These are not needed +for our purposes and are omitted as well. In total, the dataset +consists of 91479 images with a roughly 85/5/10 split for training, +validation and testing, respectively. + +\subsubsection{Test Phase} +\label{sssec:yolo-test} + +Of the 91479 images around 10\% were used for the test phase. These +images contain a total of 12238 ground truth +labels. Table~\ref{tab:yolo-metrics} shows precision, recall and the +harmonic mean of both ($\mathrm{F}_1$-score). The results indicate +that the model errs on the side of sensitivity because recall is +higher than precision. Although some detections are not labeled as +plants in the dataset, if there is a labeled plant in the ground truth +data, the chance is high that it will be detected. This behavior is in +line with how the model's detections are handled in practice. The +detections are drawn on the original image and the user is able to +check the bounding boxes visually. If there are wrong detections, the +user can ignore them and focus on the relevant ones instead. A higher +recall will thus serve the user's needs better than a high precision. + +\begin{table}[h] + \centering + \begin{tabular}{lrrrr} + \toprule + {} & Precision & Recall & $\mathrm{F}_1$-score & Support \\ + \midrule + Plant & 0.547571 & 0.737866 & 0.628633 & 12238.0 \\ + \bottomrule + \end{tabular} + \caption{Precision, recall and $\mathrm{F}_1$-score for the object + detection model.} + \label{tab:yolo-metrics} +\end{table} + +Figure~\ref{fig:yolo-ap} shows the \gls{ap} for the \gls{iou} +thresholds of 0.5 and 0.95. Predicted bounding boxes with an \gls{iou} +of less than 0.5 are not taken into account for the precision and +recall values of table~\ref{tab:yolo-metrics}. The lower the detection +threshold, the more plants are detected. Conversely, a higher +detection threshold leaves potential plants undetected. The +precision-recall curves confirm this behavior because the area under +the curve for the threshold of 0.5 is higher than for the threshold of +0.95 ($0.66$ versus $0.41$). These values are combined in COCO's +\cite{lin2015} main evaluation metric which is the \gls{ap} averaged +across the \gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps. This +value is then averaged across all classes and called \gls{map}. The +object detection model achieves a state-of-the-art \gls{map} of 0.5727 +for the \emph{Plant} class. + +\begin{figure} + \centering + \includegraphics{graphics/APpt5-pt95.pdf} + \caption[Object detection AP@0.5 and AP@0.95.]{Precision-recall + curves for \gls{iou} thresholds of 0.5 and 0.95. The \gls{ap} of a + specific threshold is defined as the area under the + precision-recall curve of that threshold. The \gls{map} across + \gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps + \textsf{mAP}@0.5:0.95 is 0.5727.} + \label{fig:yolo-ap} +\end{figure} + +\subsubsection{Hyperparameter Optimization} +\label{sssec:yolo-hyp-opt} + +This section should be moved to the hyperparameter optimization +section in the development chapter +(section~\ref{sec:development-detection}). + + +\begin{table}[h] + \centering + \begin{tabular}{lrrrr} + \toprule + {} & Precision & Recall & $\mathrm{F}_1$-score & Support \\ + \midrule + Plant & 0.633358 & 0.702811 & 0.666279 & 12238.0 \\ + \bottomrule + \end{tabular} + \caption{Precision, recall and $\mathrm{F}_1$-score for the + optimized object detection model.} + \label{tab:yolo-metrics-hyp} +\end{table} + +Turning to the evaluation of the optimized model on the test dataset, +table~\ref{tab:yolo-metrics-hyp} shows precision, recall and the +$\mathrm{F}_1$-score for the optimized model. Comparing these metrics +with the non-optimized version from table~\ref{tab:yolo-metrics}, +precision is significantly higher by more than 8.5\%. Recall, however, +is 3.5\% lower. The $\mathrm{F}_1$-score is higher by more than 3.7\% +which indicates that the optimized model is better overall despite the +lower recall. We feel that the lower recall value is a suitable trade +off for the substantially higher precision considering that the +non-optimized model's precision is quite low at 0.55. + +The precision-recall curves in figure~\ref{fig:yolo-ap-hyp} for the +optimized model show that the model draws looser bounding boxes than +the optimized model. The \gls{ap} for both \gls{iou} thresholds of 0.5 +and 0.95 is lower indicating worse performance. It is likely that more +iterations during evolution would help increase the \gls{ap} values as +well. Even though the precision and recall values from +table~\ref{tab:yolo-metrics-hyp} are better, the \textsf{mAP}@0.5:0.95 +is lower by 1.8\%. + +\begin{figure} + \centering + \includegraphics{graphics/APpt5-pt95-final.pdf} + \caption[Hyper-parameter optimized object detection AP@0.5 and + AP@0.95.]{Precision-recall curves for \gls{iou} thresholds of 0.5 + and 0.95. The \gls{ap} of a specific threshold is defined as the + area under the precision-recall curve of that threshold. The + \gls{map} across \gls{iou} thresholds from 0.5 to 0.95 in 0.05 + steps \textsf{mAP}@0.5:0.95 is 0.5546.} + \label{fig:yolo-ap-hyp} +\end{figure} + +\subsection{Classification} +\label{ssec:classifier-eval} + + +\subsubsection{Hyperparameter Optimization} +\label{sssec:classifier-hyp-opt} + +This section should be moved to the hyperparameter optimization +section in the development chapter +(section~\ref{sec:development-classification}). + + \subsubsection{Class Activation Maps} \label{sssec:classifier-cam}