Move AlexNet to classification section

2023-11-08 10:48:39 +01:00 · 2023-11-08 10:48:39 +01:00 · 89404df619
commit 89404df619
parent e30879f9e2
4 changed files with 44 additions and 13 deletions
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -1048,6 +1048,20 @@
  file = {/home/zenon/Zotero/storage/S6SE8F56/Simard et al. - 2003 - Best practices for convolutional neural networks a.pdf;/home/zenon/Zotero/storage/FQHDISEK/1227801.html}
 }
@online{simonyan2015,
  title = {Very {{Deep Convolutional Networks}} for {{Large-Scale Image Recognition}}},
  author = {Simonyan, Karen and Zisserman, Andrew},
  date = {2015-04-10},
  eprint = {1409.1556},
  eprinttype = {arxiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.1409.1556},
  abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
  pubstate = {preprint},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/home/zenon/Zotero/storage/CW73C5N3/Simonyan and Zisserman - 2015 - Very Deep Convolutional Networks for Large-Scale I.pdf;/home/zenon/Zotero/storage/LQIW4V7G/1409.html}
 }
@article{su2020,
  title = {Machine {{Learning-Based Crop Drought Mapping System}} by {{UAV Remote Sensing RGB Imagery}}},
  author = {Su, Jinya and Coombes, Matthew and Liu, Cunjia and Zhu, Yongchao and Song, Xingyang and Fang, Shibo and Guo, Lei and Chen, Wen-Hua},
@ -1064,6 +1078,19 @@
  file = {/home/zenon/Zotero/storage/KUHDEQJF/Su et al. - 2020 - Machine Learning-Based Crop Drought Mapping System.pdf}
 }
@inproceedings{szegedy2015,
  title = {Going Deeper with Convolutions},
  booktitle = {2015 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
  date = {2015-06},
  pages = {1--9},
  issn = {1063-6919},
  doi = {10.1109/CVPR.2015.7298594},
  abstract = {We propose a deep convolutional neural network architecture codenamed Inception that achieves the new state of the art for classification and detection in the ImageNet Large-Scale Visual Recognition Challenge 2014 (ILSVRC14). The main hallmark of this architecture is the improved utilization of the computing resources inside the network. By a carefully crafted design, we increased the depth and width of the network while keeping the computational budget constant. To optimize quality, the architectural decisions were based on the Hebbian principle and the intuition of multi-scale processing. One particular incarnation used in our submission for ILSVRC14 is called GoogLeNet, a 22 layers deep network, the quality of which is assessed in the context of classification and detection.},
  eventtitle = {2015 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  file = {/home/zenon/Zotero/storage/VL2YIAAN/Szegedy et al. - 2015 - Going deeper with convolutions.pdf;/home/zenon/Zotero/storage/GWTG8T26/7298594.html}
 }
@article{uijlings2013,
  title = {Selective {{Search}} for {{Object Recognition}}},
  author = {Uijlings, J. R. R. and family=Sande, given=K. E. A., prefix=van de, useprefix=true and Gevers, T. and Smeulders, A. W. M.},
@ -1150,18 +1177,22 @@
  file = {/home/zenon/Zotero/storage/G27M4VFA/Wang et al. - 2022 - YOLOv7 Trainable Bag-of-Freebies Sets New State-o.pdf}
 }
-@online{zeiler2013,
+@inproceedings{zeiler2014,
  title = {Visualizing and {{Understanding Convolutional Networks}}},
  booktitle = {Computer {{Vision}} – {{ECCV}} 2014},
  author = {Zeiler, Matthew D. and Fergus, Rob},
-  date = {2013-11-28},
+  editor = {Fleet, David and Pajdla, Tomas and Schiele, Bernt and Tuytelaars, Tinne},
-  eprint = {1311.2901},
+  date = {2014},
-  eprinttype = {arxiv},
+  series = {Lecture {{Notes}} in {{Computer Science}}},
-  eprintclass = {cs},
+  pages = {818--833},
-  doi = {10.48550/arXiv.1311.2901},
+  publisher = {{Springer International Publishing}},
-  abstract = {Large Convolutional Network models have recently demonstrated impressive classification performance on the ImageNet benchmark. However there is no clear understanding of why they perform so well, or how they might be improved. In this paper we address both issues. We introduce a novel visualization technique that gives insight into the function of intermediate feature layers and the operation of the classifier. We also perform an ablation study to discover the performance contribution from different model layers. This enables us to find model architectures that outperform Krizhevsky \textbackslash etal on the ImageNet classification benchmark. We show our ImageNet model generalizes well to other datasets: when the softmax classifier is retrained, it convincingly beats the current state-of-the-art results on Caltech-101 and Caltech-256 datasets.},
+  location = {{Cham}},
-  pubstate = {preprint},
+  doi = {10.1007/978-3-319-10590-1_53},
-  keywords = {Computer Science - Computer Vision and Pattern Recognition},
+  abstract = {Large Convolutional Network models have recently demonstrated impressive classification performance on the ImageNet benchmark Krizhevsky et al. [18]. However there is no clear understanding of why they perform so well, or how they might be improved. In this paper we explore both issues. We introduce a novel visualization technique that gives insight into the function of intermediate feature layers and the operation of the classifier. Used in a diagnostic role, these visualizations allow us to find model architectures that outperform Krizhevsky et al on the ImageNet classification benchmark. We also perform an ablation study to discover the performance contribution from different model layers. We show our ImageNet model generalizes well to other datasets: when the softmax classifier is retrained, it convincingly beats the current state-of-the-art results on Caltech-101 and Caltech-256 datasets.},
-  file = {/home/zenon/Zotero/storage/XIE8AWCP/Zeiler and Fergus - 2013 - Visualizing and Understanding Convolutional Networ.pdf;/home/zenon/Zotero/storage/2SFHRHUU/1311.html}
+  isbn = {978-3-319-10590-1},
  langid = {english},
  keywords = {Convolutional Neural Network,Input Image,Pixel Space,Stochastic Gradient Descent,Training Image},
  file = {/home/zenon/Zotero/storage/5LSEHN2T/Zeiler and Fergus - 2014 - Visualizing and Understanding Convolutional Networ.pdf}
 }
@online{zheng2019,
--- a/thesis/thesis.pdf
+++ b/thesis/thesis.pdf
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@ -105,8 +105,8 @@
 \newacronym{elu}{ELU}{Exponential Linear Unit}
 \newacronym{silu}{SiLU}{Sigmoid Linear Unit}
 \newacronym{mse}{MSE}{mean squared error}
-\newacronym{ilsvrc2012}{ILSVRC2012}{ImageNet Large Scale Visual
+\newacronym{ilsvrc}{ILSVRC}{ImageNet Large Scale Visual Recognition
-  Recognition Challenge}
+Challenge}
 \newacronym{lrn}{LRN}{Local Response Normalization}
 \newacronym[plural=ROIs,longplural=Regions of Interest]{roi}{ROI}{Region of Interest}
 \newacronym{spp}{SPP}{Spatial Pyramid Pooling}
@ -972,7 +972,7 @@ After the publication of the \gls{dpm}, the field of object detection
 did not make significant advances regarding speed or accuracy. Only
 the (re-)introduction of \glspl{cnn} by \textcite{krizhevsky2012} with
 their AlexNet architecture and their subsequent win of the
-\gls{ilsvrc2012} gave the field a new influx of ideas. The
+\gls{ilsvrc} 2012 gave the field a new influx of ideas. The
 availability of the 12 million labeled images in the ImageNet dataset
 \cite{deng2009} allowed a shift from focusing on better methods to
 being able to use more data to train models. Earlier models had
--- a/thesis/vutinfth.pdf
+++ b/thesis/vutinfth.pdf