| [1] | 
																						 
											 KRIZHEVSKY A, SUTSKEVER I, HINTON G E. ImageNet classification with deep convolutional neural networks[J]. Communications of the ACM, 2017, 60(6): 84-90. 
																							 
																									DOI    
																																					URL    
																																														 | 
										
																													
																							| [2] | 
																						 
											 SIMONYAN K, ZISSERMAN A. Very deep convolutional networks for large-scale image recognition[EB/OL]. [2023-01-10].  https://arxiv.org/abs/1409.1556.pdf.
																						 | 
										
																													
																							| [3] | 
																						 
											 SZEGEDY C, LIU W, JIA Y Q, et al.  Going deeper with convolutions[C]// 2015 IEEE Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2015: 1-9.
																						 | 
										
																													
																							| [4] | 
																						 
											 HE K M, ZHANG X Y, REN S Q, et al.  Deep residual learning for image recognition[C]// 2016 IEEE Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2016: 770-778.
																						 | 
										
																													
																							| [5] | 
																						 
											 LIU W, ANGUELOV D, ERHAN D, et al.  SSD: single shot MultiBox detector[C]// European Conference on Computer Vision. Cham: Springer, 2016: 21-37.
																						 | 
										
																													
																							| [6] | 
																						 
											 REDMON J, DIVVALA S, GIRSHICK R, et al.  You only look once: unified, real-time object detection[C]// 2016 IEEE Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2016: 779-788.
																						 | 
										
																													
																							| [7] | 
																						 
											 HUANG G, LIU Z, LAURENS V D M, et al.  Densely connected convolutional networks[C]// 2017 IEEE Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2017: 2261-2269.
																						 | 
										
																													
																							| [8] | 
																						 
											 REDMON J, FARHADI A. YOLOv3: an incremental improvement[EB/OL]. [2023-03-10].  https://arxiv.org/abs/1804.02767.pdf.
																						 | 
										
																													
																							| [9] | 
																						 
											 BOCHKOVSKIY A, WANG C Y, LIAO H Y M. YOLOv4: optimal speed and accuracy of object detection[EB/OL]. [2023-03-10].  https://arxiv.org/abs/2004.10934.pdf.
																						 | 
										
																													
																							| [10] | 
																						 
											 GLENN R J. YOLOv5[EB/OL]. [2023-03-10].  https://github.com/ultralytics/yolov5.
																						 | 
										
																													
																							| [11] | 
																						 
											 REN S Q, HE K M, GIRSHICK R, et al.  Faster R-CNN: towards real-time object detection with region proposal networks[J]. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2017, 39(6): 1137-1149. 
																							 
																									DOI    
																																																	PMID
																																		 | 
										
																													
																							| [12] | 
																						 
											 HU J, SHEN L, SUN G. Squeeze-and-excitation networks[C]// 2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2018: 7132-7141.
																						 | 
										
																													
																							| [13] | 
																						 
											 JADERBERG M, SIMONYAN K, ZISSERMAN A, et al.  Spatial transformer networks[EB/OL]. [2023-03-10].  https://arxiv.org/abs/1506.02025.pdf.
																						 | 
										
																													
																							| [14] | 
																						 
											 WOO S, PARK J, LEE J Y, et al.  CBAM: convolutional block attention module[C]// European Conference on Computer Vision. Cham: Springer, 2018: 3-19.
																						 | 
										
																													
																							| [15] | 
																						 
											 DAI J F, QI H Z, XIONG Y W, et al.  Deformable convolutional networks[C]// 2017 IEEE International Conference on Computer Vision. New York: IEEE Press, 2017: 764-773.
																						 | 
										
																													
																							| [16] | 
																						 
											 ZHU L, WANG X J, KE Z H, et al.  BiFormer: vision transformer with Bi-level routing attention[C]// 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2023: 10323-10333.
																						 | 
										
																													
																							| [17] | 
																						 
											 YU F, CHEN H F, WANG X, et al.  BDD100K: a diverse driving dataset for heterogeneous multitask learning[C]// 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition. New York: IEEE Press, 2020: 2636-2645.
																						 | 
										
																													
																							| [18] | 
																						 
											 KLEIN I. NEXET-the largest and most diverse road dataset in the world[EB/OL]. [2023-03-10].  https://www.kaggle.com/datasets/solesensei/nexet-original.
																						 | 
										
																													
																							| [19] | 
																						 
											 TIAN Z, SHEN C H, CHEN H, et al.  FCOS: fully convolutional one-stage object detection[C]// 2019 IEEE/CVF International Conference on Computer Vision. New York: IEEE Press, 2020: 9626-9635.
																						 |