Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

get_voc.sh 4.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
  1. #!/bin/bash
  2. # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
  3. # Download command: bash data/scripts/get_voc.sh
  4. # Train command: python train.py --data voc.yaml
  5. # Default dataset location is next to /yolov5:
  6. # /parent_folder
  7. # /VOC
  8. # /yolov5
  9. start=$(date +%s)
  10. mkdir -p ../tmp
  11. cd ../tmp/
  12. # Download/unzip images and labels
  13. d='.' # unzip directory
  14. url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
  15. f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
  16. f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
  17. f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
  18. for f in $f1 $f2 $f3; do
  19. echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
  20. done
  21. end=$(date +%s)
  22. runtime=$((end - start))
  23. echo "Completed in" $runtime "seconds"
  24. echo "Splitting dataset..."
  25. python3 - "$@" <<END
  26. import xml.etree.ElementTree as ET
  27. import pickle
  28. import os
  29. from os import listdir, getcwd
  30. from os.path import join
  31. sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
  32. classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
  33. def convert(size, box):
  34. dw = 1./(size[0])
  35. dh = 1./(size[1])
  36. x = (box[0] + box[1])/2.0 - 1
  37. y = (box[2] + box[3])/2.0 - 1
  38. w = box[1] - box[0]
  39. h = box[3] - box[2]
  40. x = x*dw
  41. w = w*dw
  42. y = y*dh
  43. h = h*dh
  44. return (x,y,w,h)
  45. def convert_annotation(year, image_id):
  46. in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
  47. out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
  48. tree=ET.parse(in_file)
  49. root = tree.getroot()
  50. size = root.find('size')
  51. w = int(size.find('width').text)
  52. h = int(size.find('height').text)
  53. for obj in root.iter('object'):
  54. difficult = obj.find('difficult').text
  55. cls = obj.find('name').text
  56. if cls not in classes or int(difficult)==1:
  57. continue
  58. cls_id = classes.index(cls)
  59. xmlbox = obj.find('bndbox')
  60. b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
  61. bb = convert((w,h), b)
  62. out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
  63. wd = getcwd()
  64. for year, image_set in sets:
  65. if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
  66. os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
  67. image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
  68. list_file = open('%s_%s.txt'%(year, image_set), 'w')
  69. for image_id in image_ids:
  70. list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
  71. convert_annotation(year, image_id)
  72. list_file.close()
  73. END
  74. cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
  75. cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
  76. python3 - "$@" <<END
  77. import shutil
  78. import os
  79. os.system('mkdir ../VOC/')
  80. os.system('mkdir ../VOC/images')
  81. os.system('mkdir ../VOC/images/train')
  82. os.system('mkdir ../VOC/images/val')
  83. os.system('mkdir ../VOC/labels')
  84. os.system('mkdir ../VOC/labels/train')
  85. os.system('mkdir ../VOC/labels/val')
  86. import os
  87. print(os.path.exists('../tmp/train.txt'))
  88. f = open('../tmp/train.txt', 'r')
  89. lines = f.readlines()
  90. for line in lines:
  91. line = "/".join(line.split('/')[-5:]).strip()
  92. if (os.path.exists("../" + line)):
  93. os.system("cp ../"+ line + " ../VOC/images/train")
  94. line = line.replace('JPEGImages', 'labels')
  95. line = line.replace('jpg', 'txt')
  96. if (os.path.exists("../" + line)):
  97. os.system("cp ../"+ line + " ../VOC/labels/train")
  98. print(os.path.exists('../tmp/2007_test.txt'))
  99. f = open('../tmp/2007_test.txt', 'r')
  100. lines = f.readlines()
  101. for line in lines:
  102. line = "/".join(line.split('/')[-5:]).strip()
  103. if (os.path.exists("../" + line)):
  104. os.system("cp ../"+ line + " ../VOC/images/val")
  105. line = line.replace('JPEGImages', 'labels')
  106. line = line.replace('jpg', 'txt')
  107. if (os.path.exists("../" + line)):
  108. os.system("cp ../"+ line + " ../VOC/labels/val")
  109. END
  110. rm -rf ../tmp # remove temporary directory
  111. echo "VOC download done."
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...