Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dev_misc.py 20 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
  1. import traceback
  2. import json
  3. import multiprocessing
  4. import shutil
  5. from pathlib import Path
  6. import cv2
  7. import numpy as np
  8. from core import imagelib, pathex
  9. from core.cv2ex import *
  10. from core.interact import interact as io
  11. from core.joblib import Subprocessor
  12. from core.leras import nn
  13. from DFLIMG import *
  14. from facelib import FaceType, LandmarksProcessor
  15. from . import Extractor, Sorter
  16. from .Extractor import ExtractSubprocessor
  17. def extract_vggface2_dataset(input_dir, device_args={} ):
  18. multi_gpu = device_args.get('multi_gpu', False)
  19. cpu_only = device_args.get('cpu_only', False)
  20. input_path = Path(input_dir)
  21. if not input_path.exists():
  22. raise ValueError('Input directory not found. Please ensure it exists.')
  23. bb_csv = input_path / 'loose_bb_train.csv'
  24. if not bb_csv.exists():
  25. raise ValueError('loose_bb_train.csv found. Please ensure it exists.')
  26. bb_lines = bb_csv.read_text().split('\n')
  27. bb_lines.pop(0)
  28. bb_dict = {}
  29. for line in bb_lines:
  30. name, l, t, w, h = line.split(',')
  31. name = name[1:-1]
  32. l, t, w, h = [ int(x) for x in (l, t, w, h) ]
  33. bb_dict[name] = (l,t,w, h)
  34. output_path = input_path.parent / (input_path.name + '_out')
  35. dir_names = pathex.get_all_dir_names(input_path)
  36. if not output_path.exists():
  37. output_path.mkdir(parents=True, exist_ok=True)
  38. data = []
  39. for dir_name in io.progress_bar_generator(dir_names, "Collecting"):
  40. cur_input_path = input_path / dir_name
  41. cur_output_path = output_path / dir_name
  42. if not cur_output_path.exists():
  43. cur_output_path.mkdir(parents=True, exist_ok=True)
  44. input_path_image_paths = pathex.get_image_paths(cur_input_path)
  45. for filename in input_path_image_paths:
  46. filename_path = Path(filename)
  47. name = filename_path.parent.name + '/' + filename_path.stem
  48. if name not in bb_dict:
  49. continue
  50. l,t,w,h = bb_dict[name]
  51. if min(w,h) < 128:
  52. continue
  53. data += [ ExtractSubprocessor.Data(filename=filename,rects=[ (l,t,l+w,t+h) ], landmarks_accurate=False, force_output_path=cur_output_path ) ]
  54. face_type = FaceType.fromString('full_face')
  55. io.log_info ('Performing 2nd pass...')
  56. data = ExtractSubprocessor (data, 'landmarks', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()
  57. io.log_info ('Performing 3rd pass...')
  58. ExtractSubprocessor (data, 'final', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=None).run()
  59. """
  60. import code
  61. code.interact(local=dict(globals(), **locals()))
  62. data_len = len(data)
  63. i = 0
  64. while i < data_len-1:
  65. i_name = Path(data[i].filename).parent.name
  66. sub_data = []
  67. for j in range (i, data_len):
  68. j_name = Path(data[j].filename).parent.name
  69. if i_name == j_name:
  70. sub_data += [ data[j] ]
  71. else:
  72. break
  73. i = j
  74. cur_output_path = output_path / i_name
  75. io.log_info (f"Processing: {str(cur_output_path)}, {i}/{data_len} ")
  76. if not cur_output_path.exists():
  77. cur_output_path.mkdir(parents=True, exist_ok=True)
  78. for dir_name in dir_names:
  79. cur_input_path = input_path / dir_name
  80. cur_output_path = output_path / dir_name
  81. input_path_image_paths = pathex.get_image_paths(cur_input_path)
  82. l = len(input_path_image_paths)
  83. #if l < 250 or l > 350:
  84. # continue
  85. io.log_info (f"Processing: {str(cur_input_path)} ")
  86. if not cur_output_path.exists():
  87. cur_output_path.mkdir(parents=True, exist_ok=True)
  88. data = []
  89. for filename in input_path_image_paths:
  90. filename_path = Path(filename)
  91. name = filename_path.parent.name + '/' + filename_path.stem
  92. if name not in bb_dict:
  93. continue
  94. bb = bb_dict[name]
  95. l,t,w,h = bb
  96. if min(w,h) < 128:
  97. continue
  98. data += [ ExtractSubprocessor.Data(filename=filename,rects=[ (l,t,l+w,t+h) ], landmarks_accurate=False ) ]
  99. io.log_info ('Performing 2nd pass...')
  100. data = ExtractSubprocessor (data, 'landmarks', 256, face_type, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False).run()
  101. io.log_info ('Performing 3rd pass...')
  102. data = ExtractSubprocessor (data, 'final', 256, face_type, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, final_output_path=cur_output_path).run()
  103. io.log_info (f"Sorting: {str(cur_output_path)} ")
  104. Sorter.main (input_path=str(cur_output_path), sort_by_method='hist')
  105. import code
  106. code.interact(local=dict(globals(), **locals()))
  107. #try:
  108. # io.log_info (f"Removing: {str(cur_input_path)} ")
  109. # shutil.rmtree(cur_input_path)
  110. #except:
  111. # io.log_info (f"unable to remove: {str(cur_input_path)} ")
  112. def extract_vggface2_dataset(input_dir, device_args={} ):
  113. multi_gpu = device_args.get('multi_gpu', False)
  114. cpu_only = device_args.get('cpu_only', False)
  115. input_path = Path(input_dir)
  116. if not input_path.exists():
  117. raise ValueError('Input directory not found. Please ensure it exists.')
  118. output_path = input_path.parent / (input_path.name + '_out')
  119. dir_names = pathex.get_all_dir_names(input_path)
  120. if not output_path.exists():
  121. output_path.mkdir(parents=True, exist_ok=True)
  122. for dir_name in dir_names:
  123. cur_input_path = input_path / dir_name
  124. cur_output_path = output_path / dir_name
  125. l = len(pathex.get_image_paths(cur_input_path))
  126. if l < 250 or l > 350:
  127. continue
  128. io.log_info (f"Processing: {str(cur_input_path)} ")
  129. if not cur_output_path.exists():
  130. cur_output_path.mkdir(parents=True, exist_ok=True)
  131. Extractor.main( str(cur_input_path),
  132. str(cur_output_path),
  133. detector='s3fd',
  134. image_size=256,
  135. face_type='full_face',
  136. max_faces_from_image=1,
  137. device_args=device_args )
  138. io.log_info (f"Sorting: {str(cur_input_path)} ")
  139. Sorter.main (input_path=str(cur_output_path), sort_by_method='hist')
  140. try:
  141. io.log_info (f"Removing: {str(cur_input_path)} ")
  142. shutil.rmtree(cur_input_path)
  143. except:
  144. io.log_info (f"unable to remove: {str(cur_input_path)} ")
  145. """
  146. #unused in end user workflow
  147. def dev_test_68(input_dir ):
  148. # process 68 landmarks dataset with .pts files
  149. input_path = Path(input_dir)
  150. if not input_path.exists():
  151. raise ValueError('input_dir not found. Please ensure it exists.')
  152. output_path = input_path.parent / (input_path.name+'_aligned')
  153. io.log_info(f'Output dir is % {output_path}')
  154. if output_path.exists():
  155. output_images_paths = pathex.get_image_paths(output_path)
  156. if len(output_images_paths) > 0:
  157. io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False )
  158. for filename in output_images_paths:
  159. Path(filename).unlink()
  160. else:
  161. output_path.mkdir(parents=True, exist_ok=True)
  162. images_paths = pathex.get_image_paths(input_path)
  163. for filepath in io.progress_bar_generator(images_paths, "Processing"):
  164. filepath = Path(filepath)
  165. pts_filepath = filepath.parent / (filepath.stem+'.pts')
  166. if pts_filepath.exists():
  167. pts = pts_filepath.read_text()
  168. pts_lines = pts.split('\n')
  169. lmrk_lines = None
  170. for pts_line in pts_lines:
  171. if pts_line == '{':
  172. lmrk_lines = []
  173. elif pts_line == '}':
  174. break
  175. else:
  176. if lmrk_lines is not None:
  177. lmrk_lines.append (pts_line)
  178. if lmrk_lines is not None and len(lmrk_lines) == 68:
  179. try:
  180. lmrks = [ np.array ( lmrk_line.strip().split(' ') ).astype(np.float32).tolist() for lmrk_line in lmrk_lines]
  181. except Exception as e:
  182. print(e)
  183. print(filepath)
  184. continue
  185. rect = LandmarksProcessor.get_rect_from_landmarks(lmrks)
  186. output_filepath = output_path / (filepath.stem+'.jpg')
  187. img = cv2_imread(filepath)
  188. img = imagelib.normalize_channels(img, 3)
  189. cv2_imwrite(output_filepath, img, [int(cv2.IMWRITE_JPEG_QUALITY), 95] )
  190. raise Exception("unimplemented")
  191. #DFLJPG.x(output_filepath, face_type=FaceType.toString(FaceType.MARK_ONLY),
  192. # landmarks=lmrks,
  193. # source_filename=filepath.name,
  194. # source_rect=rect,
  195. # source_landmarks=lmrks
  196. # )
  197. io.log_info("Done.")
  198. #unused in end user workflow
  199. def extract_umd_csv(input_file_csv,
  200. face_type='full_face',
  201. device_args={} ):
  202. #extract faces from umdfaces.io dataset csv file with pitch,yaw,roll info.
  203. multi_gpu = device_args.get('multi_gpu', False)
  204. cpu_only = device_args.get('cpu_only', False)
  205. face_type = FaceType.fromString(face_type)
  206. input_file_csv_path = Path(input_file_csv)
  207. if not input_file_csv_path.exists():
  208. raise ValueError('input_file_csv not found. Please ensure it exists.')
  209. input_file_csv_root_path = input_file_csv_path.parent
  210. output_path = input_file_csv_path.parent / ('aligned_' + input_file_csv_path.name)
  211. io.log_info("Output dir is %s." % (str(output_path)) )
  212. if output_path.exists():
  213. output_images_paths = pathex.get_image_paths(output_path)
  214. if len(output_images_paths) > 0:
  215. io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False )
  216. for filename in output_images_paths:
  217. Path(filename).unlink()
  218. else:
  219. output_path.mkdir(parents=True, exist_ok=True)
  220. try:
  221. with open( str(input_file_csv_path), 'r') as f:
  222. csv_file = f.read()
  223. except Exception as e:
  224. io.log_err("Unable to open or read file " + str(input_file_csv_path) + ": " + str(e) )
  225. return
  226. strings = csv_file.split('\n')
  227. keys = strings[0].split(',')
  228. keys_len = len(keys)
  229. csv_data = []
  230. for i in range(1, len(strings)):
  231. values = strings[i].split(',')
  232. if keys_len != len(values):
  233. io.log_err("Wrong string in csv file, skipping.")
  234. continue
  235. csv_data += [ { keys[n] : values[n] for n in range(keys_len) } ]
  236. data = []
  237. for d in csv_data:
  238. filename = input_file_csv_root_path / d['FILE']
  239. x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT'])
  240. data += [ ExtractSubprocessor.Data(filename=filename, rects=[ [x,y,x+w,y+h] ]) ]
  241. images_found = len(data)
  242. faces_detected = 0
  243. if len(data) > 0:
  244. io.log_info ("Performing 2nd pass from csv file...")
  245. data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run()
  246. io.log_info ('Performing 3rd pass...')
  247. data = ExtractSubprocessor (data, 'final', face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
  248. faces_detected += sum([d.faces_detected for d in data])
  249. io.log_info ('-------------------------')
  250. io.log_info ('Images found: %d' % (images_found) )
  251. io.log_info ('Faces detected: %d' % (faces_detected) )
  252. io.log_info ('-------------------------')
  253. def dev_test1(input_dir):
  254. # LaPa dataset
  255. image_size = 1024
  256. face_type = FaceType.HEAD
  257. input_path = Path(input_dir)
  258. images_path = input_path / 'images'
  259. if not images_path.exists:
  260. raise ValueError('LaPa dataset: images folder not found.')
  261. labels_path = input_path / 'labels'
  262. if not labels_path.exists:
  263. raise ValueError('LaPa dataset: labels folder not found.')
  264. landmarks_path = input_path / 'landmarks'
  265. if not landmarks_path.exists:
  266. raise ValueError('LaPa dataset: landmarks folder not found.')
  267. output_path = input_path / 'out'
  268. if output_path.exists():
  269. output_images_paths = pathex.get_image_paths(output_path)
  270. if len(output_images_paths) != 0:
  271. io.input(f"\n WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.\n")
  272. for filename in output_images_paths:
  273. Path(filename).unlink()
  274. output_path.mkdir(parents=True, exist_ok=True)
  275. data = []
  276. img_paths = pathex.get_image_paths (images_path)
  277. for filename in img_paths:
  278. filepath = Path(filename)
  279. landmark_filepath = landmarks_path / (filepath.stem + '.txt')
  280. if not landmark_filepath.exists():
  281. raise ValueError(f'no landmarks for {filepath}')
  282. #img = cv2_imread(filepath)
  283. lm = landmark_filepath.read_text()
  284. lm = lm.split('\n')
  285. if int(lm[0]) != 106:
  286. raise ValueError(f'wrong landmarks format in {landmark_filepath}')
  287. lmrks = []
  288. for i in range(106):
  289. x,y = lm[i+1].split(' ')
  290. x,y = float(x), float(y)
  291. lmrks.append ( (x,y) )
  292. lmrks = np.array(lmrks)
  293. l,t = np.min(lmrks, 0)
  294. r,b = np.max(lmrks, 0)
  295. l,t,r,b = ( int(x) for x in (l,t,r,b) )
  296. #for x, y in lmrks:
  297. # x,y = int(x), int(y)
  298. # cv2.circle(img, (x, y), 1, (0,255,0) , 1, lineType=cv2.LINE_AA)
  299. #imagelib.draw_rect(img, (l,t,r,b), (0,255,0) )
  300. data += [ ExtractSubprocessor.Data(filepath=filepath, rects=[ (l,t,r,b) ]) ]
  301. #cv2.imshow("", img)
  302. #cv2.waitKey(0)
  303. if len(data) > 0:
  304. device_config = nn.DeviceConfig.BestGPU()
  305. io.log_info ("Performing 2nd pass...")
  306. data = ExtractSubprocessor (data, 'landmarks', image_size, 95, face_type, device_config=device_config).run()
  307. io.log_info ("Performing 3rd pass...")
  308. data = ExtractSubprocessor (data, 'final', image_size, 95, face_type, final_output_path=output_path, device_config=device_config).run()
  309. for filename in pathex.get_image_paths (output_path):
  310. filepath = Path(filename)
  311. dflimg = DFLJPG.load(filepath)
  312. src_filename = dflimg.get_source_filename()
  313. image_to_face_mat = dflimg.get_image_to_face_mat()
  314. label_filepath = labels_path / ( Path(src_filename).stem + '.png')
  315. if not label_filepath.exists():
  316. raise ValueError(f'{label_filepath} does not exist')
  317. mask = cv2_imread(label_filepath)
  318. #mask[mask == 10] = 0 # remove hair
  319. mask[mask > 0] = 1
  320. mask = cv2.warpAffine(mask, image_to_face_mat, (image_size, image_size), cv2.INTER_LINEAR)
  321. mask = cv2.blur(mask, (3,3) )
  322. #cv2.imshow("", (mask*255).astype(np.uint8) )
  323. #cv2.waitKey(0)
  324. dflimg.set_xseg_mask(mask)
  325. dflimg.save()
  326. import code
  327. code.interact(local=dict(globals(), **locals()))
  328. def dev_resave_pngs(input_dir):
  329. input_path = Path(input_dir)
  330. if not input_path.exists():
  331. raise ValueError('input_dir not found. Please ensure it exists.')
  332. images_paths = pathex.get_image_paths(input_path, image_extensions=['.png'], subdirs=True, return_Path_class=True)
  333. for filepath in io.progress_bar_generator(images_paths,"Processing"):
  334. cv2_imwrite(filepath, cv2_imread(filepath))
  335. def dev_segmented_trash(input_dir):
  336. input_path = Path(input_dir)
  337. if not input_path.exists():
  338. raise ValueError('input_dir not found. Please ensure it exists.')
  339. output_path = input_path.parent / (input_path.name+'_trash')
  340. output_path.mkdir(parents=True, exist_ok=True)
  341. images_paths = pathex.get_image_paths(input_path, return_Path_class=True)
  342. trash_paths = []
  343. for filepath in images_paths:
  344. json_file = filepath.parent / (filepath.stem +'.json')
  345. if not json_file.exists():
  346. trash_paths.append(filepath)
  347. for filepath in trash_paths:
  348. try:
  349. filepath.rename ( output_path / filepath.name )
  350. except:
  351. io.log_info ('fail to trashing %s' % (src.name) )
  352. def dev_test(input_dir):
  353. """
  354. extract FaceSynthetics dataset https://github.com/microsoft/FaceSynthetics
  355. BACKGROUND = 0
  356. SKIN = 1
  357. NOSE = 2
  358. RIGHT_EYE = 3
  359. LEFT_EYE = 4
  360. RIGHT_BROW = 5
  361. LEFT_BROW = 6
  362. RIGHT_EAR = 7
  363. LEFT_EAR = 8
  364. MOUTH_INTERIOR = 9
  365. TOP_LIP = 10
  366. BOTTOM_LIP = 11
  367. NECK = 12
  368. HAIR = 13
  369. BEARD = 14
  370. CLOTHING = 15
  371. GLASSES = 16
  372. HEADWEAR = 17
  373. FACEWEAR = 18
  374. IGNORE = 255
  375. """
  376. image_size = 1024
  377. face_type = FaceType.WHOLE_FACE
  378. input_path = Path(input_dir)
  379. output_path = input_path.parent / f'{input_path.name}_out'
  380. if output_path.exists():
  381. output_images_paths = pathex.get_image_paths(output_path)
  382. if len(output_images_paths) != 0:
  383. io.input(f"\n WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.\n")
  384. for filename in output_images_paths:
  385. Path(filename).unlink()
  386. output_path.mkdir(parents=True, exist_ok=True)
  387. data = []
  388. for filepath in io.progress_bar_generator(pathex.get_paths(input_path), "Processing"):
  389. if filepath.suffix == '.txt':
  390. image_filepath = filepath.parent / f'{filepath.name.split("_")[0]}.png'
  391. if not image_filepath.exists():
  392. print(f'{image_filepath} does not exist, skipping')
  393. lmrks = []
  394. for lmrk_line in filepath.read_text().split('\n'):
  395. if len(lmrk_line) == 0:
  396. continue
  397. x, y = lmrk_line.split(' ')
  398. x, y = float(x), float(y)
  399. lmrks.append( (x,y) )
  400. lmrks = np.array(lmrks[:68], np.float32)
  401. rect = LandmarksProcessor.get_rect_from_landmarks(lmrks)
  402. data += [ ExtractSubprocessor.Data(filepath=image_filepath, rects=[rect], landmarks=[ lmrks ] ) ]
  403. if len(data) > 0:
  404. io.log_info ("Performing 3rd pass...")
  405. data = ExtractSubprocessor (data, 'final', image_size, 95, face_type, final_output_path=output_path, device_config=nn.DeviceConfig.CPU()).run()
  406. for filename in io.progress_bar_generator(pathex.get_image_paths (output_path), "Processing"):
  407. filepath = Path(filename)
  408. dflimg = DFLJPG.load(filepath)
  409. src_filename = dflimg.get_source_filename()
  410. image_to_face_mat = dflimg.get_image_to_face_mat()
  411. seg_filepath = input_path / ( Path(src_filename).stem + '_seg.png')
  412. if not seg_filepath.exists():
  413. raise ValueError(f'{seg_filepath} does not exist')
  414. seg = cv2_imread(seg_filepath)
  415. seg_inds = np.isin(seg, [1,2,3,4,5,6,9,10,11])
  416. seg[~seg_inds] = 0
  417. seg[seg_inds] = 1
  418. seg = seg.astype(np.float32)
  419. seg = cv2.warpAffine(seg, image_to_face_mat, (image_size, image_size), cv2.INTER_LANCZOS4)
  420. dflimg.set_xseg_mask(seg)
  421. dflimg.save()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...