์ด์ ํฌ์คํ
[Python] YOLO ๋ชจ๋ธ ํ์ต์ ์ํ ์คํ ๋ฐ์ดํฐ ์ ์ค๋น (1)
[Python] YOLO ๋ชจ๋ธ ํ์ต์ ์ํ ์คํ ๋ฐ์ดํฐ ์ ์ค๋น (1)
=============== ๋ชฉํ =============== YOLO ๋ชจ๋ธ ํ์ต์ ์ํ ์คํ ๋ฐ์ดํฐ์ ์ค๋น ================================== 1. roboflow๋ฅผ ํตํด ๋ฐฐํฌ๋์ด ์๋ ์คํ ์ด๋ฏธ์ง ๋ฐ์ดํฐ ์ ๋ค์ด 2. ๊ตฌ๊ธ OpenImageDataset๊ณผ Google BigQuery
coding-gym.tistory.com
ใ OIDv4_ToolKit ํ์ฉ ใ
GitHub - EscVM/OIDv4_ToolKit: Download and visualize single or multiple classes from the huge Open Images v4 dataset
Download and visualize single or multiple classes from the huge Open Images v4 dataset - GitHub - EscVM/OIDv4_ToolKit: Download and visualize single or multiple classes from the huge Open Images v4...
github.com
์ค๋ช ์ ์ํ๋ฉด ์ฝ 600๊ฐ ํด๋์ค๋ฅผ
ํฌํจํ ์ด๋ฏธ์ง ๋ฐ์ดํฐ ์ ์ ๋ค์ด๋ก๋ ํ ์ ์๋ค๊ณ ํ๋ค์.
๊ธฐ๋ณธ์ ์ธ ํ๋ก์ธ์ค๋ฅผ ๋ณด๋ฉด Google Open Images Dataset V4์
๊ธฐ๋ฐํด ์ํ๋ ํน์ ๋ฐ์ดํฐ ์ ์ ์ถ์ถํ๋ ๊ฒ ๊ฐ์ต๋๋ค.
์ฐ์ ํด๋น GitHub๋ฅผ ๋ณธ์ธ์ ํ๋ก์ ํธ ์์น์ cloneํฉ๋๋ค.
git clone https://github.com/EscVM/OIDv4_ToolKit.git
pip install -r requirements.txt
Git clone๊ณผ ํ์ ํจํค์ง ๋ค์ pip install์ ํตํด ๋ค์ด๋ก๋ ๋ฐ์ ํ
๋์ผํ๊ฒ Google Open Images Dataset V4์์ ์ ๊ณตํ๋
ํด๋์ค ํ์ผ์ ํ์ธ ํ ๋ณธ์ธ์๊ฒ ํด๋น ํ๋ ์ด๋ฏธ์ง ํด๋์ค๋ฅผ ํ์ํฉ๋๋ค.
์ด์ ๋ณธ๊ฒฉ์ ์ผ๋ก ์ด๋ฏธ์ง ๋ฐ์ดํฐ์ ์ ๋ค์ด๋ก๋ ๋ฐ๊ฒ ์ต๋๋ค.
--type_csv ํ๋ผ๋ฏธํฐ๋ฅผ ํตํด train ํ์ผ test ํ์ผ val ํ์ผ
ํน์ ์ ์ฒด ํ์ผ์ ๊ฐ๋จํ๊ฒ ๋ค์ด๋ก๋ ๋ฐ์ ์ ์์ต๋๋ค.
๋ช ๋ น์ด | ์ค๋ช | ์์ |
<command> | 'downloader' 'visualizer' 'ill_downloader' | downloader |
ํ์ดํผ ํ๋ผ๋ฏธํฐ | ์ค๋ช | ์์ |
--classes | list of classes | Jellyfish |
--type_csv | 'train' 'test' 'validation' 'all' | test |
--sub | Subset of human verified images or machine generated h or m | h |
--image_IsOccluded, --image_IsTruncated --image_IsGroupOf --image_IsDepiction --image_IsInside |
1 or 0 | |
--multiclasses | default or 1 | 0 [default] |
--n_threads | default 20 | |
--noLabels | no labels | |
--iimit | integer number | 20 |
# OIDv4_TooKit ํด๋ ๊ฒฝ๋ก๋ก ์ด๋
cd OIDv4_ToolKit
# main.py ์คํ
python main.py downloader --classes Jellyfish --type_csv test
ํดํ๋ฆฌ์ ํ ์คํธ ์ด๋ฏธ์ง ๋ฐ์ดํฐ์ ํ์ผ์ ๋ค์ด๋ก๋ ๋ฐ์๋ณด๋ 103๊ฐ์ ๋๋ค.
์ ์์ ์ผ๋ก ํด๋น ํด๋์ค์ ํ ์คํธ ์ด๋ฏธ์ง ํ์ผ๊ณผ label ํ์ผ์
๋ค์ด๋ก๋ ๋ฐ์ ๊ฒ์ ํ์ธํ ์ ์์ต๋๋ค.
์ด ๋ ๋ผ๋ฒจ ํ์ผ์ ๊ฒฝ์ฐ ์ ๊ทํ ๋์ด ์์ง ์์ผ๋ฏ๋ก
๋ฐ๋ก yolo ํ์์ ๋ผ๋ฒจ ๋ฐ์ดํฐ๋ก ์ ๊ทํ ์์ผ์ผ ํฉ๋๋ค.
ํด๋น ์ ๊ทํ ์ฝ๋๋ ์๋ ๊นํ๋ธ์ ์ฝ๋๋ฅผ ์ฐธ์กฐํ์ต๋๋ค.
https://gist.github.com/EscVM/b6e5d60343c88f358742aa9e0de2cc3f
Translate OIDv4 labels to YOLO format
Translate OIDv4 labels to YOLO format. GitHub Gist: instantly share code, notes, and snippets.
gist.github.com
ํด๋น ์ฝ๋๋ฅผ ์์ฑํ ํ์ผ์ ์์๋ก oid_to_yolo.py๋ก ๋ณ๊ฒฝํ์ฌ
OIDv4_ToolKit ํด๋ ํ์์์ ์คํํด ์ค๋๋ค.
# ---------------
# Date: 7/25/2018
# Place: Biella
# Author: EscVM
# Project: OID v4 to Yolo
# ---------------
import os
from tqdm import tqdm
from sys import exit
import argparse
import cv2
from textwrap import dedent
ROOT_DIR = 'OID' # ๋ฃจํธ ํด๋ ๊ฒฝ๋ก ์ค์
OUTPUT_DIR = 'OUTPUT' # OUTPUT ๊ฒฝ๋ก ํด๋๋ช
์ค์
NAME_DIR = 'To_Yolo' # OUTPUT ๊ฒฝ๋ก ํ์ ํด๋๋ช
์ค์
def argument_parser():
parser = argparse.ArgumentParser(description='Convert OID format to Yolo')
parser.add_argument("command",
metavar="<command> 'convert' or 'dummy'",
help="'convert oid format to yolo' or 'dummy', add no target images")
parser.add_argument('--dataset', required=True,
metavar="type of dataset: 'validation', 'test', 'train', 'all'",
help='"validation" or "train" or "test" or "all"')
parser.add_argument('--class_name', required=True,
metavar="name of the class to convert",
help='Convert: name of the class Ex: "Apple" Dummy: where to add images')
parser.add_argument('--class_number', required=False,
default=0,
metavar="dictionary value of the class",
help="It's the value assigned to the class")
parser.add_argument('--copy', required=False,
default=False,
metavar="boolean: copy images in a common folder",
help="copy images with labels in a common folder")
parser.add_argument('--names', required=False,
default=False,
metavar="boolean: create .names file with classes",
help="boolean: create .names file with classes")
parser.add_argument('--dummy_name', required=False,
default=False,
metavar="class_name to add as dummy to class_name",
help="add images to class_name with void labels")
parser.add_argument('--move', required=False,
default=False,
metavar="boolean: move images from original folder",
help="if '1' moves imase to To_YOLO folder")
args = parser.parse_args()
return args
class Engine(object):
global ROOT_DIR
global OUTPUT_DIR
global NAME_DIR
def __init__(self, dataset, class_name, class_number, copy, names, move):
self.dataset = dataset
self.class_name = class_name
self.class_number = class_number
self.copy = copy
self.names = names
self.move = move
self.class_list = []
def run_converter(self):
self.make_start()
if self.dataset != 'all':
self.dataset_dir = os.path.join(ROOT_DIR, 'Dataset', self.dataset, self.class_name)
self.label_dir = os.path.join(self.dataset_dir, 'Label')
self.output_dataset_dir = os.path.join(OUTPUT_DIR, NAME_DIR, self.dataset, self.class_name)
if not os.path.exists(self.output_dataset_dir):
os.makedirs(self.output_dataset_dir)
self.img_file = os.listdir(self.dataset_dir)
print("[INFO] {} images found".format(len(self.img_file) - 1))
print("[INFO] ---- x | y | width | height ---- output format".format(len(self.img_file) - 1))
self.make_labels()
if self.names == '1':
self.make_names()
else:
dataset_DIR = os.path.join(ROOT_DIR, 'Dataset')
dataset_list = tuple(os.listdir(dataset_DIR))
for dataset in dataset_list:
self.dataset_dir = os.path.join(ROOT_DIR, 'Dataset', dataset, self.class_name)
self.label_dir = os.path.join(self.dataset_dir, 'Label')
self.output_dataset_dir = os.path.join(OUTPUT_DIR, NAME_DIR, dataset, self.class_name)
if not os.path.exists(self.output_dataset_dir):
os.makedirs(self.output_dataset_dir)
self.img_file = os.listdir(self.dataset_dir)
print("[INFO] {} images found".format(len(self.img_file) - 1))
print("[INFO] ---- x | y | width | height ---- output format".format(len(self.img_file) - 1))
self.make_labels()
if self.names == '1':
self.make_names()
def run_dummy(self, dummy_name):
self.make_start()
dataset_dir = os.path.join(ROOT_DIR, 'Dataset', self.dataset, dummy_name)
self.output_dataset_dir = os.path.join(OUTPUT_DIR, NAME_DIR, self.dataset, self.class_name)
if not os.path.exists(self.output_dataset_dir):
print("The selected output folder does not exists")
exit(1)
img_file = os.listdir(dataset_dir)
print("[INFO] {} images found".format(len(img_file) - 1))
print("[INFO] VOID output format".format(len(img_file) - 1))
for element in tqdm(img_file):
if element.endswith('.jpg'):
img_path = os.path.join(dataset_dir, element)
self.img_path_yolo = os.path.join(self.output_dataset_dir, element)
img_name = str(element.split('.')[0]) + '.txt'
label_path_yolo = os.path.join(self.output_dataset_dir, img_name)
self.img = cv2.imread(img_path)
label_yolo = open(label_path_yolo, 'w')
label_yolo.close()
if self.copy == '1':
self.make_copy()
if self.move == '1':
self.make_copy()
self.make_end()
def make_labels(self):
for element in tqdm(self.img_file):
if element.endswith('.jpg'):
self.img_path = os.path.join(self.dataset_dir, element)
self.img_path_yolo = os.path.join(self.output_dataset_dir, element)
img_name = str(element.split('.')[0]) + '.txt'
self.label_path_original = os.path.join(self.label_dir, img_name)
label_path_yolo = os.path.join(self.output_dataset_dir, img_name)
self.img = cv2.imread(self.img_path)
label_original = open(self.label_path_original, 'r')
label_yolo = open(label_path_yolo, 'w')
for line in label_original:
# name_of_class X_min Y_min X_max Y_max
line = line.strip()
l = line.split(' ')
class_name = l.pop(0)
try:
float(l[0])
except ValueError:
class_name += ' ' + l.pop(0)
if class_name not in self.class_list:
self.class_list.append(class_name)
for i in range(len(l)):
l[i] = float(l[i])
x = ((l[2] + l[0]) / 2)
y = ((l[3] + l[1]) / 2)
width = (l[2] - l[0])
height = (l[3] - l[1])
img_width = 1 / self.img.shape[1]
img_height = 1 / self.img.shape[0]
x *= img_width
y *= img_height
width *= img_width
height *= img_height
if self.class_number != 0:
c_name = self.class_number
else:
c_name = self.class_list.index(class_name)
print("{0} {1} {2} {3} {4}".format(c_name, x, y, width, height), file=label_yolo)
if self.copy == '1':
self.make_copy()
if self.move == '1':
self.make_copy()
self.make_move()
label_yolo.close()
label_original.close()
self.make_end()
def make_copy(self):
cv2.imwrite(self.img_path_yolo, self.img)
def make_move(self):
os.remove(self.img_path)
os.remove(self.label_path_original)
def make_names(self):
file_path = os.path.join(OUTPUT_DIR, NAME_DIR, 'obj.names')
f = open(file_path, 'w')
for i in range(len(self.class_list)):
print("{}".format((self.class_list[i]).lower()), file=f)
f.close()
def make_end(self):
if self.copy == '1':
print("[INFO] Done!")
print("[INFO] There are {} images in your dataset".format(len(os.listdir(self.output_dataset_dir)) / 2))
else:
print("[INFO] Done!")
print("[INFO] There are {} images in your dataset".format(len(os.listdir(self.output_dataset_dir))))
def make_start(self):
print(dedent("""
_____ ____ ____ _ _ __
( _ )(_ _)( _ \( \/ )/. |
)(_)( _)(_ )(_) )\ /(_ _)
(_____)(____)(____/ \/ (_)
____ _____
(_ _)( _ )
)( )(_)(
(__) (_____)
_ _ _____ __ _____
( \/ )( _ )( ) ( _ )
\ / )(_)( )(__ )(_)(
(__) (_____)(____)(_____)
"""))
if __name__ == '__main__':
args = argument_parser()
if args.command == 'convert':
converter = Engine(args.dataset, args.class_name, args.class_number, args.copy, args.names, args.move)
converter.run_converter()
if args.command == 'dummy':
dummy_generator = Engine(args.dataset, args.class_name, args.class_number, args.copy, args.names, args.move)
dummy_generator.run_dummy(args.dummy_name)
ํด๋น ํ์ด์ฌ ์คํ ํ์ผ์ ํ๋ผ๋ฏธํฐ์ ๊ฒฝ์ฐ ํ์ผ์
์๋จ์ ์ฐธ๊ณ ํ์๋ฉด ๋ฉ๋๋ค.
# OIDv4_ToolKit ์์น๋ก ์ด๋
cd OIDv4_ToolKit
# oid_to_yolo.py ์คํ
python oid_to_yolo.py convert --dataset test --class_name Jellyfish --copy True
ํด๋น ํ์ผ ์คํ ๊ฒฐ๊ณผ ์ ์์ ์ผ๋ก OUTPUT ํ๋ ๋ด์
๋ณต์ฌ ๋ ์ด๋ฏธ์ง ํ์ผ๊ณผ YOLO ํ์์ผ๋ก ์ ๊ทํ ๋ ๋ผ๋ฒจ ํ์ผ 104์์ด ์๋ฃ ๋์์ต๋๋ค.
๊ฐ์ธ์ ์ผ๋ก๋ OIDv4_ToolKit์ ๋ฐฉ์์ด ์ ๊ทํ๋ ๊ฐํธํ๊ณ
์ํ๋ ์ด๋ฏธ์ง ๋ฐ์ดํฐ์ ๋ง ์ป์ ์ ์์ด ํธ๋ฆฌํ ๊ฒ ๊ฐ์ต๋๋ค.