imgaug 라는 좋은 라이브러리가 있어 이를 활용해 기존 데이터셋의 불균형을 해소하고자 한다
darknet 플랫폼 자체에서 augmentation 기능을 지원해서 특별히 필요 없을 줄 알았는데 하위 버전의 YOLO를 사용해야하니 cfg 설정의 한계를 느껴 데이터 증강 라이브러리를 따로 찾게 되었다.
아래 코드는 폴더 내 이미지를 검색하고, augmentation_count 설정에 따라 반복하여 이미지와 라벨 데이터를 증강시킨다.
imgaug의 Documentation과 https://junyoung-jamong.github.io/ 님의 글을 참고하여 작성하였다.
최신 소스 링크 : https://github.com/fishduke/imgaug_darknet
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox
from imgaug.augmentables.bbs import BoundingBoxesOnImage
import os
import glob
import cv2
import argparse
import gc
import psutil
def parser():
parser = argparse.ArgumentParser()
parser.add_argument('--path', type=str, default='/home/fishduke/Desktop/darknet/data/person',
help="Where is images directory? ex)home/user/imagefolder")
parser.add_argument('--count', type=int, default=2,
help="how many will you augmentate? ex)2")
parser.add_argument('--mode', type=int, default=0,
help="test augmentation:0 / create augmented files:1")
return parser.parse_args()
def img_seq():
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
# Define our sequence of augmentation steps that will be applied to every image
# All augmenters with per_channel=0.5 will sample one value _per image_
# in 50% of all cases. In all other cases they will sample new values
# _per channel_.
seq = iaa.Sequential(
[
# apply the following augmenters to most images
iaa.Fliplr(0.5), # horizontally flip 50% of all images
iaa.Flipud(0.2), # vertically flip 20% of all images
# crop images by -5% to 10% of their height/width
sometimes(iaa.CropAndPad(
percent=(-0.05, 0.1),
pad_mode=ia.ALL,
pad_cval=(0, 255)
)),
sometimes(iaa.Affine(
scale={"x": (0.8, 1.0), "y": (0.8, 1.0)}, # scale images to 80-120% of their size, individually per axis
translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)}, # translate by -20 to +20 percent (per axis)
rotate=(0, 5), # rotate by -45 to +45 degrees
shear=(-16, 16), # shear by -16 to +16 degrees
order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
cval=(0, 255), # if mode is constant, use a cval between 0 and 255
mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
)),
# execute 0 to 5 of the following (less important) augmenters per image
# don't execute all of them, as that would often be way too strong
iaa.SomeOf((0, 5),
[
sometimes(iaa.Superpixels(p_replace=(0, 0.1), n_segments=(20, 20))), # convert images into their superpixel representation
iaa.OneOf([
iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
iaa.AverageBlur(k=(2, 2)), # blur image using local means with kernel sizes between 2 and 7
iaa.MedianBlur(k=(3, 3)), # blur image using local medians with kernel sizes between 2 and 7
]),
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
# search either for all edges or for directed edges,
# blend the result with the original image using a blobby mask
iaa.SimplexNoiseAlpha(iaa.OneOf([
iaa.EdgeDetect(alpha=(0.5, 1.0)),
iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
])),
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
iaa.OneOf([
iaa.Dropout((0.01, 0.05), per_channel=0.5), # randomly remove up to 10% of the pixels
iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
]),
iaa.Invert(0.05, per_channel=True), # invert color channels
iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
# either change the brightness of the whole image (sometimes
# per channel) or change the brightness of subareas
iaa.OneOf([
iaa.Multiply((0.5, 1.0), per_channel=0.5),
iaa.FrequencyNoiseAlpha(
exponent=(-4, 0),
first=iaa.Multiply((0.5, 1.5), per_channel=True),
second=iaa.ContrastNormalization((0.5, 2.0))
)
]),
iaa.OneOf([
iaa.Clouds(),
iaa.FastSnowyLandscape(
lightness_threshold=(100, 255),
lightness_multiplier=(1.0, 4.0)
),
#iaa.Fog(),
iaa.Snowflakes(flake_size=(0.7, 0.95), speed=(0.001, 0.03)),
iaa.Rain(drop_size=(0.10, 0.20))
]),
iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
iaa.Grayscale(alpha=(0.0, 1.0)),
sometimes(iaa.ElasticTransformation(alpha=(0.5, 1.5), sigma=0.25)), # move pixels locally around (with random strengths)
sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), # sometimes move parts of the image around
sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
],
random_order=True
)
],
random_order=True
)
return seq
def make_aug(image, count, labeling):
seq = img_seq()
img = cv2.imread(image)
img_path = image
txt_path = img_path[:img_path.find('.')] + '.txt'
txt = open(txt_path, "r")
lines = txt.readlines()
boxes = []
class_name = []
for i in range(len(lines)):
label = lines[i].split(' ')
x = float(label[1]) * img.shape[1]
y = float(label[2]) * img.shape[0]
w = int(float(label[3]) * img.shape[1])
h = int(float(label[4][:len(label[4])-1]) * img.shape[0])
if(labeling):
#for labeling
x1 = x-w/2
y1 = y-h/2
x2 = x+w/2
y2 = y+h/2
else:
#for rectangle
x1 = int(x-w/2)
y1 = int(y-h/2)
x2 = int(x+w/2)
y2 = int(y+h/2)
cv2.rectangle(img, (x1,y1), (x2,y2), (255,0,0), 1)
class_name.append(label[0])
boxes.append(BoundingBox(x1,y1,x2,y2))
txt.close()
bounding_boxes = ia.BoundingBoxesOnImage(boxes, img.shape)
seq = seq.to_deterministic()
img_np = np.array(img)
images_aug = seq.augment_image(img_np)
bbs_aug = seq.augment_bounding_boxes([bounding_boxes])[0]
del img_np
aug = images_aug.astype(np.uint8)
new_path = image[:image.rfind('.')]+"_"+str(count)
if(labeling):
new_txt = open(new_path+".txt", "w")
for i in range(len(bbs_aug.bounding_boxes)):
bb_box = bbs_aug.bounding_boxes[i]
if(labeling):
x1 = bb_box[0][0] / img.shape[1]
y1 = bb_box[0][1] / img.shape[0]
x2 = bb_box[1][0] / img.shape[1]
y2 = bb_box[1][1] / img.shape[0]
w = int(float(label[3]) * img.shape[1])
h = int(float(label[4][:len(label[4])-1]) * img.shape[0])
wid = x2-x1
hei = y2-y1
x = x1+wid/2
y = y1+hei/2
new_line = class_name[i] + " " + str(format(round(x,6), ".6f")) + " " + str(format(round(y,6),".6f")) + " " + str(format(round(wid,6),".6f")) + " " + str(format(round(hei,6),".6f")) + "\n"
new_txt.write(new_line)
else:
x1 = int(bb_box[0][0])
y1 = int(bb_box[0][1])
x2 = int(bb_box[1][0])
y2 = int(bb_box[1][1])
cv2.rectangle(aug, (x1, y1), (x2, y2), (255,0,255), 1)
if(labeling==0):
add = cv2.hconcat([img,aug])
cv2.imshow("add",add)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
new_txt.close()
cv2.imwrite(new_path+".jpg",aug)
seq.clear()
def memory_usage(message: str = 'debug'):
# current process RAM usage
p = psutil.Process()
rss = p.memory_info().rss / 2 ** 20 # Bytes to MB
memory_use = f"[{message}] memory usage: {rss: 10.5f} MB"
return memory_use
def main():
args = parser()
labeling = args.mode
augmentation_count = args.count
folder_path = args.path
folders = os.listdir(folder_path)
# for folder in folders:
# path = os.path.join(folder_path, folder) ## if images hold seperated, gethering all
path = folder_path
images = glob.glob(f'{path}/*.jpg') + glob.glob(f'{path}/*.png')
print("original images size : ", len(images))
p = psutil.Process()
p.memory_info()
num = 0
for image in images:
num += 1
for count in range(augmentation_count):
make_aug(image, count, labeling)
gc.collect()
msg = "\rprocessed : %.0f%%" % (num/len(images)*100.0) + " / " + memory_usage(str(num))
print(msg,end='')
images = glob.glob(f'{path}/*.jpg') + glob.glob(f'{path}/*.png')
print("original+aug images size : ", len(images))
if __name__=="__main__":
main()
원본이미지
증강이미지
labeling = 1의 경우
darknet에서 바로 학습가능한 형태로 augmentation 적용된 라벨링으로 txt 파일도 추가 생성
0의 경우,
증강된 이미지를 확인하는 경우 (증강 옵션 설정 후 체크용)
imgaug Document
https://imgaug.readthedocs.io/en/latest/source/overview/blend.html