공부정리/Computer Vision
[Vision] albumentations라이브러리와 CutMix를 통한 이미지 증강
sillon
2023. 2. 15. 23:02
728x90
반응형
본 게시글은 이미지 증강 과정의 예시를 시각화한 과정입니다.
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
import warnings
warnings.filterwarnings('ignore')
In [ ]:
from typing import List, Optional
import cv2
import torch
from albumentations import (
Compose,
RandomRotate90,
HorizontalFlip,
VerticalFlip,
Resize,
Normalize
)
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset
device = 'cuda' if torch.cuda.is_available() else 'cpu'
In [ ]:
img_paths = ["./utils/data/dataset/0/0.jpg","./utils/data/dataset/1/0.jpg","./utils/data/dataset/2/0.jpg","./utils/data/dataset/3/0.jpg"]
labels = [0,1,2,3]
img_size = 224
In [ ]:
origin_images = [img.imread(img_paths[i]) for i in range(len(img_paths))]
albumentations라이브러리와 CutMix를 통한 이미지 증강¶
CutMix¶
In [ ]:
import os
import numpy as np
import random
import cv2
import matplotlib.pyplot as plt
image_path = img_paths
index_len = len(img_paths)
image_list = image_path
def load_image(path, index):
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image /= 255.0
return image
def rand_bbox(size, lam): # size : [B, C, W, H]
W = size[2] # 이미지의 width
H = size[3] # 이미지의 height
cut_rat = np.sqrt(1. - lam) # 패치 크기의 비율 정하기
cut_w = np.int(W * cut_rat) # 패치의 너비
cut_h = np.int(H * cut_rat) # 패치의 높이
# uniform
# 기존 이미지의 크기에서 랜덤하게 값을 가져옵니다.(중간 좌표 추출)
cx = np.random.randint(W)
cy = np.random.randint(H)
# 패치 부분에 대한 좌표값을 추출합니다.
bbx1 = np.clip(cx - cut_w // 2, 0, W)
bby1 = np.clip(cy - cut_h // 2, 0, H)
bbx2 = np.clip(cx + cut_w // 2, 0, W)
bby2 = np.clip(cy + cut_h // 2, 0, H)
return bbx1, bby1, bbx2, bby2
def cutmix(path,index, imsize):
w, h = imsize, imsize
s = imsize // 2
# 중앙값 랜덤하게 잡기
xc, yc = [int(random.uniform(imsize*0.25, imsize*0.75)) for _ in range(2)] #256 ~ 768
indexes = [index] + [random.randint(0, index) for _ in range(3)]
result_imgs = []
# #검은색 배경의 임의 이미지 생성 (여기다가 이미지들 붙여넣는 방식)
# result_img = np.full((imsize, imsize, 3), 1, dtype=np.float32)
for i, index in enumerate(indexes):
print(path)
image = load_image(path[i], index)
lam = np.random.beta(1.0, 1.0)
rand_index = i - 1
rand_cut_img = load_image(path[rand_index], i)
bbx1, bby1, bbx2, bby2 = rand_bbox([0,0,224,224], lam)
image[bbx1:bbx2,bby1:bby2] = rand_cut_img[bbx1:bbx2,bby1:bby2]
result_imgs.append(image)
return result_imgs
test = cutmix(image_path, 3,224)
plt.imshow(test[0])
plt.show()
['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg'] ['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg'] ['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg'] ['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg']
- 모자이크 믹스
In [ ]:
import os
import numpy as np
import random
import cv2
import matplotlib.pyplot as plt
image_path = img_paths
index_len = len(img_paths)
image_list = image_path
def load_image(path, index):
print(path)
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image /= 255.0
return image
# 모자이크 믹스
def mozamix(path,index, imsize):
w, h = imsize, imsize
s = imsize // 2
# 중앙값 랜덤하게 잡기
xc, yc = [int(random.uniform(imsize*0.25, imsize*0.75)) for _ in range(2)] #256 ~ 768
indexes = [index] + [random.randint(0, index) for _ in range(3)]
#검은색 배경의 임의 이미지 생성 (여기다가 이미지들 붙여넣는 방식)
result_img = np.full((imsize, imsize, 3), 1, dtype=np.float32)
for i, index in enumerate(indexes):
image = load_image(path[i], index)
#top left
if i == 0:
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
result_img[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
return result_img
test = mozamix(image_path, 3,224)
plt.imshow(test)
plt.show()
./utils/data/dataset/0/0.jpg ./utils/data/dataset/1/0.jpg ./utils/data/dataset/2/0.jpg ./utils/data/dataset/3/0.jpg
augmentation¶
In [ ]:
def _get_augmentations(use_augmentation: bool, img_size: int) -> Compose:
if use_augmentation:
return Compose(
[
RandomRotate90(p=0.5),
Resize(img_size, img_size),
HorizontalFlip(p=0.5),
VerticalFlip(p=0.5),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
else:
return Compose(
[
Resize(img_size, img_size),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
In [ ]:
# 무작위 회전
def compose1(use_augmentation: bool, img_size: int) -> Compose:
if use_augmentation:
return Compose(
[
RandomRotate90(p=1), # 무작위 회전
Resize(img_size, img_size), # 사이즈 조절
# HorizontalFlip(p=0.5), # 수평반전
# VerticalFlip(p=0.5), # 수직반전
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
ToTensorV2(), # augmentation 적용
]
)
else:
return Compose(
[
Resize(img_size, img_size),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
# 수평 반전
def compose2(use_augmentation: bool, img_size: int) -> Compose:
if use_augmentation:
return Compose(
[
# RandomRotate90(p=0.5), # 무작위 회전
Resize(img_size, img_size), # 사이즈 조절
HorizontalFlip(p=1), # 수평 반전
# VerticalFlip(p=0.5), # 수직 반전
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
ToTensorV2(), # augmentation 적용
]
)
else:
return Compose(
[
Resize(img_size, img_size),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
# 수직 반전
def compose3(use_augmentation: bool, img_size: int) -> Compose:
if use_augmentation:
return Compose(
[
# RandomRotate90(p=0.5), # 무작위 회전
Resize(img_size, img_size), # 사이즈 조절
# HorizontalFlip(p=0.5), # 수평 반전
VerticalFlip(p=1), # 수직 반전
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
ToTensorV2(), # augmentation 적용
]
)
else:
return Compose(
[
Resize(img_size, img_size),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
# 이미지 정규화만
def compose4(use_augmentation: bool, img_size: int) -> Compose:
if use_augmentation:
return Compose(
[
# RandomRotate90(p=0.5), # 무작위 회전
# Resize(img_size, img_size), # 사이즈 조절
# HorizontalFlip(p=0.5), # 수평 반전
# VerticalFlip(p=0.5), # 수직 반전
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
ToTensorV2(), # augmentation 적용
]
)
else:
return Compose(
[
Resize(img_size, img_size),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2(),
]
)
원본¶
In [ ]:
fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
ax = plt.subplot(2, 4, i)
# ax.axes.xaxis.set_visible(False)
# ax.axes.yaxis.set_visible(False)
plt.imshow(origin_images[i-1])
무작위 회전¶
In [ ]:
fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
ax = plt.subplot(2, 4, i)
# ax.axes.xaxis.set_visible(False)
# ax.axes.yaxis.set_visible(False)
img = cv2.imread(img_paths[i-1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
augmented = compose1(use_augmentation = True,img_size = img_size)(
image=img
)
img = augmented["image"]
plt.imshow(np.transpose(img,(1,2,0)))
수평 반전¶
In [ ]:
fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
ax = plt.subplot(2, 4, i)
# ax.axes.xaxis.set_visible(False)
# ax.axes.yaxis.set_visible(False)
img = cv2.imread(img_paths[i-1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
augmented = compose2(use_augmentation = True,img_size = img_size)(
image=img
)
img = augmented["image"]
plt.imshow(np.transpose(img,(1,2,0)))
수직 반전¶
In [ ]:
fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
ax = plt.subplot(2, 4, i)
# ax.axes.xaxis.set_visible(False)
# ax.axes.yaxis.set_visible(False)
img = cv2.imread(img_paths[i-1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
augmented = compose3(use_augmentation = True,img_size = img_size)(
image=img
)
img = augmented["image"]
plt.imshow(np.transpose(img,(1,2,0)))
무작위 회전 + 수평 반전 + 수직 반전 + 밝기 조절 + 정규화
In [ ]:
fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
ax = plt.subplot(2, 4, i)
# ax.axes.xaxis.set_visible(False)
# ax.axes.yaxis.set_visible(False)
img = cv2.imread(img_paths[i-1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
augmented = _get_augmentations(use_augmentation = True,img_size = img_size)(
image=img
)
img = augmented["image"]
plt.imshow(np.transpose(img,(1,2,0)))
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [ ]:
자세한 코드는 여기에..
728x90
반응형