728x90

본 게시글은 이미지 증강 과정의 예시를 시각화한 과정입니다.

augment_test copy

In [ ]:

import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [ ]:

from typing import List, Optional
import cv2
import torch
from albumentations import (
    Compose,
    RandomRotate90,
    HorizontalFlip,
    VerticalFlip,
    Resize,
    Normalize
)
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [ ]:

img_paths = ["./utils/data/dataset/0/0.jpg","./utils/data/dataset/1/0.jpg","./utils/data/dataset/2/0.jpg","./utils/data/dataset/3/0.jpg"]
labels = [0,1,2,3]
img_size = 224

In [ ]:

origin_images = [img.imread(img_paths[i]) for i in range(len(img_paths))]

albumentations라이브러리와 CutMix를 통한 이미지 증강¶

CutMix¶

In [ ]:

import os
import numpy as np
import random
import cv2
import matplotlib.pyplot as plt

image_path = img_paths
index_len = len(img_paths)
image_list = image_path

def load_image(path, index):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0

    return image

def rand_bbox(size, lam): # size : [B, C, W, H]
    W = size[2] # 이미지의 width
    H = size[3] # 이미지의 height
    cut_rat = np.sqrt(1. - lam)  # 패치 크기의 비율 정하기
    cut_w = np.int(W * cut_rat)  # 패치의 너비
    cut_h = np.int(H * cut_rat)  # 패치의 높이

    # uniform
    # 기존 이미지의 크기에서 랜덤하게 값을 가져옵니다.(중간 좌표 추출)
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # 패치 부분에 대한 좌표값을 추출합니다.
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def cutmix(path,index, imsize):
    w, h = imsize, imsize
    s = imsize // 2

    # 중앙값 랜덤하게 잡기
    xc, yc = [int(random.uniform(imsize*0.25, imsize*0.75)) for _ in range(2)] #256 ~ 768
    indexes = [index] + [random.randint(0, index) for _ in range(3)]
    result_imgs = []
	
    # #검은색 배경의 임의 이미지 생성 (여기다가 이미지들 붙여넣는 방식) 
    # result_img = np.full((imsize, imsize, 3), 1, dtype=np.float32)

    for i, index in enumerate(indexes):
        print(path)
        image = load_image(path[i], index)
        lam = np.random.beta(1.0, 1.0) 

        rand_index = i - 1
        rand_cut_img = load_image(path[rand_index], i)

        bbx1, bby1, bbx2, bby2 = rand_bbox([0,0,224,224], lam)
        image[bbx1:bbx2,bby1:bby2] = rand_cut_img[bbx1:bbx2,bby1:bby2]
        
        result_imgs.append(image)

    return result_imgs



test = cutmix(image_path, 3,224)
plt.imshow(test[0])
plt.show()

['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg']
['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg']
['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg']
['./utils/data/dataset/0/0.jpg', './utils/data/dataset/1/0.jpg', './utils/data/dataset/2/0.jpg', './utils/data/dataset/3/0.jpg']

모자이크 믹스

In [ ]:

import os
import numpy as np
import random
import cv2
import matplotlib.pyplot as plt

image_path = img_paths
index_len = len(img_paths)
image_list = image_path

def load_image(path, index):
    print(path)
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0

    return image
# 모자이크 믹스
def mozamix(path,index, imsize):
    w, h = imsize, imsize
    s = imsize // 2

    # 중앙값 랜덤하게 잡기
    xc, yc = [int(random.uniform(imsize*0.25, imsize*0.75)) for _ in range(2)] #256 ~ 768
    indexes = [index] + [random.randint(0, index) for _ in range(3)]
	
    #검은색 배경의 임의 이미지 생성 (여기다가 이미지들 붙여넣는 방식) 
    result_img = np.full((imsize, imsize, 3), 1, dtype=np.float32)

    for i, index in enumerate(indexes):
        
        image = load_image(path[i], index)

        #top left
        if i == 0:
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        result_img[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]

    return result_img



test = mozamix(image_path, 3,224)
plt.imshow(test)
plt.show()

./utils/data/dataset/0/0.jpg
./utils/data/dataset/1/0.jpg
./utils/data/dataset/2/0.jpg
./utils/data/dataset/3/0.jpg

augmentation¶

In [ ]:

def _get_augmentations(use_augmentation: bool, img_size: int) -> Compose:
    if use_augmentation:
        return Compose(
            [
                RandomRotate90(p=0.5),
                Resize(img_size, img_size),
                HorizontalFlip(p=0.5),
                VerticalFlip(p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )
    else:
        return Compose(
            [
                Resize(img_size, img_size),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )

In [ ]:

# 무작위 회전
def compose1(use_augmentation: bool, img_size: int) -> Compose:
    if use_augmentation:
        return Compose(
            [
                RandomRotate90(p=1), # 무작위 회전
                Resize(img_size, img_size), # 사이즈 조절
                # HorizontalFlip(p=0.5), # 수평반전
                # VerticalFlip(p=0.5), # 수직반전
                # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
                ToTensorV2(), # augmentation 적용
            ]
        )
    else:
        return Compose(
            [
                Resize(img_size, img_size),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )
# 수평 반전
def compose2(use_augmentation: bool, img_size: int) -> Compose:
    if use_augmentation:
        return Compose(
            [
                # RandomRotate90(p=0.5), # 무작위 회전
                Resize(img_size, img_size), # 사이즈 조절
                HorizontalFlip(p=1), # 수평 반전
                # VerticalFlip(p=0.5), # 수직 반전
                # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
                ToTensorV2(), # augmentation 적용
            ]
        )
    else:
        return Compose(
            [
                Resize(img_size, img_size),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )
# 수직 반전
def compose3(use_augmentation: bool, img_size: int) -> Compose:
    if use_augmentation:
        return Compose(
            [
                # RandomRotate90(p=0.5), # 무작위 회전
                Resize(img_size, img_size), # 사이즈 조절
                # HorizontalFlip(p=0.5), # 수평 반전
                VerticalFlip(p=1), # 수직 반전
                # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
                ToTensorV2(), # augmentation 적용
            ]
        )
    else:
        return Compose(
            [
                Resize(img_size, img_size),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )
# 이미지 정규화만
def compose4(use_augmentation: bool, img_size: int) -> Compose:
    if use_augmentation:
        return Compose(
            [
                # RandomRotate90(p=0.5), # 무작위 회전
                # Resize(img_size, img_size), # 사이즈 조절
                # HorizontalFlip(p=0.5), #  수평 반전
                # VerticalFlip(p=0.5), # 수직 반전
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 이미지 정규화
                ToTensorV2(), # augmentation 적용
            ]
        )
    else:
        return Compose(
            [
                Resize(img_size, img_size),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ]
        )

원본¶

In [ ]:

fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
    ax = plt.subplot(2, 4, i)
    # ax.axes.xaxis.set_visible(False)
    # ax.axes.yaxis.set_visible(False)
    plt.imshow(origin_images[i-1])

무작위 회전¶

In [ ]:

fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
    ax = plt.subplot(2, 4, i)
    # ax.axes.xaxis.set_visible(False)
    # ax.axes.yaxis.set_visible(False)
    img = cv2.imread(img_paths[i-1])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    augmented = compose1(use_augmentation = True,img_size = img_size)(
        image=img
    )
    img = augmented["image"]
    plt.imshow(np.transpose(img,(1,2,0)))

수평 반전¶

In [ ]:

fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
    ax = plt.subplot(2, 4, i)
    # ax.axes.xaxis.set_visible(False)
    # ax.axes.yaxis.set_visible(False)
    img = cv2.imread(img_paths[i-1])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    augmented = compose2(use_augmentation = True,img_size = img_size)(
        image=img
    )
    img = augmented["image"]
    plt.imshow(np.transpose(img,(1,2,0)))

수직 반전¶

In [ ]:

fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
    ax = plt.subplot(2, 4, i)
    # ax.axes.xaxis.set_visible(False)
    # ax.axes.yaxis.set_visible(False)
    img = cv2.imread(img_paths[i-1])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    augmented = compose3(use_augmentation = True,img_size = img_size)(
        image=img
    )
    img = augmented["image"]
    plt.imshow(np.transpose(img,(1,2,0)))

무작위 회전 + 수평 반전 + 수직 반전 + 밝기 조절 + 정규화

In [ ]:

fig = plt.figure(figsize=(14, 10))
for i in range(1, 5):
    ax = plt.subplot(2, 4, i)
    # ax.axes.xaxis.set_visible(False)
    # ax.axes.yaxis.set_visible(False)
    img = cv2.imread(img_paths[i-1])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    augmented = _get_augmentations(use_augmentation = True,img_size = img_size)(
        image=img
    )
    img = augmented["image"]
    plt.imshow(np.transpose(img,(1,2,0)))

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

In [ ]:

자세한 코드는 여기에..

728x90

'공부정리 > Computer Vision' 카테고리의 다른 글

[GAN] GAN 모델 안정화를 위한 기법 - LSGAN (0)	2024.01.08
[GAN] GAN이란? (0)	2024.01.08
[GAN] VAE(Variational Auto-Encoder) (0)	2024.01.05
[GAN] 확률 밀도의 추정과 샘플링 (1)	2024.01.05
[Vision] timm 으로 이미지 사전학습 모델 (ImageNet) 불러오기 / Python 파이썬 (0)	2023.02.14

[Vision] albumentations라이브러리와 CutMix를 통한 이미지 증강

albumentations라이브러리와 CutMix를 통한 이미지 증강¶

CutMix¶

augmentation¶

원본¶

무작위 회전¶

수평 반전¶

수직 반전¶

'공부정리 > Computer Vision' 카테고리의 다른 글

티스토리툴바