RGB and RGB-D Fusion
borrow code from https://github.com/jiangyao-scu/JL-DCF-pytorch and add some tips
import mmcv
import matplotlib.pyplot as plt
from fastcore.basics import *
from fastai.vision.all import *
from fastai.torch_basics import *
import warnings
warnings.filterwarnings("ignore")
import kornia
from kornia.constants import Resample
from kornia.color import *
from kornia import augmentation as K
import kornia.augmentation as F
import kornia.augmentation.random_generator as rg
from torchvision.transforms import functional as tvF
from torchvision.transforms import transforms
from torchvision.transforms import PILToTensor
from functools import partial
from timm.models.layers import trunc_normal_, DropPath
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from timm.models.vision_transformer import _cfg
from einops import rearrange
from timm.models.registry import register_model
set_seed(105)
rootPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/')
rgbPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/')
depPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/')
gtPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/GT/')
rgbFiles = get_image_files(rgbPath)
depFiles = get_image_files(depPath)
lblFiles = get_image_files(gtPath)
rgbFiles[0]
depFiles[0]
lblFiles[0]
to_tensor = transforms.ToTensor()
to_pil = transforms.ToPILImage()
rgbImage = Image.open(rgbFiles[0])
lblImage = Image.open(lblFiles[0])
depImage = Image.open(depFiles[0])
rgbTensor = image2tensor(rgbImage)
lblTensor = image2tensor(lblImage)
depTensor = image2tensor(depImage)
rgbImage
depImage
depImage.shape
depTensor.shape
torch.unique(depTensor)
len(torch.unique(depTensor))
depFiles[0]
import cv2
im = cv2.imread('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',-1)
# cv2.imshow("test",im)
attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension
in_ = np.array(im, dtype=np.float32)
in_
# attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension
in_.shape
len(np.unique(in_))
def Normalization(image):
# set_trace()
# 最后一维倒着取数
# cv2 读取图片的顺序是BGR,转换为RGB格式
in_ = image[:, :, ::-1]
in_ = in_ / 255.0
in_ -= np.array((0.485, 0.456, 0.406))
in_ /= np.array((0.229, 0.224, 0.225))
return in_
temp = np.array((1,1,1),dtype=np.float32)
temp2 = np.array((2,2,2),dtype=np.float32)
temp-=temp2
temp/=temp2
temp
# author use the following method to read both rgbImage and depth image
def load_image(path,image_size):
im = cv2.imread(path)
in_ = np.array(im, dtype=np.float32)
in_ = cv2.resize(in_, (image_size, image_size))
in_ = Normalization(in_)
return in_
def load_sal_label(path,image_size):
im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
label = np.array(im, dtype=np.float32)
label = cv2.resize(label, (image_size, image_size))
label = label / 255.0
label = label[..., np.newaxis]
return label
rgbFiles[0]
import numpy as np
b = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]],
[[25, 26, 27, 28], [29, 30, 31, 32], [33, 34, 35, 36]],
])
print(f'b is {b}')
print(b.shape)
print("b[:, :, ::-1]",b[:, :, ::-1], b[:, :, ::-1].shape)
print("b[0, ::],b[1, ::],b[-1, ::],b[0:2, ::]")
print("b[0, ::]",b[0, ::], b[0, ::].shape)
print("b[1, ::]",b[1, ::], b[1, ::].shape)
print("b[-1, ::]",b[-1, ::], b[-1, ::].shape)
print("b[0:2, ::]",b[0:2, ::], b[0:2, ::].shape)
print("b[:, 0:],b[:, 1:],b[:, -1:],b[:, 0:2:]")
print("b[:, 0:]",b[:, 0:], b[:, 0:].shape)
print("b[:, 1:]",b[:, 1:], b[:, 1:].shape)
print("b[:, -1:]",b[:, -1:], b[:, -1:].shape)
print("b[:, 0:2:]",b[:, 0:2:], b[:, 0:2:].shape)
print("b[::, 0],b[::, 1],b[::, -1],b[::, 0:2:]")
print("b[::, 0]",b[::, 0], b[::, 0].shape)
print("b[::, 1]",b[::, 1], b[::, 1].shape)
print("b[::, -1]",b[::, -1], b[::, -1].shape)
print("b[::, 0:2:]",b[::, 0:2:], b[::, 0:2].shape)
print("b[:,:, 0],b[:,:, 1],b[:,:, -1],b[:,:, 0:2:]")
print("b[:, :, 0]",b[:, :, 0], b[:, :, 0].shape)
print("b[:, :, 1]",b[:, :, 1], b[:, :, 1].shape)
print("b[:, :, -1]",b[:, :, -1], b[:, :, -1].shape)
load_image('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',512).shape
# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py
def plasma_fractal(mapsize=256, wibbledecay=3):
"""
Generate a heightmap using diamond-square algorithm.
Return square 2d array, side length 'mapsize', of floats in range 0-255.
'mapsize' must be a power of two.
"""
assert (mapsize & (mapsize - 1) == 0)
maparray = np.empty((mapsize, mapsize), dtype=np.float_)
maparray[0, 0] = 0
stepsize = mapsize
wibble = 100
def wibbledmean(array):
return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape)
def fillsquares():
"""For each square of points stepsize apart,
calculate middle value as mean of points + wibble"""
cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0)
squareaccum += np.roll(squareaccum, shift=-1, axis=1)
maparray[stepsize // 2:mapsize:stepsize,
stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum)
def filldiamonds():
"""For each diamond of points stepsize apart,
calculate middle value as mean of points + wibble"""
mapsize = maparray.shape[0]
drgrid = maparray[stepsize // 2:mapsize:stepsize, stepsize // 2:mapsize:stepsize]
ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
ldrsum = drgrid + np.roll(drgrid, 1, axis=0)
lulsum = ulgrid + np.roll(ulgrid, -1, axis=1)
ltsum = ldrsum + lulsum
maparray[0:mapsize:stepsize, stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum)
tdrsum = drgrid + np.roll(drgrid, 1, axis=1)
tulsum = ulgrid + np.roll(ulgrid, -1, axis=0)
ttsum = tdrsum + tulsum
maparray[stepsize // 2:mapsize:stepsize, 0:mapsize:stepsize] = wibbledmean(ttsum)
while stepsize >= 2:
fillsquares()
filldiamonds()
stepsize //= 2
wibble /= wibbledecay
maparray -= maparray.min()
return maparray / maparray.max()
def fog(x, severity=1):
c = [(1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)][severity - 1]
set_trace()
x = np.array(x) / 255.
max_val = x.max()
x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255
rgbTensor.shape
fog(rgbTensor,severity=3)