Exploratory data analysis of Images
based on the dataset of ISPRS and DroneDeploy
from operator import invert
from fastcore.basics import *
from fastai.vision.all import *
from fastai.torch_basics import *
from torch._C import dtype
import libs.images2chips
import sys
import os
from skimage import io
from glob import glob
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import confusion_matrix
import random
import itertools
# Matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
set_seed(105)
train_a_path = Path('/home/ubuntu/data/dronedeploy/dataset-medium/image')
label_a_path = Path('/home/ubuntu/data/dronedeploy/dataset-medium/labels')
elev_path = Path('/home/ubuntu/data/dronedeploy/dataset-medium/elevations')
imgNames = get_image_files(train_a_path)
lblNames = get_image_files(label_a_path)
eleNames = get_image_files(elev_path)
eleFileNameA = eleNames[0]
eleFile = PILMask.create(eleFileNameA)
# eleFile.show(cmap='tab20')
np.unique(eleFile)
lblFileNameA = lblNames[random.randint(1, 19)]
lblFile = PILMask.create(lblFileNameA)
lblFile.show(cmap='tab20')
np.unique(lblFile)
def imageChipGet(dataset):
image_chips = f'{dataset}/image-chips'
label_chips = f'{dataset}/label-chips'
if not os.path.exists(image_chips) and not os.path.exists(label_chips):
print("creating chips")
libs.images2chips.run(dataset)
else:
print(
f'chip folders "{image_chips}" and "{label_chips}" already exist, remove them to recreate chips.')
# imageChipGet('dataset-medium')
paletteISPRS = {0: (255, 255, 255), # Impervious surfaces (white)
1: (0, 0, 255), # Buildings (blue)
2: (0, 255, 255), # Low vegetation (cyan)
3: (0, 255, 0), # Trees (green)
4: (255, 255, 0), # Cars (yellow)
5: (255, 0, 0), # Clutter (red)
6: (0, 0, 0)} # Undefined (black)
paletteDDSG = {
0: (230, 25, 75), # BUILDING
1: (145, 30, 180), # CLUTTER
2: (60, 180, 75), # VEGETATION
3: (245, 130, 48), # WATER
4: (255, 255, 255), # GROUND
5: (000, 130, 200), # CAR
6: (255, 000, 255), # IGNORE
7: (0, 0, 0) # Undefined (black)
}
paletteDDSG2 = {
0: (75, 25, 230), # BUILDING
1: (180, 30, 145), # CLUTTER
2: (75, 180, 60), # VEGETATION
3: (48, 130, 245), # WATER
4: (255, 255, 255), # GROUND
5: (200, 130, 0), # CAR
6: (255, 000, 255), # IGNORE
7: (0, 0, 0) # Undefined (black)
}
# convert to gray scale labels
def getGrayScaleValue(palette):
result = {}
for i, o in (palette).items():
R = o[0]
G = o[1]
B = o[2]
y = R * 299 / 1000 + G * 587 / 1000 + B * 114 / 1000
result[i] = round(y)
# result.append(int(y))
return result
getGrayScaleValue(paletteDDSG)
# 从灰度值进行转化
def converFromGray(lblname, palette):
# 先获取mask值,而不是RGB
label = PILMask.create(lblname)
labelArray = np.array(label)
paletteGray = getGrayScaleValue(palette)
# 需要重新定义一个新的全为0的数组,尽量不在原有的labelArray上直接进行像素的对应性修改,避免labelArray[labelArray == o] = i这样的写法,因为(0,0,0)的原因,可能回混淆,不知道是映射后得到的(0,0,0),还是原来图像中的(0,0,0)
arr_2d = np.zeros(
(labelArray.shape[0], labelArray.shape[1]), dtype=np.uint8)
for i, o in paletteGray.items():
arr_2d[labelArray == o] = i
# print(np.unique(labelArray))
return PILMask.create(arr_2d)
temp = converFromGray(lblNames[3], paletteDDSG)
np.unique(temp)
# 此处的转换还有第二种写法
# https://github.com/damminhtien/deepnet-for-semantic-labeling-photogrammetry/blob/master/Insight-data-potsdam.ipynb
# 从RGB值进行转化
# 首先获取反向的颜色对应盘
def getInverPalette(palette):
inverted = {}
for k, v in palette.items():
inverted[v] = k
return inverted
# getInverPalette(paletteDDSG)
def converFromRGB(lblname, palette):
# 先获取mask值,而不是RGB
label = PILImage.create(lblname)
labelArray = np.array(label)
invertP = getInverPalette(palette)
print(np.unique(labelArray))
arr_2d = np.zeros(
(labelArray.shape[0], labelArray.shape[1]), dtype=np.uint8)
for i, o in invertP.items():
# axis = 2, 使得可以在RGB三个数值上进行比较
# reshape 使得从(3,)变换为(1,1,3)的维度
arr_2d[np.all(labelArray == np.array(i).reshape(1, 1, 3), axis=2)] = o
return PILMask.create(arr_2d)
temp = converFromRGB(lblNames[2], paletteDDSG)
np.unique(temp)
# 开始画大饼
labels = ['BUILDING', 'CLUTTER', 'VEGETATION',
'WATER', 'GROUND', 'CAR', 'IGNORE']
colrDDSG = ['#e6194b', '#911eb4', '#3cb44b',
'#f58230', 'whitesmoke', '#0082c8', '#ff00ff']
colrISPRS = ['whitesmoke', '#0000ff', '#00FFFF',
'#00FF00', '#FFFF00', '#FF0000']
# 组装一个大饼函数
def plotPieChart(lblname, palette, title='dataset', draw=True):
# n_pixel用来统计每个类别像素的占比,pixelCount 用来统计所有的像素点
pixelCount = []
if palette == paletteDDSG:
n_pixel = [0, 0, 0, 0, 0, 0, 0]
n_all_pixel = [0, 0, 0, 0, 0, 0, 0]
colors = colrDDSG
label = ['BUILDING', 'CLUTTER', 'VEGETATION',
'WATER', 'GROUND', 'CAR', 'IGNORE']
else:
n_pixel = [0, 0, 0, 0, 0, 0]
n_all_pixel = [0, 0, 0, 0, 0, 0]
label = ['Impervious Surface', 'Buildings', 'VEGETATION',
'Tree', 'Cars', 'Clutter']
colors = colrISPRS
_temp = converFromGray(lblname=lblname, palette=palette)
# np.unique(_temp)
imageArray = image2tensor(_temp).squeeze(0)
num, counts = np.unique(imageArray, return_counts=True)
for i in range(len(num)):
n_pixel[num[i]] = counts[i]
pixelCount = n_pixel
n_pixel = np.round(n_pixel/np.sum(n_pixel), 8)
if palette == paletteDDSG:
colors = colrDDSG
else:
colors = colrISPRS
if draw is True:
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(n_pixel.tolist(), labels=label,
autopct='%1.2f%%', colors=colors)
ax.set_title(f"class components in {title} dataset")
return n_pixel, pixelCount
aa, _ = plotPieChart(lblNames[random.randint(1, 20)],
paletteDDSG, 'dronedeploy')
# 获得所有label的统计数据
def get_all_piestatics(palette, lblNames, title='dataset'):
if palette == paletteDDSG:
n_pixel = [0, 0, 0, 0, 0, 0, 0]
n_all_pixel = [0, 0, 0, 0, 0, 0, 0]
pixelAllCount = [0, 0, 0, 0, 0, 0, 0]
colors = colrDDSG
label = ['BUILDING', 'CLUTTER', 'VEGETATION',
'WATER', 'GROUND', 'CAR', 'IGNORE']
else:
n_pixel = [0, 0, 0, 0, 0, 0]
n_all_pixel = [0, 0, 0, 0, 0, 0]
pixelAllCount = [0, 0, 0, 0, 0, 0]
label = ['Impervious Surface', 'Buildings', 'VEGETATION',
'Tree', 'Cars', 'Clutter']
colors = colrISPRS
for i in lblNames:
n_pixel, pixelCount = plotPieChart(i, palette, draw=False)
for j in range(len(n_pixel)):
n_all_pixel[j] += n_pixel[j]
pixelAllCount[j] += pixelCount[j]
n_all_pixel = np.round(n_all_pixel/np.sum(n_all_pixel), 8)
if palette == paletteDDSG:
colors = colrDDSG
else:
colors = colrISPRS
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(n_all_pixel.tolist(), labels=label,
autopct='%1.2f%%', colors=colors)
ax.set_title(f"Composition of Each Class in {title} Dataset")
return n_all_pixel, pixelAllCount
cc, dd = get_all_piestatics(paletteDDSG, lblNames, 'DroneDeploy')
# get the statics for Potsdam dataset
data_path = Path('/home/ubuntu/.fastai/data/isprs/')
path_img = data_path / 'Potsdam/2_Ortho_RGB/train_pick'
path_lbl = data_path / 'Potsdam/5_Labels_for_participants'
imgNames = get_image_files(path_img)
lbl_names = get_image_files(path_lbl)
aa, bb = get_all_piestatics(paletteISPRS, lbl_names, 'Potsdam')
aa = array([0.28464172, 0.26721742, 0.23536882, 0.14624186, 0.01689545,0.04963473]) bb = [245930445, 230875852, 203358663, 126352970, 14597667, 42884403] cc = array([0.05577413, 0.01997206, 0.10434894, 0.01207262, 0.37689098,0.00380131, 0.42713996]) dd = [134082091, 30949408, 277077545, 35463163, 860128086, 8950954, 1088831663] ee = array([0.27606349, 0.26086128, 0.21335261, 0.231575 , 0.01192941,0.00621821]) ff = [21815349, 20417332, 16272917, 18110438, 945687, 526083]
# get the statics for Vaihingen dataset
data_path = Path('/home/ubuntu/.fastai/data/isprs/')
path_img = data_path / 'Vaihingen/images'
path_lbl = data_path / 'Vaihingen/label'
imgNames = get_image_files(path_img)
lbl_names = get_image_files(path_lbl)
# ee,ff = get_all_piestatics(paletteISPRS, lbl_names, 'Vaihingen')