import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import numpy as np
import sys
from skimage import io
from skimage.util import random_noise
import modules.MNIST
from modules.MNIST import MNIST
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3AE1')
FIDLE - Environment initialization
Version : 2.3.0 Run id : K3AE1 Run dir : ./run/K3AE1 Datasets dir : /gpfswork/rech/mlh/uja62cb/fidle-project/datasets-fidle Start time : 03/03/24 21:07:48 Hostname : r6i1n1 (Linux) Tensorflow log level : Warning + Error (=1) Update keras cache : False Update torch cache : False Save figs : ./run/K3AE1/figs (True) keras : 3.0.4 numpy : 1.24.4 sklearn : 1.3.2 yaml : 6.0.1 skimage : 0.22.0 matplotlib : 3.8.2 pandas : 2.1.3 torch : 2.1.1
1.2 - Parameters¶
prepared_dataset
: Filename of the future prepared dataset (example : ./data/mnist-noisy.h5)
scale
: Dataset scale. 1 mean 100% of the dataset - set 0.1 for tests
progress_verbosity
: Verbosity of progress bar: 0=silent, 1=progress bar, 2=One line
prepared_dataset = './data/mnist-noisy.h5'
scale = 1
progress_verbosity = 1
Override parameters (batch mode) - Just forget this cell
fidle.override('prepared_dataset', 'scale', 'progress_verbosity')
** Overrided parameters : ** scale : 1 progress_verbosity : 2
Step 2 - Get original dataset¶
We load :
clean_data
: Original and clean images - This is what we will want to ontain at the output of the AE
class_data
: Image classes - Useless, because the training will be unsupervised
We'll build :
noisy_data
: Noisy images - These are the images that we will give as input to our AE
clean_data, class_data = MNIST.get_origine(scale=scale)
Dataset loaded. Normalized. Reshaped. Concatenate. x shape : (70000, 28, 28, 1) y shape : (70000,)
Step 3 - Add noise¶
We add noise to the original images (clean_data) to obtain noisy images (noisy_data)
Need 30-40 seconds
def noise_it(data):
new_data = np.copy(data)
for i,image in enumerate(new_data):
fidle.utils.update_progress('Add noise : ',i+1,len(data),verbosity=progress_verbosity)
image=random_noise(image, mode='gaussian', mean=0, var=0.3)
image=random_noise(image, mode='s&p', amount=0.2, salt_vs_pepper=0.5)
image=random_noise(image, mode='poisson')
image=random_noise(image, mode='speckle', mean=0, var=0.1)
new_data[i]=image
print('Done.')
return new_data
# ---- Add noise to input data : x_data
#
noisy_data = noise_it(clean_data)
Add noise : [########################################] 100.0% of 70000 Done.
Step 4 - Have a look¶
print('Clean dataset (clean_data) : ',clean_data.shape)
print('Noisy dataset (noisy_data) : ',noisy_data.shape)
fidle.utils.subtitle("Noisy images we'll have in input (or x)")
fidle.scrawler.images(noisy_data[:5], None, indices='all', columns=5, x_size=3,y_size=3, interpolation=None, save_as='01-noisy')
fidle.utils.subtitle('Clean images we want to obtain (or y)')
fidle.scrawler.images(clean_data[:5], None, indices='all', columns=5, x_size=3,y_size=3, interpolation=None, save_as='02-original')
Clean dataset (clean_data) : (70000, 28, 28, 1) Noisy dataset (noisy_data) : (70000, 28, 28, 1)
Noisy images we'll have in input (or x)
Clean images we want to obtain (or y)
Step 5 - Shuffle dataset¶
p = np.random.permutation(len(clean_data))
clean_data, noisy_data, class_data = clean_data[p], noisy_data[p], class_data[p]
print('Shuffled.')
Shuffled.
Step 6 - Save our prepared dataset¶
MNIST.save_prepared_dataset( clean_data, noisy_data, class_data, filename=prepared_dataset )
Saved. clean_data shape is : (70000, 28, 28, 1) noisy_data shape is : (70000, 28, 28, 1) class_data shape is : (70000,)
fidle.end()
End time : 03/03/24 21:08:44
Duration : 00:00:56 851ms
This notebook ends here :-)
https://fidle.cnrs.fr