!pip install easyimages
!pip install torchmetrics
!pip install sentence-transformers
!pip install lpips
!pip install tabulate
!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cu113.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-1.11.0+cu113.html
!pip install torch-geometric


# functional
import os
import sys
import time
import glob
import zipfile
import pickle

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import torchvision.utils as vutils

import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from natsort import natsorted
from numba import prange

# visualization
import matplotlib.pyplot as plt
%matplotlib inline
from tabulate import tabulate

# images
from skimage import io
from skimage import transform
from PIL import Image

# metrics
import torchmetrics
import lpips


DATA_ROOT = 'data'
DATA_FOLDER = 'frog_images'

DATASET_PATH = os.path.join(DATA_ROOT, DATA_FOLDER)
os.makedirs(DATASET_PATH, exist_ok=True)

PREPROCESSED_FOLDER = os.path.join(DATA_ROOT, "preprocessed")
os.makedirs(PREPROCESSED_FOLDER, exist_ok=True)

MODEL_WEIGHTS_PATH = 'model_weights'
os.makedirs(MODEL_WEIGHTS_PATH, exist_ok=True)


device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda")


# gdrive link: https://drive.google.com/file/d/1V_UP_MQfTVaiKS0h48QNUiNOv8VgIR-w/view?usp=sharing

file_id = '1V_UP_MQfTVaiKS0h48QNUiNOv8VgIR-w'
file_name = 'frogs_metadata.csv'
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={file_id}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={file_id}" -O {file_name} && rm -rf /tmp/cookies.txt

--2022-04-27 15:18:11--  https://docs.google.com/uc?export=download&confirm=&id=1V_UP_MQfTVaiKS0h48QNUiNOv8VgIR-w
Resolving docs.google.com (docs.google.com)... 142.251.8.100, 142.251.8.139, 142.251.8.138, ...
Connecting to docs.google.com (docs.google.com)|142.251.8.100|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-0g-54-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/422v9p1em0h5p97rcqjd0ke0uqsagvr9/1651072650000/04490789293409028596/*/1V_UP_MQfTVaiKS0h48QNUiNOv8VgIR-w?e=download [following]
Warning: wildcards not supported in HTTP.
--2022-04-27 15:18:15--  https://doc-0g-54-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/422v9p1em0h5p97rcqjd0ke0uqsagvr9/1651072650000/04490789293409028596/*/1V_UP_MQfTVaiKS0h48QNUiNOv8VgIR-w?e=download
Resolving doc-0g-54-docs.googleusercontent.com (doc-0g-54-docs.googleusercontent.com)... 108.177.125.132, 2404:6800:4008:c01::84
Connecting to doc-0g-54-docs.googleusercontent.com (doc-0g-54-docs.googleusercontent.com)|108.177.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 90906708 (87M) [text/csv]
Saving to: ‘frogs_metadata.csv’

frogs_metadata.csv  100%[===================>]  86.70M   214MB/s    in 0.4s    

2022-04-27 15:18:16 (214 MB/s) - ‘frogs_metadata.csv’ saved [90906708/90906708]


frogs_df = pd.read_csv("frogs_metadata.csv", sep=",")

# remove invalid and duplicate urls
frogs_df = frogs_df.dropna(subset=['image_url']).drop_duplicates(subset=['image_url'])

# remove non-image types e.g. .gif files
frogs_filetype = frogs_df['image_url'].apply(lambda x: os.path.splitext(x)[1]).unique()
print(f"Image file extensions are {frogs_filetype}")
print("Removing .fig files...")
frogs_df = frogs_df[~(frogs_df['image_url'].apply(lambda x: os.path.splitext(x)[1]).isin(['.gif']))]
frogs_filetype = frogs_df['image_url'].apply(lambda x: os.path.splitext(x)[1]).unique()
print(f"Done. Image file extensions are {frogs_filetype}")

print(f"Total frogs instances: {len(frogs_df)}")
frogs_df.head()

Image file extensions are ['.jpg' '.jpeg' '.png' '.gif']
Removing .fig files...
Done. Image file extensions are ['.jpg' '.jpeg' '.png']
Total frogs instances: 186902


from easyimages import EasyImageList

def visualize_images_from_df(df, samples = 128):
    """
    This method takes in a dataframe containing links to the image
    and the source and visualizes them. The required keys are:
        * "image_url" -> url where image can be pulled from
        * "url" -> url of the sample
    """
    urls = df['image_url'].tolist()
    observation_urls = df['url'].tolist()
    easy_list = EasyImageList.from_list_of_urls(urls, lazy = True)
    
    # Add observation url into label, which is displayed as alt text
    for i in prange(len(easy_list)):
        easy_list[i].label = observation_urls[i]
        
    _ = easy_list.visualize_grid_html(np.random.choice(easy_list.images, min(len(easy_list), samples), replace = False))


visualize_images_from_df(frogs_df, samples=32)


print("Columns are: \n", frogs_df.columns)

Columns are: 
 Index(['id', 'observed_on_string', 'observed_on', 'time_observed_at',
       'time_zone', 'user_id', 'user_login', 'created_at', 'updated_at',
       'quality_grade', 'license', 'url', 'image_url', 'sound_url', 'tag_list',
       'description', 'num_identification_agreements',
       'num_identification_disagreements', 'captive_cultivated',
       'oauth_application_id', 'place_guess', 'latitude', 'longitude',
       'positional_accuracy', 'private_place_guess', 'private_latitude',
       'private_longitude', 'public_positional_accuracy', 'geoprivacy',
       'taxon_geoprivacy', 'coordinates_obscured', 'positioning_method',
       'positioning_device', 'species_guess', 'scientific_name', 'common_name',
       'iconic_taxon_name', 'taxon_id'],
      dtype='object')


frogs_df = frogs_df.dropna(subset=['description'])
print(f"Total frogs instances: {len(frogs_df)}")

Total frogs instances: 20964


seed_words = ['dead','spawn','egg','tadpole','nest','brood']

all_words = frogs_df['description'].str.lower().str.findall("\w+")
word_pool = set()
for l in all_words:
    word_pool.update(l)
word_pool = list(word_pool)
print("Number of unique words:", len(word_pool))

Number of unique words: 19390


from sentence_transformers import SentenceTransformer

def retrieve_word_embeddings(words: list):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(words, convert_to_tensor = True)
    return embeddings


pool_embeddings = retrieve_word_embeddings(word_pool)
seed_embeddings = retrieve_word_embeddings(seed_words)

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]


from sklearn.metrics.pairwise import cosine_similarity

def compute_similarity_score(seed_embeddings: list, all_embeddings: list) -> list:
    """
    Takes in a [n x d] list of seed_embeddings, and for each one, computes a similarity score
    with all embeddings in the list of all_embeddings [w x d] to get an output of [n x w].
    """
    scores = cosine_similarity(seed_embeddings.cpu(), all_embeddings.cpu())
    return scores

def print_topk_words(query_words: list, scores: list, key_words: list, k = 10, threshold = 0.5):
    """
    Takes in a n x w list of scores, and a list of w words, and finds the top k 
    similar words for each of the n words.
    """
    indices = np.argpartition(scores, -k)[:,-k:] # sort then take top k, for each word
    topk_words = np.take(np.array(key_words), indices)
    topk_scores = scores[np.arange(scores.shape[0])[:, None], indices]

    # print top k word, score for each word
    out_list = []
    for i, row in enumerate(range(topk_words.shape[0])):
        print(f"Word {i+1}: {query_words[i]}")
        sorted_list = sorted(list(zip(topk_words[row], topk_scores[row])), key=lambda t: t[1], reverse=True)
        for word, score in sorted_list:
            print("[{:.5f}] - {}".format(score, word))
        print()

        out_list.extend(list(filter(lambda wordscore : wordscore[1] > threshold, sorted_list)))

    return out_list


scores = compute_similarity_score(seed_embeddings, pool_embeddings)
sorted_word_list = print_topk_words(
    query_words=seed_words, 
    scores=scores, 
    key_words=word_pool, 
    k=20, 
    threshold=0.75
)

Word 1: dead
[1.00000] - dead
[0.80403] - deceased
[0.77241] - died
[0.76030] - killed
[0.73658] - alive
[0.73513] - dies
[0.71715] - death
[0.70166] - lifeless
[0.68499] - die
[0.66718] - dying
[0.61315] - killing
[0.60743] - lives
[0.60378] - buried
[0.59865] - demise
[0.59791] - revive
[0.59344] - kill
[0.58953] - funeral
[0.58488] - lived
[0.56652] - living
[0.55951] - live

Word 2: spawn
[1.00000] - spawn
[0.85648] - spawning
[0.85346] - spawned
[0.53511] - create
[0.53016] - emerge
[0.50725] - nests
[0.50455] - brood
[0.49718] - frogspawn
[0.49277] - reproduce
[0.48962] - nest
[0.48770] - populated
[0.48687] - creating
[0.47871] - emergent
[0.47714] - generated
[0.47676] - reproduction
[0.47652] - emergents
[0.47547] - swarming
[0.47353] - swarm
[0.47271] - feeder
[0.47156] - generates

Word 3: egg
[1.00000] - egg
[0.91208] - eggs
[0.59261] - chicken
[0.56301] - chickens
[0.55842] - breeding
[0.55607] - hatchling
[0.55018] - duck
[0.54832] - feathers
[0.54711] - hatching
[0.54572] - hatches
[0.53519] - hatchlings
[0.53467] - bird
[0.53391] - scrambled
[0.52584] - breed
[0.52514] - ovum
[0.52442] - milk
[0.51899] - hatchery
[0.51773] - ovular
[0.51058] - cooked
[0.50778] - food

Word 4: tadpole
[1.00000] - tadpole
[0.94692] - tadpolee
[0.91085] - tadpoles
[0.68520] - taddies
[0.67378] - taddie
[0.66052] - frogget
[0.65100] - frog
[0.65100] - frög
[0.61493] - toad
[0.60707] - frogle
[0.60536] - frogsand
[0.60484] - frogamet
[0.60146] - frogid
[0.60076] - frogstoads
[0.59992] - froglet
[0.59320] - frogsandtoads
[0.59014] - toadlet
[0.58947] - froge
[0.58681] - frogge
[0.58627] - tads

Word 5: nest
[1.00000] - nest
[0.89375] - neste
[0.88684] - nesting
[0.88110] - nesters
[0.80639] - nests
[0.66616] - brood
[0.52981] - hatchlings
[0.52541] - roof
[0.52520] - spawned
[0.52234] - offspring
[0.51910] - tree
[0.51226] - tent
[0.50845] - swarming
[0.50548] - swarm
[0.49553] - warbler
[0.49484] - hatchling
[0.49325] - spawning
[0.49097] - hatchery
[0.49064] - breeding
[0.49027] - flock

Word 6: brood
[1.00000] - brood
[0.69717] - nesters
[0.68656] - nests
[0.66616] - nest
[0.65030] - breeding
[0.62823] - nesting
[0.62466] - flock
[0.61566] - neste
[0.59993] - offspring
[0.59702] - swarming
[0.58434] - birds
[0.57624] - hatchlings
[0.57623] - swarm
[0.57602] - birdbaths
[0.56570] - farm
[0.56547] - spawning
[0.56077] - hatchling
[0.55523] - birding
[0.55221] - feeder
[0.54155] - progeny


blacklist = set()
blacklist.update(list(map(lambda wordscore : wordscore[0], sorted_word_list)))
print("Blacklisted words are:\n\t{}".format('\n\t'.join(blacklist)))
print("No. words:", len(blacklist))

Blacklisted words are:
	spawned
	eggs
	neste
	dead
	tadpolee
	nests
	killed
	nest
	nesters
	died
	brood
	deceased
	nesting
	tadpoles
	spawning
	tadpole
	egg
	spawn
No. words: 18


for word in blacklist:
    df_word = frogs_df[frogs_df['description'].str.lower().str.contains(word)]
    print(f"For {word}, {len(df_word)} samples.")
    visualize_images_from_df(df_word, 16)
    print('\n')

For eggs, 165 samples.

For neste, 2 samples.

For tadpole, 325 samples.

For nest, 37 samples.

For dead, 301 samples.

For nesters, 1 samples.

For nesting, 2 samples.

For spawn, 65 samples.

For egg, 278 samples.

For deceased, 25 samples.


frogs_df = frogs_df[~(frogs_df['description'].str.lower().isin(blacklist))]
print("Number after removing:", len(frogs_df))

Number after removing: 20898


# Show species histogram
freq = frogs_df["common_name"].value_counts()
counts, edges, _ = plt.hist(freq, bins=10)
plt.title("Histogram of species counts")
plt.xlabel('Number of images in species (counts)')
plt.ylabel('Frequency')
plt.show()
print(f"Number of species in first bin (with count < {round(edges[1])}): {int(counts[0])} ({round(counts[0] / counts.sum(),2)*100}%)")

Number of species in first bin (with count < 134): 858 (96.0%)


def groupby_plot_hist(df, column_name, count_threshold = 100):
    frequent = df[column_name].value_counts()
    thres_counts = frequent[frequent > count_threshold]
    thres_counts.plot(kind = "bar")
    print(thres_counts)
    print(len(thres_counts), f"species above threshold of {count_threshold}. Total is", thres_counts.sum())
    plt.show()


count_threshold = 150
column_name = "common_name"
groupby_plot_hist(frogs_df, column_name, count_threshold = count_threshold)

American Toad                 1328
American Bullfrog             1189
Green Frog                    1097
Gulf Coast Toad                897
Green Treefrog                 649
European Common Frog           559
European Toad                  542
Wood Frog                      529
Western Leopard Toad           496
Northern Leopard Frog          463
Spring Peeper                  390
Cuban Tree Frog                379
Gray Treefrog                  371
Northern Pacific Tree Frog     358
Western Toad                   355
Gray Treefrog Complex          290
Southern Toad                  277
Cane Toad                      265
Asian Common Toad              260
Southern Leopard Frog          248
Pickerel Frog                  242
Blanchard's Cricket Frog       239
Fowler's Toad                  195
Squirrel Tree Frog             194
Sierran Tree Frog              194
Giant Toad                     192
Clicking Stream Frog           190
Woodhouse's Toad               189
Australian Green Tree Frog     187
Cope's Gray Treefrog           184
Southern Cricket Frog          174
Name: common_name, dtype: int64
31 species above threshold of 150. Total is 13122


# Show timezone histogram
freq = frogs_df["time_zone"].value_counts()
counts, edges, _ = plt.hist(freq, bins=10)
plt.title("Histogram of time zones")
plt.xlabel('Number of images in time zone (counts)')
plt.ylabel('Frequency')
plt.show()
print(f"Number of time zones in first bin (with count < {round(edges[1])}): {int(counts[0])} ({round(counts[0] / counts.sum(),2)*100}%)")

Number of time zones in first bin (with count < 596): 115 (94.0%)


count_threshold = 150
column_name = "time_zone"
groupby_plot_hist(frogs_df, column_name, count_threshold = count_threshold)

Eastern Time (US & Canada)     5956
Central Time (US & Canada)     2891
UTC                            2688
Pacific Time (US & Canada)     1911
Pretoria                       1338
Brisbane                        617
Mountain Time (US & Canada)     607
Sydney                          399
Rome                            340
Paris                           333
Hawaii                          327
Atlantic Time (Canada)          234
Jakarta                         219
Mexico City                     208
Taipei                          190
Bogota                          188
Brasilia                        159
Name: time_zone, dtype: int64
17 species above threshold of 150. Total is 18605


def filter_by_threshold_counts(df, column_names: list, count_threshold = 100):
    out_df = df
    prev_size = len(df)
    redo = True
    while redo:
        for column in column_names:
            out_df = out_df[out_df.groupby(column)[column].transform("size") > count_threshold]
            if len(out_df) < prev_size: # had change, need to recheck previous
                prev_size = len(out_df)
                redo = True
                continue
            redo = False

    print("After filtering, left with", len(out_df), "samples.")
    return out_df


count_threshold = 150
column_names = ['common_name', 'time_zone']
frogs_df = filter_by_threshold_counts(frogs_df, column_names, count_threshold)

After filtering, left with 11517 samples.


visualize_images_from_df(frogs_df, samples=32)


import ntpath
import requests
from multiprocessing import Pool
from functools import partial

def download_image(id_url: tuple, save_dir: str):

    EXTENSIONS = [".jpeg", ".png", ".jpg"]

    try:
        idx, img_url = id_url
        
        # check if exists
        exists = [os.path.join(save_dir, "frogs_" + str(idx) + ext) for ext in EXTENSIONS]
        exists = list(filter(lambda file : os.path.exists(file), exists))
        if len(exists) > 0: # exists
            return

        # get file
        img_bytes = requests.get(img_url).content
        _, file_name = ntpath.split(img_url)
        file_id, file_ext = os.path.splitext(file_name)

        # name
        FILENAME = "frogs_" + str(idx) + file_ext
        SAVE_PATH = os.path.join(save_dir, FILENAME)

        # save
        with open(SAVE_PATH, 'wb') as img_file:
            img_file.write(img_bytes)
            print(f'\r{FILENAME} was downloaded...', end = '', flush = True)
    except Exception as e:
        print(e)

def download_dataset_from_df(df, save_path):
    frogs_id_url_df = df[['id', 'image_url']]
    id_url_list = frogs_id_url_df.apply(lambda row : (row["id"], row["image_url"]), axis = 1).values

    with Pool(10) as p:
        p.map(partial(download_image, save_dir=save_path), id_url_list)
    
    print(f"Finished downloading {len(id_url_list)} images.")


# download_dataset_from_df(frogs_df, dataset_path)


# Gdrive link: https://drive.google.com/file/d/16sdPg_iXsDAWR_mPcpodhaxu9hRdEaXT/view?usp=sharing

file_id = '16sdPg_iXsDAWR_mPcpodhaxu9hRdEaXT'
file_name = "frogs_dataset.zip"
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={file_id}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={file_id}" -O {file_name} && rm -rf /tmp/cookies.txt

# Unzip dataset
if os.path.exists(file_name):
    with zipfile.ZipFile(file_name, 'r') as ziphandler:
        ziphandler.extractall(DATA_ROOT)
    data_len = len(glob.glob(os.path.join(DATASET_PATH, '*')))
    print(f"Successfully downloaded and unzipped {data_len} images.")
else:
    print("Zip file was not downloaded successfully! Please run the code block again.")

--2022-04-27 15:21:24--  https://docs.google.com/uc?export=download&confirm=t&id=16sdPg_iXsDAWR_mPcpodhaxu9hRdEaXT
Resolving docs.google.com (docs.google.com)... 142.251.8.100, 142.251.8.101, 142.251.8.113, ...
Connecting to docs.google.com (docs.google.com)|142.251.8.100|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-10-54-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/he4pffj9a6v0li1jma4k8qit4a1l4e5a/1651072875000/04490789293409028596/*/16sdPg_iXsDAWR_mPcpodhaxu9hRdEaXT?e=download [following]
Warning: wildcards not supported in HTTP.
--2022-04-27 15:21:24--  https://doc-10-54-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/he4pffj9a6v0li1jma4k8qit4a1l4e5a/1651072875000/04490789293409028596/*/16sdPg_iXsDAWR_mPcpodhaxu9hRdEaXT?e=download
Resolving doc-10-54-docs.googleusercontent.com (doc-10-54-docs.googleusercontent.com)... 108.177.125.132, 2404:6800:4008:c01::84
Connecting to doc-10-54-docs.googleusercontent.com (doc-10-54-docs.googleusercontent.com)|108.177.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1317515380 (1.2G) [application/zip]
Saving to: ‘frogs_dataset.zip’

frogs_dataset.zip   100%[===================>]   1.23G   125MB/s    in 8.2s    

2022-04-27 15:21:33 (153 MB/s) - ‘frogs_dataset.zip’ saved [1317515380/1317515380]

Successfully downloaded and unzipped 11517 images.


from sklearn.model_selection import train_test_split

frogs_ids = frogs_df['id'].tolist()
train_val, test = train_test_split(frogs_ids, test_size=517)
train, val = train_test_split(train_val, test_size=1000)

print(f"Length of train split: {len(train)}")
print(f"Length of validation split: {len(val)}")
print(f"Length of test split: {len(test)}")

Length of train split: 10000
Length of validation split: 1000
Length of test split: 517


# with open("frogs_train.txt", "w") as f:
#     for i in train:
#         f.write(f"{i}\n")
# with open("frogs_val.txt", "w") as f:
#     for i in val:
#         f.write(f"{i}\n")
# with open("frogs_test.txt", "w") as f:
#     for i in test:
#         f.write(f"{i}\n")


# functional
import cv2
import pandas as pd
import PIL.Image
import numpy as np
import os
import time
import torch

from numba import prange
from skimage import io
from skimage import transform
from torch.utils.data import Dataset


# pickle
import pickle

class AnimalDataset(Dataset):
    """
    Custom Animal Dataset.
    """

    def __init__(self, 
                 index_file_path: str, 
                 root_dir_path: str, 
                 file_prefix : str,
                 image_dimension: int, 
                 local_dir_path: str = None, 
                 transform = None,
                 concat_mask = True,
                 random_noise = True,
                 require_init = True,
                 drops = None,
                 center_mask = False,
                 divide_range = (4,6), 
                 file_postfix = [".png", ".jpg", ".jpeg"]):
        """
        Args:
            index_file_path: Path to the file with indices
            root_dir_path:   Directory with the images
            transform:       Callable that transforms sample
        """

        # save other attributes
        self.root_dir_path = root_dir_path
        self.transform = transform
        self.local_dir_path = local_dir_path
        self.IM_DIMENSIONS = image_dimension
        self.CONCAT_MASK = concat_mask
        self.FILE_PREFIX = file_prefix
        self.INITIALIZED = False
        self.REQUIRE_INIT = require_init
        self.RANDOM_NOISE = random_noise
        self.RNG = np.random.RandomState(10)
        self.CENTER_MASK = center_mask

        # constants
        AnimalDataset.CROP_BOX_SIZES_DIVIDE_RANGE = divide_range
        AnimalDataset.FILE_POSTFIX = file_postfix


        # load file indices, then transform each index to one possibility for each postfix
        self.df_indices = pd.read_csv(index_file_path, usecols = [0], header = 0) # select first column, which should be index
        if drops is not None:
            self.df_indices = self.df_indices.drop(drops, inplace = False)
        
        # get file names
        self.df_filenames = self.df_indices.apply(
            lambda x : [self.FILE_PREFIX + str(int(x)).strip() + postfix for postfix in AnimalDataset.FILE_POSTFIX], 
            axis = 1)  


                
    def _clean(self, verbose = True):
        """
        This is a helper method that clears the text file of corrupted files at startup. 
        Used as a defensive method of ensuring dataset is clean.
        """

        corrupted = []
        start = time.time()
        print("Cleaning ...")

        for idx in prange(len(self.df_filenames)):
            filenames = self.df_filenames.iloc[idx]
            for f in filenames:
                try: 
                    image_name = os.path.join(self.root_dir_path, f)
                    image = io.imread(image_name)   
                
                    # if 4 channels and png, then RGBA -> convert
                    if image.shape[-1] == 4:
                        rgba_image = PIL.Image.open(image_name)
                        image = np.array(rgba_image.convert('RGB'))

                    # sanity check that is an RGB image
                    h, w, c = image.shape
                    assert(c == 3)

                except FileNotFoundError:
                    continue

                except AttributeError: # NoneType -> corrupted file
                    corrupted.append(idx) 

                except: # no idea why it fails, catch all, just remove
                    corrupted.append(idx) 


        # at the end, drop all corrupted rows from df_indices 
        self.df_indices = self.df_indices.drop(corrupted, inplace = False)

        # reload filenames
        self.df_filenames = self.df_indices.apply(
            lambda x : [self.FILE_PREFIX + str(int(x) + 1).strip() + postfix for postfix in AnimalDataset.FILE_POSTFIX], 
            axis = 1)   

        print(f"Cleaning completed. Dropped {len(corrupted)} files. Took {(time.time() - start)/60} min")
        if verbose:
            print(f"The row indices dropped were {corrupted}")
    
    def __len__(self):
        return len(self.df_indices)

    def __getitem__(self, idx):
        """
        Gets a sample.
        """
        if not self.INITIALIZED and self.REQUIRE_INIT:
            print("Not initialized! Please call AnimalDataset::initialize()!")

        assert(self.INITIALIZED or not self.REQUIRE_INIT)

        if torch.is_tensor(idx):
            idx = idx.tolist()

        # retrieve preloaded
        if self.local_dir_path != None:
            try: 
                # load from memory
                sample = self._getsample_local(idx, self.CONCAT_MASK, self.RANDOM_NOISE)
                return sample

            except FileNotFoundError:
                pass

        # no preloaded, preprocess and save
        return self._getsample_loadsave(idx, self.CONCAT_MASK, self.RANDOM_NOISE)

    def initialize(self):
        self._clean()
        self.INITIALIZED = True


    def _getsample_local(self, idx, concat_mask = False, random_noise = False):
        """
        Gets an image stored locally and then processes it into 
        a sample.
        """

        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # load image from local
        filename = os.path.join(self.local_dir_path, str(int(self.df_indices.iloc[idx])).strip())
        with open(f"{filename}.pickle", "rb") as p:
            image = pickle.load(p)

        with open(f"{filename}.pickle", "rb") as p:
            damaged_image = pickle.load(p)

        with open(f"{filename}.pickle", "rb") as p:
            gray = pickle.load(p)

        # dynamically damage
        damaged_image, mask = self._damage(damaged_image, random_noise)

        # optionally append mask to damaged image
        if concat_mask:
            damaged_image = torch.cat([damaged_image, mask], dim = -1)

        # grayscale
        gray = self._gray(gray)
                
        # return sample as tuples of (tensor, tensor)
        sample = {"image": damaged_image, "reconstructed" : image, "mask" : mask, "gray" : gray}

        # transform if defined as in normal Dataset class
        if self.transform:
            sample = self.transform(sample)

        return sample
        

    def _getsample_loadsave(self, idx, concat_mask = False, random_noise = False):
        """
        Gets a sample from memory, preprocesses it for aspect ratio, then 
        processes it into a sample.
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()
               
        # load pair
        damaged_image, image = self._load_image_pair(self.df_filenames.iloc[idx])
        damaged_image, mask = self._damage(damaged_image, random_noise)
        gray = self._gray(image)
        
        # optionally append mask to damaged image
        if concat_mask:
            damaged_image = torch.cat([damaged_image, mask], dim = -1)
                
        # return sample as dictionaries
        sample = {"image": damaged_image, "reconstructed" : image, "mask" : mask, "gray" : gray}

        # save image as pickle
        if self.local_dir_path != None:
            filename = os.path.join(self.local_dir_path, str(int(self.df_indices.iloc[idx])).strip())
            with open(f"{filename}.pickle", "wb") as p:
                pickle.dump(image, p, protocol = pickle.HIGHEST_PROTOCOL)

        # transform if defined as in normal Dataset class
        if self.transform:
            sample = self.transform(sample)

        return sample
        

    def _load_image_pair(self, filenames: list):
        """
        This is a helper method that loads images from our dataset. Given that 
        the images are saved in different file types, this method tries to find 
        one of each and does a sanity check to make sure the image is RGB.
        """
        for i in filenames:
            try: 
                image_name = os.path.join(self.root_dir_path, i)
                image = io.imread(image_name)   
            
                # if 4 channels and png, then RGBA -> convert
                if image.shape[-1] == 4:
                    rgba_image = PIL.Image.open(image_name)
                    image = np.array(rgba_image.convert('RGB'))

                # aspect ratio preprocessing
                image = self._preprocess_aspectratio(image)

                # sanity check that is an RGB image
                h, w, c = image.shape
                assert(c == 3)

                return torch.tensor(image).float(), torch.tensor(image).float()

            except FileNotFoundError:
                continue

            except AttributeError:
                print("AttributeError!")
                print(image_name)

        raise Exception("Unable to load image! File names are: ", filenames)
        
    def _preprocess_aspectratio(self, image):
        """
        Corrects aspect ratio by resizing according to the smallest dimension
        (by n-dimensional interpolation), followed by cropping out the 
        centre portion of the image.
        """

        # ===== ASPECT RATIO CORRECTION =====
        h, w = image.shape[:2] # first two dimensions

        # interpolate by shorter side
        rotate = False
        if w > h: 
            rotate = True
            image = image.transpose(1, 0, 2)
            h, w = image.shape[:2]

        # width always shorter 
        ratio = h/w
        dim =  self.IM_DIMENSIONS
        h = int(dim * ratio)
        image = transform.resize(image, (h, dim))

        # crop
        centre = h//2
        image = image[centre - dim//2:centre + dim//2,:,:]

        # rotate back when done
        if rotate:
            image = image.transpose(1, 0, 2)
        
        return image

    def _damage(self, image, seed, random_noise = True):
        """
        This helper method damages the image for later reconstruction. 
        I have wrapped the methods into local helper methods. This way, reading and 
        debugging is easier in future.
        """
        def random_squares(image, seed):
            crop_size = 10
            # randomly choose size of crop
            
            if self.CENTER_MASK:
                
                # create mask
                height, width, c = image.shape
                mask  = torch.ones(height, width, 1)
                mask[height//2 - crop_size //2:height//2 + crop_size // 2,
                     width//2 - crop_size // 2: width//2 + crop_size // 2,:] = 0

            else:    
                low, high = AnimalDataset.CROP_BOX_SIZES_DIVIDE_RANGE
                h, w = self.RNG.random(size = 2) * (high - low) + low
                h, w = int(self.IM_DIMENSIONS//h), int(self.IM_DIMENSIONS//w)

                # randomly choose location of crop
                h_lower, h_higher = 0 + h//2, self.IM_DIMENSIONS - h//2
                w_lower, w_higher = 0 + w//2, self.IM_DIMENSIONS - w//2
                h_centre = self.RNG.randint(h_lower, h_higher + 1)
                w_centre = self.RNG.randint(w_lower, w_higher + 1)

                # create mask
                height, width, c = image.shape
                mask  = torch.ones(height, width, 1)
                mask[h_centre - h//2:h_centre + h//2,
                     w_centre - w//2:w_centre + w//2,:] = 0

            # crop
            image = torch.mul(mask, image)

            # add random noise
#             if random_noise:
#                 image[h_centre - h//2:h_centre + h//2,
#                     w_centre - w//2:w_centre + w//2,:]  = torch.rand((h//2 * 2), (w//2 * 2),c) 

            return image, mask

        image, mask = random_squares(image, seed)
        
        return image, mask

    def _reshape_channelFirst(self, image):
        h, w, c = image.size()
        return image.reshape(c, h, w)

    def _gray(self, image):
        image = (image.numpy()*255).astype(np.uint8) # to numpy 255
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return gray

    def _edge(self, image):

        def canny(image):
            image = (image.numpy()*255).astype(np.uint8) # to numpy 255
            high = np.percentile(image, 99)
            low = np.percentile(image, 97.5)
            canny = torch.from_numpy(cv2.Canny(image, threshold1 = low, threshold2 = high)/255)
            return canny

        def gray(image):
            image = (image.numpy()*255).astype(np.uint8) # to numpy 255
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            return gray

        edge = gray(image)

        return edge


# sample indices for visualization
sample_indices = [
    "100007643",
    "100007683",
    "100017041",
    "100017050",
    "100017100",
    "100019721",
    "100019722",
    "100019723",
]
with open("sample_indices.txt", "w") as f:
    for i in sample_indices:
        f.write(i + "\n")


sample_dataset = AnimalDataset(index_file_path = "sample_indices.txt",
    root_dir_path = DATASET_PATH,
    local_dir_path = PREPROCESSED_FOLDER,
    file_prefix = "frogs_",
    image_dimension = 64,
    concat_mask = True,
    random_noise = False,
    require_init = False,
    drops = [])
print(f"Sample dataset length: {len(sample_dataset)}")

Sample dataset length: 7


%matplotlib inline

def sample_batch(dataset, sample_size = 4, show_gray = False):
    """
    This method is intended to help visualize samples from a 
    dataset object. It takes in a dataset and plots out "sample_size"
    number of images from the dataset.
    """

    loader = DataLoader(dataset, batch_size = sample_size, shuffle = True)

    batch = next(iter(loader))
    n = 3
    if show_gray:
        n = 4

    fig, ax = plt.subplots(n, sample_size, figsize = (sample_size * 5, n * 5, ))
    for i in range(sample_size):
        image = batch["image"][i]
        reconstructed = batch["reconstructed"][i]
        mask = batch["mask"][i]
        gray = batch["gray"][i]
        if image.shape[-1] > 3: # take first three channels, rgb
            image = image[:, :, 0:3]

        ax[0][i].imshow(image)
        ax[1][i].imshow(reconstructed)
        ax[2][i].matshow(mask.squeeze())

        if show_gray:
            ax[3][i].matshow(gray)
        
    plt.show()
    plt.close()


sample_batch(sample_dataset, sample_size = 7)


train_dataset = AnimalDataset(index_file_path = os.path.join(DATA_ROOT, "frogs_train.txt"),
    root_dir_path = DATASET_PATH,
    local_dir_path = PREPROCESSED_FOLDER,
    file_prefix = "frogs_",
    image_dimension = 64,
    concat_mask = True,
    random_noise = False,
    require_init = False,
    drops = [])

valid_dataset = AnimalDataset(index_file_path = os.path.join(DATA_ROOT, "frogs_val.txt"),
    root_dir_path = DATASET_PATH,
    local_dir_path = PREPROCESSED_FOLDER,
    file_prefix = "frogs_",
    image_dimension = 64,
    concat_mask = True,
    random_noise = False,
    require_init = False,
    drops = [])

test_dataset = AnimalDataset(index_file_path = os.path.join(DATA_ROOT, "frogs_test.txt"),
    root_dir_path = DATASET_PATH,
    local_dir_path = PREPROCESSED_FOLDER,
    file_prefix = "frogs_",
    image_dimension = 64,
    concat_mask = True,
    random_noise = False,
    require_init = False,
    center_mask = True,
    drops = [])


# sanity check
print("Train dataset:")
sample_batch(train_dataset, sample_size = 8)
print("Validation dataset:")
sample_batch(valid_dataset, sample_size = 8)
print("Test dataset:")
sample_batch(test_dataset, sample_size = 8)

Train dataset:

Validation dataset:

Test dataset:


VGG_LPIPS = lpips.LPIPS(net = 'vgg')
METRICS = {
    "Peak SnR (Whole)" : lambda img, gt, mask : torchmetrics.functional.peak_signal_noise_ratio(img * (1-mask) + gt * mask, gt),
    "L2 loss (Whole)" : lambda img, gt, mask : nn.functional.mse_loss(img * (1-mask) + gt * mask, gt),
    "L2 loss (Mask)" : lambda img, gt, mask : nn.functional.mse_loss(img * (1-mask), gt * (1-mask), reduction = 'sum')/(1-mask).sum(),
    "L1 loss (Whole)" : lambda img, gt, mask : nn.functional.l1_loss(img * (1-mask) + gt * mask, gt),
    "L1 loss (Mask)" : lambda img, gt, mask : nn.functional.l1_loss(img * (1-mask), gt * (1-mask), reduction = 'sum')/(1-mask).sum(),
    "LPIPS (Whole)" : (lambda img, gt, mask : VGG_LPIPS(img * (1-mask) + gt * mask, gt).mean()),
}

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth

  0%|          | 0.00/528M [00:00<?, ?B/s]

Loading model from: /usr/local/lib/python3.7/dist-packages/lpips/weights/v0.1/vgg.pth


# Gdrive link: https://drive.google.com/file/d/1p64ojuWJPrnQBzNQeSGo-5ukrpPXg72o/view?usp=sharing

file_id = '1p64ojuWJPrnQBzNQeSGo-5ukrpPXg72o'
file_name = "model_weights.zip"
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={file_id}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={file_id}" -O {file_name} && rm -rf /tmp/cookies.txt

# Unzip dataset
if os.path.exists(file_name):
    with zipfile.ZipFile(file_name, 'r') as ziphandler:
        ziphandler.extractall()
    models_len = len(glob.glob(os.path.join(MODEL_WEIGHTS_PATH, '*')))
    print(f"Successfully downloaded and unzipped {models_len} model weights.")
else:
    print("Zip file was not downloaded successfully! Please run the code block again.")

--2022-04-27 15:34:24--  https://docs.google.com/uc?export=download&confirm=t&id=1p64ojuWJPrnQBzNQeSGo-5ukrpPXg72o
Resolving docs.google.com (docs.google.com)... 142.251.8.113, 142.251.8.139, 142.251.8.100, ...
Connecting to docs.google.com (docs.google.com)|142.251.8.113|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-0o-30-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5k9gm23bm8h0ltfap69760uvd735m3al/1651073625000/14966598133122679211/*/1p64ojuWJPrnQBzNQeSGo-5ukrpPXg72o?e=download [following]
Warning: wildcards not supported in HTTP.
--2022-04-27 15:34:25--  https://doc-0o-30-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5k9gm23bm8h0ltfap69760uvd735m3al/1651073625000/14966598133122679211/*/1p64ojuWJPrnQBzNQeSGo-5ukrpPXg72o?e=download
Resolving doc-0o-30-docs.googleusercontent.com (doc-0o-30-docs.googleusercontent.com)... 108.177.125.132, 2404:6800:4008:c01::84
Connecting to doc-0o-30-docs.googleusercontent.com (doc-0o-30-docs.googleusercontent.com)|108.177.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 50436150 (48M) [application/zip]
Saving to: ‘model_weights.zip’

model_weights.zip   100%[===================>]  48.10M   120MB/s    in 0.4s    

2022-04-27 15:34:25 (120 MB/s) - ‘model_weights.zip’ saved [50436150/50436150]

Successfully downloaded and unzipped 13 model weights.


def summary(model: nn.Module, verbose = False):
    """
    This method helps summarize a model.
    """
    count = 0
    if verbose:
        print(model)

    for name, params in model.named_parameters():
        num_params = params.flatten().size()[0]
        count += num_params
        if verbose:
            print(f"\nlayer: {name}")
            print(f"number of params: {num_params}")
            print(f"params shape: {params.size()}")

    print(f"model has {count/1e6} million parameters")


def run_inference(model_g: nn.Module, test_dataset: torch.utils.data.Dataset, sample_size: int, seed: int, mode: int = 0):
    ## Dataloader for test set
    test_dataloader = DataLoader(test_dataset, batch_size = 32, shuffle = False, num_workers = 2)
    
    generator = model_g.eval()
    metrics = METRICS
    # epoch statistics
    running_results = {list(metrics.keys())[i] : 0.0 for i in range(len(metrics)) } 
    
    batch_to_show = None

    # ===== EVALUATE EPOCH =====

    with torch.no_grad():
        batches = 0
        for index, batch in enumerate(test_dataloader, 1):
            
            batches += 1

            # input and ground truth
            input_batched = batch["image"]
            ground_truth_batched = batch["reconstructed"]
            mask_batched = batch["mask"]

            # ===== FORWARD PASS =====

            # 1. reshape to channel first
            input_batched = input_batched.permute(0, 3, 1, 2)
            ground_truth_batched = ground_truth_batched.permute(0, 3, 1, 2)
            mask_batched = mask_batched.permute(0, 3, 1, 2)

            # 2. predict    
            output_batched = generator(input_batched)
            if mode == 0:
                spliced_batched = ((1-mask_batched) * output_batched) + (mask_batched * ground_truth_batched)             
                for key, func in metrics.items():
                    running_results[key] += func(output_batched, ground_truth_batched, mask_batched).detach().item()
            elif mode == 1:
                #edge
                spliced_batched = ((1-mask_batched) * output_batched[0]) + (mask_batched * ground_truth_batched)   
                for key, func in metrics.items():
                    running_results[key] += func(output_batched[0], ground_truth_batched, mask_batched).detach().item()

            # 3. evaluate

            
            if batch_to_show is None:
                batch_to_show = True
                fig, ax = plt.subplots(3, sample_size, figsize = (sample_size * 5, 15, ))
                for i in range(sample_size):
                    image = input_batched[i].permute(1,2,0)
                    reconstructed = ground_truth_batched[i].permute(1,2,0)
                    predicted = spliced_batched[i].permute(1,2,0)

                    if image.shape[-1] > 3: 
                        image = image[:, :, 0:3] # take rgb if more than 3 channels

                    ax[0][i].imshow(image)
                    ax[1][i].imshow(reconstructed)
                    ax[2][i].imshow(predicted)
                
    # normalise numbers by batch
    for key, _ in running_results.items():
        running_results[key] /= batches
    
    
    # pretty print metrics
    
    df = pd.DataFrame(running_results,index=[0])
    pdtabulate = lambda df: tabulate(df, headers='keys', tablefmt='psql', showindex=False)

    print(pdtabulate(df))
    
    return running_results


class LinearBlock(nn.Module):
    """
    This class encapsulates a linear layer. 
    Linear -> BN -> activation
    """

    def __init__(self, input_dim, output_dim, activation = nn.ReLU):
        super(LinearBlock, self).__init__()

        self.linear = nn.Linear(input_dim, output_dim)
        self.activation = activation()
        self.bn = nn.BatchNorm1d(output_dim)

    def forward(self, input_tensor):
        
        x = input_tensor
        x = self.linear(x)
        x = self.bn(x)
        x = self.activation(x)

        return x


class MLP(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation, image_shape):
        super(MLP, self).__init__()
        
        INPUT_DIM = image_shape * image_shape * input_dim
        OUTPUT_DIM = image_shape * image_shape * output_dim
        self.linear0 = LinearBlock(INPUT_DIM, hidden_dim, activation)
        self.linear1 = LinearBlock(hidden_dim, hidden_dim, activation)
        self.linear2 = LinearBlock(hidden_dim, hidden_dim, activation)
        self.final = nn.Linear(hidden_dim, OUTPUT_DIM)

    def forward(self, input_tensor):
        b, c, h, w = input_tensor.shape    
        x = input_tensor.reshape(b, c*h*w)
        x = self.linear0(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.final(x)
        x = x.reshape(b, 3, h, w)

        return x


mlp_weights = os.path.join(MODEL_WEIGHTS_PATH, "mlp_generator_epoch20.pt")
mlp = MLP(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish, image_shape=64)
mlp.load_state_dict(torch.load(mlp_weights, map_location=device))
print(f"Loaded generator weights from {mlp_weights}.")

summary(mlp)

Loaded generator weights from model_weights/mlp_generator_epoch20.pt.
model has 1.856064 million parameters


mlp_results = run_inference(mlp, test_dataset=test_dataset, sample_size=10, seed=None)

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            32.3722 |       0.000582524 |        0.0715806 |        0.00296437 |         0.364262 |       0.0310939 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Conv2dBlock(nn.Module):
    """
    This class encapsulates a standard convolution block.
    Conv -> BN -> activation
    """

    def __init__(self, input_dim, output_dim, 
        kernel_size = 3, stride = 1, padding = 'same', dilation = 1, 
        activation = nn.ReLU):

        super(Conv2dBlock, self).__init__()

        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, padding, dilation)
        self.bn = nn.BatchNorm2d(output_dim)
        self.activation = activation()


    def forward(self, input_tensor):
        
        x = input_tensor
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)

        return x


class CNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(CNN, self).__init__()
        
        # 4 x same
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 2 x  same
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x same
        self.conv6 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv7 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv8 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv9 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 2 x same
        self.conv10 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x same
        self.conv12 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv13 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv15 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x) + x
        x = self.conv2(x) + x
        x = self.conv3(x) + x
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x
        x = self.conv12(x) + x
        x = self.conv13(x) + x
        x = self.conv14(x) + x
        x = self.conv15(x) + x
        
        # final
        x = self.final(x)

        return x


cnn_weights = os.path.join(MODEL_WEIGHTS_PATH, "cnn_generator_epoch20.pt")
cnn = CNN(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
cnn.load_state_dict(torch.load(cnn_weights, map_location=device))
print(f"Loaded generator weights from {cnn_weights}.")

summary(cnn)

Loaded generator weights from model_weights/cnn_generator_epoch20.pt.
model has 0.560067 million parameters


cnn_results = run_inference(cnn, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            35.1031 |       0.000314723 |        0.0386731 |        0.00204271 |         0.251009 |       0.0175084 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class UpConv2dBlock(nn.Module):
    """
    This class encapsulates upsampling by upsampling then convolution. 
    Here is a reference of this technique compared to transposed convolutions:
    Odena, et al., "Deconvolution and Checkerboard Artifacts", Distill, 2016. http://doi.org/10.23915/distill.00003
    """

    def __init__(self, input_dim, output_dim, 
        kernel_size = 3, stride = 1, padding = 'same', dilation = 1, 
        activation = nn.ReLU, 
        scale_factor = (2,2), mode = 'nearest'):

        super(UpConv2dBlock, self).__init__()
        self.upsample = nn.Upsample(scale_factor = scale_factor, mode = mode)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, padding, dilation)
        self.activation = activation()
        self.bn = nn.BatchNorm2d(output_dim)

    def forward(self, input_tensor):
        
        x = input_tensor
        x = self.upsample(x)
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)

        return x


class AutoEncoder(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(AutoEncoder, self).__init__()
        
        # same -> down -> same -> down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 8 x  same
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv6 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv7 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv8 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv9 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv10 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = UpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv13 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = UpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv15 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # up sample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x


autoencoder_weights = os.path.join(MODEL_WEIGHTS_PATH, "autoencoder_generator_epoch20.pt")
autoencoder = AutoEncoder(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
autoencoder.load_state_dict(torch.load(autoencoder_weights, map_location=device))
print(f"Loaded generator weights from {autoencoder_weights}.")

summary(autoencoder)

Loaded generator weights from model_weights/autoencoder_generator_epoch20.pt.
model has 0.560067 million parameters


autoencoder_results = run_inference(autoencoder, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            35.5781 |        0.00027985 |         0.034388 |        0.00192118 |         0.236075 |       0.0211599 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> down -> same -> down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 8 x  same
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv6 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv7 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv8 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv9 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv10 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = UpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv13 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = UpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv15 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # up sample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x


class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, 1)


    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x


gan_1_weights = os.path.join(MODEL_WEIGHTS_PATH, "gan_1_generator_epoch20.pt")
gan_1_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
gan_1_generator.load_state_dict(torch.load(gan_1_weights, map_location=device))
print(f"Loaded generator weights from {gan_1_weights}.")

summary(gan_1_generator)

Loaded generator weights from model_weights/gan_1_generator_epoch20.pt.
model has 0.560067 million parameters


gan_1_results = run_inference(gan_1_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.4302 |       0.000362975 |        0.0446024 |        0.00223769 |         0.274967 |       0.0188187 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


gan_01_weights = os.path.join(MODEL_WEIGHTS_PATH, "gan_0.1_generator_epoch20.pt")
gan_01_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
gan_01_generator.load_state_dict(torch.load(gan_01_weights, map_location=device))
print(f"Loaded generator weights from {gan_01_weights}.")

summary(gan_01_generator)

Loaded generator weights from model_weights/gan_01_generator_epoch20.pt.
model has 0.560067 million parameters


gan_01_results = run_inference(gan_01_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.2563 |       0.000379741 |        0.0466626 |        0.00227141 |         0.279111 |       0.0194531 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


gan_001_weights = os.path.join(MODEL_WEIGHTS_PATH, "gan_0.01_generator_epoch20.pt")
gan_001_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
gan_001_generator.load_state_dict(torch.load(gan_001_weights, map_location=device))
print(f"Loaded generator weights from {gan_001_weights}.")

summary(gan_001_generator)

Loaded generator weights from model_weights/gan_0.01_generator_epoch20.pt.
model has 0.560067 million parameters


gan_001_results = run_inference(gan_001_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            35.1391 |       0.000309324 |        0.0380097 |        0.00203009 |         0.249458 |       0.0213654 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


gan_0001_weights = os.path.join(MODEL_WEIGHTS_PATH, "gan_0.001_generator_epoch20.pt")
gan_0001_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
gan_0001_generator.load_state_dict(torch.load(gan_0001_weights, map_location=device))
print(f"Loaded generator weights from {gan_0001_weights}.")

summary(gan_0001_generator)

Loaded generator weights from model_weights/gan_0.001_generator_epoch20.pt.
model has 0.560067 million parameters


gan_0001_results = run_inference(gan_0001_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.4302 |       0.000362975 |        0.0446024 |        0.00223769 |         0.274967 |       0.0188187 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class GatedConv2d(nn.Module):

    """
    This class implements a gated convolution, following the implementation in the given reference.
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation):
        super(GatedConv2d, self).__init__()
        self.image_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation = dilation)
        self.gate_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation = dilation)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_tensor, return_mask = False):

        mask = self.sigmoid(self.gate_conv(input_tensor))
        x = self.image_conv(input_tensor)
        x = torch.mul(x, mask) # apply mask

        if return_mask:
            return x, mask

        return x

class GatedUpConv2dBlock(nn.Module):

    def __init__(self, input_dim, output_dim, 
        kernel_size = 3, stride = 1, padding = 'same', dilation = 1, 
        activation = nn.ReLU, 
        scale_factor = (2,2), mode = 'nearest'):

        super(GatedUpConv2dBlock, self).__init__()


        self.upsample = nn.Upsample(scale_factor = scale_factor, mode = mode)
        self.conv = GatedConv2d(input_dim, output_dim, kernel_size, stride, padding, dilation)
        self.activation = activation()
        self.bn = nn.BatchNorm2d(output_dim)

    def forward(self, input_tensor):
        
        x = input_tensor
        x = self.upsample(x)
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)

        return x

class GatedConv2dBlock(nn.Module):
    """
    This class encapsulates a standard convolution block.
    Conv -> BN -> activation
    """

    def __init__(self, input_dim, output_dim, 
        kernel_size = 3, stride = 1, padding = 'same', dilation = 1, 
        activation = nn.ReLU):

        super(GatedConv2dBlock, self).__init__()

        self.conv = GatedConv2d(input_dim, output_dim, kernel_size, stride, padding, dilation)
        self.bn = nn.BatchNorm2d(output_dim)
        self.activation = activation()


    def forward(self, input_tensor):
        
        x = input_tensor
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)

        return x


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> down -> same -> down
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        
        # 4 x dilated
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)
        
        # 2 x same
        self.conv10 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # up sample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x


class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, 1)


    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x


dilatedgatedgan_weights = os.path.join(MODEL_WEIGHTS_PATH, "dilatedgatedgan_generator_epoch20.pt")
dilatedgatedgan_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
dilatedgatedgan_generator.load_state_dict(torch.load(dilatedgatedgan_weights, map_location=device))
print(f"Loaded generator weights from {dilatedgatedgan_weights}.")

summary(dilatedgatedgan_generator)

Loaded generator weights from model_weights/dilatedgatedgan_generator_epoch20.pt.
model has 1.116355 million parameters


dilatedgatedgan_results = run_inference(dilatedgatedgan_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.4379 |       0.000362805 |        0.0445815 |        0.00221354 |            0.272 |       0.0169406 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> down -> same -> down
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x dilated conv
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)

        # 2 x same
        self.conv10 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # up sample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x


class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, hidden_dim)

    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x

class DoubleDiscriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(DoubleDiscriminator, self).__init__()

        self.d1 = Discriminator(input_dim, hidden_dim, input_shape, activation)
        self.d2 = Discriminator(input_dim, hidden_dim, input_shape, activation)
        
        # linear to predict classes
        self.linear = nn.Linear(hidden_dim*2, 1)

    def forward(self, input_tensor1, input_tensor2):
        x1 = self.d1(input_tensor1)
        x2 = self.d2(input_tensor2)
        x = torch.cat([x1, x2], dim = 1)
        x = self.linear(x)

        return x


localglobalgan_weights = os.path.join(MODEL_WEIGHTS_PATH, "localglobalgan_generator_epoch20.pt")
localglobalgan_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
localglobalgan_generator.load_state_dict(torch.load(localglobalgan_weights, map_location=device))
print(f"Loaded generator weights from {localglobalgan_weights}.")

summary(localglobalgan_generator)

Loaded generator weights from model_weights/localglobalgan_generator_epoch20.pt.
model has 1.116355 million parameters


localglobalgan_results = run_inference(localglobalgan_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            33.4518 |       0.000453681 |        0.0557483 |         0.0025013 |          0.30736 |         0.02004 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> down -> same -> down
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x dilated conv
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)

        # 2 x same
        self.conv10 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation, scale_factor = (2,2))
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # up sample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x

class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        self.final = nn.Conv2d(hidden_dim, 1, kernel_size = 3, stride = 1, padding = 'same')

    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x) + x
        x = self.conv2(x) + x
        x = self.conv3(x) + x
        x = self.conv4(x) + x

        # scores
        x = self.final(x)
        x = torch.sigmoid(x)

        return x


patchgan_weights = os.path.join(MODEL_WEIGHTS_PATH, "patchgan_generator_epoch20.pt")
patchgan_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
patchgan_generator.load_state_dict(torch.load(patchgan_weights, map_location=device))
print(f"Loaded generator weights from {patchgan_weights}.")

summary(patchgan_generator)

Loaded generator weights from model_weights/patchgan_generator_epoch20.pt.
model has 1.116355 million parameters


patchgan_results = run_inference(patchgan_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            35.0264 |       0.000316774 |        0.0389252 |        0.00205053 |         0.251969 |       0.0165137 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> downsample -> same -> downsample
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same conv
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x dilated same conv
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)

        # 2 x same conv
        self.conv10 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 2 x up conv for gray (NOT USED TODO)
        self.convgray1 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.convgray2 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')


        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')


    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        
        # downsample
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x

        # dilated conv with residual skips
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x

        # middle layers
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # upsample using gray
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        final = self.final(x)

        return final

class DoubleGenerator(nn.Module):

    def __init__(self, input_dim, hidden_dim, activation):
        super(DoubleGenerator, self).__init__()
        self.g1 = Generator(input_dim, hidden_dim, output_dim = 1, activation = activation)
        self.g2 = Generator(input_dim + 1, hidden_dim, output_dim = 3, activation = activation)

    def forward(self, input_tensor):
        gray = self.g1(input_tensor)
        colour = self.g2(torch.cat([gray, input_tensor], dim = 1))
        return colour, torch.sigmoid(gray)

class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, 1)


    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x


edgegan_weights = os.path.join(MODEL_WEIGHTS_PATH, "edgegan_generator_epoch20.pt")
edgegan_generator = DoubleGenerator(input_dim = 4, hidden_dim = 64, activation = nn.Mish)
edgegan_generator.load_state_dict(torch.load(edgegan_weights, map_location=device))
print(f"Loaded generator weights from {edgegan_weights}.")

summary(edgegan_generator)

Loaded generator weights from model_weights/edgegan_generator_epoch20.pt.
model has 2.528644 million parameters


edgegan_results = run_inference(edgegan_generator, test_dataset=test_dataset, sample_size=10, seed=None, mode=1)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            33.9808 |       0.000402019 |        0.0494001 |         0.0023456 |         0.288227 |       0.0187774 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


import torch_geometric as torch_g
import torch_geometric.nn as gnn


class GNNBlock(nn.Module):

    def __init__(self, in_channels, out_channels, activation = nn.ReLU):

        super(GNNBlock, self).__init__()
        self.conv = gnn.GINConv(nn.Sequential(nn.Linear(in_channels, out_channels), activation()))
        self.post1 = nn.Sequential(nn.Linear(out_channels, out_channels), activation())
        self.post2 = nn.Sequential(nn.Linear(out_channels, out_channels), activation())

    def forward(self, input_tensor, adj):
        # convert adj to sparse
        device = input_tensor.device
        edge_index, edge_attr = torch_g.utils.dense_to_sparse(adj)
        edge_index = edge_index.long().to(device)
        b, hw, c = input_tensor.shape
        x = input_tensor.reshape(b * hw, c) # (b x hw x c) -> (bhw x c)

        # forward
        x = self.conv(x, edge_index)
        x = self.post1(x)
        x = self.post2(x)

        # reshape back
        x = x.reshape(b, hw, c)
        
        return x


class GatedGraphConvModule(nn.Module):
    """
    This module implements GNN convolution on images using local, global and channel features to predict
    the adjacency tensor.
    """

    def __init__(self, channels, kernel_size, stride, padding, dilation, activation = nn.ReLU):

        super(GatedGraphConvModule, self).__init__()


        # incidence matrix
        self.feature_conv = GatedConv2dBlock(channels, channels, kernel_size = kernel_size, stride = stride, padding = padding, dilation = dilation, activation = activation)
        self.edge_conv = GatedConv2dBlock(channels, channels, kernel_size = kernel_size, stride = stride, padding = padding, dilation = dilation, activation = activation)
        self.scaleconv = GatedConv2d(channels, 1, kernel_size = 1, stride = 1, padding = 0, dilation = 1)
        self.offsetconv = GatedConv2d(channels, 1, kernel_size = 1, stride = 1, padding = 0, dilation = 1)

        # graph conv
        self.gnn1 = GNNBlock(channels, channels, activation = activation)


    def forward(self, input_tensor, return_adj = False,):

        # metadata
        b, c, h, w = input_tensor.shape
        
        # 1. compute features for edge prediction
        edge_features = self.edge_conv(input_tensor)

        # 2. compute adjacency matrix by dot product 
        scores = edge_features.view(b, c, h*w) # reshape and normalize
        scores = nn.functional.normalize(scores, p = 2, dim = 2) # normalize vector at each node
        scores = torch.bmm(scores.permute(0, 2, 1), scores) # (b x hw x c) x (b x c x hw) -> (b x hw x hw)
        adj_tensor = torch.sigmoid(scores)

        # 3. compute dampening factor by affine transformations of mean of edge features
        scale = self.scaleconv(edge_features)
        offset = self.offsetconv(edge_features)
        mean = scores.mean(dim = 1, keepdim = True) # (b x 1 x hw) mean of similarity scores as base
        adjustment = torch.relu(scale.view(b,1,h*w) * mean + offset.view(b,1,h*w)) # relu to keep everything positive

        # dampen
        adj_tensor = adj_tensor - adjustment

        # 4. graph conv
        x = input_tensor.view(b, c, h*w).permute(0, 2, 1)  # -> b x hw x c
        x = self.gnn1(x, adj_tensor) + x # -> b x hw x c

        # 5. reshape back to image
        x = x.permute(0, 2, 1).view(b, c, h, w)

        if return_adj:
            return x, adj_tensor
            
        return x


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> downsample 
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # same -> downsample
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same conv
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # graph conv
        self.graphconv1 = GatedGraphConvModule(hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x dilated conv
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)

        # 2 x same conv
        self.conv10 = GatedConv2dBlock(2*hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

        # for contrastive learning, 1x1 conv to compress feature map into 1 channel
        self.conv_feature = nn.Conv2d(hidden_dim, 1, kernel_size = 1, stride = 1, padding = 'same')

    def forward(self, input_tensor, return_adj = False):
        
        # downsample
        x = self.conv0(input_tensor)
        x = self.conv1(x)

        # downsample
        x = self.conv2(x)
        x = self.conv3(x)

        # middle preprocessing layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x

        # graph conv
        g, adj = self.graphconv1(x, return_adj = True)
        g = g + x

        # dilated conv with residual skips
        d = self.conv6(x) + x
        d = self.conv7(d) + d
        d = self.conv8(d) + d
        d = self.conv9(d) + d

        # middle postprocessing layers
        x = self.conv10(torch.cat([d,g], dim = 1)) + x
        x = self.conv11(x) + x

        # upsample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        if return_adj:
            return x, adj

        return x


class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, 1)


    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x


graphgan_weights = os.path.join(MODEL_WEIGHTS_PATH, "graphgan_generator_epoch20.pt")
graphgan_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
graphgan_generator.load_state_dict(torch.load(graphgan_weights, map_location=device))
print(f"Loaded generator weights from {graphgan_weights}.")

summary(graphgan_generator)

Loaded generator weights from model_weights/graphgan_generator_epoch20.pt.
model has 1.350856 million parameters


graphgan_results = run_inference(graphgan_generator, test_dataset=test_dataset, sample_size=10, seed=None)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.3257 |       0.000375406 |        0.0461299 |        0.00225082 |         0.276581 |       0.0184011 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+


class Generator(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, activation):
        super(Generator, self).__init__()
        
        # same -> downsample -> same -> downsample
        self.conv0 = GatedConv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv1 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv3 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # 2 x same conv
        self.conv4 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv5 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # 4 x dilated conv
        self.conv6 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 2, dilation = 2, activation = activation)
        self.conv7 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 4, dilation = 4, activation = activation)
        self.conv8 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 8, dilation = 8, activation = activation)
        self.conv9 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 16, dilation = 16, activation = activation)

        # 2 x same conv
        self.conv10 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv11 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # upsample -> same -> upsample -> same
        self.conv12 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv13 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)
        self.conv14 = GatedUpConv2dBlock(hidden_dim, hidden_dim, scale_factor = (2,2), kernel_size = 3, stride = 1, padding = 1, dilation = 1, mode = 'nearest')
        self.conv15 = GatedConv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 1, padding = 1, dilation = 1, activation = activation)

        # final
        self.final = nn.Conv2d(hidden_dim, output_dim, kernel_size = 3, stride = 1, padding = 'same')

        # for contrastive learning, 1x1 conv to compress feature map into 1 channel
        self.conv_feature = nn.Conv2d(hidden_dim, 1, kernel_size = 1, stride = 1, padding = 'same')

    def forward(self, input_tensor):

        x = self.conv0(input_tensor)
        
        # downsample
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # middle layers
        x = self.conv4(x) + x
        x = self.conv5(x) + x

        # dilated conv with residual skips
        x = self.conv6(x) + x
        x = self.conv7(x) + x
        x = self.conv8(x) + x
        x = self.conv9(x) + x

        # extract for contrastive loss
        x_feature = self.conv_feature(x)

        # middle layers
        x = self.conv10(x) + x
        x = self.conv11(x) + x

        # upsample
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        
        # final
        x = self.final(x)

        return x, x_feature


class Discriminator(nn.Module):

    def __init__(self, input_dim, hidden_dim, input_shape, activation):

        super(Discriminator, self).__init__()

        # 5 layers down
        self.conv0 = Conv2dBlock(input_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv1 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv2 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv3 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)
        self.conv4 = Conv2dBlock(hidden_dim, hidden_dim, kernel_size = 3, stride = 2, padding = 1, dilation = 1, activation = activation)

        # linear to predict classes
        LATENT_H = input_shape//(2**5)
        self.linear = nn.Linear(LATENT_H**2 * hidden_dim, 1)


    def forward(self, input_tensor):
        
        x = self.conv0(input_tensor)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # scores
        x = torch.flatten(x, start_dim = 1)
        x = self.linear(x)

        return x


contrastivegan_weights = os.path.join(MODEL_WEIGHTS_PATH, "contrastivegan_generator1_epoch20.pt")
contrastivegan_generator = Generator(input_dim = 4, hidden_dim = 64, output_dim = 3, activation = nn.Mish)
state_dict = torch.load(contrastivegan_weights, map_location=device)

# create new OrderedDict that does not contain module.
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] # remove module.
    new_state_dict[name] = v
# load params
contrastivegan_generator.load_state_dict(new_state_dict)

print(f"Loaded generator weights from {contrastivegan_weights}.")
summary(contrastivegan_generator)

Loaded generator weights from model_weights/contrastivegan_generator1_epoch20.pt.
model has 1.11642 million parameters


contrastivegan_results = run_inference(contrastivegan_generator, test_dataset=test_dataset, sample_size=10, seed=None, mode=1)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

+--------------------+-------------------+------------------+-------------------+------------------+-----------------+
|   Peak SnR (Whole) |   L2 loss (Whole) |   L2 loss (Mask) |   L1 loss (Whole) |   L1 loss (Mask) |   LPIPS (Whole) |
|--------------------+-------------------+------------------+-------------------+------------------+-----------------|
|            34.2538 |       0.000377194 |        0.0463496 |        0.00227762 |         0.279874 |       0.0205833 |
+--------------------+-------------------+------------------+-------------------+------------------+-----------------+

	id	observed_on_string	observed_on	time_observed_at	time_zone	user_id	user_login	created_at	updated_at	quality_grade	...	geoprivacy	taxon_geoprivacy	coordinates_obscured	positioning_method	positioning_device	species_guess	scientific_name	common_name	iconic_taxon_name	taxon_id
0	73992831	Thu Apr 15 2021 07:10:05 GMT+0900 (GMT+9)	2021-04-15	2021-04-14 22:10:05 UTC	Tokyo	460572	norio_nomura	2021-04-14 23:15:23 UTC	2021-04-28 12:19:04 UTC	research	...	NaN	open	False	NaN	NaN	ニホンアマガエル	Hyla japonica	Japanese Tree Frog	Amphibia	23951
1	73992869	Thu Apr 15 2021 07:16:24 GMT+0900 (GMT+9)	2021-04-15	2021-04-14 22:16:24 UTC	Tokyo	460572	norio_nomura	2021-04-14 23:15:42 UTC	2021-04-28 05:38:57 UTC	research	...	NaN	open	False	NaN	NaN	Japanese Tree Frog	Hyla japonica	Japanese Tree Frog	Amphibia	23951
2	73999658	2021/04/15 10:04 AM AEST	2021-04-15	2021-04-15 00:04:00 UTC	Brisbane	1771883	graham_winterflood	2021-04-15 00:34:04 UTC	2022-02-04 13:11:14 UTC	research	...	NaN	open	False	NaN	NaN	White-lipped Tree Frog	Nyctimystes infrafrenatus	White-lipped Tree Frog	Amphibia	517066
3	74005755	Thu Apr 15 2021 07:08:31 GMT+1000 (GMT+10)	2021-04-15	2021-04-14 21:08:31 UTC	Brisbane	2579853	megahertzia	2021-04-15 01:53:49 UTC	2022-01-04 06:01:13 UTC	research	...	NaN	open	False	NaN	NaN	Desert Tree Frog	Litoria rubella	Desert Tree Frog	Amphibia	23611
4	74006270	2021-04-15 10:38:32 AM GMT+10:00	2021-04-15	2021-04-15 00:38:32 UTC	Brisbane	2235434	kimradnell	2021-04-15 02:01:22 UTC	2021-04-15 13:03:12 UTC	research	...	NaN	open	False	gps	gps	Eastern Dwarf Tree Frog	Litoria fallax	Eastern Dwarf Tree Frog	Amphibia	23656

Metric	$\alpha=$ 1	$\alpha=$ 0.1	$\alpha=$ 0.01	$\alpha=$ 0.001
L1	0.2750	0.2791	0.2495	0.2459
L2	0.04460	0.04667	0.03801	0.03672
LPIPS	0.01882	0.01945	0.02137	0.02247
PSNR	34.43	34.26	35.14	35.29

Metric	GAN	DilatedGatedGan	LocalGlobalGan	PatchGAN
L1	0.2750	0.2720	0.3406	0.2520
L2	0.04460	0.04458	0.07278	0.03893
LPIPS	0.01750	0.01694	0.02063	0.01651
PSNR	34.43	34.44	32.32	35.03

Metric	GAN	EdgeGAN	GraphGAN	ContrastiveGAN
L1	0.2750	0.2882	0.2766	0. 2799
L2	0.04460	0.04940	0.04613	0.04635
LPIPS	0.01750	0.01878	0.01840	0.02058
PSNR	34.43	33.98	34.33	34.25

Metric	GAN	DilatedGatedGan	LocalGlobalGan	PatchGAN	EdgeGAN	GraphGAN	ContrastiveGAN
L1	0.2750	0.2720	0.3406	0.2520	0.2882	0.2766	0. 2799
L2	0.04460	0.04458	0.07278	0.03893	0.04940	0.04613	0.04635
LPIPS	0.01750	0.01694	0.02063	0.01651	0.01878	0.01840	0.02058
PSNR	34.43	34.44	32.32	35.03	33.98	34.33	34.25

Metric	MLP	CNN
L1	0.3642	0.2510
L2	0.07158	0.03867
LPIPS	0.03109	0.01882
PSNR	32.37	35.10

Metric	MLP	CNN	AutoEncoder	GAN
L1	0.3642	0.2510	0.2361	0.2750
L2	0.07158	0.03867	0.03439	0.04460
LPIPS	0.03109	0.01882	0.02116	0.01750
PSNR	32.37	35.10	35.58	34.43

Image Inpainting¶

Setup¶

Abstract¶

Overview of the notebook¶

1. Introduction: image inpainting¶

2. Data Collection, Exploration and Cleaning¶

2.1 iNaturalist¶

Choice of frogs¶

2.1.1 Visualize some images¶

2.2 Parsing csv metadata to filter irrelevant images¶

2.2.1 Filtering by description¶

2.2.1.1 Getting images with description fields¶

2.2.1.2 Generate blacklist of words using SentenceTransformers¶

2.2.2 Frog species distribution¶

2.2.3 Location distribution¶

2.2.3 Filter under-represented species and locations¶

2.3 Final visualization and download dataset¶

2.4 Train-val-test split¶

3. Data Preprocessing and Augmentation¶

Using torch.data.utils.Dataset¶

Aspect ratio of images¶

Generating masks¶

3.1 Define dataset class¶

3.2 Sample data visualization¶

4. Data Analysis with Deep Learning¶

Experiment Setup¶

Datasets¶

Evaluation metrics¶

Download model weights¶

Define helper functions¶

4.1 Baseline Benchmarks¶

Loss function¶

4.1.1 MLP¶

4.1.1.1 Model architecture¶

4.1.1.2 MLP Results visualisation¶

4.1.2 CNN¶

4.1.2.1 Model architecture¶

4.1.2.2 CNN Results visualisation¶

4.1.3 MLP & CNN results and analysis¶

4.2 Standard Approaches for Image Inpainting¶

4.2.1 AutoEncoder¶

4.2.1.1 Model Architecture¶

4.2.1.2 AutoEncoder Results visualisation¶

4.2.2 Generative Adversarial Networks (GANs)¶

What are GANs?¶

GANs and Adversarial Loss¶

GANs in Image Inpainting¶

Non-saturating loss¶

4.2.2.1 Model Architecture¶

4.2.2.2 GAN Results visualisation¶

4.2.3 All baselines results and analysis¶

4.2.4 Exploring the $\alpha$ hyper-parameter for GAN¶

4.2.4.1 GAN at $\alpha = 0.1$¶

4.2.4.2 GAN at $\alpha = 0.01$¶

4.2.4.3 GAN at $\alpha = 0.001$¶

4.2.4.4 Analysis of $\alpha$¶

4.3 Proposed Improvements to Architectures¶

4.3.1 Introducing dilated and gated convolutions¶

Dilated Convolutions¶

Gated Convolutions¶

Partial Convolutions¶

Gated Convolutions¶

4.3.1.1 Model Architecture¶

4.3.1.2 DilatedGatedGAN Results visualisation¶

4.3.1.3 Analysis of results¶

4.3.2 Local and Global Discriminators¶

4.3.2.1 Model Architecture¶

4.3.2.2 LocalGlobalGAN Results visualisation¶

4.3.2.3 Analysis of results¶

4.3.3 PatchGAN Discriminator¶

4.3.3.1 Model Architecture¶

4.3.3.2 PatchGAN Results visualisation¶

4.3.3.3 Analysis of results¶

4.3.4 Results of improvement techniques (so far)¶

4.4 Generator-focused improvements¶

4.4.1 Edge-First Generation¶

4.4.1.1 Model Architecture¶

4.4.1.2 EdgeGAN Results visualisation¶

4.4.1.3 Analysis of results¶

4.4.2 Graph-based image inpainting¶

Using `torch.data.utils.Dataset`¶