import numpy as np
import pandas as pd
from scipy.stats import kendalltau
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from collections import OrderedDict
import os
import skimage.io as skio


dict_metrics = {
    'tpr': 'TPR, Recall, Sensitivity',
    'tnr': 'TNR, Specificity, Selectivity',
    'fpr': 'FPR',
    'fnr': 'FNR, Miss rate',
    'mpr': 'May positive rate (MPR)',
    'mnr': 'May negative rate (MNR)',
    'f1': 'F1',
    'precision': 'Precision',
    'edge_coherence': 'Edge coherence',
    'accuracy_must_may': 'Accuracy (ignoring cannot)'
}
dict_models = OrderedDict([
    ('e84a8b06', 'm'),
    ('b0a3ff6a', 'ms'),
    ('5a53032f', 'msd'),
    ('0575609e', 'pseudo, msd'),
    ('90fa5734', 'msd, dada, pseudo'),
    ('34e84fe9', 'pseudo, msd, dada'),
    ('ae98d8ee', 'dada, pseudo, msd'),
    ('23ad5382', 'pseudo, dada, msd'),
    ('c4721ec4', 'dada, msd'),
    ('d7118205', 'msd_spade'),
    ('0da9669d', 'msd_spade, pseudo'),
    ('9a8fcb11', 'msd_spade, pseudo'),
    ('8051ea3a', 'dada, msd_spade'),
    ('49167587', 'trash, dada, msd_spade, pseudo'),
])


def boxplot_metric(df, metric, do_stripplot=False, **snskwargs):
    f = plt.figure(dpi=300)
    
    if do_stripplot:
        ax = sns.boxplot(x='model', y=metric, data=df, fliersize=0., **snskwargs)
        ax = sns.stripplot(x='model', y=metric, data=df, size=2., color='gray', **snskwargs)
    else:
        ax = sns.boxplot(x='model', y=metric, data=df, **snskwargs)
    
    # Set X-label
    ax.set_xlabel('Models', rotation=0, fontsize='medium');
    
    # Set Y-label
    ax.set_ylabel(dict_metrics[metric], rotation=90, fontsize='medium');
    
    # Change spines
    sns.despine(left=True, bottom=True)
    
    # Change X-Tick labels
    xticklabels = [dict_models[t.get_text()] for t in ax.get_xticklabels()]
    ax.set_xticklabels(xticklabels, 
                       rotation=20, 
                       verticalalignment='top',
                       horizontalalignment='right',
                       fontsize='xx-small');

    
#     # Remove legend
#     ax.get_legend().remove()

#     # Set x-axis limits
#     set_xaxis(ax, x_min=-0.5, x_max=19.5, step=1., fontsize='small')

#     # Vertical grid lines
#     ax.grid(b=True, axis='x', which='major')
#     ax.tick_params(axis='x', which='both',length=0)

    return ax


def heatmap_kendall(data_dict, metric, models, **snskwargs):
    f = plt.figure(dpi=300)
    ax = sns.heatmap(dict_kendall_mat[metric], linewidths=.5)
    
    # Set axis labels
    ax.set_xlabel(None);
    ax.set_ylabel(None);
    
    # Change X-Tick labels
    ax.set_xticklabels(models, 
                       rotation=20, 
                       verticalalignment='top',
                       horizontalalignment='right',
                       fontsize='xx-small');
    ax.set_yticklabels(models,
                       rotation=0, 
                       fontsize='xx-small');
    
    ax.set_title(dict_metrics[metric])

    
#     # Remove legend
#     ax.get_legend().remove()

#     # Set x-axis limits
#     set_xaxis(ax, x_min=-0.5, x_max=19.5, step=1., fontsize='small')

#     # Vertical grid lines
#     ax.grid(b=True, axis='x', which='major')
#     ax.tick_params(axis='x', which='both',length=0)

    return ax


models_paths = list(Path('./data').glob('*'))
models_df = {m.name.split('--')[1]: pd.read_csv(m.joinpath('eval_masker.csv'), index_col=False) 
             for m in models_paths}
for k, v in models_df.items():
    v['model'] = [k] * len(v)


df = pd.concat(list(models_df.values()), ignore_index=True)


models = df.model.unique()
remove_keys = set(dict_models.keys()).difference(set(models))
for m in remove_keys:
    dict_models.pop(m)


tpr_th = 0.95
fpr_th = 0.05
acc_th = 0.5
edgec_th = 0.02


idx_good_in_all = []
for idx in df.idx.unique():
    df_th = df.loc[(df.tpr >= tpr_th) &\
                   (df.fpr <= fpr_th) &\
                   (df.accuracy_must_may >= acc_th) &\
                   (df.idx == idx) &\
                   (df.model.isin(models))]
    if len(df_th) == len(models):
        idx_good_in_all.append(idx)
print(len(idx_good_in_all))
idx_not_good_in_all = list(set(df.idx.unique()).difference(idx_good_in_all))

82


tpr_th = 0.95
fpr_th = 0.05
edgec_th = 0.05


idx_not_good_in_any = []
for idx in df.idx.unique():
    df_th = df.loc[((df.tpr <= tpr_th) |\
                    (df.fpr >= fpr_th) |\
                    (df.edge_coherence >= edgec_th)) &\
                   ((df.idx == idx) &\
                    (df.model.isin(models)))]
    if len(df_th) > 0:
        idx_not_good_in_any.append(idx)
print(len(idx_not_good_in_any))

25


data_path = '/home/alex/Dropbox/ccai/data/'
imgs_orig_path = os.path.join(data_path, 'floodmasks_eval/imgs')


n_cols = 5
n_rows = len(idx_failed) // n_cols
f, axes = plt.subplots(n_rows, n_cols, dpi=800)

idx = 0
for r in range(n_rows):
    for c in range(n_cols):
        filename = df.loc[df.idx == idx_not_good_in_any[idx], 'filename'].values[0]
        axes[r, c].imshow(skio.imread(os.path.join(imgs_orig_path, filename)))
        axes[r, c].axis('off');
        axes[r, c].set_title(filename[:15], fontsize='xx-small')
        idx += 1


f.savefig('images_low_metrics_any_model.png', dpi=f.dpi)


# filter_idx = df.idx.unique()
# filter_idx = idx_not_good_in_all
filter_idx = idx_not_good_in_any

for m in dict_metrics.keys():
    if m in ['mnr', 'mpr', 'accuracy_must_may']:
        boxplot_metric(df.loc[df.idx.isin(filter_idx)], metric=m, 
                       do_stripplot=True, order=list(dict_models.keys()))
    else:
        boxplot_metric(df.loc[df.idx.isin(filter_idx)], metric=m, 
                       fliersize=1., order=list(dict_models.keys()))


n_models = len(dict_models.keys())
dict_kendall_mat = {metric: np.zeros([n_models, n_models]) for metric in dict_metrics.keys()}
dict_kendallp_mat = {metric: np.zeros([n_models, n_models]) for metric in dict_metrics.keys()}


# filter_idx = df.idx.unique()
# filter_idx = idx_not_good_in_all
filter_idx = idx_not_good_in_any

for metric in dict_kendall_mat.keys():
    for idx_i, m_i in enumerate(dict_models.keys()):
        for idx_j, m_j in enumerate(dict_models.keys()):
            v_m_i = df.loc[(df.model == m_i) & df.idx.isin(filter_idx), metric].argsort().values
            v_m_j = df.loc[(df.model == m_j) & df.idx.isin(filter_idx), metric].argsort().values
            corr, pval = kendalltau(v_m_i, v_m_j)
            dict_kendall_mat[metric][idx_i, idx_j] = corr
            dict_kendallp_mat[metric][idx_i, idx_j] = pval


for metric in dict_kendall_mat.keys():
    heatmap_kendall(dict_kendall_mat, metric, dict_models.values())

df

	idx	fpr	fnr	mnr	mpr	tpr	tnr	precision	f1	accuracy_must_may	edge_coherence	filename	model
0	0	0.000000	0.011309	0.834115	0.165885	0.988691	1.000000	1.000000	0.994313	0.922224	0.022213	007417_1.jpg	e84a8b06
1	1	0.008064	0.015866	0.535247	0.464753	0.984134	0.991936	0.983243	0.983688	0.866534	0.020105	007567_1.jpg	e84a8b06
2	2	0.000303	0.009482	0.577987	0.422013	0.990518	0.999697	0.999204	0.994842	0.898152	0.014964	007581_1.jpg	e84a8b06
3	3	0.005014	0.003687	0.664616	0.335384	0.996313	0.994986	0.992075	0.994190	0.955980	0.018490	007582_3.jpg	e84a8b06
4	4	0.000000	0.007156	0.508482	0.491518	0.992844	1.000000	1.000000	0.996409	0.775012	0.032354	007584_1.jpg	e84a8b06
...	...	...	...	...	...	...	...	...	...	...	...	...	...
1095	95	0.004993	0.001286	0.299337	0.700663	0.998714	0.995007	0.982357	0.990468	0.824464	0.019180	ubHSgySsaD2kcJrVYKgxew.jpg	8051ea3a
1096	96	0.003956	0.000000	0.093806	0.906194	1.000000	0.996044	0.993211	0.996594	0.875969	0.019686	xeMkVLPzp4EkpCPFrDTSfA.jpg	8051ea3a
1097	97	0.000772	0.000452	0.493276	0.506724	0.999548	0.999228	0.998702	0.999125	0.928307	0.010167	yZUEyQZuMnnrGzWbDwbpaA.jpg	8051ea3a
1098	98	0.000000	0.001572	0.819036	0.180964	0.998428	1.000000	1.000000	0.999213	0.969460	0.008581	zbUjR0xNCXstCKTILH6s_w.jpg	8051ea3a
1099	99	0.000000	0.033730	0.772260	0.227740	0.966270	1.000000	1.000000	0.982846	0.892885	0.008070	Жилой_дом_купца_Богомолова.png	8051ea3a

Imports¶

Constants¶

Functions¶

boxplot_metric¶

heatmap_kendall¶

Read data¶

Read DataFrame of each model¶

Concatenate DataFrames¶

Update dict of models¶

Filter¶

Determine images with high metrics in all models¶

Determine images with low metrics in any model¶

Plot¶

Plot¶

Boxplots¶

Kendall correlation¶

Playground¶