Source code for acda.plot_functions

import os
import copy
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from matplotlib.patches import Wedge
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as hierarchy
from scipy.spatial.distance import pdist

[docs]def plotDendrogramWithKnownPairs(Z, dfC, dfKS):

    fig, ax = plt.subplots(2, 1, figsize=(15, 7), gridspec_kw={'height_ratios':[1,3]})

    origLineWidth = matplotlib.rcParams['lines.linewidth']
    matplotlib.rcParams['lines.linewidth'] = 0.5
    n_clusters = 10
    cmap = cm.gist_ncar(np.linspace(0, 0.5, n_clusters + 1))
    hierarchy.set_link_color_palette([matplotlib.colors.rgb2hex(rgb[:3]) for rgb in cmap])

    D = dendrogram(Z, ax=ax[0], color_threshold = (Z[-n_clusters,2] + Z[-n_clusters + 1,2]) / 2, above_threshold_color='k', orientation='top', no_labels=True)
    leaves = D['leaves']
    names = dfC.index[leaves]
    ax[0].axis("off")

    maxco = dfC.stack().max()

    d = []
    for drug1, drug2 in dfKS.values[:]:
        if (drug1 in dfC.index) and (drug2 in dfC.index):
            p1, p2 = np.where(names == drug1)[0][0] / len(dfC), np.where(names == drug2)[0][0] / len(dfC)
            spr = np.abs(p1 - p2) / 2
            co = dfC.loc[drug1, drug2]
            w = Wedge((min(p1, p2) + spr, 1), spr, 180, 0, width=0.001, color=cm.hot_r(0.25 + (co / (2. * maxco)))) # coolwarm
            ax[1].add_artist(w)

            d.append(spr)
    d = np.array(d)

    ax[1].set_ylim([0.5, 1.0])
    ax[1].axis("off")

    plt.subplots_adjust(hspace=0)

    hierarchy.set_link_color_palette(None)
    matplotlib.rcParams['lines.linewidth'] = origLineWidth

    return fig

[docs]def plotHeatmapPredictedSynergy(dfC, Z, pvalues):

    fig, ax = plt.subplots(2, 2, figsize=(7, 7), gridspec_kw={'height_ratios':[1,3], 'width_ratios':[3,1]})

    origLineWidth = matplotlib.rcParams['lines.linewidth']
    matplotlib.rcParams['lines.linewidth'] = 0.5
    n_clusters = 10
    cmap = cm.gist_ncar(np.linspace(0, 0.5, n_clusters + 1))
    hierarchy.set_link_color_palette([matplotlib.colors.rgb2hex(rgb[:3]) for rgb in cmap])

    dendrogram(Z, ax=ax[1, 1], color_threshold = (Z[-n_clusters,2] + Z[-n_clusters + 1,2]) / 2, 
                   above_threshold_color='k', orientation='right', no_labels=True)
    D = dendrogram(Z, ax=ax[0, 0], color_threshold = (Z[-n_clusters,2] + Z[-n_clusters + 1,2]) / 2, 
                   above_threshold_color='k', orientation='top', no_labels=True)

    leaves = D['leaves']
    names = dfC.index[leaves]
    ax[1, 1].axis("off")
    ax[0, 0].axis("off")

    ax[1, 0].axis("off")

    se = pd.Series(index=pd.MultiIndex.from_tuples(pvalues), data=1)
    se = pd.concat([se, se.reorder_levels([0, 2, 1])])
    se = se.loc[~se.index.duplicated()]
    se = se.droplevel(0)
    se = se.groupby(level=[0, 1]).sum()
    se = se.loc[se.index.to_frame()[0] != se.index.to_frame()[1]]
    dfDr = se.unstack(0).fillna(0.).reindex(dfC.index, axis=0).reindex(dfC.index, axis=1).fillna(0.).astype(int)

    dfDr = dfDr.replace(0, np.nan)
    dfDr = dfDr.iloc[leaves[::-1], leaves]
    masked_M = np.ma.array(dfDr.values, mask=np.isnan(dfDr.values))

    #dfCc = dfC.iloc[leaves[::-1], leaves]
    #masked_M = np.ma.array(dfCc.values, mask=np.isnan(dfCc.values))
    cmap = copy.copy(plt.cm.bwr)
    cmap.set_bad('lightgrey')
    vmin, vmax = None, None

    im = ax[1, 0].imshow(masked_M, cmap=cmap, aspect='auto', vmin=vmin, vmax=vmax, interpolation='None', 
                   extent=(-0.5, masked_M.shape[0] - 0.5, masked_M.shape[1] - 0.5, -0.5))

    ax[0, 1].axis("off")
    clb = fig.colorbar(im, ax=ax[0, 1], fraction=0.5, shrink=0.85, orientation='horizontal', label='Count')
    clb.ax.tick_params(labelsize=10)

    plt.subplots_adjust(hspace=0.001)
    fig.tight_layout()

    hierarchy.set_link_color_palette(None)
    matplotlib.rcParams['lines.linewidth'] = origLineWidth

    return fig

[docs]def makeBarplotSingleDatasets(df_res_single, figsize=(10, 8), c=['green', 'gold', 'navy', 'grey', 'crimson'], width=0.15, labelsAbove=False, saveName=None, dpi=300):

    oneax = None
    fig, ax = plt.subplots(figsize=figsize)
    for pos in np.arange(len(df_res_single)):
        df_temp = df_res_single.iloc[pos].unstack()
        bpos = np.array([-width*8/5, -width*4/5, width*0/5, width*4/5, width*8/5])
        #bpos = np.array([-width*6/4, -width*2/4, width*2/4, width*6/4])
        bars = ax.bar(pos + bpos, df_temp['avg'].values, width, label=df_res_single.index[pos], yerr=df_temp['sem'].values, color=c, 
                      align='center', alpha=1.0, ecolor='black', capsize=2, edgecolor='w', linewidth=0.25)
        if oneax is None:
            oneax = bars

        if labelsAbove:
            t0 = ax.text(pos, df_temp['avg'].max() + 0.08, df_res_single.index.values[pos][0].replace('_', ' '), ha='center')
            t1 = ax.text(pos, df_temp['avg'].max() + 0.05, df_res_single.index.values[pos][1], ha='center')

            t0.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])
            t1.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])

    if not labelsAbove:
        ax.set_xticks(range(len(df_res_single)))
        ax.set_xticklabels([v[0].replace('_', ' ').capitalize() + '\n' + v[1].replace('_', ' ') for v in df_res_single.index.values], rotation=90, va='top')
    else:
        ax.set_xticks([])

    ax.set_ylabel('Pearson corr. coef.', fontsize=16)
    ax.axhline(0, color='k', linewidth=0.5)
    ax.legend(oneax, df_temp.index, frameon=False, loc='upper right', fontsize=12)
    ax.set_ylim([-0.1, 1.095])
    ax.tick_params(axis='y', labelsize=16)

    fig.tight_layout()
    
    if not saveName is None:
        fig.savefig(saveName, facecolor='w', dpi=dpi)
    
    return fig

[docs]def makeBarplotCrossDatasets(df_res_cross, figsize=(12, 7), c=['green', 'gold', 'navy', 'grey', 'crimson'], width=0.15, labelsAbove=False, saveName=None, dpi=300):

    oneax = None
    fig, ax = plt.subplots(figsize=figsize)
    for pos in np.arange(len(df_res_cross)):
        df_temp = df_res_cross.iloc[pos].unstack()
        bpos = np.array([-width*8/5, -width*4/5, width*0/5, width*4/5, width*8/5])
        #bpos = np.array([-width*6/4, -width*2/4, width*2/4, width*6/4])
        bars = ax.bar(pos + bpos, df_temp['avg'].values, width, label=df_res_cross.index[pos], yerr=df_temp['sem'].values, color=c, 
                      align='center', alpha=1.0, ecolor='black', capsize=2, edgecolor='w', linewidth=0.25)
        if oneax is None:
            oneax = bars

        if labelsAbove:
            t0 = ax.text(pos, df_temp['avg'].max() + 0.08, df_res_cross.index.values[pos][0].replace('_', ' '), ha='center')
            t1 = ax.text(pos, df_temp['avg'].max() + 0.05, df_res_cross.index.values[pos][1], ha='center')
            t2 = ax.text(pos, df_temp['avg'].max() + 0.02, df_res_cross.index.values[pos][2], ha='center')

            t0.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])
            t1.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])
            t2.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])

    if not labelsAbove:
        ax.set_xticks(range(len(df_res_cross)))
        ax.set_xticklabels([v[0].replace('_', ' ').capitalize() + '\n' + v[1].replace('_', ' ') + '-' + v[2].replace('_', ' ') for v in df_res_cross.index.values], rotation=90, va='top')
    else:
        ax.set_xticks([])

    ax.set_ylabel('Pearson corr. coef.', fontsize=16)
    ax.axhline(0, color='k', linewidth=0.5)
    ax.legend(oneax, df_temp.index, frameon=False, loc='upper right', fontsize=12)
    ax.set_ylim([-0.3, 0.9])
    ax.tick_params(axis='y', labelsize=16)

    fig.tight_layout()
    
    if not saveName is None:
        fig.savefig(saveName, facecolor='w', dpi=dpi)
    
    return fig

[docs]def drawOneDownsampled(usedf, ax, panel, a, b, c, v, loc='lower right', xlabel='Training set size', ylabel='Pearson corr. coef.', col='val'):

    gb = usedf.groupby(level=[0, 1], axis=0)
    dftemp = gb.mean()[col].unstack(0)
    dftemp.plot(ax=ax)
    ax.set_xlabel(xlabel, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.set_xlim([a - c, b + c])
    ax.set_ylim([0, v])

    dftemp_err = gb.sem()[col].unstack(0)

    for m in dftemp.columns:
        ax.errorbar(dftemp.index, dftemp[m], yerr=dftemp_err[m], fmt='.k', capsize=3);

    l = ax.legend(loc=loc)   
    xdif = ax.get_xlim()[1] - ax.get_xlim()[0]
    ydif = ax.get_ylim()[1] - ax.get_ylim()[0]
    ax.text(ax.get_xlim()[0] - 0.15*xdif, ax.get_ylim()[1] - 0.05*ydif, panel, fontsize=16)

    return