Bananas in ATAC output

Hi. I predicted ATAC in a 128kib window using each model fold. They are in good concordance, apart from the most important one: ALL_FOLDS, which shows a bend with respect to all other folds.

Is it expected? Is it a bug? A feature of distillation process? (if ALL_FOLDS is indeed the distilled version)

Steps to reproduce:

tss = 178_807_423
ext = 131072 // 2  # 128k window size
window = Interval(chromosome="chr2", start=tss - ext, end=tss + ext)
lung  = 'UBERON:2048'

output = {}
for fold in dna_client.ModelVersion:
    print(f"Processing fold: {fold}")
    dna_model_public = dna_client.create(api_key, model_version=fold )
    output[fold] = dna_model_public.predict_interval(interval=window, 
                       requested_outputs=[dna_client.OutputType.ATAC],
                       ontology_terms=[lung])
# Create a dataframe with ATAC values from all folds
atac_df = pd.DataFrame({fold: output[fold].atac.values.flatten() for fold in output.keys()})


# Create pairwise scatterplots for all folds vs all folds
folds = output.keys()
n_folds = len(folds)

fig, axes = plt.subplots(n_folds, n_folds, figsize=(15, 15))

for i, fold_y in enumerate(folds):
    for j, fold_x in enumerate(folds):
        ax = axes[i, j]
        if i == j:
            # Diagonal: histogram
            ax.hist(atac_df[fold_x], bins=np.logspace(np.log10(atac_df[fold_x][atac_df[fold_x] > 0].min()), np.log10(atac_df[fold_x].max()), 50), alpha=0.7)
            ax.set_xscale('log')
            ax.set_title(fold_x)
        else:
            ax.scatter(atac_df[fold_x], atac_df[fold_y], alpha=0.3, s=1)
            ax.set_xscale('log')
            ax.set_yscale('log')
            # Add diagonal line
            lims = [max(ax.get_xlim()[0], ax.get_ylim()[0]), min(ax.get_xlim()[1], ax.get_ylim()[1])]
            ax.plot(lims, lims, 'r--', alpha=0.5)
        
        if j == 0:
            ax.set_ylabel(fold_y)
        if i == n_folds - 1:
            ax.set_xlabel(fold_x)

plt.tight_layout()
plt.show()