Hi,
Thanks for bringing us this great model!
I have some questions when I tried to predict PSI for a sequence. According to the paper, the PSI3/5 predictions are calcuated based on predicted reads of junctions for donors and acceptors. When I run the model, I see there is only one value for each junction, which I assume is the predicted read.
- I randomly picked a region with length of 1MB and I saw most of them were very small number(<10^-3). Is it normal?
- For most of donors/acceptors, I only see one direction results. How can I calculate PSI3/5 here?
interval = genome.Interval( chromosome="chr1", start=15117807, end=15118327, strand='-', name="", ) interval = interval.resize(dna_client.SEQUENCE_LENGTH_16KB) ontology_curie = [ "UBERON:0008952", # Lung ] output = dna_model.predict_interval( interval=interval, requested_outputs=[dna_client.OutputType.SPLICE_JUNCTIONS], ontology_terms=ontology_curie, )
Output:
JunctionData(junctions=array([Interval(chromosome='chr1', start=15114991, end=15117132, strand='+', name=''),
Interval(chromosome='chr1', start=15112541, end=15117132, strand='+', name=''),
Interval(chromosome='chr1', start=15112541, end=15114470, strand='+', name=''),
Interval(chromosome='chr1', start=15121991, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15121059, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15118904, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15118746, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15124227, strand='-', name=''),
Interval(chromosome='chr1', start=15118904, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15118746, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15120971, strand='-', name=''),
Interval(chromosome='chr1', start=15118904, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15118746, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15120786, strand='-', name=''),
Interval(chromosome='chr1', start=15118904, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15118746, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15120576, strand='-', name=''),
Interval(chromosome='chr1', start=15118746, end=15118821, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15118821, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15118821, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15118821, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15118821, strand='-', name=''),
Interval(chromosome='chr1', start=15118584, end=15118641, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15118641, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15118641, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15118641, strand='-', name=''),
Interval(chromosome='chr1', start=15117807, end=15118326, strand='-', name=''),
Interval(chromosome='chr1', start=15117804, end=15118326, strand='-', name=''),
Interval(chromosome='chr1', start=15117338, end=15118326, strand='-', name='')],
dtype=object), values=array([[9.03921537e-05, 4.46208607e-04],
[3.75819545e-05, 1.15691386e-04],
[1.01393624e-03, 6.83864579e-03],
[7.16255017e-05, 3.77635588e-04],
[2.21747512e-04, 1.35367631e-03],
[8.96488127e-05, 3.47073539e-04],
[3.90054156e-05, 1.31100125e-04],
[4.47979546e-05, 1.64986850e-04],
[5.18145789e-05, 1.58849682e-04],
[3.99900200e-05, 1.27642270e-04],
[4.15748764e-05, 1.30350294e-04],
[1.82703006e-04, 9.80059849e-04],
[4.39373725e-05, 1.82334552e-04],
[5.50519289e-05, 2.45094183e-04],
[8.30852950e-05, 3.00117885e-04],
[3.99218152e-05, 1.52624780e-04],
[4.99283924e-05, 1.81747819e-04],
[8.25972311e-05, 3.78494849e-04],
[4.25024118e-05, 1.38587711e-04],
[4.74643821e-05, 1.62616692e-04],
[5.31442092e-05, 1.66276295e-04],
[4.05318387e-05, 1.22874597e-04],
[4.27420273e-05, 1.24147322e-04],
[5.94982885e-05, 2.55754683e-04],
[4.19346470e-05, 1.18447075e-04],
[4.42714954e-05, 1.35137627e-04],
[4.39011383e-05, 1.27054969e-04],
[4.11875517e-05, 1.12630893e-04],
[4.05040628e-05, 1.07849504e-04],
[1.35374576e-04, 6.55636657e-04],
[8.07774195e-05, 4.03029990e-04],
[6.97672149e-05, 2.80175678e-04],
[4.22936173e-05, 1.49126194e-04],
[4.17271294e-05, 1.44993959e-04],
[6.07975562e-05, 2.34780789e-04],
[6.04256566e-05, 2.70105666e-04],
[4.51626947e-05, 1.55694477e-04],
[4.12592599e-05, 1.38720730e-04],
[1.77618087e-04, 8.54160928e-04],
[5.70015945e-05, 2.59108056e-04],
[6.45233667e-05, 2.67930649e-04]], dtype=float32), metadata= name ontology_curie \
0 junction_UBERON:0008952 gtex Lung polyA plus R... UBERON:0008952
1 junction_UBERON:0008952 total RNA-seq UBERON:0008952
biosample_name biosample_type biosample_life_stage gtex_tissue \
0 upper lobe of left lung tissue adult Lung
1 upper lobe of left lung tissue adult
data_source Assay title
0 gtex polyA plus RNA-seq
1 encode total RNA-seq , interval=Interval(chromosome='chr1', start=15109875, end=15126259, strand='.', name=''), uns=None)
Thanks!