Commit cc5e8088 authored by Guillaume Poirier-Morency's avatar Guillaume Poirier-Morency
Browse files

Binding energy model for supplementary (fix #63)

Modulate the catalytic rate with the probability of cleavage site being
bound.

For Wee et al., we consider a two state system with and without 3'
supplementary bound.

For Yan et al. model, we consider the 5 discrete states resulting from
the finite state automata and compute their binding energy.

Modulate kcat of individual interactions with the binding probability of
nucleotide surrounding the cleavage site.

Fix missing RT factor in 'binding_energy'.

Regenerate 3mer and 4mer tables.
parent b1c452dc
No preview for this file type
No preview for this file type
......@@ -190,17 +190,17 @@ is_g_bulge (MirbookingTarget *target, gsize position)
toupper (*mirbooking_sequence_get_subsequence (MIRBOOKING_SEQUENCE (target), position + 3, 1)) == 'G';
}
static gfloat
binding_energy (gfloat *G, gsize n)
static gdouble
binding_energy (gdouble *G, gsize n)
{
guint i;
gfloat Gtot = 0, Z = 0;
gdouble Gtot = 0, Z = 0;
for (i = 0; i < n; i++)
{
if (isfinite (G[i]))
{
Gtot += exp (-G[i]) * G[i];
Z += exp (-G[i]);
Gtot += exp (-G[i] / (R * T)) * G[i];
Z += exp (-G[i] / (R * T));
}
}
......@@ -212,6 +212,27 @@ binding_energy (gfloat *G, gsize n)
return Gtot / Z;
}
static gdouble
binding_probability (gdouble *G, gsize n, gsize j)
{
guint i;
gdouble Z = 0;
for (i = 0; i < n; i++)
{
if (isfinite (G[i]))
{
Z += exp (-G[i] / (R * T));
}
}
if (Z == 0)
{
return INFINITY;
}
return exp (-G[j] / (R * T)) / Z;
}
static gboolean
compute_score (MirbookingScoreTable *score_table,
MirbookingMirna *mirna,
......@@ -222,9 +243,9 @@ compute_score (MirbookingScoreTable *score_table,
{
MirbookingDefaultScoreTable *self = MIRBOOKING_DEFAULT_SCORE_TABLE (score_table);
MirbookingScore ret = {.kf = KF, .kcat = KCAT};
MirbookingScore ret = {.kf = KF, .kcat = 0};
gfloat A_score = 0.0f;
gdouble A_score = 0.0f;
if (position + SEED_LENGTH + 1 <= mirbooking_sequence_get_sequence_length (MIRBOOKING_SEQUENCE (target)) &&
toupper (*mirbooking_sequence_get_subsequence (MIRBOOKING_SEQUENCE (target), position + SEED_LENGTH, 1)) == 'A')
{
......@@ -281,11 +302,11 @@ compute_score (MirbookingScoreTable *score_table,
j = (1l << (2 * 4)) * mirbooking_sequence_get_subsequence_index (MIRBOOKING_SEQUENCE (target), position, 3) +
mirbooking_sequence_get_subsequence_index (MIRBOOKING_SEQUENCE (target), position + 4, 4);
gfloat Z[2] = {seed_score, sparse_matrix_get_float (&self->priv->seed_scores, i, j) + G_BULGED_SEED_SCORE};
gdouble Z[2] = {seed_score, sparse_matrix_get_float (&self->priv->seed_scores, i, j) + G_BULGED_SEED_SCORE};
seed_score = binding_energy (Z, 2);
}
gfloat supplementary_score = 0;
gdouble supplementary_score = 0;
if (self->priv->supplementary_model == MIRBOOKING_DEFAULT_SCORE_TABLE_SUPPLEMENTARY_MODEL_WEE_ET_AL_2012)
{
if (self->priv->supplementary_scores_bytes != NULL)
......@@ -297,8 +318,11 @@ compute_score (MirbookingScoreTable *score_table,
4,
SEED_OFFSET + SEED_LENGTH + 4,
4);
gdouble z[2] = {0, supplementary_score_};
supplementary_score = binding_energy (z, 2);
supplementary_score = MIN (supplementary_score, supplementary_score_);
// require at least the 3' supplementary bindings for cleavage
ret.kcat = binding_probability (z, 5, 1) * KCAT;
}
}
else if (self->priv->supplementary_model == MIRBOOKING_DEFAULT_SCORE_TABLE_SUPPLEMENTARY_MODEL_YAN_ET_AL_2018)
......@@ -349,27 +373,16 @@ compute_score (MirbookingScoreTable *score_table,
SEED_OFFSET + SEED_LENGTH + 9,
3);
gfloat mismatch_threshold = 0.0f;
gdouble z[5] = {0,
B_box,
B_box + C_box,
B_box + C_box + A_box,
B_box + C_box + A_box + D_box};
if (B_box < mismatch_threshold)
{
supplementary_score += B_box;
}
supplementary_score = binding_energy (z, 5);
if (B_box < mismatch_threshold && C_box < mismatch_threshold)
{
supplementary_score += C_box;
}
if (A_box < mismatch_threshold && B_box < mismatch_threshold && C_box <= mismatch_threshold)
{
supplementary_score += A_box;
}
if (A_box < mismatch_threshold && B_box < mismatch_threshold && C_box < mismatch_threshold && D_box < mismatch_threshold)
{
supplementary_score += D_box;
}
// at least A-box and B-box for cleaving
ret.kcat = (binding_probability (z, 5, 3) + binding_probability (z, 5, 4)) * KCAT;
}
}
......
......@@ -6,10 +6,9 @@
#include <string.h>
/*
* See @MirbookingDefaultScoreTable for the detail of the computation. Here,
* mcff returned -19.765 kcal/mol.
* When folding 'CUACCUC&GAGGUAG' with '-zzd', we obtain -18.380 kcal/mol.
*/
#define MCFF_AGO2_SCORE (AGO2_SCORE + 10.3f)
#define MCFF_7MER_GAP 9.01f
struct _MirbookingMcffScoreTable
{
......@@ -110,7 +109,7 @@ compute_score (MirbookingScoreTable *score_table,
gfloat mfe;
sscanf (standard_output, "%f", &mfe);
ret.kr = ret.kf * (1e12 * exp ((mfe + MCFF_AGO2_SCORE) / (R * T)));
ret.kr = ret.kf * (1e12 * exp ((mfe + MCFF_7MER_GAP + AGO2_SCORE) / (R * T)));
*score = ret;
......
......@@ -24,7 +24,7 @@
* (July 2, 2015): 84–95, https://doi.org/10.1016/j.cell.2015.06.029.
*/
#define KF 2.4e-4 // pM^-1s^-1
#define KCAT 3.6e-2 // s^-1
#define KCAT 2.8 // s^-1
/*
* Half life of an average microRNA is is ~119 hours.
......@@ -65,10 +65,14 @@
*
* In addition, the seed setup experiment for Salomon et al. had a 'A' at t1,
* which should account for a -0.56 kcal/mol additional contribution (Schirle
* et al. 2015).
* et al. 2015).
*
* For Wee et al. we have an entropic contribution of -5.65 kcal/mol which is
* consistent with the -5.43 kcal/mol obtained from Salomon et al. We take the
* For the supplementary contribution, both setup had intentionally poor
* supplementary bindings leading to 0.04 kcal/mol from ensemble analysis with
* Yan et al. 2018 model.
*
* For Wee et al. we have an entropic contribution of -5.69 kcal/mol which is
* consistent with the -5.47 kcal/mol obtained from Salomon et al. We take the
* average of the two values.
*
* Reference:
......@@ -80,7 +84,7 @@
* Argonaute2 to MicroRNA Targets,” ed. Phillip D Zamore, ELife 4 (September
* 11, 2015): e07646, https://doi.org/10.7554/eLife.07646.
*/
#define AGO2_SCORE (-5.54f)
#define AGO2_SCORE (-5.69)
/*
* AGO2 has a slight preference for sites starting with 'A' at position t1.
......@@ -90,7 +94,7 @@
* Anchors Argonaute2 to MicroRNA Targets,” ed. Phillip D Zamore, ELife 4
* (September 11, 2015): e07646, https://doi.org/10.7554/eLife.07646.
*/
#define T1_ADENOSINE_SCORE (-0.56f)
#define T1_ADENOSINE_SCORE (-0.56)
/*
* We allow a 'G' nucleation bulge at position t5.
......@@ -103,4 +107,4 @@
* Mode of MicroRNA Target Recognition,” Nature Structural & Molecular
* Biology 19, no. 3 (March 2012): 321–27, https://doi.org/10.1038/nsmb.2230.
*/
#define G_BULGED_SEED_SCORE (1.2f)
#define G_BULGED_SEED_SCORE (1.2)
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment