"""
**Author(s):** Alex Keeley (with Alex Holehouse)
"""
from .ctexceptions import CTException
import re
# ----------------------------------------------------------------------------------------------------------------------------------------------------------------
# ----------------------------------------------------------------------------------------------------------------------------------------------------------------
[docs]def compute_random_coil_chemical_shifts(protein_sequence, temperature=25, pH=7.4, use_ggxgg=True, use_perdeuteration=False, asFloat=False):
"""
Function that predicts the random coil chemical shifts for user-provided amino acid sequence corrected for user-provided
conditions. Specifically, chemical shift and general sequence correction factors are from [1], temperature corrections
and glycine corrections are from [2] and the underlying methods associated with correction-factor calculations are in [3].
The correction factors for pertdeuteration are from [4].
Input sequence can be a standard one-letter sequence, but phosphoresidues can also be included (see examples).
Code is based on JavaScript written by Alex Maltsev at the NIH and can be accessed here
(https://www1.bio.ku.dk/english/research/bms/research/sbinlab/randomchemicalshifts/).
Parameters
----------
sequence : str
The sequence of representative abbreviations for the sequence of amino acids
temperature : float or int
Experiment temperature of the sample of amino acids for use in corrected chemical shift calculations
(note units are in degrees celcius). Default value is 25 and should be between 0 and 100.
pH : float or int
pH of the sample of amino acids for use in corrected chemical shift calculations. Default value us 7.4
and should be between 0 and 14.
use_ggxgg : bool
Whether to use GGXGG-based neighbor correction for glycines. Default is True.
use_perdeuteration : bool
Whether perdeuterated correction factors should be used. Default is False. Note this cannot work with
phosphoresidues
asFloat : bool
Whether to populate output dictionaries with float or string variables containing chemical shift numbers.
False (strings) by default.
Returns
-------
output : list of dict
List containing a dictionary for each amino acid in the provided sequence detailing abbreviation and chemical
shifts for the main six different atoms.
Examples
--------
References
----------
[1] Kjaergaard, M. and Poulsen, F.M. (2011) Sequence correction of random coil chemical shifts: correlation between neighbor correction factors and changes in the Ramachandran distribution J. Biomol. NMR 50(2):157-165
[2] Kjaergaard, M., Brander, S. and Poulsen, F.M. (2011) Random coil chemical shifts for intrinsically disordered proteins: Effects of temperature and pH J. Biomol. NMR 49(2):139-49.
[3] Schwarzinger, S., Kroon, G.J., Foss, T.R., Chung. J., Wright, P.E., Dyson, H.J. (2001) Sequence-dependent correction of random coil NMR chemical shifts. JACS 123(13):2970-8.
[4] Cavanagh, J., Fairbrother, W.J., Palmer, A.G., Rance, M. and Skelton, N.J. (2007) Protein NMR Spectroscopy - Principles and practice. 2nd edition. Academic Press
"""
# sanity check temperature
if temperature > 100 or temperature < 0:
raise CTException('Temperature provided (%i) was non-physiological. Remember temperature should be in *celcius*.' %(temperature))
# pH sanity check
if pH < 0 or pH > 14:
raise CTException('pH provided (%i) was non-physiological. Remember pH should be in between 0 and 14.' %(pH))
# SETUP
# The array 'key' is used to translate amino acid letter code into
# numerical index. Value is -1 when there is no such amino acid letter
key_aa1 = [0, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, -1, 17, 18, -1, 19, -1]
key_aa3 = {
"ALA": 0,
"CYS": 1,
"ASP": 2,
"GLU": 3,
"PHE": 4,
"GLY": 5,
"HIS": 6,
"ILE": 7,
"LYS": 8,
"LEU": 9,
"MET": 10,
"ASN": 11,
"PRO": 12,
"GLN": 13,
"ARG": 14,
"SER": 15,
"THR": 16,
"VAL": 17,
"TRP": 18,
"TYR": 19,
"PSER": 20,
"SEP": 20,
"PS": 20,
"PTHR": 21,
"TPO": 21,
"PT": 21,
"PTYR": 22,
"PTR": 22,
"PY": 22}
# The array 'sequence' keeps the set of amino acid indices for the given
# protein. The array size is not fixed and will accommodate a given
# sequence.
sequence = []
# The XX_av arrays contain uncorrected random coil values for atom type XX at 5C and pH 6.5
ca_av = [
52.747,
58.639,
54.586,
56.805,
57.804,
45.317,
55.933,
61.279,
56.542,
55.359,
55.565,
53.422,
63.236,
56.041,
56.287,
58.604,
62.151,
62.574,
57.464,
57.968,
58.235,
62.985,
57.977]
cb_av = [
19.048,
29.693,
40.942,
30.181,
39.482,
0,
29.974,
38.640,
33.024,
42.262,
32.677,
38.643,
32.155,
29.322,
30.786,
63.706,
69.877,
32.784,
29.262,
38.654,
65.725,
72.449,
38.698]
co_av = [
177.967,
174.736,
176.527,
176.707,
175.721,
174.363,
175.044,
176.451,
176.738,
177.580,
176.423,
175.436,
177.023,
176.118,
176.442,
174.798,
174.647,
176.330,
176.276,
175.773,
174.769,
174.566,
175.740]
n_av = [125.922, 121.101, 121.778, 122.87, 121.691, 110.613, 120.581, 123.509, 123.571, 124.07, 122.428,
119.966, 0, 122.304, 123.182, 117.629, 116.375, 122.879, 122.099, 121.787, 118.870, 119.095, 121.935]
hn_av = [8.575, 8.627, 8.572, 8.692, 8.438, 8.662, 8.653, 8.441, 8.592, 8.485, 8.63,
8.672, 0, 8.67, 8.61, 8.585, 8.413, 8.44, 8.267, 8.396, 9.134, 9.091, 8.336]
ha_av = [4.293, 4.498, 4.588, 4.266, 4.644, 3.977, 4.669, 4.134, 4.287, 4.338, 4.478,
4.696, 4.435, 4.321, 4.319, 4.437, 4.343, 4.076, 4.672, 4.588, 4.435, 4.253, 4.596]
# The XX_t arrays contain temperature corrections for atom type XX
ca_t = [-2.2, -0.9, 2.78, 0.94, -4.74, 3.28, 7.76, -1.98, -0.76, 1.73, 4.09,
2.78, 1.12, 2.26, -1.37, -1.7, -0.03, -2.79, -2.69, -4.99, 1.38, -5.27, -3.68]
cb_t = [4.73, 1.27, 6.53, 4.6, 2.42, 0, 15.54, 4.6, 2.41, 4.92, 9.37,
5.08, -0.19, 3.62, 3.54, 4.4, 2.15, 2.49, 3.07, 2.92, 4.82, 3.66, 0.01]
co_t = [-7.09, -2.55, -4.81, -4.9, -6.9, -3.21, -8.3, -8.73, -7.12, -8.18, -8.17, -
6.11, -4.01, -5.7, -6.9, -4.67, -5.24, -8.09, -7.88, -7.73, -1.71, -3.00, -3.00]
n_t = [-5.25, -8.2, -3.91, -3.7, -11.15, -6.15, 3.3, -12.73, -7.6, -2.85, -6.2, -
3.25, 0, -6.45, -5.3, -3.8, -6.7, -14.16, -10.1, -12, -22.69, -31.26, -25.64]
hn_t = [-8.95, -7, -6.2, -6.46, -7.5, -9.1, -8.3, -7.78, -7.5, -7.45, -7.05, -
6.95, 0, -7.2, -7.05, -7.6, -7.25, -7.64, -7.8, -7.73, -6.47, -8.34, -8.26]
ha_t = [0.69, 0, -0.06, 0.31, 0.4, -0.02, -0.93, 0.37, 0.38, 0.05, -0.48, -
2.92, -0.02, 0.26, 0.4, 0.05, -0.04, 0.47, 0.38, 0.53, 0.48, 1.36, 0.42]
# Neighbor correction factors for Ca. Notice extra zero for 'no Residue' (last element).
ca_a = [-0.007, -0.043, -0.019, -0.025, 0.168, -0.059, -0.036, -0.076, -0.003, 0.084, 0.004,
0.01, -0.206, 0, 0, 0.003, -0.007, -0.095, -0.003, 0.14, -0.304, -0.303, 0.264, 0]
ca_b = [-0.156, -0.011, 0.055, -0.013, -0.018, 0.132, -0.041, -0.27, -0.148, -0.205, -0.096,
0.047, -2.249, 0, -0.128, -0.024, -0.087, -0.242, 0.24, -0.004, 0.180, -0.041, 0.040, 0]
ca_c = [-0.076, 0.095, 0.086, -0.036, -0.301, 0.007, -0.066, -0.217, -0.082, -0.139, -0.039,
0.157, -0.072, 0, -0.062, 0.025, -0.053, -0.212, -0.226, -0.344, -0.160, -0.100, -0.403, 0]
ca_d = [0.007, 0.044, 0.131, 0.007, 0.105, 0.061, 0.09, -0.003, 0.048, -0.031, 0.003,
0.1, -0.024, 0, 0.074, 0.073, 0.05, -0.01, 0.112, 0.123, -0.009, 0.058, 0.131, 0]
# Neighbor correction factors for Cb. Notice extra zero for 'no Residue' (last element).
cb_a = [0.017, 0.07, -0.011, -0.002, -0.086, -0.011, -0.012, 0.097, 0.033, 0.058, 0.036, -
0.006, 0.149, 0, 0.11, -0.243, 0.039, 0.066, -0.159, -0.102, 0.013, 0.085, -0.154, 0]
cb_b = [-0.049, -0.117, -0.03, -0.096, -0.151, -0.128, -0.08, -0.087, -0.077, -0.231, -0.153, -
0.138, -0.701, 0, -0.098, -0.043, -0.044, -0.039, -0.343, -0.147, 0.076, 0.210, -0.511, 0]
cb_c = [0.078, -0.025, -0.104, 0.054, 0.113, 0.031, 0.052, 0, 0.021, -0.08, -0.031, -
0.069, 0.102, 0, -0.005, -0.028, 0.259, 0.034, 0.102, 0.14, -0.175, 0.002, 0.081, 0]
cb_d = [0.058, 0.049, -0.111, -0.004, -0.022, -0.067, -0.011, 0.099, 0.083, 0.119, 0.08, -
0.074, 0.107, 0, 0.031, 0.006, 0.027, 0.078, -0.145, -0.055, -0.065, -0.031, -0.016, 0]
# Neighbor correction factors for Co. Notice extra zero for 'no Residue' (last element).
co_a = [-0.043, -0.046, 0.043, -0.086, -0.144, -0.008, -0.07, -0.249, -0.055, -0.163, -0.138,
0.021, -0.228, 0, -0.066, 0.033, -0.025, -0.21, 0.05, -0.135, -0.136, -0.367, -0.031, 0]
co_b = [-0.181, -0.075, -0.251, 0.065, -0.357, 0.546, -0.195, -0.113, -0.042, -0.083, 0.02, -
0.23, -2.01, 0, -0.047, 0.148, 0.193, -0.06, -0.183, -0.338, 0.245, 0.074, -0.251, 0]
co_c = [0.062, -0.082, 0.143, 0, -0.546, 0.212, -0.194, -0.201, -0.068, -0.109, -0.123,
0.022, 0.096, 0, -0.046, -0.017, -0.112, -0.146, -0.665, -0.569, 0.002, -0.129, -0.605, 0]
co_d = [0.005, -0.044, 0.177, 0.068, -0.007, 0.069, 0.01, -0.037, -0.04, -0.018, -0.015,
0.075, -0.069, 0, -0.051, 0.024, 0.01, -0.017, -0.002, 0.012, 0.123, 0.096, 0.052, 0]
# Neighbor correction factors for N. Notice extra zero for 'no Residue' (last element).
n_a = [0.003, -0.05, 0.041, 0.018, 0.212, -0.024, -0.066, -0.069, -0.036, 0.039, 0.06, -
0.035, -0.091, 0, -0.042, 0.009, -0.02, -0.079, -0.073, 0.203, -0.018, -0.053, 0.247, 0]
n_b = [0.141, 0.147, -0.033, -0.035, -0.445, -0.043, -0.316, 0.249, 0.068, -0.158, -0.044, -
0.256, 1.433, 0, 0.17, 0.167, 0.229, 0.318, -0.231, -0.401, 0.721, 0.485, -0.532, 0]
n_c = [-2.25, 1.102, -1.407, -0.432, 0.187, -2.086, -0.033, 3.166, 0.152, -0.642, -0.053, -
1.25, -1.027, 0, 0.157, 0.212, 0.881, 2.83, 0.051, 0.395, -0.758, 1.619, 0.232, 0]
n_d = [-0.209, -0.072, -1.003, -0.139, -0.089, -0.445, -0.049, 0.649, 0.17, -0.12, 0.015, -
0.757, -0.082, 0, 0.183, -0.62, 0.014, 0.634, -0.764, -0.127, -0.821, -0.193, 0.081, 0]
# Neighbor correction factors for Hn. Notice extra zero for 'no Residue' (last element).
hn_a = [-0.002, -0.007, 0.001, 0, -0.011, 0.007, -0.009, -0.031, -0.005, -0.004, -0.009,
0.004, -0.024, 0, -0.004, 0.006, -0.001, -0.029, -0.044, -0.013, -0.010, -0.019, 0.043, 0]
hn_b = [-0.036, 0.032, 0.01, -0.012, -0.034, 0.032, -0.005, -0.027, -0.024, -0.004, -0.156,
0.031, -0.007, 0, -0.013, -0.046, 0.031, -0.028, -0.023, -0.02, -0.013, -0.044, 0.109, 0]
hn_c = [-0.101, 0.052, -0.148, -0.021, -0.265, -0.206, -0.115, 0.028, -0.078, -0.093, -0.041,
0.002, 0.079, 0, 0.017, -0.023, -0.016, 0.042, -0.53, -0.305, -0.123, 0.091, -0.344, 0]
hn_d = [-0.072, -0.05, -0.104, -0.034, -0.121, 0.001, 0.017, 0.008, -0.003, -0.069, -0.021, -
0.095, -0.03, 0, 0.009, -0.132, -0.035, 0.021, -0.358, -0.151, -0.173, -0.056, -0.137, 0]
# Neighbor correction factors for Ha. Notice extra zero for 'no Residue' (last element).
ha_a = [-0.002, 0.006, 0.015, 0.01, -0.068, 0.014, -0.025, -0.007, -0.012, -0.019, -0.015,
0.001, 0.014, 0, -0.013, 0.006, 0.006, -0.003, -0.075, -0.063, 0.030, 0.027, -0.080, 0]
ha_b = [-0.002, 0.042, 0.005, 0.009, -0.055, 0.032, -0.029, 0.035, 0.002, 0.016, 0.008,
0.016, 0.305, 0, 0.008, 0.069, 0.106, 0.047, -0.067, -0.05, 0.033, 0.073, -0.059, 0]
ha_c = [-0.013, 0.022, -0.007, 0, -0.027, 0.014, 0.006, 0.025, 0.002, 0.012, 0.013,
0.003, -0.027, 0, 0.008, 0.043, 0.036, 0.02, -0.149, -0.037, 0.033, 0.008, -0.055, 0]
ha_d = [-0.002, 0.006, -0.022, -0.005, -0.057, 0.005, -0.012, -0.011, -0.008, 0.001, -0.002, -
0.008, 0.005, 0, -0.007, -0.007, -0.012, -0.012, -0.166, -0.063, -0.028, -0.039, -0.069, 0]
# Neighbor correction factors for Ca for Gly.
gly_ca_a = [-0.011, -0.001, -0.085, -0.001, -0.048, 0, -0.05, -0.054, 0.016, -0.057,
0.019, -0.05, -0.221, 0, 0, -0.006, -0.003, -0.042, -0.082, -0.034, 0, 0, 0, 0]
gly_ca_b = [-0.149, -0.046, -0.045, -0.14, -0.244, 0, -0.09, -0.19, -0.01, -0.188, -
0.03, -0.015, -0.8, -0.002, -0.07, -0.051, -0.041, -0.174, -0.193, -0.245, 0, 0, 0, 0]
gly_ca_c = [0.076, 0.117, 0.329, 0.141, 0.072, 0, 0.02, 0.029, -0.061, 0.052,
0.114, 0.238, 0.033, 0.028, -0.01, 0.129, 0.134, 0.024, 0.11, 0.068, 0, 0, 0, 0]
gly_ca_d = [0.017, 0.007, 0.066, 0.053, 0.006, 0, 0.01, 0.024, 0.013, -0.053, 0.011,
0.018, 0.048, 0.017, 0.02, -0.003, 0.001, 0.016, -0.08, -0.004, 0, 0, 0, 0]
# Neighbor correction factors for Co for Gly.
gly_co_a = [-0.113, -0.076, -0.112, -0.113, -0.297, 0, -0.137, -0.226, -0.075, -0.153, -
0.092, -0.085, -0.558, -0.039, -0.038, -0.073, -0.084, -0.232, -0.296, -0.303, 0, 0, 0, 0]
gly_co_b = [-0.754, -0.46, -0.753, -0.44, -0.832, 0, -0.706, -0.558, -0.494, -0.491, -
0.424, -0.714, -2.799, -0.477, -0.444, -0.4, -0.203, -0.526, -0.565, -0.863, 0, 0, 0, 0]
gly_co_c = [-0.042, -0.24, 0.165, -0.024, -0.23, 0, -0.174, -0.152, -0.187, -0.104, -
0.165, -0.079, -0.058, -0.148, -0.186, -0.13, -0.127, -0.154, -0.31, -0.23, 0, 0, 0, 0]
gly_co_d = [-0.018, -0.056, 0.049, 0.013, -0.097, 0, -0.039, -0.029, -0.034, -0.006, -
0.024, -0.025, -0.024, -0.027, -0.021, -0.054, -0.047, -0.024, -0.185, -0.156, 0, 0, 0, 0]
# Neighbor correction factors for N for Gly.
gly_n_a = [-0.044, -0.081, -0.104, 0.094, -0.142, 0, -0.087, -0.176, -0.056, -0.094, -
0.084, -0.104, -0.115, -0.074, -0.039, -0.045, -0.055, -0.15, -0.234, -0.163, 0, 0, 0, 0]
gly_n_b = [-0.03, -0.078, 0.045, -0.023, -0.415, 0, -0.427, -0.111, -0.143, -0.145, -
0.161, -0.149, -0.134, -0.097, -0.117, 0.032, 0, -0.054, -0.317, -0.381, 0, 0, 0, 0]
gly_n_c = [-0.535, 2.6, 0.764, 1.235, 2.41, 0, 0.913, 4.157, 1.248, 0.809, 1.288,
0.692, 0.756, 1.304, 1.296, 2.114, 2.444, 3.754, 2.703, 2.709, 0, 0, 0, 0]
gly_n_d = [-0.158, -0.003, -0.027, -0.094, -0.408, 0, 0.729, -0.03, -0.061, -0.143, -
0.03, -0.16, -0.157, -0.056, -0.031, -0.154, -0.024, -0.057, -0.821, -0.495, 0, 0, 0, 0]
# Set pSER, pTHR, and pTYR Gly corrections to their SER, THR and TYR equivalents
gly_ca_a[key_aa3["PSER"]] = gly_ca_a[key_aa3["SER"]]
gly_ca_a[key_aa3["PTHR"]] = gly_ca_a[key_aa3["THR"]]
gly_ca_a[key_aa3["PTYR"]] = gly_ca_a[key_aa3["TYR"]]
gly_ca_b[key_aa3["PSER"]] = gly_ca_b[key_aa3["SER"]]
gly_ca_b[key_aa3["PTHR"]] = gly_ca_b[key_aa3["THR"]]
gly_ca_b[key_aa3["PTYR"]] = gly_ca_b[key_aa3["TYR"]]
gly_ca_c[key_aa3["PSER"]] = gly_ca_a[key_aa3["SER"]]
gly_ca_a[key_aa3["PTHR"]] = gly_ca_a[key_aa3["THR"]]
gly_ca_a[key_aa3["PTYR"]] = gly_ca_a[key_aa3["TYR"]]
gly_ca_d[key_aa3["PSER"]] = gly_ca_d[key_aa3["SER"]]
gly_ca_d[key_aa3["PTHR"]] = gly_ca_d[key_aa3["THR"]]
gly_ca_d[key_aa3["PTYR"]] = gly_ca_d[key_aa3["TYR"]]
gly_co_a[key_aa3["PSER"]] = gly_co_a[key_aa3["SER"]]
gly_co_a[key_aa3["PTHR"]] = gly_co_a[key_aa3["THR"]]
gly_co_a[key_aa3["PTYR"]] = gly_co_a[key_aa3["TYR"]]
gly_co_b[key_aa3["PSER"]] = gly_co_b[key_aa3["SER"]]
gly_co_b[key_aa3["PTHR"]] = gly_co_b[key_aa3["THR"]]
gly_co_b[key_aa3["PTYR"]] = gly_co_b[key_aa3["TYR"]]
gly_co_c[key_aa3["PSER"]] = gly_co_a[key_aa3["SER"]]
gly_co_a[key_aa3["PTHR"]] = gly_co_a[key_aa3["THR"]]
gly_co_a[key_aa3["PTYR"]] = gly_co_a[key_aa3["TYR"]]
gly_co_d[key_aa3["PSER"]] = gly_co_d[key_aa3["SER"]]
gly_co_d[key_aa3["PTHR"]] = gly_co_d[key_aa3["THR"]]
gly_co_d[key_aa3["PTYR"]] = gly_co_d[key_aa3["TYR"]]
gly_n_a[key_aa3["PSER"]] = gly_n_a[key_aa3["SER"]]
gly_n_a[key_aa3["PTHR"]] = gly_n_a[key_aa3["THR"]]
gly_n_a[key_aa3["PTYR"]] = gly_n_a[key_aa3["TYR"]]
gly_n_b[key_aa3["PSER"]] = gly_n_b[key_aa3["SER"]]
gly_n_b[key_aa3["PTHR"]] = gly_n_b[key_aa3["THR"]]
gly_n_b[key_aa3["PTYR"]] = gly_n_b[key_aa3["TYR"]]
gly_n_c[key_aa3["PSER"]] = gly_n_a[key_aa3["SER"]]
gly_n_a[key_aa3["PTHR"]] = gly_n_a[key_aa3["THR"]]
gly_n_a[key_aa3["PTYR"]] = gly_n_a[key_aa3["TYR"]]
gly_n_d[key_aa3["PSER"]] = gly_n_d[key_aa3["SER"]]
gly_n_d[key_aa3["PTHR"]] = gly_n_d[key_aa3["THR"]]
gly_n_d[key_aa3["PTYR"]] = gly_n_d[key_aa3["TYR"]]
# Arrays for calculation of pH corrected shifts
asp_ph_0 = [[53.05, 37.81, 175.25, 120.10, 8.68, 4.70],
[54.59, 40.95, 176.53, 121.78, 8.57, 4.58]]
glu_ph_0 = [[55.90, 28.65, 176.19, 122.04, 8.58, 4.36],
[56.81, 30.19, 176.71, 122.86, 8.69, 4.26]]
his_ph_0 = [[55.30, 28.89, 174.37, 120.00, 8.78, 4.69],
[56.69, 31.27, 175.85, 121.46, 8.44, 4.63]]
sep_ph_0 = [[57.613, 66.527, 174.062, 116.940, 8.805, 4.525],
[58.433, 65.463, 174.977, 119.488, 9.242, 4.409]]
tpo_ph_0 = [[61.827, 73.986, 174.063, 115.936, 8.615, 4.407],
[63.524, 71.718, 174.793, 121.101, 9.397, 4.167]]
ptr_ph_0 = [[57.905, 38.760, 175.605, 121.797, 8.403, 4.604],
[58.002, 38.685, 175.788, 121.953, 8.320, 4.595]]
# Arrays for keeping the chemical shifts calculated for the entered pH value
asp_ph_corr = [0, 0, 0, 0, 0, 0]
glu_ph_corr = [0, 0, 0, 0, 0, 0]
his_ph_corr = [0, 0, 0, 0, 0, 0]
sep_ph_corr = [0, 0, 0, 0, 0, 0]
tpo_ph_corr = [0, 0, 0, 0, 0, 0]
ptr_ph_corr = [0, 0, 0, 0, 0, 0]
# Arrays for CS corrections for deuterated proteins
ca_deut = [-0.68, -0.55, -0.55, -0.69, -0.55, -0.39, -0.55, -0.77, -0.69, -
0.62, -0.69, -0.55, -0.69, -0.69, -0.69, -0.55, -0.55, -0.84, -0.55, -0.55]
cb_deut = [-1.00, -0.71, -0.71, -0.97, -0.71, 0.00, -0.71, -1.28, -1.11, -
1.26, -0.97, -0.71, -1.11, -0.97, -1.11, -0.71, -0.71, -1.20, -0.71, -0.71]
# RUN
cur = 0
delta_T = temperature - 5 # difference between the given temperature and 5 degrees C
ca_pred = 0
cb_pred = 0
co_pred = 0
n_pred = 0
hn_pred = 0
ha_pred = 0
output = []
# Calculate deprotonated fractions of Asp, Glu and His at the given pH
asp_deprot_frac = 7.78 * (10**-5) / (7.78 * (10**-5) + (10**(-pH)))
glu_deprot_frac = 3.43 * (10**-5) / (3.43 * (10**-5) + (10**(-pH)))
his_deprot_frac = 1.67 * (10**-7) / (1.67 * (10**-7) + (10**(-pH)))
sep_deprot_frac = 9.76 * (10**-7) / (9.76 * (10**-7) + (10**(-pH)))
tpo_deprot_frac = 5.00 * (10**-7) / (5.00 * (10**-7) + (10**(-pH)))
ptr_deprot_frac = 1.47 * (10**-6) / (1.47 * (10**-6) + (10**(-pH)))
# Calculate pH corrected chemical shifts
for i in range(6):
asp_ph_corr[i] = asp_deprot_frac * asp_ph_0[1][i] + (1 - asp_deprot_frac) * asp_ph_0[0][i]
glu_ph_corr[i] = glu_deprot_frac * glu_ph_0[1][i] + (1 - glu_deprot_frac) * glu_ph_0[0][i]
his_ph_corr[i] = his_deprot_frac * his_ph_0[1][i] + (1 - his_deprot_frac) * his_ph_0[0][i]
sep_ph_corr[i] = sep_deprot_frac * sep_ph_0[1][i] + (1 - sep_deprot_frac) * sep_ph_0[0][i]
tpo_ph_corr[i] = tpo_deprot_frac * tpo_ph_0[1][i] + (1 - tpo_deprot_frac) * tpo_ph_0[0][i]
ptr_ph_corr[i] = ptr_deprot_frac * ptr_ph_0[1][i] + (1 - ptr_deprot_frac) * ptr_ph_0[0][i]
print(protein_sequence)
sequences = __set_sequence(protein_sequence, key_aa1, key_aa3)
sequence = sequences[0]
aminos = sequences[1]
for j in range(len(sequence) - 4):
output.append({"Res": aminos[j], "Index": j})
# deuterated parameters not available for phosphorylated Residues
if ((22 in sequence) or (25 in sequence) or (28 in sequence)) and use_perdeuteration:
raise CTException('Phosphorylated amino acids not supported in deuterated proteins')
while cur < (len(sequence) - 4):
if sequence[cur + 2] == 2: # Aspartate
ca_pred = asp_ph_corr[0]
cb_pred = asp_ph_corr[1]
co_pred = asp_ph_corr[2]
n_pred = asp_ph_corr[3]
hn_pred = asp_ph_corr[4]
ha_pred = asp_ph_corr[5]
elif sequence[cur + 2] == 3: # Glutamate
ca_pred = glu_ph_corr[0]
cb_pred = glu_ph_corr[1]
co_pred = glu_ph_corr[2]
n_pred = glu_ph_corr[3]
hn_pred = glu_ph_corr[4]
ha_pred = glu_ph_corr[5]
elif sequence[cur + 2] == 6: # Histidine
ca_pred = his_ph_corr[0]
cb_pred = his_ph_corr[1]
co_pred = his_ph_corr[2]
n_pred = his_ph_corr[3]
hn_pred = his_ph_corr[4]
ha_pred = his_ph_corr[5]
elif sequence[cur + 2] == key_aa3["SEP"]: # phospho-SER
ca_pred = sep_ph_corr[0]
cb_pred = sep_ph_corr[1]
co_pred = sep_ph_corr[2]
n_pred = sep_ph_corr[3]
hn_pred = sep_ph_corr[4]
ha_pred = sep_ph_corr[5]
elif sequence[cur + 2] == key_aa3["TPO"]: # phospho-THR
ca_pred = tpo_ph_corr[0]
cb_pred = tpo_ph_corr[1]
co_pred = tpo_ph_corr[2]
n_pred = tpo_ph_corr[3]
hn_pred = tpo_ph_corr[4]
ha_pred = tpo_ph_corr[5]
elif sequence[cur + 2] == key_aa3["PTR"]: # phospho-TYR
ca_pred = ptr_ph_corr[0]
cb_pred = ptr_ph_corr[1]
co_pred = ptr_ph_corr[2]
n_pred = ptr_ph_corr[3]
hn_pred = ptr_ph_corr[4]
ha_pred = ptr_ph_corr[5]
else: # any other amino acid
ca_pred = ca_av[sequence[cur + 2]]
cb_pred = cb_av[sequence[cur + 2]]
co_pred = co_av[sequence[cur + 2]]
n_pred = n_av[sequence[cur + 2]]
hn_pred = hn_av[sequence[cur + 2]]
ha_pred = ha_av[sequence[cur + 2]]
# Apply the neighbor and temperature corrections
if sequence[cur + 2] == 5 and use_ggxgg: # special case of glycine
ca_pred += gly_ca_a[sequence[cur + 4]] + gly_ca_b[sequence[cur + 3]] + \
gly_ca_c[sequence[cur + 1]] + gly_ca_d[sequence[cur]] + (delta_T * ca_t[sequence[cur + 2]] / 1000)
co_pred += gly_co_a[sequence[cur + 4]] + gly_co_b[sequence[cur + 3]] + \
gly_co_c[sequence[cur + 1]] + gly_co_d[sequence[cur]] + (delta_T * co_t[sequence[cur + 2]] / 1000)
n_pred += gly_n_a[sequence[cur + 4]] + gly_n_b[sequence[cur + 3]] + \
gly_n_c[sequence[cur + 1]] + gly_n_d[sequence[cur]] + (delta_T * n_t[sequence[cur + 2]] / 1000)
else: # all other Residues
ca_pred += ca_a[sequence[cur + 4]] + ca_b[sequence[cur + 3]] + ca_c[sequence[cur + 1]] + \
ca_d[sequence[cur]] + (delta_T * ca_t[sequence[cur + 2]] / 1000)
co_pred += co_a[sequence[cur + 4]] + co_b[sequence[cur + 3]] + co_c[sequence[cur + 1]] + \
co_d[sequence[cur]] + (delta_T * co_t[sequence[cur + 2]] / 1000)
n_pred += n_a[sequence[cur + 4]] + n_b[sequence[cur + 3]] + n_c[sequence[cur + 1]] + \
n_d[sequence[cur]] + (delta_T * n_t[sequence[cur + 2]] / 1000)
cb_pred += cb_a[sequence[cur + 4]] + cb_b[sequence[cur + 3]] + cb_c[sequence[cur + 1]] + \
cb_d[sequence[cur]] + (delta_T * cb_t[sequence[cur + 2]] / 1000)
hn_pred += hn_a[sequence[cur + 4]] + hn_b[sequence[cur + 3]] + hn_c[sequence[cur + 1]] + \
hn_d[sequence[cur]] + (delta_T * hn_t[sequence[cur + 2]] / 1000)
ha_pred += ha_a[sequence[cur + 4]] + ha_b[sequence[cur + 3]] + ha_c[sequence[cur + 1]] + \
ha_d[sequence[cur]] + (delta_T * ha_t[sequence[cur + 2]] / 1000)
if use_perdeuteration:
ca_pred += ca_deut[sequence[cur + 2]]
cb_pred += cb_deut[sequence[cur + 2]]
# write to output
if sequence[cur + 2] == 5: # special output for gly
output[cur].update({"CA": __round3(ca_pred, asFloat)})
output[cur].update({"CB": "**.***"})
output[cur].update({"CO": __round3(co_pred, asFloat)})
else:
output[cur].update({"CA": __round3(ca_pred, asFloat)})
output[cur].update({"CB": __round3(cb_pred, asFloat)})
output[cur].update({"CO": __round3(co_pred, asFloat)})
if sequence[cur + 2] == 12: # special output for pro
output[cur].update({"N": "***.***"})
output[cur].update({"HN": "*.***"})
else:
output[cur].update({"N": __round3(n_pred, asFloat)})
output[cur].update({"HN": __round3(hn_pred, asFloat)})
if use_perdeuteration:
output[cur].update({"HA": "*.***"})
else:
output[cur].update({"HA": __round3(ha_pred, asFloat)})
cur += 1
return output
def __set_sequence(sequence, key1, key3):
"""Translates the amino acid input string into a list of integers representing the same set of amino acids.
Parameters
----------
sequence : str
The alphabetical list of amino acid abbreviations input by the user.
key1 : list
The list of numeric keys used to translate single-letter amino acid abbreviations into representative numbers
key3 : dictionary
The dictionary of alphabetical keys used to translate 2/3-letter amino acid abbreviations into representative numbers
Returns
-------
sequences : list of list
List containing both a list of numeric representatives of input amino acids and a list of the abbreviations for said amino acids
"""
key_aa1 = key1
key_aa3 = key3
i = 0
code = 0
inp = sequence
sequence = []
aminos = []
sequence.append(23)
sequence.append(23)
# Strip white space at beginning and end
inp = inp.strip()
regex = re.findall(r"\(([^)]+)\)|(.)", inp)
for i in range(len(regex)):
set = regex[i]
if set[0] is '':
aa1 = set[1]
aminos.append(aa1)
code = ord(aa1[0]) - 65
if (code < 0 or code > 35) or (key_aa1[code] == -1):
continue
sequence.append(key_aa1[code])
else:
aa3 = set[0].upper()
aminos.append(aa3)
if aa3 in key_aa3:
sequence.append(key_aa3[aa3])
else:
continue
sequence.append(23)
sequence.append(23)
sequences = [sequence, aminos]
return sequences
# ------------------------------------------------------------------------------------------------------------------------------------------------------------------
def __round3(num, asFloat=False):
"""Performs statistics-consistent rounding of float variables to precisely three decimal places which can be returned as either a string or a float.
Parameters
----------
num : float
Float number to be rounded
asFloat : bool
Determines whether to return rounded number as string or float. False (returns string type) by default
Returns
-------
strng : str
Rounded number as string
"""
strng = "" + str(round(num * 1000 + 10**(-len(str(num * 1000)) - 1)) / 1000)
strng2 = "" + str(round(num + 10**(-len(str(num)) - 1)))
delta = len(strng) - len(strng2)
if delta == 0:
strng += ".000"
if delta == 2:
strng += "00"
if delta == 3:
strng += "0"
if asFloat:
return strng
else:
return float(strng)