Source code for pychemelt.thermal_oligomer

"""
Main class to handle thermal denaturation data of mono- and oligomers up to tetramers
The current model assumes the proteins' unfolding is reversible
"""

import pandas as pd
import numpy as np

from itertools import chain
from copy import deepcopy

from .main import Sample

from .utils.signals import (
    map_two_state_model_to_signal_fx,
    map_three_state_model_to_signal_fx,
)

from .utils.math import (
    temperature_to_kelvin,
    temperature_to_celsius,
    relative_errors,
    constant_baseline_only_temp,
    linear_baseline_only_temp,
    quadratic_baseline_only_temp,
    exponential_baseline_only_temp
)

from .utils.processing import (
    guess_Tm_from_derivative,
    set_param_bounds,
    adjust_value_to_interval,
    re_arrange_params,
    re_arrange_predictions,
    subset_data,
    estimate_signal_baseline_params,
    oligomer_number,
    transform_to_list
)

from .utils.fitting import (
    fit_oligomer_unfolding_single_slopes,
    fit_oligomer_unfolding_shared_slopes_many_signals,
    fit_oligomer_unfolding_many_signals,
    fit_oligomer_unfolding_three_states_single_slopes,
    fit_oligomer_unfolding_three_states_shared_slopes_many_signals,
    fit_oligomer_unfolding_three_states_many_signals,
    evaluate_fitting_and_refit,
    baseline_fx_name_to_req_params
)



[docs] class ThermalOligomer(Sample): """ Class to hold the data of a DSF experiment of thermal unfolding with different concentrations of an oligomer. """
[docs] def __init__(self, name='Test'): super().__init__(name) self.nr_olig = 0 # Number of oligomer concentrations in data self.model = None # Oligomer model type self.oligomeric = True # Flag for oligomer or denaturant
[docs] def set_model(self, model_name, intermediate_name=None): """ Set thermodynamic model of oligomer used for the analysis. Currently supported are 2 state models of monomeres, dimers, trimeres and tetrameres Parameters ---------- model_name : str name of the used model. Can be: "Monomer", "Dimer", "Trimer", "Tetramer". Case insensitive Raises ------ ValueError If the provided model name is not in the supported list. Notes ----- This method creates/updates the following attributes on the instance: - self.model: oligomeric model used for analysis """ allowed_models = ["monomer", "dimer", "trimer", "tetramer"] allowed_intermediate_models = ["monomeric", "dimeric", "trimeric"] allowed_combinations = ['monomer_monomeric_intermediate', 'dimer_monomeric_intermediate', 'dimer_dimeric_intermediate', 'trimer_monomeric_intermediate', 'trimer_trimeric_intermediate', 'tetramer_monomeric_intermediate'] model = model_name.lower() if model not in allowed_models: raise ValueError( f"Invalid model '{model_name}'. " f"Allowed models are: {', '.join(m.capitalize() for m in allowed_models)}." ) if intermediate_name is not None: intermediate = intermediate_name.lower() if intermediate not in allowed_intermediate_models: raise ValueError( f"Invalid intermediate '{intermediate_name}'. " f"Allowed models are: {', '.join(m.capitalize() for m in allowed_intermediate_models)}." ) model = model + '_' + intermediate + '_intermediate' if model not in allowed_combinations: raise ValueError( f"Invalid intermediate model '{model.capitalize()}'. " f"Allowed models are: {', '.join(m.capitalize() for m in allowed_combinations)}." ) # Save model with first letter uppercase self.model = model.capitalize() return None
[docs] def set_concentrations(self, concentrations=None): """ Set the oligomeric concentrations for the sample Parameters ---------- concentrations : list, optional List of oligomer concentrations. If None, use the sample conditions Notes ----- Creates/updates attribute `oligomer_concentrations_pre` (numpy.ndarray) """ if concentrations is None: concentrations = self.conditions concentrations = transform_to_list(concentrations) self.oligomer_concentrations_pre = np.array(concentrations) return None
[docs] def select_conditions( self, boolean_lst=None): """ For each signal, select the conditions to be used for the analysis Parameters ---------- boolean_lst : list of bool, optional List of booleans selecting which conditions to keep. If None, keep all. Notes ----- Creates/updates several attributes used by downstream fitting: - signal_lst_multiple, temp_lst_multiple : lists of lists with selected data - oligomer_concentrations : list of selected oligomer concentrations - oligomer_concentrations_expanded : flattened numpy array matching expanded signals - boolean_lst, nr_olig : control flags/values """ # If boolean_lst is a boolean, convert it to a list of one boolean boolean_lst = transform_to_list(boolean_lst) if boolean_lst is None: self.signal_lst_multiple = self.signal_lst_pre_multiple self.temp_lst_multiple = self.temp_lst_pre_multiple self.oligomer_concentrations = self.oligomer_concentrations_pre else: self.signal_lst_multiple = [None for _ in range(len(self.signal_lst_pre_multiple))] self.temp_lst_multiple = [None for _ in range(len(self.temp_lst_pre_multiple))] for i in range(len(self.signal_lst_pre_multiple)): self.signal_lst_multiple[i] = [x for j, x in enumerate(self.signal_lst_pre_multiple[i]) if boolean_lst[j]] self.temp_lst_multiple[i] = [x for j, x in enumerate(self.temp_lst_pre_multiple[i]) if boolean_lst[j]] self.oligomer_concentrations = [x for i, x in enumerate(self.oligomer_concentrations_pre) if boolean_lst[i]] self.nr_olig = len(self.oligomer_concentrations) # For compatibility with the plotting functions, we need to have the "nr_den" variable which is functionally the # same as "nr_olig": self.nr_den = self.nr_olig # Expand the number of oligomer concentrations to match the number of signals oligomer_concentrations = [self.oligomer_concentrations for _ in range(self.nr_signals)] self.oligomer_concentrations_expanded = np.concatenate(oligomer_concentrations, axis=0) self.boolean_lst = boolean_lst self.oligomer_concentrations = np.array(self.oligomer_concentrations) # Needed for compatibility self.denaturant_concentrations = self.oligomer_concentrations return None
[docs] def guess_Cp(self): """ Guess the Cp of the assembled oligomer by the number of residues. Raises ------ ValueError If `self.n_residues` is not set. Notes ----- The number of residues represent the total number of residues in the oligomer This method creates/updates attributes used later in fitting: - Cp0 assigned to self.Cp0 """ # If the number of residues is still zero, raise an error if self.n_residues == 0: raise ValueError('The number of residues is still zero. Please set n_residues before calling guess_Cp') Cp0 = self.n_residues * 0.0148 - 0.1267 # Cp0 needs to be positive Cp0 = max(Cp0, 0) self.Cp0 = Cp0 return None
[docs] def estimate_baseline_parameters( self, native_baseline_type, unfolded_baseline_type, window_range_native=12, window_range_unfolded=12): """ Estimate the baseline parameters for multiple signals of the oligomer. The native baseline represents the curve for the assemble doligomer while the unfolded baseline represents the curve for the unfolded and disassembled oligomer. Parameters ---------- native_baseline_type : str one of 'constant', 'linear', 'quadratic', 'exponential' unfolded_baseline_type : str one of 'constant', 'linear', 'quadratic', 'exponential' window_range_native : int, optional Range of the window (in degrees) to estimate the baselines and slopes of the native state window_range_unfolded : int, optional Range of the window (in degrees) to estimate the baselines and slopes of the unfolded state Notes ----- This method sets or updates these attributes: - bNs_per_signal, bUs_per_signal, kNs_per_signal, kUs_per_signal, qNs_per_signal, qUs_per_signal - poly_order_native, poly_order_unfolded """ self.first_param_Ns_per_signal = [] self.first_param_Us_per_signal = [] self.second_param_Ns_per_signal = [] self.second_param_Us_per_signal = [] self.third_param_Ns_per_signal = [] self.third_param_Us_per_signal = [] oligomer_concentrations = np.repeat(self.oligomer_concentrations, np.array(self.signal_lst_multiple).shape[-1]) oligomer_concentrations = np.split(oligomer_concentrations, len(self.oligomer_concentrations)) for i in range(len(self.signal_lst_multiple)): self.adjusted_signal_lst_multiple = list(np.array(self.signal_lst_multiple[i])/ np.array(oligomer_concentrations)) p1Ns, p1Us, p2Ns, p2Us, p3Ns, p3Us = estimate_signal_baseline_params( self.adjusted_signal_lst_multiple, self.temp_lst_multiple[i], native_baseline_type, unfolded_baseline_type, window_range_native, window_range_unfolded, oligomer_number(self.model) ) self.first_param_Ns_per_signal.append(p1Ns) self.first_param_Us_per_signal.append(p1Us) self.second_param_Ns_per_signal.append(p2Ns) self.second_param_Us_per_signal.append(p2Us) self.third_param_Ns_per_signal.append(p3Ns) self.third_param_Us_per_signal.append(p3Us) baseline_fx_dic = { 'constant': constant_baseline_only_temp, 'linear': linear_baseline_only_temp, 'quadratic': quadratic_baseline_only_temp, 'exponential': exponential_baseline_only_temp } self.window_range_native = window_range_native self.window_range_unfolded = window_range_unfolded self.baseline_N_fx = baseline_fx_dic[native_baseline_type] self.baseline_U_fx = baseline_fx_dic[unfolded_baseline_type] self.native_baseline_type = native_baseline_type self.unfolded_baseline_type = unfolded_baseline_type return None
[docs] def create_dg_df(self): """ Create a dataframe of the dg values versus temperature """ # Create a dataframe of the parameters Tm, DHm, Cp0 = self.global_fit_params[:3] T_c = np.arange(0, 150, 0.5) T = temperature_to_kelvin(T_c) Tm = temperature_to_kelvin(Tm) DG = DHm * (1 - T / Tm) + Cp0 * (T - Tm - T * np.log(T / Tm)) dg_df = pd.DataFrame({ 'DG (kcal/mol)': DG, 'Temperature (°C)': T_c }) self.dg_df = dg_df return None
[docs] def fit_thermal_unfolding_global( self, cp_limits=None, dh_limits=None, tm_limits=None, cp_value=None): """ Fit the thermal unfolding of the sample using the signal and temperature data We fit all the curves at once, with global thermodynamic parameters but local slopes and local baselines) Multiple signals can be fitted at the same time, such as 350nm and 330nm Parameters ---------- cp_limits : list, optional List of two values, the lower and upper bounds for the Cp value. If None, bounds set automatically dh_limits : list, optional List of two values, the lower and upper bounds for the dH value. If None, bounds set automatically tm_limits : list, optional List of two values, the lower and upper bounds for the Tm value. If None, bounds set automatically cp_value : float, optional If provided, the Cp value is fixed to this value, the bounds are ignored Notes ----- This is a heavy routine that creates/updates many fitting-related attributes, including: - bNs_expanded, bUs_expanded, kNs_expanded, kUs_expanded, qNs_expanded, qUs_expanded - p0, low_bounds, high_bounds, global_fit_params, rel_errors - predicted_lst_multiple, params_names, params_df, dg_df - flags: global_fit_done, limited_tm, limited_dh, limited_cp, fixed_cp """ # Requires Cp0 if self.Cp0 <= 0: raise ValueError('Cp0 must be positive. Please run guess_Cp before fitting globally.') # Get Guess of Tm: tm_lst = [] x1 = 6 x2 = 11 if not hasattr(self, "deriv_lst_multiple"): self.estimate_derivative() for i in range(len(self.signal_lst_multiple)): tm_lst.append(guess_Tm_from_derivative( self.temp_deriv_lst_multiple[i], self.deriv_lst_multiple[i], x1, x2 )) Tm = np.average(tm_lst) dh = 100 if self.model == 'Dimer': dh = 120 elif self.model == 'Trimer': dh = 150 elif self.model == 'Tetramer': dh = 180 p0 = [Tm, dh, self.Cp0] params_names = [ 'Tm (°C)', 'ΔH (kcal/mol)', 'Cp (kcal/mol/°C)'] self.first_param_Ns_expanded = np.concatenate(self.first_param_Ns_per_signal, axis=0) self.first_param_Us_expanded = np.concatenate(self.first_param_Us_per_signal, axis=0) self.second_param_Ns_expanded = np.concatenate(self.second_param_Ns_per_signal, axis=0) self.second_param_Us_expanded = np.concatenate(self.second_param_Us_per_signal, axis=0) self.third_param_Ns_expanded = np.concatenate(self.third_param_Ns_per_signal, axis=0) self.third_param_Us_expanded = np.concatenate(self.third_param_Us_per_signal, axis=0) p0 = np.concatenate([p0, self.first_param_Ns_expanded, self.first_param_Us_expanded]) # We need to append as many bN and bU as the number of oligomer concentrations # times the number of signal types for signal in self.signal_names: params_names += (['intercept_native - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) for signal in self.signal_names: params_names += (['intercept_unfolded - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.native_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' p0 = np.concatenate([p0, self.second_param_Ns_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' p0 = np.concatenate([p0, self.second_param_Us_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' p0 = np.concatenate([p0, self.third_param_Ns_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' p0 = np.concatenate([p0, self.third_param_Us_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) low_bounds = (p0.copy()) high_bounds = (p0.copy()) low_bounds[3:], high_bounds[3:] = set_param_bounds(p0[3:],params_names[3:]) # Adjusting boundaries based on the size of the oligomer self.limited_tm = tm_limits is not None if self.limited_tm: tm_lower, tm_upper = tm_limits else: tm_lower = p0[0] - 12 if self.model == 'Monomer' else p0[0] - 20 tm_upper = p0[0] + 20 if self.model == 'Monomer' else p0[0] + 30 if self.model in ['Trimer', 'Tetramer']: tm_lower, tm_upper, p0[0] = tm_lower + 10, tm_upper + 10, p0[0] + 10 if self.model == 'Tetramer': tm_lower, tm_upper, p0[0] = tm_lower + 10, tm_upper + 20, p0[0] + 20 low_bounds[0] = tm_lower high_bounds[0] = tm_upper # Verify that the initial guess is within the user-defined limits p0[0] = adjust_value_to_interval(p0[0], tm_lower, tm_upper,1) self.limited_dh = dh_limits is not None if self.limited_dh: dh_lower, dh_upper = dh_limits p0[1] = adjust_value_to_interval(p0[1], dh_lower, dh_upper, 1) else: dh_lower = 10 dh_upper = 500 low_bounds[1] = dh_lower high_bounds[1] = dh_upper self.cp_value = cp_value self.fixed_cp = cp_value is not None self.limited_cp = cp_limits is not None and not self.fixed_cp if self.limited_cp: cp_lower, cp_upper = cp_limits else: cp_lower, cp_upper = 0.1, 5 if self.fixed_cp: # Remove the Cp from p0, low_bounds and high_bounds # Remove Cp0 from the parameter names p0 = np.delete(p0, 2) low_bounds = np.delete(low_bounds, 2) high_bounds = np.delete(high_bounds, 2) params_names.pop(2) else: low_bounds[2] = cp_lower high_bounds[2] = cp_upper # Verify that the Cp initial guess is within the user-defined limits p0[2] = adjust_value_to_interval(p0[2], cp_lower, cp_upper, 0.5) # Populate the expanded signal and temperature lists self.expand_multiple_signal() signal_fx = map_two_state_model_to_signal_fx(self.model) kwargs = { 'oligomer_concentrations' : self.oligomer_concentrations_expanded, 'initial_parameters': p0, 'low_bounds' : low_bounds, 'high_bounds' : high_bounds, 'cp_value' : cp_value, 'baseline_native_fx' : self.baseline_N_fx, 'baseline_unfolded_fx' : self.baseline_U_fx, 'signal_fx' : signal_fx } fit_fx = fit_oligomer_unfolding_single_slopes # Do a quick prefit with a reduced data set if self.pre_fit: kwargs['list_of_temperatures'] = self.temp_lst_expanded_subset kwargs['list_of_signals'] = self.signal_lst_expanded_subset global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) p0 = global_fit_params # Now use the whole dataset kwargs['list_of_temperatures'] = self.temp_lst_expanded kwargs['list_of_signals'] = self.signal_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit( global_fit_params, cov, predicted, high_bounds, low_bounds, p0, False, self.limited_cp, self.limited_dh, self.limited_tm, self.fixed_cp, kwargs, fit_fx, result=result, minimizer=minimizer, fit_m_value=False, ) rel_errors = relative_errors(global_fit_params, cov) self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions(predicted, self.nr_signals, self.nr_olig) self.result = result self.minimizer = minimizer self.global_fit_done = True self.params_names = params_names self.create_params_df() self.create_dg_df() return None
[docs] def fit_thermal_unfolding_three_state_global( self, t1_init=0, t2_init=0, dh_limits=None, tm_limits=None, CpTh=None): """ Fit the thermal unfolding of the sample using the signal and temperature data on a three state model We fit all the curves at once, with global thermodynamic parameters but local slopes and local baselines) Multiple signals can be fitted at the same time, such as 350nm and 330nm Parameters ---------- t1_init, t2_init : float, optional initial user given values of the melting temperatures of the three states, t1_init: Native to intermediate, t2_init: intermediate to unfolded dh_limits : list of lists, optional List of two lists with two values each, the lower and upper bounds for the dH values. If None, bounds set automatically tm_limits : list of lists, optional List of two lists with two values each, the lower and upper bounds for the Tm values. If None, bounds set automatically CpTh : float, optional Given estimate of the Total Cp of the system. If given, the Cp value of the transition from native to intermediate will be fitted as Cp1. If not given, the system assumes a total Cp of 0 Notes ----- This is a heavy routine that creates/updates many fitting-related attributes, including: - bNs_expanded, bUs_expanded, kNs_expanded, kUs_expanded, qNs_expanded, qUs_expanded - p0, low_bounds, high_bounds, global_fit_params, rel_errors - predicted_lst_multiple, params_names, params_df, dg_df - flags: global_fit_done, limited_tm, limited_dh, limited_cp, fixed_cp """ # Initial parameters have to be in order: # Global melting temperature 1, Global enthalpy of unfolding 1, # Global melting temperature 2, Global enthalpy of unfolding 2, # Optionally Cp1 # Single intercepts folded, Single slopes folded, # Single intercepts unfolded, Single slopes unfolded if not hasattr(self, "deriv_lst_multiple"): self.estimate_derivative() if CpTh is not None: if CpTh <= 0.1: raise ValueError('CpTh must be large enough for fitting. If you do not wish to fit the Cp values, omit this parameter.') # Parameters for T1, T2, will be added later via gridsearch p0 = [0, 200, 0, 200, self.Cp0] else: p0 = [0, 200, 0, 200, 0] params_names = [ 'Tm1 (°C)', 'ΔH1 (kcal/mol)', 'Tm2 (°C)', 'ΔH2 (kcal/mol)', 'Cp1 (kcal/mol/°C)'] self.first_param_Ns_expanded = np.concatenate(self.first_param_Ns_per_signal, axis=0) self.first_param_Us_expanded = np.concatenate(self.first_param_Us_per_signal, axis=0) self.second_param_Ns_expanded = np.concatenate(self.second_param_Ns_per_signal, axis=0) self.second_param_Us_expanded = np.concatenate(self.second_param_Us_per_signal, axis=0) self.third_param_Ns_expanded = np.concatenate(self.third_param_Ns_per_signal, axis=0) self.third_param_Us_expanded = np.concatenate(self.third_param_Us_per_signal, axis=0) #Estimating intermediate intercept self.intercept_intermediate = (self.first_param_Ns_expanded + self.first_param_Us_expanded) / 2 p0 = np.concatenate([p0, self.first_param_Ns_expanded, self.first_param_Us_expanded, self.intercept_intermediate]) # We need to append as many bN and bU as the number of oligomer concentrations # times the number of signal types for signal in self.signal_names: params_names += (['intercept_native - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) for signal in self.signal_names: params_names += (['intercept_unfolded - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) for signal in self.signal_names: params_names += (['intercept_intermediate - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.native_baseline_type in ['linear', 'quadratic', 'exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' p0 = np.concatenate([p0, self.second_param_Ns_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' p0 = np.concatenate([p0, self.second_param_Us_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' p0 = np.concatenate([p0, self.third_param_Ns_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' p0 = np.concatenate([p0, self.third_param_Us_expanded]) for signal in self.signal_names: params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) + ' - ' + str(signal) for i in range(self.nr_olig)]) low_bounds = (p0.copy()) high_bounds = (p0.copy()) low_bounds[5:], high_bounds[5:] = set_param_bounds(p0[5:], params_names[5:]) self.limited_tm = tm_limits is not None if self.limited_tm: tm1_lower, tm1_upper, tm2_lower, tm2_upper = np.array(tm_limits).flatten().tolist() else: tm1_lower = 15 tm1_upper = self.user_max_temp + 20 tm2_lower = 30 tm2_upper = self.user_max_temp + 20 low_bounds[0] = tm1_lower high_bounds[0] = tm1_upper low_bounds[2] = tm2_lower high_bounds[2] = tm2_upper # Verify that the initial guesses are within the user-defined limits p0[0] = adjust_value_to_interval(p0[0], tm1_lower, tm1_upper, 1) p0[2] = adjust_value_to_interval(p0[2], tm2_lower, tm2_upper, 1) self.limited_dh = dh_limits is not None if self.limited_dh: dh1_lower, dh1_upper, dh2_lower, dh2_upper, = np.array(dh_limits).flatten().tolist() p0[1] = adjust_value_to_interval(p0[1], dh1_lower, dh1_upper, 1) p0[3] = adjust_value_to_interval(p0[3], dh2_lower, dh2_upper, 1) else: # Set dh1_lower 30 for monomer, 50 for dimer, 70 for trimer and 90 for tetramer, and dh1_upper to 500 for all lower_value = 30 if "Dimer" in self.model: lower_value = 50 elif "Trimer" in self.model: lower_value = 70 elif "Tetramer" in self.model: lower_value = 90 dh1_lower = lower_value dh1_upper = 500 dh2_lower = lower_value dh2_upper = 500 low_bounds[1] = dh1_lower high_bounds[1] = dh1_upper low_bounds[3] = dh2_lower high_bounds[3] = dh2_upper self.cp_value = CpTh self.fixed_cp = CpTh is not None if not self.fixed_cp: # Remove the Cp from p0, low_bounds and high_bounds # Remove Cp0 from the parameter names p0 = np.delete(p0, 4) low_bounds = np.delete(low_bounds, 4) high_bounds = np.delete(high_bounds, 4) params_names.pop(4) else: cp_lower, cp_upper = 0.1, CpTh - 0.4 low_bounds[4] = cp_lower high_bounds[4] = cp_upper # Verify that the Cp initial guess is within the user-defined limits p0[4] = adjust_value_to_interval(p0[4], cp_lower, cp_upper, 0.5) # Populate the expanded signal and temperature lists self.expand_multiple_signal() signal_fx = map_three_state_model_to_signal_fx(self.model) if t1_init != 0: p0[0], low_bounds[0], high_bounds[0] = t1_init, np.max([t1_init - 15, 20]), t1_init + 20 if t2_init != 0: p0[2], low_bounds[2], high_bounds[2] = t2_init, np.max([t2_init - 15, 20]), t2_init + 20 kwargs = { 'oligomer_concentrations': self.oligomer_concentrations_expanded, 'initial_parameters': p0, 'low_bounds': low_bounds, 'high_bounds': high_bounds, 'baseline_native_fx': self.baseline_N_fx, 'baseline_unfolded_fx': self.baseline_U_fx, 'signal_fx': signal_fx, 'CpTh_value': CpTh, } fit_fx = fit_oligomer_unfolding_three_states_single_slopes step = 7 if t1_init == 0 or t2_init == 0: if self.limited_tm: test_T1s = np.arange(np.max([tm1_lower, 20]), tm1_upper, step) test_T2s = np.arange(np.max([tm2_lower, 35]) + step, tm2_upper, step) else: test_T1s = np.arange(np.max([self.global_min_temp + 10, 20]), self.global_max_temp, step) test_T2s = np.arange(np.max([self.global_min_temp + 20, 20]) + step, self.global_max_temp + step, step) if t1_init != 0: test_T1s = np.array([t1_init]) elif t2_init != 0: test_T2s = np.array([t2_init]) combinations = [(t1, t2) for t1 in test_T1s for t2 in test_T2s] combinations = [(t1, t2) for t1, t2 in combinations if t1 < t2] df = pd.DataFrame(combinations, columns=['t1', 't2']) # Remove all combinations where t1 is 30 degrees higher than t2 df = df[~(df['t1'] - df['t2'] >= 30)] df_tm = pd.DataFrame(np.zeros_like(combinations), columns=['t1', 't2'], dtype=float) df_dh = pd.DataFrame(np.zeros_like(combinations), columns=['dh1', 'dh2'], dtype=float) rss_all = [] #Using a subset for fitting kwargs['list_of_temperatures'] = self.temp_lst_expanded_subset kwargs['list_of_signals'] = self.signal_lst_expanded_subset for index, row in df.iterrows(): kwargs['t1'] = row['t1'] kwargs['t2'] = row['t2'] fit_params, cov, pred, result, minimizer = fit_oligomer_unfolding_three_states_single_slopes( **kwargs, max_nfev=6000) # We need to limit max_nfev for a faster initial grid search #using the fitted parameters as a base for fitting df_tm.iloc[index, 0] = fit_params[0] df_tm.iloc[index, 1] = fit_params[2] df_dh.iloc[index, 0] = fit_params[1] df_dh.iloc[index, 1] = fit_params[3] rss = np.nansum((np.array(pred) - np.array(self.signal_lst_expanded_subset)) ** 2) rss_all.append(rss) idx = np.argmin(rss_all) t1_init, t2_init = df_tm['t1'][idx], df_tm['t2'][idx] p0[0], p0[2] = t1_init, t2_init dh1_init, dh2_init = df_dh['dh1'][idx], df_dh['dh2'][idx] p0[1], p0[3] = dh1_init, dh2_init low_bounds[0], low_bounds[2] = t1_init - 14, t2_init - 14 high_bounds[0], high_bounds[2] = t1_init + 18, t2_init + 18 low_bounds[1], low_bounds[3] = np.max([dh1_init - 100, 30]), np.max([dh2_init - 100, 30]) high_bounds[1], high_bounds[3] = dh1_init + 150, dh2_init + 150 kwargs['initial_parameters'] = p0 kwargs['low_bounds'] = low_bounds kwargs['high_bounds'] = high_bounds kwargs['t1'] = None kwargs['t2'] = None # Now use the whole dataset kwargs['list_of_temperatures'] = self.temp_lst_expanded kwargs['list_of_signals'] = self.signal_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit( global_fit_params, cov, predicted, high_bounds, low_bounds, p0, False, False, self.limited_dh, self.limited_tm, self.fixed_cp, kwargs, fit_fx, result=result, minimizer=minimizer, fit_m_value=False, three_state_model=True ) rel_errors = relative_errors(global_fit_params, cov) self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions(predicted, self.nr_signals, self.nr_olig) self.result = result self.minimizer = minimizer self.global_fit_done = True self.params_names = params_names self.create_params_df_three_state() self.create_dg_df() return None
[docs] def fit_thermal_unfolding_global_global(self): """ Fit the thermal unfolding of the sample using the signal and temperature data We fit all the curves at once, with global thermodynamic parameters and global slopes (but local baselines) Multiple refers to the fact that we fit many signals at the same time, such as 350nm and 330nm Must be run after fit_thermal_unfolding_global_multiple Notes ----- Updates global fitting attributes and sets `global_global_fit_done` when complete. """ # Requires global fit done if not self.global_fit_done: self.fit_thermal_unfolding_global() if self.signal_ids is None: self.set_signal_id() param_init = 2 + (self.cp_value is None) p0 = self.global_fit_params[:param_init] low_bounds = self.low_bounds[:param_init] high_bounds = self.high_bounds[:param_init] n_datasets = self.nr_olig * self.nr_signals p1Ns = self.global_fit_params[param_init:param_init + n_datasets] p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets] low_bounds_p1Ns = self.low_bounds[param_init:param_init + n_datasets] low_bounds_p1Us = self.low_bounds[param_init + n_datasets:param_init + 2 * n_datasets] high_bounds_p1Ns = self.high_bounds[param_init:param_init + n_datasets] high_bounds_p1Us = self.high_bounds[param_init + n_datasets:param_init + 2 * n_datasets] id_start = param_init + 2 * n_datasets params_names = self.params_names[:id_start] if self.native_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' p2Ns = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p2Ns = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p2Ns = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' p2Us = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p2Us = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p2Us = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' p3Ns = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p3Ns = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p3Ns = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' p3Us = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p3Us = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p3Us = self.high_bounds[id_start:id_start + n_datasets] p0 = np.concatenate([p0, p1Ns, p1Us]) low_bounds = np.concatenate([low_bounds, low_bounds_p1Ns, low_bounds_p1Us]) high_bounds = np.concatenate([high_bounds, high_bounds_p1Ns, high_bounds_p1Us]) # Baselines are still independent for each signal and oligomer concentration # Slopes and quadratic terms are shared - per signal only if self.native_baseline_type in ['linear', 'quadratic','exponential']: p2Ns = re_arrange_params(p2Ns, self.nr_signals) low_bounds_p2Ns = re_arrange_params(low_bounds_p2Ns, self.nr_signals) high_bounds_p2Ns = re_arrange_params(high_bounds_p2Ns, self.nr_signals) for kNs_i, low_bounds_kNs_i, high_bounds_kNs_i in zip(p2Ns, low_bounds_p2Ns, high_bounds_p2Ns): p0 = np.append(p0, np.median(kNs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_kNs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_kNs_i)) if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: p2Us = re_arrange_params(p2Us, self.nr_signals) low_bounds_p2Us = re_arrange_params(low_bounds_p2Us, self.nr_signals) high_bounds_p2Us = re_arrange_params(high_bounds_p2Us, self.nr_signals) for kUs_i, low_bounds_kUs_i, high_bounds_kUs_i in zip(p2Us, low_bounds_p2Us, high_bounds_p2Us): p0 = np.append(p0, np.median(kUs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_kUs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_kUs_i)) if self.native_baseline_type in ['quadratic', 'exponential']: p3Ns = re_arrange_params(p3Ns, self.nr_signals) low_bounds_p3Ns = re_arrange_params(low_bounds_p3Ns, self.nr_signals) high_bounds_p3Ns = re_arrange_params(high_bounds_p3Ns, self.nr_signals) for qNs_i, low_bounds_qNs_i, high_bounds_qNs_i in zip(p3Ns, low_bounds_p3Ns, high_bounds_p3Ns): p0 = np.append(p0, np.median(qNs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_qNs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_qNs_i)) if self.unfolded_baseline_type in ['quadratic', 'exponential']: p3Us = re_arrange_params(p3Us, self.nr_signals) low_bounds_p3Us = re_arrange_params(low_bounds_p3Us, self.nr_signals) high_bounds_p3Us = re_arrange_params(high_bounds_p3Us, self.nr_signals) for qUs_i, low_bounds_qUs_i, high_bounds_qUs_i in zip(p3Us, low_bounds_p3Us, high_bounds_p3Us): p0 = np.append(p0, np.median(qUs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_qUs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_qUs_i)) signal_fx = map_two_state_model_to_signal_fx(self.model) kwargs = { 'oligomer_concentrations': self.oligomer_concentrations_expanded, 'list_of_temperatures': self.temp_lst_expanded_subset, 'list_of_signals': self.signal_lst_expanded_subset, 'initial_parameters': p0, 'low_bounds': low_bounds, 'high_bounds': high_bounds, 'cp_value': self.cp_value, 'signal_ids':self.signal_ids, 'baseline_native_fx': self.baseline_N_fx, 'baseline_unfolded_fx': self.baseline_U_fx, 'signal_fx' : signal_fx, } fit_fx = fit_oligomer_unfolding_shared_slopes_many_signals if self.pre_fit: # Do a pre-fit with a reduced data set global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) p0 = global_fit_params # End of pre-fit # Use whole dataset kwargs['list_of_temperatures'] = self.temp_lst_expanded kwargs['list_of_signals'] = self.signal_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit( global_fit_params, cov, predicted, high_bounds, low_bounds, p0, False, self.limited_cp, self.limited_dh, self.limited_tm, self.fixed_cp, kwargs, fit_fx, result=result, minimizer=minimizer, fit_m_value=False, ) rel_errors = relative_errors(global_fit_params, cov) self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions( predicted, self.nr_signals, self.nr_olig) self.params_names = params_names self.result = result self.minimizer = minimizer self.create_params_df() self.create_dg_df() self.global_global_fit_done = True return None
[docs] def fit_thermal_unfolding_three_state_global_global(self): """ Fit the thermal unfolding with three states of the sample using the signal and temperature data We fit all the curves at once, with global thermodynamic parameters and global slopes (but local baselines) Multiple refers to the fact that we fit many signals at the same time, such as 350nm and 330nm Must be run after fit_thermal_unfolding_global Notes ----- Updates global fitting attributes and sets `global_global_fit_done` when complete. """ # Requires global fit done if not self.global_fit_done: self.fit_thermal_unfolding_three_state_global() if self.signal_ids is None: self.set_signal_id() param_init = 4 if self.fixed_cp: param_init += 1 p0 = self.global_fit_params[:param_init] low_bounds = self.low_bounds[:param_init] high_bounds = self.high_bounds[:param_init] n_datasets = self.nr_olig * self.nr_signals p1Ns = self.global_fit_params[param_init:param_init + n_datasets] p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets] intermediate_baseline = self.global_fit_params[param_init + 2* n_datasets:param_init + 3 * n_datasets] low_bounds_p1Ns = self.low_bounds[param_init:param_init + n_datasets] low_bounds_p1Us = self.low_bounds[param_init + n_datasets:param_init + 2 * n_datasets] low_bounds_intermediate_baseline = self.low_bounds[param_init + 2 * n_datasets:param_init + 3 * n_datasets] high_bounds_p1Ns = self.high_bounds[param_init:param_init + n_datasets] high_bounds_p1Us = self.high_bounds[param_init + n_datasets:param_init + 2 * n_datasets] high_bounds_intermediate_baseline = self.high_bounds[param_init + 2 * n_datasets:param_init + 3 * n_datasets] id_start = param_init + 3 * n_datasets params_names = self.params_names[:id_start] if self.native_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' p2Ns = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p2Ns = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p2Ns = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' p2Us = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p2Us = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p2Us = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' p3Ns = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p3Ns = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p3Ns = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' p3Us = self.global_fit_params[id_start:id_start + n_datasets] params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] low_bounds_p3Us = self.low_bounds[id_start:id_start + n_datasets] high_bounds_p3Us = self.high_bounds[id_start:id_start + n_datasets] id_start += n_datasets p0 = np.concatenate([p0, p1Ns, p1Us, intermediate_baseline]) low_bounds = np.concatenate([low_bounds, low_bounds_p1Ns, low_bounds_p1Us, low_bounds_intermediate_baseline]) high_bounds = np.concatenate([high_bounds, high_bounds_p1Ns, high_bounds_p1Us, high_bounds_intermediate_baseline]) # Baselines are still independent for each signal and oligomer concentration # Slopes and quadratic terms are shared - per signal only if self.native_baseline_type in ['linear', 'quadratic','exponential']: p2Ns = re_arrange_params(p2Ns, self.nr_signals) low_bounds_p2Ns = re_arrange_params(low_bounds_p2Ns, self.nr_signals) high_bounds_p2Ns = re_arrange_params(high_bounds_p2Ns, self.nr_signals) for kNs_i, low_bounds_kNs_i, high_bounds_kNs_i in zip(p2Ns, low_bounds_p2Ns, high_bounds_p2Ns): p0 = np.append(p0, np.median(kNs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_kNs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_kNs_i)) if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: p2Us = re_arrange_params(p2Us, self.nr_signals) low_bounds_p2Us = re_arrange_params(low_bounds_p2Us, self.nr_signals) high_bounds_p2Us = re_arrange_params(high_bounds_p2Us, self.nr_signals) for kUs_i, low_bounds_kUs_i, high_bounds_kUs_i in zip(p2Us, low_bounds_p2Us, high_bounds_p2Us): p0 = np.append(p0, np.median(kUs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_kUs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_kUs_i)) if self.native_baseline_type in ['quadratic', 'exponential']: p3Ns = re_arrange_params(p3Ns, self.nr_signals) low_bounds_p3Ns = re_arrange_params(low_bounds_p3Ns, self.nr_signals) high_bounds_p3Ns = re_arrange_params(high_bounds_p3Ns, self.nr_signals) for qNs_i, low_bounds_qNs_i, high_bounds_qNs_i in zip(p3Ns, low_bounds_p3Ns, high_bounds_p3Ns): p0 = np.append(p0, np.median(qNs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_qNs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_qNs_i)) if self.unfolded_baseline_type in ['quadratic', 'exponential']: p3Us = re_arrange_params(p3Us, self.nr_signals) low_bounds_p3Us = re_arrange_params(low_bounds_p3Us, self.nr_signals) high_bounds_p3Us = re_arrange_params(high_bounds_p3Us, self.nr_signals) for qUs_i, low_bounds_qUs_i, high_bounds_qUs_i in zip(p3Us, low_bounds_p3Us, high_bounds_p3Us): p0 = np.append(p0, np.median(qUs_i)) low_bounds = np.append(low_bounds, np.min(low_bounds_qUs_i)) high_bounds = np.append(high_bounds, np.max(high_bounds_qUs_i)) signal_fx = map_three_state_model_to_signal_fx(self.model) kwargs = { 'oligomer_concentrations': self.oligomer_concentrations_expanded, 'list_of_temperatures': self.temp_lst_expanded_subset, 'list_of_signals': self.signal_lst_expanded_subset, 'initial_parameters': p0, 'low_bounds': low_bounds, 'high_bounds': high_bounds, 'signal_ids':self.signal_ids, 'baseline_native_fx': self.baseline_N_fx, 'baseline_unfolded_fx': self.baseline_U_fx, 'signal_fx' : signal_fx, 'CpTh_value' : self.cp_value, } fit_fx = fit_oligomer_unfolding_three_states_shared_slopes_many_signals if self.pre_fit: # Do a pre-fit with a reduced data set global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) p0 = global_fit_params # End of pre-fit # Use whole dataset kwargs['list_of_temperatures'] = self.temp_lst_expanded kwargs['list_of_signals'] = self.signal_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit( global_fit_params, cov, predicted, high_bounds, low_bounds, p0, False, False, self.limited_dh, self.limited_tm, self.fixed_cp, kwargs, fit_fx, result=result, minimizer=minimizer, fit_m_value=False, three_state_model=True ) rel_errors = relative_errors(global_fit_params, cov) self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions( predicted, self.nr_signals, self.nr_olig) self.params_names = params_names self.create_params_df_three_state() self.create_dg_df() self.global_global_fit_done = True return None
[docs] def fit_thermal_unfolding_global_global_global( self, model_scale_factor=True): """ Fit the thermal unfolding of the sample using the signal and temperature data We fit all the curves at once, with global thermodynamic parameters, global slopes and global baselines Must be run after fit_thermal_unfolding_global_global Parameters ---------- model_scale_factor : bool, optional If True, model a scale factor for each oligomer concentration Notes ----- Updates many global fitting attributes and sets `global_global_global_fit_done` when complete. If `model_scale_factor` is True the method also creates scaled signal attributes: - signal_lst_multiple_scaled, predicted_lst_multiple_scaled """ # Requires global global fit done if not self.global_global_fit_done: self.fit_thermal_unfolding_global_global() param_init = 2 + (self.cp_value is None) params_names = self.params_names[:param_init] p0 = self.global_fit_params[:param_init] low_bounds = self.low_bounds[:param_init] high_bounds = self.high_bounds[:param_init] n_datasets = self.nr_olig * self.nr_signals p1Ns = self.global_fit_params[param_init:param_init + n_datasets] p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets] p1Ns_per_signal = re_arrange_params(p1Ns, self.nr_signals) p1Us_per_signal = re_arrange_params(p1Us, self.nr_signals) p1Ns = np.mean(p1Ns_per_signal, axis=1) p1Us = np.mean(p1Us_per_signal, axis=1) p1Ns_low_bounds = [p1N / 100 if p1N > 0 else 100 * p1N for p1N in p1Ns] p1Us_low_bounds = [p1U / 100 if p1U > 0 else 100 * p1U for p1U in p1Us] p1Ns_high_bounds = [p1N * 100 if p1N > 0 else p1N / 100 for p1N in p1Ns] p1Us_high_bounds = [p1U * 100 if p1U > 0 else p1U / 100 for p1U in p1Us] idx = param_init + 2 * n_datasets params_names += ['intercept_native - ' + signal_name for signal_name in self.signal_names] params_names += ['intercept_unfolded - ' + signal_name for signal_name in self.signal_names] if self.native_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' kNs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_kNs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_kNs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' kUs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_kUs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_kUs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' qNs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_qNs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_qNs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' qUs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_qUs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_qUs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] p0 = np.concatenate([p0, p1Ns, p1Us]) low_bounds = np.concatenate([low_bounds, p1Ns_low_bounds, p1Us_low_bounds]) high_bounds = np.concatenate([high_bounds, p1Ns_high_bounds, p1Us_high_bounds]) if self.native_baseline_type in ['linear', 'quadratic','exponential']: p0 = np.concatenate([p0, kNs]) low_bounds = np.concatenate([low_bounds, low_bounds_kNs]) high_bounds = np.concatenate([high_bounds, high_bounds_kNs]) if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']: p0 = np.concatenate([p0, kUs]) low_bounds = np.concatenate([low_bounds, low_bounds_kUs]) high_bounds = np.concatenate([high_bounds, high_bounds_kUs]) if self.native_baseline_type in ['quadratic', 'exponential']: p0 = np.concatenate([p0, qNs]) low_bounds = np.concatenate([low_bounds, low_bounds_qNs]) high_bounds = np.concatenate([high_bounds, high_bounds_qNs]) if self.unfolded_baseline_type in ['quadratic', 'exponential']: p0 = np.concatenate([p0, qUs]) low_bounds = np.concatenate([low_bounds, low_bounds_qUs]) high_bounds = np.concatenate([high_bounds, high_bounds_qUs]) # If required, include a scale factor for each oligomer concentration if model_scale_factor: # The last oligomer concentration is fixed to 1, the rest are fitted scale_factors = [1 for _ in range(self.nr_olig - 1)] scale_factors_low = [0.5882 for _ in range(self.nr_olig - 1)] scale_factors_high = [1.7 for _ in range(self.nr_olig - 1)] p0 = np.concatenate([p0, scale_factors]) low_bounds = np.concatenate([low_bounds, scale_factors_low]) high_bounds = np.concatenate([high_bounds, scale_factors_high]) params_names += ['Scale factor - ' + str(d) + ' (M). ID: ' + str(i) for i, d in enumerate(self.oligomer_concentrations)] params_names.pop() # Remove the last one, as it is fixed to 1 scale_factor_exclude_ids = [self.nr_olig - 1] if model_scale_factor else [] signal_fx = map_two_state_model_to_signal_fx(self.model) # Do a prefit with a reduced dataset kwargs = { 'list_of_temperatures' : self.temp_lst_expanded_subset, 'list_of_signals' : self.signal_lst_expanded_subset, 'signal_ids' : self.signal_ids, 'oligomer_concentrations': self.oligomer_concentrations_expanded, 'initial_parameters': p0, 'low_bounds': low_bounds, 'high_bounds': high_bounds, 'model_scale_factor':model_scale_factor, 'cp_value' : self.cp_value, 'scale_factor_exclude_ids':scale_factor_exclude_ids, 'signal_fx' : signal_fx, 'baseline_native_fx' : self.baseline_N_fx, 'baseline_unfolded_fx' : self.baseline_U_fx } fit_fx = fit_oligomer_unfolding_many_signals if self.pre_fit: global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) # Assign the fitted parameters to the initial guess for the full dataset p0 = global_fit_params # End of prefit with reduced dataset # Use the whole dataset kwargs['list_of_signals'] = self.signal_lst_expanded kwargs['list_of_temperatures'] = self.temp_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) # Remove scale factors that are not significant if model_scale_factor: # 2 parameters corresponding to Tm, dH # plus Cp if fitted idx_start = 2 + (self.cp_value is None) native_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_N_fx)) unfolded_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_U_fx)) # Add index according to the native baseline polynomial order idx_start += native_factor * self.nr_signals # Add index according to the unfolded baseline polynomial order idx_start += unfolded_factor * self.nr_signals for _ in range(5): # Sort in ascending order the IDs to exclude scale_factor_exclude_ids = sorted(scale_factor_exclude_ids) n_fixed_factors = len(scale_factor_exclude_ids) n_fit_factors = self.nr_olig - n_fixed_factors if n_fit_factors == 0: break sf_params = global_fit_params[idx_start:(idx_start + n_fit_factors)] idxs_to_remove = [] re_fit = False # Add dummy variable where we need to skip the index for id in scale_factor_exclude_ids: sf_params = np.insert(sf_params, id, np.nan) for i, sf in enumerate(sf_params): if i in scale_factor_exclude_ids: continue if 0.995 <= sf <= 1.015: # Exclude the scale factor from the fit scale_factor_exclude_ids.append(i) re_fit = True j1 = np.sum(np.array(scale_factor_exclude_ids) < i) j2 = len(idxs_to_remove) idxs_to_remove.append(idx_start + i - j1 + j2) if not re_fit: break else: for idx in reversed(idxs_to_remove): global_fit_params = np.delete(global_fit_params, idx) low_bounds = np.delete(low_bounds, idx) high_bounds = np.delete(high_bounds, idx) del params_names[idx] kwargs['initial_parameters'] = global_fit_params kwargs['low_bounds'] = low_bounds kwargs['high_bounds'] = high_bounds kwargs['scale_factor_exclude_ids'] = scale_factor_exclude_ids global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) rel_errors = relative_errors(global_fit_params, cov) self.params_names = params_names self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions( predicted, self.nr_signals, self.nr_olig) self.create_params_df() self.create_dg_df() self.global_global_global_fit_done = True # Obtained the scaled signal too if model_scale_factor: # signal scaled hos one sublist per selected signal type signal_scaled = deepcopy(self.signal_lst_multiple) predicted_scaled = deepcopy(self.predicted_lst_multiple) for value, param in zip(self.global_fit_params, self.params_names): if 'Scale factor' in param: id = int(param.split('(M). ID: ')[-1]) for i in range(len(signal_scaled)): signal_scaled[i][id] /= value predicted_scaled[i][id] /= value self.signal_lst_multiple_scaled = signal_scaled self.predicted_lst_multiple_scaled = predicted_scaled return None
[docs] def fit_thermal_unfolding_three_state_global_global_global( self, model_scale_factor=True): """ Fit the thermal unfolding of the sample using the signal and temperature data for models assuming three states We fit all the curves at once, with global thermodynamic parameters, global slopes and global baselines Must be run after fit_thermal_unfolding_global_global Parameters ---------- model_scale_factor : bool, optional If True, model a scale factor for each oligomer concentration Notes ----- Updates many global fitting attributes and sets `global_global_global_fit_done` when complete. If `model_scale_factor` is True the method also creates scaled signal attributes: - signal_lst_multiple_scaled, predicted_lst_multiple_scaled """ # Requires global global fit done if not self.global_global_fit_done: self.fit_thermal_unfolding_three_state_global_global() param_init = 4 if self.fixed_cp: param_init += 1 params_names = self.params_names[:param_init] p0 = self.global_fit_params[:param_init] low_bounds = self.low_bounds[:param_init] high_bounds = self.high_bounds[:param_init] n_datasets = self.nr_olig * self.nr_signals p1Ns = self.global_fit_params[param_init:param_init + n_datasets] p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets] intermediate_baselines = self.global_fit_params[param_init + 2 * n_datasets:param_init + 3 * n_datasets] p1Ns_per_signal = re_arrange_params(p1Ns, self.nr_signals) p1Us_per_signal = re_arrange_params(p1Us, self.nr_signals) intermediate_baselines_per_signal = re_arrange_params(intermediate_baselines, self.nr_signals) p1Ns = np.mean(p1Ns_per_signal, axis=1) p1Us = np.mean(p1Us_per_signal, axis=1) intermediate_baselines = np.mean(intermediate_baselines_per_signal, axis=1) p1Ns_low_bounds = [p1N / 100 if p1N > 0 else 100 * p1N for p1N in p1Ns] p1Us_low_bounds = [p1U / 100 if p1U > 0 else 100 * p1U for p1U in p1Us] intermediate_baselines_low_bounds = [intermediate_baseline / 100 if intermediate_baseline > 0 else 100 * intermediate_baseline for intermediate_baseline in intermediate_baselines] p1Ns_high_bounds = [p1N * 100 if p1N > 0 else p1N / 100 for p1N in p1Ns] p1Us_high_bounds = [p1U * 100 if p1U > 0 else p1U / 100 for p1U in p1Us] intermediate_baselines_high_bounds = [ intermediate_baseline * 100 if intermediate_baseline > 0 else intermediate_baseline / 100 for intermediate_baseline in intermediate_baselines] idx = param_init + 3 * n_datasets params_names += ['intercept_native - ' + signal_name for signal_name in self.signal_names] params_names += ['intercept_unfolded - ' + signal_name for signal_name in self.signal_names] params_names += ['intercept_intermediate - ' + signal_name for signal_name in self.signal_names] if self.native_baseline_type in ['linear', 'quadratic', 'exponential']: param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native' kNs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_kNs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_kNs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']: param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded' kUs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_kUs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_kUs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.native_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native' qNs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_qNs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_qNs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] if self.unfolded_baseline_type in ['quadratic', 'exponential']: param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded' qUs = self.global_fit_params[idx:idx + self.nr_signals] low_bounds_qUs = self.low_bounds[idx:idx + self.nr_signals] high_bounds_qUs = self.high_bounds[idx:idx + self.nr_signals] idx += self.nr_signals params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names] p0 = np.concatenate([p0, p1Ns, p1Us, intermediate_baselines]) low_bounds = np.concatenate([low_bounds, p1Ns_low_bounds, p1Us_low_bounds, intermediate_baselines_low_bounds]) high_bounds = np.concatenate([high_bounds, p1Ns_high_bounds, p1Us_high_bounds, intermediate_baselines_high_bounds]) if self.native_baseline_type in ['linear', 'quadratic', 'exponential']: p0 = np.concatenate([p0, kNs]) low_bounds = np.concatenate([low_bounds, low_bounds_kNs]) high_bounds = np.concatenate([high_bounds, high_bounds_kNs]) if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']: p0 = np.concatenate([p0, kUs]) low_bounds = np.concatenate([low_bounds, low_bounds_kUs]) high_bounds = np.concatenate([high_bounds, high_bounds_kUs]) if self.native_baseline_type in ['quadratic', 'exponential']: p0 = np.concatenate([p0, qNs]) low_bounds = np.concatenate([low_bounds, low_bounds_qNs]) high_bounds = np.concatenate([high_bounds, high_bounds_qNs]) if self.unfolded_baseline_type in ['quadratic', 'exponential']: p0 = np.concatenate([p0, qUs]) low_bounds = np.concatenate([low_bounds, low_bounds_qUs]) high_bounds = np.concatenate([high_bounds, high_bounds_qUs]) # If required, include a scale factor for each oligomer concentration if model_scale_factor: # The last oligomer concentration is fixed to 1, the rest are fitted scale_factors = [1 for _ in range(self.nr_olig - 1)] scale_factors_low = [0.5882 for _ in range(self.nr_olig - 1)] scale_factors_high = [1.7 for _ in range(self.nr_olig - 1)] p0 = np.concatenate([p0, scale_factors]) low_bounds = np.concatenate([low_bounds, scale_factors_low]) high_bounds = np.concatenate([high_bounds, scale_factors_high]) params_names += ['Scale factor - ' + str(d) + ' (M). ID: ' + str(i) for i, d in enumerate(self.oligomer_concentrations)] params_names.pop() # Remove the last one, as it is fixed to 1 scale_factor_exclude_ids = [self.nr_olig - 1] if model_scale_factor else [] signal_fx = map_three_state_model_to_signal_fx(self.model) # Do a prefit with a reduced dataset kwargs = { 'list_of_temperatures' : self.temp_lst_expanded_subset, 'list_of_signals' : self.signal_lst_expanded_subset, 'signal_ids' : self.signal_ids, 'oligomer_concentrations': self.oligomer_concentrations_expanded, 'initial_parameters': p0, 'low_bounds': low_bounds, 'high_bounds': high_bounds, 'model_scale_factor':model_scale_factor, 'scale_factor_exclude_ids':scale_factor_exclude_ids, 'signal_fx' : signal_fx, 'baseline_native_fx' : self.baseline_N_fx, 'baseline_unfolded_fx' : self.baseline_U_fx, 'CpTh_value' : self.cp_value, } fit_fx = fit_oligomer_unfolding_three_states_many_signals if self.pre_fit: global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) # Assign the fitted parameters to the initial guess for the full dataset p0 = global_fit_params # End of prefit with reduced dataset # Use the whole dataset kwargs['list_of_signals'] = self.signal_lst_expanded kwargs['list_of_temperatures'] = self.temp_lst_expanded global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) # Remove scale factors that are not significant if model_scale_factor: # 4 parameters corresponding to Tm, dH idx_start = 4 # Additional parameter if CpTh is given if self.fixed_cp: idx_start += 1 native_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_N_fx)) unfolded_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_U_fx)) # Add index according to the native baseline polynomial order idx_start += native_factor * self.nr_signals # Add index according to the unfolded baseline polynomial order idx_start += unfolded_factor * self.nr_signals for _ in range(5): # Sort in ascending order the IDs to exclude scale_factor_exclude_ids = sorted(scale_factor_exclude_ids) n_fixed_factors = len(scale_factor_exclude_ids) n_fit_factors = self.nr_olig - n_fixed_factors if n_fit_factors == 0: break sf_params = global_fit_params[idx_start:(idx_start + n_fit_factors)] idxs_to_remove = [] re_fit = False # Add dummy variable where we need to skip the index for id in scale_factor_exclude_ids: sf_params = np.insert(sf_params, id, np.nan) for i, sf in enumerate(sf_params): if i in scale_factor_exclude_ids: continue if 0.995 <= sf <= 1.015: # Exclude the scale factor from the fit scale_factor_exclude_ids.append(i) re_fit = True j1 = np.sum(np.array(scale_factor_exclude_ids) < i) j2 = len(idxs_to_remove) idxs_to_remove.append(idx_start + i - j1 + j2) if not re_fit: break else: for idx in reversed(idxs_to_remove): global_fit_params = np.delete(global_fit_params, idx) low_bounds = np.delete(low_bounds, idx) high_bounds = np.delete(high_bounds, idx) del params_names[idx] kwargs['initial_parameters'] = global_fit_params kwargs['low_bounds'] = low_bounds kwargs['high_bounds'] = high_bounds kwargs['scale_factor_exclude_ids'] = scale_factor_exclude_ids global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs) rel_errors = relative_errors(global_fit_params, cov) self.params_names = params_names self.p0 = p0 self.low_bounds = low_bounds self.high_bounds = high_bounds self.global_fit_params = global_fit_params self.rel_errors = rel_errors self.predicted_lst_multiple = re_arrange_predictions( predicted, self.nr_signals, self.nr_olig) self.create_params_df_three_state() self.create_dg_df() self.global_global_global_fit_done = True # Obtained the scaled signal too if model_scale_factor: # signal scaled hos one sublist per selected signal type signal_scaled = deepcopy(self.signal_lst_multiple) predicted_scaled = deepcopy(self.predicted_lst_multiple) for value, param in zip(self.global_fit_params, self.params_names): if 'Scale factor' in param: id = int(param.split('(M). ID: ')[-1]) for i in range(len(signal_scaled)): signal_scaled[i][id] /= value predicted_scaled[i][id] /= value self.signal_lst_multiple_scaled = signal_scaled self.predicted_lst_multiple_scaled = predicted_scaled return None
[docs] def signal_to_df(self, signal_type='raw', scaled=False): """ Create a dataframe with three columns: Temperature, Signal, and oligomer. Optimized for speed by avoiding per-curve DataFrame creation. Parameters ---------- signal_type : {'raw', 'fitted', 'derivative'}, optional Which signal to include in the dataframe. 'raw' uses experimental data, 'fitted' uses model predictions, 'derivative' uses the estimated derivative signal. scaled : bool, optional If True and signal_type == 'fitted' or 'raw', use the scaled versions if available. Returns ------- pd.DataFrame A DataFrame with columns: ['Temperature', 'Signal', 'Oligomer', 'ID']. """ # Flatten all arrays and repeat oligomer values accordingly if signal_type == 'derivative': deriv_lst = self.deriv_lst_multiple[0] temp_lst = self.temp_deriv_lst_multiple[0] signal_all = np.concatenate(deriv_lst) temp_all = np.concatenate(temp_lst) else: # temperature is shared for the experimental and fitted signals temp_lst = self.temp_lst_multiple[0] if self.max_points is not None: temp_lst = [subset_data(x, self.max_points) for x in temp_lst] temp_all = np.concatenate(temp_lst) # fitted data signal does not need subset! if signal_type == 'fitted': if not scaled: predicted_lst = self.predicted_lst_multiple[0] else: predicted_lst = self.predicted_lst_multiple_scaled[0] signal_all = np.concatenate(predicted_lst) temp_all = np.concatenate(temp_lst) # Signal_type set to 'raw' else: if not scaled: signal_lst = self.signal_lst_multiple[0] else: signal_lst = self.signal_lst_multiple_scaled[0] if self.max_points is not None: signal_lst = [subset_data(x, self.max_points) for x in signal_lst] signal_all = np.concatenate(signal_lst) oligomer_all = np.concatenate([ np.full_like(temp_lst[i], self.oligomer_concentrations[i], dtype=np.float64) for i in range(len(temp_lst)) ]) # Add an ID column, so we can identify the curves, even with the same oligomer concentration id_all = np.concatenate([ np.full_like(temp_lst[i], i, dtype=np.int32) for i in range(len(temp_lst)) ]) signal_df = pd.DataFrame({ 'Temperature': temp_all, 'Signal': signal_all, 'Oligomer': oligomer_all, 'ID': id_all }) return signal_df
[docs] def create_params_df_three_state(self): """ Create a dataframe of the parameters """ # convert the first param to Celsius self.global_fit_params[0] = temperature_to_celsius(self.global_fit_params[0]) self.low_bounds[0] = temperature_to_celsius(self.low_bounds[0]) self.high_bounds[0] = temperature_to_celsius(self.high_bounds[0]) self.global_fit_params[2] = temperature_to_celsius(self.global_fit_params[2]) self.low_bounds[2] = temperature_to_celsius(self.low_bounds[2]) self.high_bounds[2] = temperature_to_celsius(self.high_bounds[2]) # Create a dataframe of the parameters self.params_df = pd.DataFrame({ 'Parameter': self.params_names, 'Value': self.global_fit_params, 'Relative error (%)': self.rel_errors, 'Fitting low Bound': self.low_bounds, 'Fitting high Bound': self.high_bounds }) return None