Source code for pychemelt.thermal_oligomer

"""
Main class to handle thermal denaturation data of mono- and oligomers up to tetramers
The current model assumes the proteins' unfolding is reversible
"""

import pandas as pd
import numpy as np

from itertools import chain
from copy import deepcopy

from .main import Sample

from .utils.constants import R_gas

from .utils.signals import (
    map_two_state_model_to_signal_fx,
    map_three_state_model_to_signal_fx,
)

from .utils.math import (
    temperature_to_kelvin,
    temperature_to_celsius,
    relative_errors,
    constant_baseline_only_temp,
    linear_baseline_only_temp,
    quadratic_baseline_only_temp,
    exponential_baseline_only_temp
)

from .utils.processing import (
    guess_Tm_from_derivative,
    set_param_bounds,
    adjust_value_to_interval,
    re_arrange_params,
    re_arrange_predictions,
    subset_data,
    estimate_signal_baseline_params,
    oligomer_number,
    transform_to_list
)

from .utils.fitting import (
    fit_oligomer_unfolding_single_slopes,
    fit_oligomer_unfolding_shared_slopes_many_signals,
    fit_oligomer_unfolding_many_signals,
    fit_oligomer_unfolding_three_states_single_slopes,
    fit_oligomer_unfolding_three_states_shared_slopes_many_signals,
    fit_oligomer_unfolding_three_states_many_signals,
    evaluate_fitting_and_refit,
    baseline_fx_name_to_req_params
)



[docs]
class ThermalOligomer(Sample):
    """
    Class to hold the data of a DSF experiment of thermal unfolding with different concentrations of an oligomer.
    """

    def __init__(self, name='Test'):

        super().__init__(name)

        self.nr_olig = 0  # Number of oligomer concentrations in data
        self.model = None # Oligomer model type
        self.oligomeric = True # Flag for oligomer or denaturant


[docs]
    def set_model(self, model_name, intermediate_name=None):

        """
        Set thermodynamic model of oligomer used for the analysis.
        Currently supported are 2 state models of monomeres, dimers, trimeres and tetrameres

        Parameters
        ----------
        model_name : str
            name of the used model. Can be: "Monomer", "Dimer", "Trimer", "Tetramer".
            Case insensitive

        Raises
        ------
        ValueError
            If the provided model name is not in the supported list.

        Notes
        -----
        This method creates/updates the following attributes on the instance:
        - self.model: oligomeric model used for analysis
        """

        allowed_models = ["monomer", "dimer", "trimer", "tetramer"]

        allowed_intermediate_models = ["monomeric", "dimeric", "trimeric"]

        allowed_combinations = ['monomer_monomeric_intermediate',
                                'dimer_monomeric_intermediate',
                                'dimer_dimeric_intermediate',
                                'trimer_monomeric_intermediate',
                                'trimer_trimeric_intermediate',
                                'tetramer_monomeric_intermediate']


        model = model_name.lower()


        if model not in allowed_models:
            raise ValueError(
            f"Invalid model '{model_name}'. "
            f"Allowed models are: {', '.join(m.capitalize() for m in allowed_models)}."
        )

        if intermediate_name is not None:

            intermediate = intermediate_name.lower()

            if intermediate not in allowed_intermediate_models:
                raise ValueError(
                f"Invalid intermediate '{intermediate_name}'. "
                f"Allowed models are: {', '.join(m.capitalize() for m in allowed_intermediate_models)}."
                )

            model = model + '_' + intermediate + '_intermediate'

            if model not in allowed_combinations:
                raise ValueError(
                f"Invalid intermediate model '{model.capitalize()}'. "
                f"Allowed models are: {', '.join(m.capitalize() for m in allowed_combinations)}."
                )



        # Save model with first letter uppercase
        self.model = model.capitalize()

        return None



[docs]
    def set_concentrations(self, concentrations=None):
        """
        Set the oligomeric concentrations for the sample

        Parameters
        ----------
        concentrations : list, optional
            List of oligomer concentrations. If None, use the sample conditions

        Notes
        -----
        Creates/updates attribute `oligomer_concentrations_pre` (numpy.ndarray)
        """

        if concentrations is None:
            concentrations = self.conditions

        concentrations = transform_to_list(concentrations)

        self.oligomer_concentrations_pre = np.array(concentrations)

        return None



[docs]
    def select_conditions(
            self,
            boolean_lst=None):

        """
        For each signal, select the conditions to be used for the analysis

        Parameters
        ----------
        boolean_lst : list of bool, optional
            List of booleans selecting which conditions to keep. If None, keep all.

        Notes
        -----
        Creates/updates several attributes used by downstream fitting:
        - signal_lst_multiple, temp_lst_multiple : lists of lists with selected data
        - oligomer_concentrations : list of selected oligomer concentrations
        - oligomer_concentrations_expanded : flattened numpy array matching expanded signals
        - boolean_lst, nr_olig : control flags/values
        """

        # If boolean_lst is a boolean, convert it to a list of one boolean
        boolean_lst = transform_to_list(boolean_lst)

        if boolean_lst is None:
            self.signal_lst_multiple = self.signal_lst_pre_multiple
            self.temp_lst_multiple = self.temp_lst_pre_multiple
            self.oligomer_concentrations = self.oligomer_concentrations_pre
        else:

            self.signal_lst_multiple = [None for _ in range(len(self.signal_lst_pre_multiple))]
            self.temp_lst_multiple = [None for _ in range(len(self.temp_lst_pre_multiple))]

            for i in range(len(self.signal_lst_pre_multiple)):
                self.signal_lst_multiple[i] = [x for j, x in enumerate(self.signal_lst_pre_multiple[i]) if
                                               boolean_lst[j]]
                self.temp_lst_multiple[i] = [x for j, x in enumerate(self.temp_lst_pre_multiple[i]) if boolean_lst[j]]

            self.oligomer_concentrations = [x for i, x in enumerate(self.oligomer_concentrations_pre) if
                                              boolean_lst[i]]

        self.nr_olig = len(self.oligomer_concentrations)

        # For compatibility with the plotting functions, we need to have the "nr_den" variable which is functionally the
        # same as "nr_olig":
        self.nr_den = self.nr_olig

        # Expand the number of oligomer concentrations to match the number of signals
        oligomer_concentrations = [self.oligomer_concentrations for _ in range(self.nr_signals)]

        self.oligomer_concentrations_expanded = np.concatenate(oligomer_concentrations, axis=0)

        self.boolean_lst = boolean_lst

        self.oligomer_concentrations = np.array(self.oligomer_concentrations)

        # Needed for compatibility
        self.denaturant_concentrations = self.oligomer_concentrations

        for i in range(self.nr_signals):

            self.temp_lst_multiple[i] = [self.center_temp_fx(x) for x in self.temp_lst_multiple[i]]

        return None


    

[docs]
    def guess_Cp(self):

        """
        Guess the Cp of the assembled oligomer by the number of residues.

        Raises
        ------
        ValueError
            If `self.n_residues` is not set.

        Notes
        -----
        The number of residues represent the total number of residues in the oligomer

        This method creates/updates attributes used later in fitting:
        - Cp0 assigned to self.Cp0
        """

        # If the number of residues is still zero, raise an error
        if self.n_residues == 0:
            raise ValueError('The number of residues is still zero. Please set n_residues before calling guess_Cp')

        Cp0 = (self.n_residues * 0.0148 - 0.1267) * self.gas_cst / R_gas 

        # Cp0 needs to be positive
        Cp0 = max(Cp0, 0)

        self.Cp0 = Cp0

        return None



[docs]
    def estimate_baseline_parameters(
            self,
            native_baseline_type,
            unfolded_baseline_type,
            window_range_native=12,
            window_range_unfolded=12):

        """
        Estimate the baseline parameters for multiple signals of the oligomer. The native baseline represents the
        curve for the assemble doligomer while the unfolded baseline represents the curve for the unfolded and
        disassembled oligomer.

        Parameters
        ----------
        native_baseline_type : str
            one of 'constant', 'linear', 'quadratic', 'exponential'
        unfolded_baseline_type : str
            one of 'constant', 'linear', 'quadratic', 'exponential'
        window_range_native : int, optional
            Range of the window (in degrees) to estimate the baselines and slopes of the native state
        window_range_unfolded : int, optional
            Range of the window (in degrees) to estimate the baselines and slopes of the unfolded state

        Notes
        -----
        This method sets or updates these attributes:
        - bNs_per_signal, bUs_per_signal, kNs_per_signal, kUs_per_signal, qNs_per_signal, qUs_per_signal
        - poly_order_native, poly_order_unfolded
        """

        self.first_param_Ns_per_signal = []
        self.first_param_Us_per_signal = []
        self.second_param_Ns_per_signal = []
        self.second_param_Us_per_signal = []
        self.third_param_Ns_per_signal = []
        self.third_param_Us_per_signal = []

        oligomer_concentrations = np.repeat(self.oligomer_concentrations,
                                            np.array(self.signal_lst_multiple).shape[-1])
        oligomer_concentrations = np.split(oligomer_concentrations, len(self.oligomer_concentrations))

        for i in range(len(self.signal_lst_multiple)):

            self.adjusted_signal_lst_multiple = list(np.array(self.signal_lst_multiple[i])/ np.array(oligomer_concentrations))

            p1Ns, p1Us, p2Ns, p2Us, p3Ns, p3Us = estimate_signal_baseline_params(
                self.adjusted_signal_lst_multiple,
                self.temp_lst_multiple[i],
                native_baseline_type,
                unfolded_baseline_type,
                window_range_native,
                window_range_unfolded,
                oligomer_number(self.model)
            )

            self.first_param_Ns_per_signal.append(p1Ns)
            self.first_param_Us_per_signal.append(p1Us)
            self.second_param_Ns_per_signal.append(p2Ns)
            self.second_param_Us_per_signal.append(p2Us)
            self.third_param_Ns_per_signal.append(p3Ns)
            self.third_param_Us_per_signal.append(p3Us)

        baseline_fx_dic = {
            'constant': constant_baseline_only_temp,
            'linear': linear_baseline_only_temp,
            'quadratic': quadratic_baseline_only_temp,
            'exponential': exponential_baseline_only_temp
        }

        self.window_range_native = window_range_native
        self.window_range_unfolded = window_range_unfolded

        self.baseline_N_fx = baseline_fx_dic[native_baseline_type]
        self.baseline_U_fx = baseline_fx_dic[unfolded_baseline_type]

        self.native_baseline_type = native_baseline_type
        self.unfolded_baseline_type = unfolded_baseline_type

        return None



[docs]
    def create_dg_df(self):

        """
        Create a dataframe of the dg values versus temperature
        """

        # Create a dataframe of the parameters
        Tm, DHm, Cp0 = self.global_fit_params[:3]

        T_c = np.arange(0, 150, 0.5)
        T = temperature_to_kelvin(T_c)
        Tm = temperature_to_kelvin(Tm)

        DG = DHm * (1 - T / Tm) + Cp0 * (T - Tm - T * np.log(T / Tm))

        dg_df = pd.DataFrame({
            'DG ({}/mol)'.format(self.energy_units_str): DG,
            'Temperature (°{})'.format(self.temp_units_str): self.center_temp_fx(T_c)
        })

        self.dg_df = dg_df

        return None



[docs]
    def fit_thermal_unfolding_global(
            self,
            cp_limits=None,
            dh_limits=None,
            tm_limits=None,
            cp_value=None):

        """
        Fit the thermal unfolding of the sample using the signal and temperature data
        We fit all the curves at once, with global thermodynamic parameters but local slopes and local baselines)
        Multiple signals can be fitted at the same time, such as 350nm and 330nm

        Parameters
        ----------
        cp_limits : list, optional
            List of two values, the lower and upper bounds for the Cp value. If None, bounds set automatically
        dh_limits : list, optional
            List of two values, the lower and upper bounds for the dH value. If None, bounds set automatically
        tm_limits : list, optional
            List of two values, the lower and upper bounds for the Tm value. If None, bounds set automatically
        cp_value : float, optional
            If provided, the Cp value is fixed to this value, the bounds are ignored

        Notes
        -----
        This is a heavy routine that creates/updates many fitting-related attributes, including:
        - bNs_expanded, bUs_expanded, kNs_expanded, kUs_expanded, qNs_expanded, qUs_expanded
        - p0, low_bounds, high_bounds, global_fit_params, rel_errors
        - predicted_lst_multiple, params_names, params_df, dg_df
        - flags: global_fit_done, limited_tm, limited_dh, limited_cp, fixed_cp
        """

        # Requires Cp0
        if self.Cp0 <= 0:
            raise ValueError('Cp0 must be positive. Please run guess_Cp before fitting globally.')

        # Get Guess of Tm:
        
        tm_lst = []

        x1 = 6
        x2 = 11

        if not hasattr(self, "deriv_lst_multiple"):
            self.estimate_derivative()

        for i in range(len(self.signal_lst_multiple)):
            tm_lst.append(guess_Tm_from_derivative(
                self.temp_deriv_lst_multiple[i],
                self.deriv_lst_multiple[i],
                x1,
                x2
            ))

        Tm = np.average(tm_lst)

        dh = 100 

        if self.model == 'Dimer':
            dh = 120 
        elif self.model == 'Trimer':
            dh = 150 
        elif self.model == 'Tetramer':
            dh = 180 

        p0 = [Tm, dh * self.gas_cst / R_gas, self.Cp0]

        params_names = [
            'Tm ({})'.format(self.temp_units_str),
            'ΔH ({}/mol)'.format(self.energy_units_str),
            'ΔCp ({}/mol/{})'.format(self.energy_units_str, self.temp_units_str)]

        self.first_param_Ns_expanded = np.concatenate(self.first_param_Ns_per_signal, axis=0)
        self.first_param_Us_expanded = np.concatenate(self.first_param_Us_per_signal, axis=0)
        self.second_param_Ns_expanded = np.concatenate(self.second_param_Ns_per_signal, axis=0)
        self.second_param_Us_expanded = np.concatenate(self.second_param_Us_per_signal, axis=0)
        self.third_param_Ns_expanded = np.concatenate(self.third_param_Ns_per_signal, axis=0)
        self.third_param_Us_expanded = np.concatenate(self.third_param_Us_per_signal, axis=0)

        p0 = np.concatenate([p0, self.first_param_Ns_expanded, self.first_param_Us_expanded])

        # We need to append as many bN and bU as the number of oligomer concentrations
        # times the number of signal types
        for signal in self.signal_names:

            params_names += (['intercept_native - ' + str(self.oligomer_concentrations[i]) +
                              ' - ' + str(signal) for i in range(self.nr_olig)])

        for signal in self.signal_names:

            params_names += (['intercept_unfolded - ' + str(self.oligomer_concentrations[i]) +
                              ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            p0 = np.concatenate([p0, self.second_param_Ns_expanded])

            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            p0 = np.concatenate([p0, self.second_param_Us_expanded])

            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.native_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            p0 = np.concatenate([p0, self.third_param_Ns_expanded])
            for signal in self.signal_names:

                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            p0 = np.concatenate([p0, self.third_param_Us_expanded])

            for signal in self.signal_names:

                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        low_bounds = (p0.copy())
        high_bounds = (p0.copy())

        low_bounds[3:], high_bounds[3:] = set_param_bounds(p0[3:],params_names[3:])

        # Adjusting boundaries based on the size of the oligomer
        self.limited_tm = tm_limits is not None

        if self.limited_tm:

            tm_lower, tm_upper = tm_limits

        else:

            tm_lower = p0[0] - 12 if self.model == 'Monomer' else p0[0] - 20
            tm_upper = p0[0] + 20 if self.model == 'Monomer' else p0[0] + 30

            if self.model in ['Trimer', 'Tetramer']:

                tm_lower, tm_upper, p0[0] = tm_lower + 10, tm_upper + 10, p0[0] + 10

                if self.model == 'Tetramer':
                    tm_lower, tm_upper, p0[0] = tm_lower + 10, tm_upper + 20, p0[0] + 20

        low_bounds[0] = tm_lower
        high_bounds[0] = tm_upper

        # Verify that the initial guess is within the user-defined limits
        p0[0] = adjust_value_to_interval(p0[0], tm_lower, tm_upper,1)

        self.limited_dh = dh_limits is not None

        if self.limited_dh:

            dh_lower, dh_upper = dh_limits

            p0[1] = adjust_value_to_interval(p0[1], dh_lower, dh_upper, 1)

        else:
            dh_lower = 10 * self.gas_cst / R_gas
            dh_upper = 500 * self.gas_cst / R_gas


        low_bounds[1] = dh_lower
        high_bounds[1] = dh_upper

        self.cp_value = cp_value
        self.fixed_cp = cp_value is not None

        self.limited_cp = cp_limits is not None and not self.fixed_cp

        if self.limited_cp:

            cp_lower, cp_upper = cp_limits

        else:

            cp_lower, cp_upper = 0.1 * self.gas_cst / R_gas, 5 * self.gas_cst / R_gas

        if self.fixed_cp:

            # Remove the Cp from p0, low_bounds and high_bounds
            # Remove Cp0 from the parameter names
            p0 = np.delete(p0, 2)
            low_bounds = np.delete(low_bounds, 2)
            high_bounds = np.delete(high_bounds, 2)
            params_names.pop(2)

        else:

            low_bounds[2] = cp_lower
            high_bounds[2] = cp_upper

            # Verify that the Cp initial guess is within the user-defined limits
            p0[2] = adjust_value_to_interval(p0[2], cp_lower, cp_upper, 0.5)

        # Populate the expanded signal and temperature lists
        self.expand_multiple_signal()

        signal_fx = map_two_state_model_to_signal_fx(self.model)

        kwargs = {
            'oligomer_concentrations' : self.oligomer_concentrations_expanded,
            'initial_parameters': p0,
            'low_bounds' : low_bounds,
            'high_bounds' : high_bounds,
            'cp_value' : cp_value,
            'baseline_native_fx' : self.baseline_N_fx,
            'baseline_unfolded_fx' : self.baseline_U_fx,
            'signal_fx' : signal_fx,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_single_slopes

        # Do a quick prefit with a reduced data set
        if self.pre_fit:

            kwargs['list_of_temperatures'] = self.temp_lst_expanded_subset
            kwargs['list_of_signals'] = self.signal_lst_expanded_subset

            global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

            p0 = global_fit_params

        # Now use the whole dataset
        kwargs['list_of_temperatures'] = self.temp_lst_expanded
        kwargs['list_of_signals'] = self.signal_lst_expanded

        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit(
            global_fit_params,
            cov,
            predicted,
            high_bounds,
            low_bounds,
            p0,
            False,
            self.limited_cp,
            self.limited_dh,
            self.limited_tm,
            self.fixed_cp,
            kwargs,
            fit_fx,
            result=result,
            minimizer=minimizer,
            fit_m_value=False,
        )

        rel_errors = relative_errors(global_fit_params, cov)

        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(predicted, self.nr_signals, self.nr_olig)

        self.result = result
        self.minimizer = minimizer

        self.global_fit_done = True

        self.params_names = params_names

        self.create_params_df()
        self.create_dg_df()

        return None



[docs]
    def fit_thermal_unfolding_three_state_global(
            self,
            t1_init=0,
            t2_init=0,
            dh_limits=None,
            tm_limits=None,
            CpTh=None):

        """
        Fit the thermal unfolding of the sample using the signal and temperature data on a three state model
        We fit all the curves at once, with global thermodynamic parameters but local slopes and local baselines)
        Multiple signals can be fitted at the same time, such as 350nm and 330nm

        Parameters
        ----------
        t1_init, t2_init : float, optional
            initial user given values of the melting temperatures of the three states, t1_init: Native to intermediate, t2_init: intermediate to unfolded
        dh_limits : list of lists, optional
            List of two lists with two values each, the lower and upper bounds for the dH values.
            If None, bounds set automatically
        tm_limits : list of lists, optional
            List of two lists with two values each, the lower and upper bounds for the Tm values.
            If None, bounds set automatically
        CpTh : float, optional
            Given estimate of the Total Cp of the system. If given, the Cp value of the transition from native to intermediate will be fitted as Cp1. If not given,
            the system assumes a total Cp of 0

        Notes
        -----
        This is a heavy routine that creates/updates many fitting-related attributes, including:
        - bNs_expanded, bUs_expanded, kNs_expanded, kUs_expanded, qNs_expanded, qUs_expanded
        - p0, low_bounds, high_bounds, global_fit_params, rel_errors
        - predicted_lst_multiple, params_names, params_df, dg_df
        - flags: global_fit_done, limited_tm, limited_dh, limited_cp, fixed_cp
        """

        # Initial parameters have to be in order:
        # Global melting temperature 1, Global enthalpy of unfolding 1,
        # Global melting temperature 2, Global enthalpy of unfolding 2,
        # Optionally Cp1
        # Single intercepts folded, Single slopes folded,
        # Single intercepts unfolded, Single slopes unfolded


        if not hasattr(self, "deriv_lst_multiple"):
            self.estimate_derivative()

        if CpTh is not None:
            if CpTh <= 0.1:
                raise ValueError('CpTh must be large enough for fitting. If you do not wish to fit the Cp values, omit this parameter.')

            # Parameters for T1, T2, will be added later via gridsearch
            p0 = [0, 200 * self.gas_cst / R_gas, 0, 200 * self.gas_cst / R_gas, self.Cp0]

        else:
            p0 = [0, 200* self.gas_cst / R_gas, 0, 200* self.gas_cst / R_gas, 0]

        params_names = [
            'Tm1 (°{})'.format(self.temp_units_str),
            'ΔH1 ({}/mol)'.format(self.energy_units_str),
            'Tm2 (°{})'.format(self.temp_units_str),
            'ΔH2 ({}/mol)'.format(self.energy_units_str),
            'ΔCp1 ({}/mol/{})'.format(self.energy_units_str, self.temp_units_str)]

        self.first_param_Ns_expanded = np.concatenate(self.first_param_Ns_per_signal, axis=0)
        self.first_param_Us_expanded = np.concatenate(self.first_param_Us_per_signal, axis=0)
        self.second_param_Ns_expanded = np.concatenate(self.second_param_Ns_per_signal, axis=0)
        self.second_param_Us_expanded = np.concatenate(self.second_param_Us_per_signal, axis=0)
        self.third_param_Ns_expanded = np.concatenate(self.third_param_Ns_per_signal, axis=0)
        self.third_param_Us_expanded = np.concatenate(self.third_param_Us_per_signal, axis=0)

        #Estimating intermediate intercept

        self.intercept_intermediate = (self.first_param_Ns_expanded + self.first_param_Us_expanded) / 2

        p0 = np.concatenate([p0, self.first_param_Ns_expanded, self.first_param_Us_expanded, self.intercept_intermediate])

        # We need to append as many bN and bU as the number of oligomer concentrations
        # times the number of signal types
        for signal in self.signal_names:
            params_names += (['intercept_native - ' + str(self.oligomer_concentrations[i]) +
                              ' - ' + str(signal) for i in range(self.nr_olig)])

        for signal in self.signal_names:
            params_names += (['intercept_unfolded - ' + str(self.oligomer_concentrations[i]) +
                              ' - ' + str(signal) for i in range(self.nr_olig)])

        for signal in self.signal_names:
            params_names += (['intercept_intermediate - ' + str(self.oligomer_concentrations[i]) +
                              ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.native_baseline_type in ['linear', 'quadratic', 'exponential']:

            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            p0 = np.concatenate([p0, self.second_param_Ns_expanded])

            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']:

            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            p0 = np.concatenate([p0, self.second_param_Us_expanded])

            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.native_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            p0 = np.concatenate([p0, self.third_param_Ns_expanded])
            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            p0 = np.concatenate([p0, self.third_param_Us_expanded])

            for signal in self.signal_names:
                params_names += ([param_name + ' - ' + str(self.oligomer_concentrations[i]) +
                                  ' - ' + str(signal) for i in range(self.nr_olig)])

        low_bounds = (p0.copy())
        high_bounds = (p0.copy())

        low_bounds[5:], high_bounds[5:] = set_param_bounds(p0[5:], params_names[5:])

        self.limited_tm = tm_limits is not None

        if self.limited_tm:

            tm1_lower, tm1_upper, tm2_lower, tm2_upper = np.array(tm_limits).flatten().tolist()

        else:

            tm1_lower = 15
            tm1_upper = self.user_max_temp + 20

            tm2_lower = 30
            tm2_upper = self.user_max_temp + 20

        low_bounds[0] = tm1_lower
        high_bounds[0] = tm1_upper

        low_bounds[2] = tm2_lower
        high_bounds[2] = tm2_upper

        # Verify that the initial guesses are within the user-defined limits
        p0[0] = adjust_value_to_interval(p0[0], tm1_lower, tm1_upper, 1)
        p0[2] = adjust_value_to_interval(p0[2], tm2_lower, tm2_upper, 1)

        self.limited_dh = dh_limits is not None

        if self.limited_dh:

            dh1_lower, dh1_upper, dh2_lower, dh2_upper, = np.array(dh_limits).flatten().tolist()

            p0[1] = adjust_value_to_interval(p0[1], dh1_lower, dh1_upper, 1)
            p0[3] = adjust_value_to_interval(p0[3], dh2_lower, dh2_upper, 1)

        else:
            # Set dh1_lower 30 for monomer, 50 for dimer, 70 for trimer and 90 for tetramer, and dh1_upper to 500 for all
            lower_value = 30 
            if "Dimer" in self.model:
                lower_value = 50 
            elif "Trimer" in self.model:
                lower_value = 70 
            elif "Tetramer" in self.model:
                lower_value = 90 

            dh1_lower = lower_value * self.gas_cst / R_gas
            dh1_upper = 500 * self.gas_cst / R_gas

            dh2_lower = lower_value
            dh2_upper = 500 * self.gas_cst / R_gas

        low_bounds[1] = dh1_lower
        high_bounds[1] = dh1_upper

        low_bounds[3] = dh2_lower
        high_bounds[3] = dh2_upper

        self.cp_value = CpTh
        self.fixed_cp = CpTh is not None


        if not self.fixed_cp:

            # Remove the Cp from p0, low_bounds and high_bounds
            # Remove Cp0 from the parameter names
            p0 = np.delete(p0, 4)
            low_bounds = np.delete(low_bounds, 4)
            high_bounds = np.delete(high_bounds, 4)
            params_names.pop(4)

        else:

            cp_lower, cp_upper = 0.1 * self.gas_cst / R_gas, CpTh - 0.4 * self.gas_cst / R_gas

            low_bounds[4]  = cp_lower
            high_bounds[4] = cp_upper

            # Verify that the Cp initial guess is within the user-defined limits
            p0[4] = adjust_value_to_interval(p0[4], cp_lower, cp_upper, 0.5)


        # Populate the expanded signal and temperature lists
        self.expand_multiple_signal()

        signal_fx = map_three_state_model_to_signal_fx(self.model)

        if t1_init != 0:
            p0[0], low_bounds[0], high_bounds[0] = t1_init, np.max([t1_init - 15, 20]), t1_init + 20

        if t2_init != 0:
            p0[2], low_bounds[2], high_bounds[2] = t2_init, np.max([t2_init - 15, 20]), t2_init + 20

        kwargs = {
            'oligomer_concentrations': self.oligomer_concentrations_expanded,
            'initial_parameters': p0,
            'low_bounds': low_bounds,
            'high_bounds': high_bounds,
            'baseline_native_fx': self.baseline_N_fx,
            'baseline_unfolded_fx': self.baseline_U_fx,
            'signal_fx': signal_fx,
            'CpTh_value': CpTh,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_three_states_single_slopes

        step = 7

        if t1_init == 0 or t2_init == 0:

            if self.limited_tm:
                test_T1s = np.arange(np.max([tm1_lower, 20]), tm1_upper, step)
                test_T2s = np.arange(np.max([tm2_lower, 35]) + step, tm2_upper, step)

            else:
                test_T1s = np.arange(np.max([self.global_min_temp + 10, 20]), self.global_max_temp, step)
                test_T2s = np.arange(np.max([self.global_min_temp + 20, 20]) + step, self.global_max_temp + step, step)

            if t1_init != 0:
                test_T1s = np.array([t1_init])
            elif t2_init != 0:
                test_T2s = np.array([t2_init])

            combinations = [(t1, t2) for t1 in test_T1s for t2 in test_T2s]
            combinations = [(t1, t2) for t1, t2 in combinations if t1 < t2]

            df = pd.DataFrame(combinations, columns=['t1', 't2'])

            # Remove all combinations where t1 is 30 degrees higher than t2
            df = df[~(df['t1'] - df['t2'] >= 30)]

            df_tm = pd.DataFrame(np.zeros_like(combinations), columns=['t1', 't2'], dtype=float)
            df_dh = pd.DataFrame(np.zeros_like(combinations), columns=['dh1', 'dh2'], dtype=float)

            rss_all = []

            #Using a subset for fitting
            kwargs['list_of_temperatures'] = self.temp_lst_expanded_subset
            kwargs['list_of_signals'] = self.signal_lst_expanded_subset

            for index, row in df.iterrows():

                kwargs['t1'] = row['t1']
                kwargs['t2'] = row['t2']

                fit_params, cov, pred, result, minimizer = fit_oligomer_unfolding_three_states_single_slopes(
                    **kwargs,
                    max_nfev=6000) # We need to limit max_nfev for a faster initial grid search
 
                #using the fitted parameters as a base for fitting
                df_tm.iloc[index, 0] = fit_params[0]
                df_tm.iloc[index, 1] = fit_params[2]

                df_dh.iloc[index, 0] = fit_params[1]
                df_dh.iloc[index, 1] = fit_params[3]

                rss = np.nansum((np.array(pred) - np.array(self.signal_lst_expanded_subset)) ** 2)
                rss_all.append(rss)

            idx = np.argmin(rss_all)

            t1_init, t2_init = df_tm['t1'][idx], df_tm['t2'][idx]
            p0[0], p0[2] = t1_init, t2_init

            dh1_init, dh2_init = df_dh['dh1'][idx], df_dh['dh2'][idx]
            p0[1], p0[3] = dh1_init, dh2_init

            low_bounds[0], low_bounds[2] = t1_init - 14, t2_init - 14
            high_bounds[0], high_bounds[2] = t1_init + 18, t2_init + 18

            low_bounds[1], low_bounds[3] = np.max([dh1_init - 100, 30]), np.max([dh2_init - 100, 30])
            high_bounds[1], high_bounds[3] = dh1_init + 150, dh2_init + 150

            kwargs['initial_parameters'] = p0
            kwargs['low_bounds'] = low_bounds
            kwargs['high_bounds'] = high_bounds
            kwargs['t1'] = None
            kwargs['t2'] = None

        # Now use the whole dataset
        kwargs['list_of_temperatures'] = self.temp_lst_expanded
        kwargs['list_of_signals'] = self.signal_lst_expanded

        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit(
            global_fit_params,
            cov,
            predicted,
            high_bounds,
            low_bounds,
            p0,
            False,
            False,
            self.limited_dh,
            self.limited_tm,
            self.fixed_cp,
            kwargs,
            fit_fx,
            result=result,
            minimizer=minimizer,
            fit_m_value=False,
            three_state_model=True,
            gas_cst=self.gas_cst
        )

        rel_errors = relative_errors(global_fit_params, cov)

        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(predicted, self.nr_signals, self.nr_olig)

        self.result = result
        self.minimizer = minimizer

        self.global_fit_done = True

        self.params_names = params_names

        self.create_params_df_three_state()
        self.create_dg_df()

        return None



[docs]
    def fit_thermal_unfolding_global_global(self):

        """
        Fit the thermal unfolding of the sample using the signal and temperature data
        We fit all the curves at once, with global thermodynamic parameters and global slopes (but local baselines)
        Multiple refers to the fact that we fit many signals at the same time, such as 350nm and 330nm
        Must be run after fit_thermal_unfolding_global_multiple

        Notes
        -----
        Updates global fitting attributes and sets `global_global_fit_done` when complete.
        """

        # Requires global fit done
        if not self.global_fit_done:
            self.fit_thermal_unfolding_global()

        if self.signal_ids is None:
            self.set_signal_id()

        param_init = 2 + (self.cp_value is None)

        p0 = self.global_fit_params[:param_init]
        low_bounds = self.low_bounds[:param_init]
        high_bounds = self.high_bounds[:param_init]

        n_datasets = self.nr_olig * self.nr_signals

        p1Ns = self.global_fit_params[param_init:param_init + n_datasets]
        p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets]

        low_bounds_p1Ns = self.low_bounds[param_init:param_init + n_datasets]
        low_bounds_p1Us = self.low_bounds[param_init + n_datasets:param_init + 2 * n_datasets]

        high_bounds_p1Ns = self.high_bounds[param_init:param_init + n_datasets]
        high_bounds_p1Us = self.high_bounds[param_init + n_datasets:param_init + 2 * n_datasets]

        id_start = param_init + 2 * n_datasets

        params_names = self.params_names[:id_start]

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            p2Ns = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p2Ns = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p2Ns = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            p2Us = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p2Us = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p2Us = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.native_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            p3Ns = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p3Ns = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p3Ns = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            p3Us = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p3Us = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p3Us = self.high_bounds[id_start:id_start + n_datasets]

        p0 = np.concatenate([p0, p1Ns, p1Us])
        low_bounds = np.concatenate([low_bounds, low_bounds_p1Ns, low_bounds_p1Us])
        high_bounds = np.concatenate([high_bounds, high_bounds_p1Ns, high_bounds_p1Us])

        # Baselines are still independent for each signal and oligomer concentration
        # Slopes and quadratic terms are shared - per signal only

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            p2Ns = re_arrange_params(p2Ns, self.nr_signals)
            low_bounds_p2Ns = re_arrange_params(low_bounds_p2Ns, self.nr_signals)
            high_bounds_p2Ns = re_arrange_params(high_bounds_p2Ns, self.nr_signals)

            for kNs_i, low_bounds_kNs_i, high_bounds_kNs_i in zip(p2Ns, low_bounds_p2Ns, high_bounds_p2Ns):
                p0 = np.append(p0, np.median(kNs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_kNs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_kNs_i))

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            p2Us = re_arrange_params(p2Us, self.nr_signals)
            low_bounds_p2Us = re_arrange_params(low_bounds_p2Us, self.nr_signals)
            high_bounds_p2Us = re_arrange_params(high_bounds_p2Us, self.nr_signals)

            for kUs_i, low_bounds_kUs_i, high_bounds_kUs_i in zip(p2Us, low_bounds_p2Us, high_bounds_p2Us):
                p0 = np.append(p0, np.median(kUs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_kUs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_kUs_i))

        if self.native_baseline_type in ['quadratic', 'exponential']:

            p3Ns = re_arrange_params(p3Ns, self.nr_signals)
            low_bounds_p3Ns = re_arrange_params(low_bounds_p3Ns, self.nr_signals)
            high_bounds_p3Ns = re_arrange_params(high_bounds_p3Ns, self.nr_signals)

            for qNs_i, low_bounds_qNs_i, high_bounds_qNs_i in zip(p3Ns, low_bounds_p3Ns, high_bounds_p3Ns):
                p0 = np.append(p0, np.median(qNs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_qNs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_qNs_i))

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            p3Us = re_arrange_params(p3Us, self.nr_signals)
            low_bounds_p3Us = re_arrange_params(low_bounds_p3Us, self.nr_signals)
            high_bounds_p3Us = re_arrange_params(high_bounds_p3Us, self.nr_signals)

            for qUs_i, low_bounds_qUs_i, high_bounds_qUs_i in zip(p3Us, low_bounds_p3Us, high_bounds_p3Us):
                p0 = np.append(p0, np.median(qUs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_qUs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_qUs_i))

        signal_fx = map_two_state_model_to_signal_fx(self.model)

        kwargs = {

            'oligomer_concentrations': self.oligomer_concentrations_expanded,
            'list_of_temperatures': self.temp_lst_expanded_subset,
            'list_of_signals': self.signal_lst_expanded_subset,
            'initial_parameters': p0,
            'low_bounds': low_bounds,
            'high_bounds': high_bounds,
            'cp_value': self.cp_value,
            'signal_ids':self.signal_ids,
            'baseline_native_fx': self.baseline_N_fx,
            'baseline_unfolded_fx': self.baseline_U_fx,
            'signal_fx' : signal_fx,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_shared_slopes_many_signals

        if self.pre_fit:
            # Do a pre-fit with a reduced data set
            global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

            p0 = global_fit_params
        # End of pre-fit

        # Use whole dataset
        kwargs['list_of_temperatures'] = self.temp_lst_expanded
        kwargs['list_of_signals'] = self.signal_lst_expanded

        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit(
            global_fit_params,
            cov,
            predicted,
            high_bounds,
            low_bounds,
            p0,
            False,
            self.limited_cp,
            self.limited_dh,
            self.limited_tm,
            self.fixed_cp,
            kwargs,
            fit_fx,
            result=result,
            minimizer=minimizer,
            fit_m_value=False,
            gas_cst=self.gas_cst
        )

        rel_errors = relative_errors(global_fit_params, cov)

        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(
            predicted, self.nr_signals, self.nr_olig)

        self.params_names = params_names

        self.result = result
        self.minimizer = minimizer

        self.create_params_df()
        self.create_dg_df()

        self.global_global_fit_done = True

        return None



[docs]
    def fit_thermal_unfolding_three_state_global_global(self):

        """
        Fit the thermal unfolding with three states of the sample using the signal and temperature data
        We fit all the curves at once, with global thermodynamic parameters and global slopes (but local baselines)
        Multiple refers to the fact that we fit many signals at the same time, such as 350nm and 330nm
        Must be run after fit_thermal_unfolding_global

        Notes
        -----
        Updates global fitting attributes and sets `global_global_fit_done` when complete.
        """

        # Requires global fit done
        if not self.global_fit_done:
            self.fit_thermal_unfolding_three_state_global()

        if self.signal_ids is None:
            self.set_signal_id()

        param_init = 4

        if self.fixed_cp:
            param_init += 1

        p0 = self.global_fit_params[:param_init]
        low_bounds = self.low_bounds[:param_init]
        high_bounds = self.high_bounds[:param_init]

        n_datasets = self.nr_olig * self.nr_signals

        p1Ns = self.global_fit_params[param_init:param_init + n_datasets]
        p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets]
        intermediate_baseline = self.global_fit_params[param_init + 2* n_datasets:param_init + 3 * n_datasets]

        low_bounds_p1Ns = self.low_bounds[param_init:param_init + n_datasets]
        low_bounds_p1Us = self.low_bounds[param_init + n_datasets:param_init + 2 * n_datasets]
        low_bounds_intermediate_baseline = self.low_bounds[param_init + 2 * n_datasets:param_init + 3 * n_datasets]

        high_bounds_p1Ns = self.high_bounds[param_init:param_init + n_datasets]
        high_bounds_p1Us = self.high_bounds[param_init + n_datasets:param_init + 2 * n_datasets]
        high_bounds_intermediate_baseline = self.high_bounds[param_init + 2 * n_datasets:param_init + 3 * n_datasets]

        id_start = param_init + 3 * n_datasets

        params_names = self.params_names[:id_start]

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            p2Ns = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p2Ns = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p2Ns = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            p2Us = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p2Us = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p2Us = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.native_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            p3Ns = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p3Ns = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p3Ns = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            p3Us = self.global_fit_params[id_start:id_start + n_datasets]
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]
            low_bounds_p3Us = self.low_bounds[id_start:id_start + n_datasets]
            high_bounds_p3Us = self.high_bounds[id_start:id_start + n_datasets]
            id_start += n_datasets

        p0 = np.concatenate([p0, p1Ns, p1Us, intermediate_baseline])
        low_bounds = np.concatenate([low_bounds, low_bounds_p1Ns, low_bounds_p1Us, low_bounds_intermediate_baseline])
        high_bounds = np.concatenate([high_bounds, high_bounds_p1Ns, high_bounds_p1Us, high_bounds_intermediate_baseline])

        # Baselines are still independent for each signal and oligomer concentration
        # Slopes and quadratic terms are shared - per signal only

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            p2Ns = re_arrange_params(p2Ns, self.nr_signals)
            low_bounds_p2Ns = re_arrange_params(low_bounds_p2Ns, self.nr_signals)
            high_bounds_p2Ns = re_arrange_params(high_bounds_p2Ns, self.nr_signals)

            for kNs_i, low_bounds_kNs_i, high_bounds_kNs_i in zip(p2Ns, low_bounds_p2Ns, high_bounds_p2Ns):
                p0 = np.append(p0, np.median(kNs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_kNs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_kNs_i))

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            p2Us = re_arrange_params(p2Us, self.nr_signals)
            low_bounds_p2Us = re_arrange_params(low_bounds_p2Us, self.nr_signals)
            high_bounds_p2Us = re_arrange_params(high_bounds_p2Us, self.nr_signals)

            for kUs_i, low_bounds_kUs_i, high_bounds_kUs_i in zip(p2Us, low_bounds_p2Us, high_bounds_p2Us):
                p0 = np.append(p0, np.median(kUs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_kUs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_kUs_i))

        if self.native_baseline_type in ['quadratic', 'exponential']:

            p3Ns = re_arrange_params(p3Ns, self.nr_signals)
            low_bounds_p3Ns = re_arrange_params(low_bounds_p3Ns, self.nr_signals)
            high_bounds_p3Ns = re_arrange_params(high_bounds_p3Ns, self.nr_signals)

            for qNs_i, low_bounds_qNs_i, high_bounds_qNs_i in zip(p3Ns, low_bounds_p3Ns, high_bounds_p3Ns):
                p0 = np.append(p0, np.median(qNs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_qNs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_qNs_i))

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            p3Us = re_arrange_params(p3Us, self.nr_signals)
            low_bounds_p3Us = re_arrange_params(low_bounds_p3Us, self.nr_signals)
            high_bounds_p3Us = re_arrange_params(high_bounds_p3Us, self.nr_signals)

            for qUs_i, low_bounds_qUs_i, high_bounds_qUs_i in zip(p3Us, low_bounds_p3Us, high_bounds_p3Us):
                p0 = np.append(p0, np.median(qUs_i))
                low_bounds = np.append(low_bounds, np.min(low_bounds_qUs_i))
                high_bounds = np.append(high_bounds, np.max(high_bounds_qUs_i))

        signal_fx = map_three_state_model_to_signal_fx(self.model)

        kwargs = {

            'oligomer_concentrations': self.oligomer_concentrations_expanded,
            'list_of_temperatures': self.temp_lst_expanded_subset,
            'list_of_signals': self.signal_lst_expanded_subset,
            'initial_parameters': p0,
            'low_bounds': low_bounds,
            'high_bounds': high_bounds,
            'signal_ids':self.signal_ids,
            'baseline_native_fx': self.baseline_N_fx,
            'baseline_unfolded_fx': self.baseline_U_fx,
            'signal_fx' : signal_fx,
            'CpTh_value' : self.cp_value,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_three_states_shared_slopes_many_signals

        if self.pre_fit:
            # Do a pre-fit with a reduced data set
            global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

            p0 = global_fit_params
        # End of pre-fit

        # Use whole dataset
        kwargs['list_of_temperatures'] = self.temp_lst_expanded
        kwargs['list_of_signals'] = self.signal_lst_expanded

        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        global_fit_params, cov, predicted, p0, low_bounds, high_bounds, result, minimizer = evaluate_fitting_and_refit(
            global_fit_params,
            cov,
            predicted,
            high_bounds,
            low_bounds,
            p0,
            False,
            False,
            self.limited_dh,
            self.limited_tm,
            self.fixed_cp,
            kwargs,
            fit_fx,
            result=result,
            minimizer=minimizer,
            fit_m_value=False,
            three_state_model=True,
            gas_cst=self.gas_cst
        )

        rel_errors = relative_errors(global_fit_params, cov)

        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(
            predicted, self.nr_signals, self.nr_olig)

        self.params_names = params_names

        self.create_params_df_three_state()
        self.create_dg_df()

        self.global_global_fit_done = True

        return None



[docs]
    def fit_thermal_unfolding_global_global_global(
            self,
            model_scale_factor=True):

        """
        Fit the thermal unfolding of the sample using the signal and temperature data
        We fit all the curves at once, with global thermodynamic parameters, global slopes and global baselines
        Must be run after fit_thermal_unfolding_global_global

        Parameters
        ----------
        model_scale_factor : bool, optional
            If True, model a scale factor for each oligomer concentration

        Notes
        -----
        Updates many global fitting attributes and sets `global_global_global_fit_done` when complete. If
        `model_scale_factor` is True the method also creates scaled signal attributes:
        - signal_lst_multiple_scaled, predicted_lst_multiple_scaled
        """

        # Requires global global fit done
        if not self.global_global_fit_done:
            self.fit_thermal_unfolding_global_global()

        param_init = 2 + (self.cp_value is None)

        params_names = self.params_names[:param_init]

        p0 = self.global_fit_params[:param_init]
        low_bounds = self.low_bounds[:param_init]
        high_bounds = self.high_bounds[:param_init]

        n_datasets = self.nr_olig * self.nr_signals

        p1Ns = self.global_fit_params[param_init:param_init + n_datasets]
        p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets]

        p1Ns_per_signal = re_arrange_params(p1Ns, self.nr_signals)
        p1Us_per_signal = re_arrange_params(p1Us, self.nr_signals)

        p1Ns = np.mean(p1Ns_per_signal, axis=1)
        p1Us = np.mean(p1Us_per_signal, axis=1)

        p1Ns_low_bounds = [p1N / 100 if p1N > 0 else 100 * p1N for p1N in p1Ns]
        p1Us_low_bounds = [p1U / 100 if p1U > 0 else 100 * p1U for p1U in p1Us]

        p1Ns_high_bounds = [p1N * 100 if p1N > 0 else p1N / 100 for p1N in p1Ns]
        p1Us_high_bounds = [p1U * 100 if p1U > 0 else p1U / 100 for p1U in p1Us]

        idx = param_init + 2 * n_datasets

        params_names += ['intercept_native - ' + signal_name for signal_name in self.signal_names]
        params_names += ['intercept_unfolded - ' + signal_name for signal_name in self.signal_names]

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            kNs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_kNs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_kNs = self.high_bounds[idx:idx + self.nr_signals]

            idx += self.nr_signals
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            kUs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_kUs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_kUs = self.high_bounds[idx:idx + self.nr_signals]

            idx += self.nr_signals
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.native_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            qNs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_qNs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_qNs = self.high_bounds[idx:idx + self.nr_signals]
            idx += self.nr_signals

            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            qUs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_qUs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_qUs = self.high_bounds[idx:idx + self.nr_signals]
            idx += self.nr_signals

            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        p0 = np.concatenate([p0, p1Ns, p1Us])
        low_bounds = np.concatenate([low_bounds, p1Ns_low_bounds, p1Us_low_bounds])
        high_bounds = np.concatenate([high_bounds, p1Ns_high_bounds, p1Us_high_bounds])

        if self.native_baseline_type in ['linear', 'quadratic','exponential']:

            p0 = np.concatenate([p0, kNs])
            low_bounds = np.concatenate([low_bounds, low_bounds_kNs])
            high_bounds = np.concatenate([high_bounds, high_bounds_kNs])

        if self.unfolded_baseline_type in ['linear', 'quadratic','exponential']:

            p0 = np.concatenate([p0, kUs])
            low_bounds = np.concatenate([low_bounds, low_bounds_kUs])
            high_bounds = np.concatenate([high_bounds, high_bounds_kUs])

        if self.native_baseline_type in ['quadratic', 'exponential']:

            p0 = np.concatenate([p0, qNs])
            low_bounds = np.concatenate([low_bounds, low_bounds_qNs])
            high_bounds = np.concatenate([high_bounds, high_bounds_qNs])

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:

            p0 = np.concatenate([p0, qUs])
            low_bounds = np.concatenate([low_bounds, low_bounds_qUs])
            high_bounds = np.concatenate([high_bounds, high_bounds_qUs])

        # If required, include a scale factor for each oligomer concentration
        if model_scale_factor:
            # The last oligomer concentration is fixed to 1, the rest are fitted
            scale_factors = [1 for _ in range(self.nr_olig - 1)]
            scale_factors_low = [0.5882 for _ in range(self.nr_olig - 1)]
            scale_factors_high = [1.7 for _ in range(self.nr_olig - 1)]

            p0 = np.concatenate([p0, scale_factors])
            low_bounds = np.concatenate([low_bounds, scale_factors_low])
            high_bounds = np.concatenate([high_bounds, scale_factors_high])

            params_names += ['Scale factor - ' + str(d) + ' (M). ID: ' + str(i) for
                             i, d in enumerate(self.oligomer_concentrations)]

            params_names.pop()  # Remove the last one, as it is fixed to 1

        scale_factor_exclude_ids = [self.nr_olig - 1] if model_scale_factor else []

        signal_fx = map_two_state_model_to_signal_fx(self.model)

        # Do a prefit with a reduced dataset
        kwargs = {

            'list_of_temperatures' : self.temp_lst_expanded_subset,
            'list_of_signals' : self.signal_lst_expanded_subset,
            'signal_ids' : self.signal_ids,
            'oligomer_concentrations': self.oligomer_concentrations_expanded,
            'initial_parameters': p0,
            'low_bounds': low_bounds,
            'high_bounds': high_bounds,
            'model_scale_factor':model_scale_factor,
            'cp_value' : self.cp_value,
            'scale_factor_exclude_ids':scale_factor_exclude_ids,
            'signal_fx' : signal_fx,
            'baseline_native_fx' : self.baseline_N_fx,
            'baseline_unfolded_fx' : self.baseline_U_fx,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_many_signals

        if self.pre_fit:

            global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

            # Assign the fitted parameters to the initial guess for the full dataset
            p0 = global_fit_params

            # End of prefit with reduced dataset

        # Use the whole dataset
        kwargs['list_of_signals'] = self.signal_lst_expanded
        kwargs['list_of_temperatures'] = self.temp_lst_expanded
        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        # Remove scale factors that are not significant
        if model_scale_factor:

            # 2 parameters corresponding to Tm, dH
            # plus Cp if fitted
            idx_start = 2 + (self.cp_value is None)

            native_factor   = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_N_fx))
            unfolded_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_U_fx))

            # Add index according to the native baseline polynomial order
            idx_start += native_factor * self.nr_signals
            # Add index according to the unfolded baseline polynomial order
            idx_start += unfolded_factor * self.nr_signals

            for _ in range(5):

                # Sort in ascending order the IDs to exclude
                scale_factor_exclude_ids = sorted(scale_factor_exclude_ids)

                n_fixed_factors = len(scale_factor_exclude_ids)
                n_fit_factors = self.nr_olig - n_fixed_factors

                if n_fit_factors == 0:
                    break

                sf_params = global_fit_params[idx_start:(idx_start + n_fit_factors)]

                idxs_to_remove = []
                re_fit = False

                # Add dummy variable where we need to skip the index
                for id in scale_factor_exclude_ids:
                    sf_params = np.insert(sf_params, id, np.nan)

                for i, sf in enumerate(sf_params):

                    if i in scale_factor_exclude_ids:
                        continue

                    if 0.995 <= sf <= 1.015:
                        # Exclude the scale factor from the fit
                        scale_factor_exclude_ids.append(i)
                        re_fit = True

                        j1 = np.sum(np.array(scale_factor_exclude_ids) < i)
                        j2 = len(idxs_to_remove)

                        idxs_to_remove.append(idx_start + i - j1 + j2)

                if not re_fit:
                    break

                else:

                    for idx in reversed(idxs_to_remove):

                        global_fit_params = np.delete(global_fit_params, idx)
                        low_bounds = np.delete(low_bounds, idx)
                        high_bounds = np.delete(high_bounds, idx)

                        del params_names[idx]

                    kwargs['initial_parameters'] = global_fit_params
                    kwargs['low_bounds'] = low_bounds
                    kwargs['high_bounds'] = high_bounds
                    kwargs['scale_factor_exclude_ids'] = scale_factor_exclude_ids

                    global_fit_params, cov, predicted, _, _ = fit_fx(**kwargs)

        rel_errors = relative_errors(global_fit_params, cov)

        self.params_names = params_names
        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(
            predicted, self.nr_signals, self.nr_olig)

        self.create_params_df()
        self.create_dg_df()

        self.global_global_global_fit_done = True

        # Obtained the scaled signal too
        if model_scale_factor:

            # signal scaled hos one sublist per selected signal type
            signal_scaled = deepcopy(self.signal_lst_multiple)
            predicted_scaled = deepcopy(self.predicted_lst_multiple)

            for value, param in zip(self.global_fit_params, self.params_names):

                if 'Scale factor' in param:

                    id = int(param.split('(M). ID: ')[-1])

                    for i in range(len(signal_scaled)):
                        signal_scaled[i][id] /= value
                        predicted_scaled[i][id] /= value

            self.signal_lst_multiple_scaled = signal_scaled
            self.predicted_lst_multiple_scaled = predicted_scaled

        return None



[docs]
    def fit_thermal_unfolding_three_state_global_global_global(
            self,
            model_scale_factor=True):

        """
        Fit the thermal unfolding of the sample using the signal and temperature data for models assuming three states
        We fit all the curves at once, with global thermodynamic parameters, global slopes and global baselines
        Must be run after fit_thermal_unfolding_global_global

        Parameters
        ----------
        model_scale_factor : bool, optional
            If True, model a scale factor for each oligomer concentration

        Notes
        -----
        Updates many global fitting attributes and sets `global_global_global_fit_done` when complete. If
        `model_scale_factor` is True the method also creates scaled signal attributes:
        - signal_lst_multiple_scaled, predicted_lst_multiple_scaled
        """

        # Requires global global fit done
        if not self.global_global_fit_done:
            self.fit_thermal_unfolding_three_state_global_global()

        param_init = 4

        if self.fixed_cp:
            param_init += 1


        params_names = self.params_names[:param_init]

        p0 = self.global_fit_params[:param_init]
        low_bounds = self.low_bounds[:param_init]
        high_bounds = self.high_bounds[:param_init]

        n_datasets = self.nr_olig * self.nr_signals

        p1Ns = self.global_fit_params[param_init:param_init + n_datasets]
        p1Us = self.global_fit_params[param_init + n_datasets:param_init + 2 * n_datasets]
        intermediate_baselines = self.global_fit_params[param_init + 2 * n_datasets:param_init + 3 * n_datasets]

        p1Ns_per_signal = re_arrange_params(p1Ns, self.nr_signals)
        p1Us_per_signal = re_arrange_params(p1Us, self.nr_signals)
        intermediate_baselines_per_signal = re_arrange_params(intermediate_baselines, self.nr_signals)

        p1Ns = np.mean(p1Ns_per_signal, axis=1)
        p1Us = np.mean(p1Us_per_signal, axis=1)
        intermediate_baselines = np.mean(intermediate_baselines_per_signal, axis=1)

        p1Ns_low_bounds = [p1N / 100 if p1N > 0 else 100 * p1N for p1N in p1Ns]
        p1Us_low_bounds = [p1U / 100 if p1U > 0 else 100 * p1U for p1U in p1Us]
        intermediate_baselines_low_bounds = [intermediate_baseline / 100 if intermediate_baseline > 0 else 100 * intermediate_baseline for intermediate_baseline in intermediate_baselines]

        p1Ns_high_bounds = [p1N * 100 if p1N > 0 else p1N / 100 for p1N in p1Ns]
        p1Us_high_bounds = [p1U * 100 if p1U > 0 else p1U / 100 for p1U in p1Us]
        intermediate_baselines_high_bounds = [
            intermediate_baseline * 100 if intermediate_baseline > 0 else intermediate_baseline / 100 for
            intermediate_baseline in intermediate_baselines]

        idx = param_init + 3 * n_datasets

        params_names += ['intercept_native - ' + signal_name for signal_name in self.signal_names]
        params_names += ['intercept_unfolded - ' + signal_name for signal_name in self.signal_names]
        params_names += ['intercept_intermediate - ' + signal_name for signal_name in self.signal_names]

        if self.native_baseline_type in ['linear', 'quadratic', 'exponential']:
            param_name = 'pre_exponential_factor_native' if self.native_baseline_type == 'exponential' else 'slope_term_native'

            kNs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_kNs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_kNs = self.high_bounds[idx:idx + self.nr_signals]

            idx += self.nr_signals
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']:
            param_name = 'pre_exponential_factor_unfolded' if self.unfolded_baseline_type == 'exponential' else 'slope_term_unfolded'

            kUs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_kUs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_kUs = self.high_bounds[idx:idx + self.nr_signals]

            idx += self.nr_signals
            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.native_baseline_type in ['quadratic', 'exponential']:
            param_name = 'exponential_coefficient_native' if self.native_baseline_type == 'exponential' else 'quadratic_term_native'

            qNs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_qNs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_qNs = self.high_bounds[idx:idx + self.nr_signals]
            idx += self.nr_signals

            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:
            param_name = 'exponential_coefficient_unfolded' if self.unfolded_baseline_type == 'exponential' else 'quadratic_term_unfolded'

            qUs = self.global_fit_params[idx:idx + self.nr_signals]
            low_bounds_qUs = self.low_bounds[idx:idx + self.nr_signals]
            high_bounds_qUs = self.high_bounds[idx:idx + self.nr_signals]
            idx += self.nr_signals

            params_names += [param_name + ' - ' + signal_name for signal_name in self.signal_names]

        p0 = np.concatenate([p0, p1Ns, p1Us, intermediate_baselines])
        low_bounds = np.concatenate([low_bounds, p1Ns_low_bounds, p1Us_low_bounds, intermediate_baselines_low_bounds])
        high_bounds = np.concatenate([high_bounds, p1Ns_high_bounds, p1Us_high_bounds, intermediate_baselines_high_bounds])

        if self.native_baseline_type in ['linear', 'quadratic', 'exponential']:
            p0 = np.concatenate([p0, kNs])
            low_bounds = np.concatenate([low_bounds, low_bounds_kNs])
            high_bounds = np.concatenate([high_bounds, high_bounds_kNs])

        if self.unfolded_baseline_type in ['linear', 'quadratic', 'exponential']:
            p0 = np.concatenate([p0, kUs])
            low_bounds = np.concatenate([low_bounds, low_bounds_kUs])
            high_bounds = np.concatenate([high_bounds, high_bounds_kUs])

        if self.native_baseline_type in ['quadratic', 'exponential']:
            p0 = np.concatenate([p0, qNs])
            low_bounds = np.concatenate([low_bounds, low_bounds_qNs])
            high_bounds = np.concatenate([high_bounds, high_bounds_qNs])

        if self.unfolded_baseline_type in ['quadratic', 'exponential']:
            p0 = np.concatenate([p0, qUs])
            low_bounds = np.concatenate([low_bounds, low_bounds_qUs])
            high_bounds = np.concatenate([high_bounds, high_bounds_qUs])

        # If required, include a scale factor for each oligomer concentration
        if model_scale_factor:
            # The last oligomer concentration is fixed to 1, the rest are fitted
            scale_factors = [1 for _ in range(self.nr_olig - 1)]
            scale_factors_low = [0.5882 for _ in range(self.nr_olig - 1)]
            scale_factors_high = [1.7 for _ in range(self.nr_olig - 1)]

            p0 = np.concatenate([p0, scale_factors])
            low_bounds = np.concatenate([low_bounds, scale_factors_low])
            high_bounds = np.concatenate([high_bounds, scale_factors_high])

            params_names += ['Scale factor - ' + str(d) + ' (M). ID: ' + str(i) for
                             i, d in enumerate(self.oligomer_concentrations)]

            params_names.pop()  # Remove the last one, as it is fixed to 1

        scale_factor_exclude_ids = [self.nr_olig - 1] if model_scale_factor else []

        signal_fx = map_three_state_model_to_signal_fx(self.model)

        # Do a prefit with a reduced dataset
        kwargs = {
            'list_of_temperatures' : self.temp_lst_expanded_subset,
            'list_of_signals' : self.signal_lst_expanded_subset,
            'signal_ids' : self.signal_ids,
            'oligomer_concentrations': self.oligomer_concentrations_expanded,
            'initial_parameters': p0,
            'low_bounds': low_bounds,
            'high_bounds': high_bounds,
            'model_scale_factor':model_scale_factor,
            'scale_factor_exclude_ids':scale_factor_exclude_ids,
            'signal_fx' : signal_fx,
            'baseline_native_fx' : self.baseline_N_fx,
            'baseline_unfolded_fx' : self.baseline_U_fx,
            'CpTh_value' : self.cp_value,
            'gas_cst': self.gas_cst
        }

        fit_fx = fit_oligomer_unfolding_three_states_many_signals

        if self.pre_fit:

            global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

            # Assign the fitted parameters to the initial guess for the full dataset
            p0 = global_fit_params

            # End of prefit with reduced dataset

        # Use the whole dataset
        kwargs['list_of_signals'] = self.signal_lst_expanded
        kwargs['list_of_temperatures'] = self.temp_lst_expanded

        global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        # Remove scale factors that are not significant
        if model_scale_factor:

            # 4 parameters corresponding to Tm, dH
            idx_start = 4

            # Additional parameter if CpTh is given
            if self.fixed_cp:
                idx_start += 1

            native_factor   = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_N_fx))
            unfolded_factor = 1+np.sum(baseline_fx_name_to_req_params(self.baseline_U_fx))

            # Add index according to the native baseline polynomial order
            idx_start += native_factor * self.nr_signals
            # Add index according to the unfolded baseline polynomial order
            idx_start += unfolded_factor * self.nr_signals

            for _ in range(5):

                # Sort in ascending order the IDs to exclude
                scale_factor_exclude_ids = sorted(scale_factor_exclude_ids)

                n_fixed_factors = len(scale_factor_exclude_ids)
                n_fit_factors = self.nr_olig - n_fixed_factors

                if n_fit_factors == 0:
                    break

                sf_params = global_fit_params[idx_start:(idx_start + n_fit_factors)]

                idxs_to_remove = []
                re_fit = False

                # Add dummy variable where we need to skip the index
                for id in scale_factor_exclude_ids:
                    sf_params = np.insert(sf_params, id, np.nan)

                for i, sf in enumerate(sf_params):

                    if i in scale_factor_exclude_ids:
                        continue

                    if 0.995 <= sf <= 1.015:
                        # Exclude the scale factor from the fit
                        scale_factor_exclude_ids.append(i)
                        re_fit = True

                        j1 = np.sum(np.array(scale_factor_exclude_ids) < i)
                        j2 = len(idxs_to_remove)

                        idxs_to_remove.append(idx_start + i - j1 + j2)

                if not re_fit:
                    break

                else:

                    for idx in reversed(idxs_to_remove):

                        global_fit_params = np.delete(global_fit_params, idx)
                        low_bounds = np.delete(low_bounds, idx)
                        high_bounds = np.delete(high_bounds, idx)

                        del params_names[idx]

                    kwargs['initial_parameters'] = global_fit_params
                    kwargs['low_bounds'] = low_bounds
                    kwargs['high_bounds'] = high_bounds
                    kwargs['scale_factor_exclude_ids'] = scale_factor_exclude_ids

                    global_fit_params, cov, predicted, result, minimizer = fit_fx(**kwargs)

        rel_errors = relative_errors(global_fit_params, cov)

        self.params_names = params_names
        self.p0 = p0
        self.low_bounds = low_bounds
        self.high_bounds = high_bounds
        self.global_fit_params = global_fit_params
        self.rel_errors = rel_errors

        self.predicted_lst_multiple = re_arrange_predictions(
            predicted, self.nr_signals, self.nr_olig)

        self.create_params_df_three_state()
        self.create_dg_df()

        self.global_global_global_fit_done = True

        # Obtained the scaled signal too
        if model_scale_factor:

            # signal scaled hos one sublist per selected signal type
            signal_scaled = deepcopy(self.signal_lst_multiple)
            predicted_scaled = deepcopy(self.predicted_lst_multiple)

            for value, param in zip(self.global_fit_params, self.params_names):

                if 'Scale factor' in param:

                    id = int(param.split('(M). ID: ')[-1])

                    for i in range(len(signal_scaled)):
                        signal_scaled[i][id] /= value
                        predicted_scaled[i][id] /= value

            self.signal_lst_multiple_scaled = signal_scaled
            self.predicted_lst_multiple_scaled = predicted_scaled

        return None



[docs]
    def signal_to_df(self, signal_type='raw', scaled=False):
        """
        Create a dataframe with three columns: Temperature, Signal, and oligomer.
        Optimized for speed by avoiding per-curve DataFrame creation.

        Parameters
        ----------
        signal_type : {'raw', 'fitted', 'derivative'}, optional
            Which signal to include in the dataframe. 'raw' uses experimental data, 'fitted' uses model predictions,
            'derivative' uses the estimated derivative signal.
        scaled : bool, optional
            If True and signal_type == 'fitted' or 'raw', use the scaled versions if available.

        Returns
        -------
        pd.DataFrame
            A DataFrame with columns: ['Temperature', 'Signal', 'Oligomer', 'ID'].
        """

        # Flatten all arrays and repeat oligomer values accordingly

        if signal_type == 'derivative':

            deriv_lst = self.deriv_lst_multiple[0]
            temp_lst = self.temp_deriv_lst_multiple[0]

            signal_all = np.concatenate(deriv_lst)
            temp_all = np.concatenate(temp_lst)

        else:

            # temperature is shared for the experimental and fitted signals
            temp_lst = self.temp_lst_multiple[0]

            if self.max_points is not None:
                temp_lst = [subset_data(x, self.max_points) for x in temp_lst]

            temp_all = np.concatenate(temp_lst)

            # fitted data signal does not need subset!
            if signal_type == 'fitted':

                if not scaled:

                    predicted_lst = self.predicted_lst_multiple[0]

                else:

                    predicted_lst = self.predicted_lst_multiple_scaled[0]

                signal_all = np.concatenate(predicted_lst)
                temp_all = np.concatenate(temp_lst)

            # Signal_type set to 'raw'
            else:

                if not scaled:

                    signal_lst = self.signal_lst_multiple[0]

                else:

                    signal_lst = self.signal_lst_multiple_scaled[0]

                if self.max_points is not None:
                    signal_lst = [subset_data(x, self.max_points) for x in signal_lst]

                signal_all = np.concatenate(signal_lst)

        oligomer_all = np.concatenate([
            np.full_like(temp_lst[i], self.oligomer_concentrations[i], dtype=np.float64)
            for i in range(len(temp_lst))
        ])

        # Add an ID column, so we can identify the curves, even with the same oligomer concentration
        id_all = np.concatenate([
            np.full_like(temp_lst[i], i, dtype=np.int32)
            for i in range(len(temp_lst))
        ])

        signal_df = pd.DataFrame({
            'Temperature': temp_all,
            'Signal': signal_all,
            'Oligomer': oligomer_all,
            'ID': id_all
        })

        return signal_df



[docs]
    def create_params_df_three_state(self):
        """
        Create a dataframe of the parameters
        """

        # convert the first param to Celsius
        self.global_fit_params[0] = temperature_to_celsius(self.global_fit_params[0])
        self.low_bounds[0] = temperature_to_celsius(self.low_bounds[0])
        self.high_bounds[0] = temperature_to_celsius(self.high_bounds[0])

        self.global_fit_params[2] = temperature_to_celsius(self.global_fit_params[2])
        self.low_bounds[2] = temperature_to_celsius(self.low_bounds[2])
        self.high_bounds[2] = temperature_to_celsius(self.high_bounds[2])

        # Create a dataframe of the parameters
        self.params_df = pd.DataFrame({
            'Parameter': self.params_names,
            'Value': self.global_fit_params,
            'Relative error (%)': self.rel_errors,
            'Fitting low Bound': self.low_bounds,
            'Fitting high Bound': self.high_bounds
        })

        return None