Source code of data collapse functions

Source code of data collapse functions#

Listing 1 implements a data collapse of the form

\[ \tilde{y}_i = p\left(\tilde{x}_i\right), \]

where \(p\) is a polynomial and \(\tilde{x}_i\), \(\tilde{y}_i\) are obtained with the scaling assumption

\[ \tilde{x}_i, \sigma_{\tilde{x}_i}, \tilde{y}_i, \sigma_{\tilde{y}_i} = f\left(L_i, x_i, \sigma_{{x}_i}, y_i, \sigma_{{y}_i}, par\right) \]

By minimizing the quality of fit function

\[ S = \frac{1}{N} \sum_i \frac{\left[ p\left(\tilde{x}_i\right) - \tilde{y}_i \right]^2} {\sigma^2_{\tilde{y}_i} + p'\left(\tilde{x}_i\right)^2 \sigma^2_{\tilde{x}_i}} \]

trough varying \(p\) and \(par\). The uncertainty of the results are estimated with a bootstrap resampling scheme [118].

Listing 1 Content of file collapse.py that implements an automatic data collapse.#
  1"""Provides functions for data collapse."""
  2# pylint: disable=invalid-name
  3# pylint: disable=redefined-outer-name
  4# pylint: disable=too-many-arguments
  5
  6import numpy as np
  7from numpy.random import default_rng
  8from numpy.polynomial.polynomial import polyval, polyfit, polyder
  9from scipy.optimize import minimize
 10import matplotlib.pyplot as plt
 11
 12
 13def collapse(func, x_name, y_name, df, Ls, par0,
 14             N_boot=100, plot=True, rank=3, x_err=False,
 15             verbose=False, size_name='l1'):
 16    """Perform a data collapse.
 17
 18    Rescales data according to `func` and fits polynomial of rank `rank`.
 19    Minimizes a quality of fit function to achieve data collapse and
 20    estimates error through bootstrap method.
 21
 22    Parameters
 23    ----------
 24    func : function
 25        Function func(L, x, dx, y, dy, par) defining the rescaling to achieve
 26        data collapse, with:
 27
 28        Parameters
 29        ----------
 30        L: int
 31            system size.
 32        x: array of floats
 33            x values.
 34        dx: array of float or None
 35            Error of x values
 36        y: array of floats
 37            y values.
 38        dy: array of float or None
 39            Error of y values.
 40        par: List of float
 41            parameters for data collapse, e.g. critical field, exponents.
 42
 43        Returns
 44        -------
 45        x_scaled : array of float
 46            Scaled x values.
 47        dx_scaled : array of float or None
 48            Scaled x error.
 49        y_scaled : array of float
 50            Scaled y values.
 51        dy_scaled : array of float
 52            Scaled y error.
 53    x_name : str
 54        Name of x variable.
 55    y_name : str
 56        Name of y variable.
 57    df : pandas DataFrame
 58        Contains data.
 59    Ls : list of int
 60        System sizes to consider.
 61    par0 : List of floats
 62        Starting parameters for data collapse.
 63    N_boot : int, default=100
 64        Number of bootstrap bins to generate for error analysis.
 65    plot : bool, default=True
 66        Plot data collapse via matplotlib.
 67    rank : integer, default=3
 68        Rank of polynomial to fit.
 69    x_err : bool, default=False
 70        Consider x errors.
 71    verbose : bool, default=False
 72        Be verbose.
 73    size_name : str, default="l1"
 74        Name of parameter corresponding to system size.
 75
 76    Returns
 77    -------
 78    {
 79        'Ls': Ls,
 80        'L0': Ls[0],
 81        'NL': len(Ls),
 82        'popt': popt,
 83        'perr': perr,
 84        'S': S,
 85    }
 86    with :
 87        Ls : list of int
 88            Input argument Ls
 89        popt : array of float
 90            Best parameters found for data collapse, including parameters for
 91            polynomial fit function.
 92        perr : array of float
 93            Standard error of data collapse parameters.
 94        S : float
 95            Quality of data collapse. Smaller is better, should be of order 1.
 96    """
 97    data = Data_obj(df, x_name, y_name, Ls, x_err=x_err, size_name=size_name)
 98
 99    if x_err:
100        def quality(par, Npar, L, x, dx, y, dy):
101            """The quality of fit function."""
102            der = polyder(par[Npar:])
103            S = 0.
104            n = 0
105            for L_, x_, dx_, y_, dy_ in zip(L, x, dx, y, dy):
106                x_scaled, dx_scaled, y_scaled, dy_scaled = \
107                    func(L_, x_, dx_, y_, dy_, par[:Npar])
108                # Alternative quality of fit function.
109                # S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
110                #       / (dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2)).mean()
111                # n += 1
112                S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
113                      / (dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2)).sum()
114                n += len(x_scaled)
115            return S / n
116    else:
117        def quality(par, Npar, L, x, dx, y, dy):
118            """The quality of fit function."""
119            S = 0.
120            n = 0
121            for L_, x_, dx_, y_, dy_ in zip(L, x, dx, y, dy):
122                x_scaled, dx_scaled, y_scaled, dy_scaled = \
123                    func(L_, x_, dx_, y_, dy_, par[:Npar])
124                # Alternative quality of fit function.
125                # S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
126                #       / dy_scaled**2).mean()
127                # n += 1
128                S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
129                      / dy_scaled**2).sum()
130                n += len(x_scaled)
131            return S / n
132
133    # Initial fit.
134    Npar = len(par0)
135    x_scaled, dx_scaled, y_scaled, dy_scaled = func(*data.get_data_flat(), par0)
136    if x_err:
137        der = polyder(p)
138        p = polyfit(x_scaled, y_scaled,
139                    w=1/(dy_scaled**2 + (polyval(x, der)*dx_scaled)**2),
140                    deg=rank, full=False)
141    else:
142        p = polyfit(x_scaled, y_scaled, w=1/dy_scaled**2, deg=rank, full=False)
143    L, x, dx, y, dy = data.get_data()
144    res0 = minimize(quality, (*par0, *p), args=(Npar, L, x, dx, y, dy))
145    if verbose:
146        print(res0.fun)
147
148    # Return results of initial fit, if no bootstrap error estimation.
149    if N_boot == 0:
150        if plot:
151            plot_collapse(func, x_name, y_name, df, Ls,
152                          res0.x[:Npar], rank=rank, x_err=x_err, size_name=size_name)
153        return {'Ls': Ls, 'L0': Ls[0], 'NL': len(Ls),
154                'popt': res0.x, 'perr': [None]*len(res0.x), 'S': res0.fun}
155
156    # Execute bootstrap error estimation.
157    xs = np.empty((N_boot, len(res0.x)))
158    for n in range(N_boot):
159        L, x, dx, y, dy = data.generate_data()
160        res = minimize(quality, res0.x, args=(Npar, L, x, dx, y, dy))
161        xs[n] = res.x
162    popt = np.mean(xs, 0)
163    perr = np.std(xs, 0)
164    L, x, dx, y, dy = data.get_data()
165    S = quality(popt, Npar, L, x, dx, y, dy)
166
167    if plot:
168        plot_collapse(func, x_name, y_name, df, Ls, popt[:Npar],
169                      rank=rank, x_err=x_err, size_name=size_name)
170    return {'Ls': Ls, 'L0': Ls[0], 'NL': len(Ls),
171            'popt': popt, 'perr': perr, 'S': S}
172
173
174def plot_collapse(
175    func, x_name, y_name, df, Ls, par, rank=3, x_err=False,
176    fmts=None, size_name='l1'):
177    """Plot data collapse via matplotlib. See :func:`collapse`."""
178    if fmts is None:
179        fmts = {4: '<', 6: '>', 8: '^', 10: 'v', 12: '*',
180                14: 'x', 16: 'o', 18: '+', 20: '.'}
181    data = Data_obj(df, x_name, y_name, Ls, x_err=x_err, size_name=size_name)
182    x_min = np.inf
183    x_max = -np.inf
184
185    for L, x, dx, y, dy in zip(*data.get_data()):
186        x_scaled, dx_scaled, y_scaled, dy_scaled = func(L, x, dx, y, dy, par)
187        x_min = min(x_min, x_scaled.min())
188        x_max = max(x_max, x_scaled.max())
189        plt.errorbar(x_scaled, y_scaled, dy_scaled, xerr=dx_scaled,
190                     fmt=fmts[L], label=f'L={L}')
191
192    L, x, dx, y, dy = data.get_data_flat()
193    x_scaled, dx_scaled, y_scaled, dy_scaled = func(L, x, dx, y, dy, par)
194    if x_err:
195        der = polyder(p)
196        p = polyfit(x_scaled, y_scaled,
197                    w=1/(dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2),
198                    deg=rank, full=False)
199    else:
200        p = polyfit(x_scaled, y_scaled, w=1/dy_scaled**2, deg=rank, full=False)
201
202    x = np.linspace(x_min, x_max)
203    plt.plot(x, polyval(x, p))
204    plt.legend()
205
206
207class Data_obj:
208    """Encapsulate data for bootstrap."""
209
210    def __init__(self, df, x_name, y_name, Ls, x_err=False, size_name='l1'):
211        self.x_err = x_err
212        self.len = 0
213        self.x = []
214        self.dx = []
215        self.y = []
216        self.dy = []
217        self.Ls = Ls
218        self.rng = default_rng()
219        for L in Ls:
220            df1 = df[df[size_name] == L]
221            self.len += len(df1)
222            self.x.append(np.array(df1[x_name]))
223            if x_err:
224                self.dx.append(np.array(df1[x_name+'_err']))
225            else:
226                self.dx.append(None)
227            self.y.append(np.array(df1[y_name]))
228            self.dy.append(np.array(df1[y_name+'_err']))
229
230    def get_data(self):
231        return self.Ls, self.x, self.dx, self.y, self.dy
232
233    def get_data_flat(self):
234        L = np.empty((self.len,))
235        x = np.empty((self.len,))
236        if self.x_err:
237            dx = np.empty((self.len,))
238        else:
239            dx = None
240        y = np.empty((self.len,))
241        dy = np.empty((self.len,))
242        i = 0
243        for L_temp, x_temp, dx_temp, y_temp, dy_temp in \
244                zip(self.Ls, self.x, self.dx, self.y, self.dy):
245            le = len(x_temp)
246            L[i:i+le] = L_temp
247            x[i:i+le] = x_temp
248            if self.x_err:
249                dx[i:i+le] = dx_temp
250            y[i:i+le] = y_temp
251            dy[i:i+le] = dy_temp
252            i += le
253        return L, x, dx, y, dy
254
255    def generate_data(self):
256        y_generated = []
257        for y, dy, in zip(self.y, self.dy):
258            y_generated.append(self.rng.normal(loc=y, scale=dy))
259
260        if self.x_err:
261            x_generated = []
262            for x, dx, in zip(self.x, self.dx):
263                x_generated.append(self.rng.normal(loc=x, scale=dx))
264        else:
265            x_generated = self.x
266
267        return self.Ls, x_generated, self.dx, y_generated, self.dy