Source code of data collapse functions#
Listing 1 implements a data collapse of the form
\[
\tilde{y}_i = p\left(\tilde{x}_i\right),
\]
where \(p\) is a polynomial and \(\tilde{x}_i\), \(\tilde{y}_i\) are obtained with the scaling assumption
\[
\tilde{x}_i, \sigma_{\tilde{x}_i}, \tilde{y}_i, \sigma_{\tilde{y}_i}
= f\left(L_i, x_i, \sigma_{{x}_i}, y_i, \sigma_{{y}_i}, par\right)
\]
By minimizing the quality of fit function
\[
S = \frac{1}{N} \sum_i \frac{\left[
p\left(\tilde{x}_i\right) - \tilde{y}_i
\right]^2}
{\sigma^2_{\tilde{y}_i}
+ p'\left(\tilde{x}_i\right)^2 \sigma^2_{\tilde{x}_i}}
\]
trough varying \(p\) and \(par\). The uncertainty of the results are estimated with a bootstrap resampling scheme [118].
1"""Provides functions for data collapse."""
2# pylint: disable=invalid-name
3# pylint: disable=redefined-outer-name
4# pylint: disable=too-many-arguments
5
6import numpy as np
7from numpy.random import default_rng
8from numpy.polynomial.polynomial import polyval, polyfit, polyder
9from scipy.optimize import minimize
10import matplotlib.pyplot as plt
11
12
13def collapse(func, x_name, y_name, df, Ls, par0,
14 N_boot=100, plot=True, rank=3, x_err=False,
15 verbose=False, size_name='l1'):
16 """Perform a data collapse.
17
18 Rescales data according to `func` and fits polynomial of rank `rank`.
19 Minimizes a quality of fit function to achieve data collapse and
20 estimates error through bootstrap method.
21
22 Parameters
23 ----------
24 func : function
25 Function func(L, x, dx, y, dy, par) defining the rescaling to achieve
26 data collapse, with:
27
28 Parameters
29 ----------
30 L: int
31 system size.
32 x: array of floats
33 x values.
34 dx: array of float or None
35 Error of x values
36 y: array of floats
37 y values.
38 dy: array of float or None
39 Error of y values.
40 par: List of float
41 parameters for data collapse, e.g. critical field, exponents.
42
43 Returns
44 -------
45 x_scaled : array of float
46 Scaled x values.
47 dx_scaled : array of float or None
48 Scaled x error.
49 y_scaled : array of float
50 Scaled y values.
51 dy_scaled : array of float
52 Scaled y error.
53 x_name : str
54 Name of x variable.
55 y_name : str
56 Name of y variable.
57 df : pandas DataFrame
58 Contains data.
59 Ls : list of int
60 System sizes to consider.
61 par0 : List of floats
62 Starting parameters for data collapse.
63 N_boot : int, default=100
64 Number of bootstrap bins to generate for error analysis.
65 plot : bool, default=True
66 Plot data collapse via matplotlib.
67 rank : integer, default=3
68 Rank of polynomial to fit.
69 x_err : bool, default=False
70 Consider x errors.
71 verbose : bool, default=False
72 Be verbose.
73 size_name : str, default="l1"
74 Name of parameter corresponding to system size.
75
76 Returns
77 -------
78 {
79 'Ls': Ls,
80 'L0': Ls[0],
81 'NL': len(Ls),
82 'popt': popt,
83 'perr': perr,
84 'S': S,
85 }
86 with :
87 Ls : list of int
88 Input argument Ls
89 popt : array of float
90 Best parameters found for data collapse, including parameters for
91 polynomial fit function.
92 perr : array of float
93 Standard error of data collapse parameters.
94 S : float
95 Quality of data collapse. Smaller is better, should be of order 1.
96 """
97 data = Data_obj(df, x_name, y_name, Ls, x_err=x_err, size_name=size_name)
98
99 if x_err:
100 def quality(par, Npar, L, x, dx, y, dy):
101 """The quality of fit function."""
102 der = polyder(par[Npar:])
103 S = 0.
104 n = 0
105 for L_, x_, dx_, y_, dy_ in zip(L, x, dx, y, dy):
106 x_scaled, dx_scaled, y_scaled, dy_scaled = \
107 func(L_, x_, dx_, y_, dy_, par[:Npar])
108 # Alternative quality of fit function.
109 # S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
110 # / (dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2)).mean()
111 # n += 1
112 S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
113 / (dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2)).sum()
114 n += len(x_scaled)
115 return S / n
116 else:
117 def quality(par, Npar, L, x, dx, y, dy):
118 """The quality of fit function."""
119 S = 0.
120 n = 0
121 for L_, x_, dx_, y_, dy_ in zip(L, x, dx, y, dy):
122 x_scaled, dx_scaled, y_scaled, dy_scaled = \
123 func(L_, x_, dx_, y_, dy_, par[:Npar])
124 # Alternative quality of fit function.
125 # S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
126 # / dy_scaled**2).mean()
127 # n += 1
128 S += ((polyval(x_scaled, par[Npar:]) - y_scaled)**2
129 / dy_scaled**2).sum()
130 n += len(x_scaled)
131 return S / n
132
133 # Initial fit.
134 Npar = len(par0)
135 x_scaled, dx_scaled, y_scaled, dy_scaled = func(*data.get_data_flat(), par0)
136 if x_err:
137 der = polyder(p)
138 p = polyfit(x_scaled, y_scaled,
139 w=1/(dy_scaled**2 + (polyval(x, der)*dx_scaled)**2),
140 deg=rank, full=False)
141 else:
142 p = polyfit(x_scaled, y_scaled, w=1/dy_scaled**2, deg=rank, full=False)
143 L, x, dx, y, dy = data.get_data()
144 res0 = minimize(quality, (*par0, *p), args=(Npar, L, x, dx, y, dy))
145 if verbose:
146 print(res0.fun)
147
148 # Return results of initial fit, if no bootstrap error estimation.
149 if N_boot == 0:
150 if plot:
151 plot_collapse(func, x_name, y_name, df, Ls,
152 res0.x[:Npar], rank=rank, x_err=x_err, size_name=size_name)
153 return {'Ls': Ls, 'L0': Ls[0], 'NL': len(Ls),
154 'popt': res0.x, 'perr': [None]*len(res0.x), 'S': res0.fun}
155
156 # Execute bootstrap error estimation.
157 xs = np.empty((N_boot, len(res0.x)))
158 for n in range(N_boot):
159 L, x, dx, y, dy = data.generate_data()
160 res = minimize(quality, res0.x, args=(Npar, L, x, dx, y, dy))
161 xs[n] = res.x
162 popt = np.mean(xs, 0)
163 perr = np.std(xs, 0)
164 L, x, dx, y, dy = data.get_data()
165 S = quality(popt, Npar, L, x, dx, y, dy)
166
167 if plot:
168 plot_collapse(func, x_name, y_name, df, Ls, popt[:Npar],
169 rank=rank, x_err=x_err, size_name=size_name)
170 return {'Ls': Ls, 'L0': Ls[0], 'NL': len(Ls),
171 'popt': popt, 'perr': perr, 'S': S}
172
173
174def plot_collapse(
175 func, x_name, y_name, df, Ls, par, rank=3, x_err=False,
176 fmts=None, size_name='l1'):
177 """Plot data collapse via matplotlib. See :func:`collapse`."""
178 if fmts is None:
179 fmts = {4: '<', 6: '>', 8: '^', 10: 'v', 12: '*',
180 14: 'x', 16: 'o', 18: '+', 20: '.'}
181 data = Data_obj(df, x_name, y_name, Ls, x_err=x_err, size_name=size_name)
182 x_min = np.inf
183 x_max = -np.inf
184
185 for L, x, dx, y, dy in zip(*data.get_data()):
186 x_scaled, dx_scaled, y_scaled, dy_scaled = func(L, x, dx, y, dy, par)
187 x_min = min(x_min, x_scaled.min())
188 x_max = max(x_max, x_scaled.max())
189 plt.errorbar(x_scaled, y_scaled, dy_scaled, xerr=dx_scaled,
190 fmt=fmts[L], label=f'L={L}')
191
192 L, x, dx, y, dy = data.get_data_flat()
193 x_scaled, dx_scaled, y_scaled, dy_scaled = func(L, x, dx, y, dy, par)
194 if x_err:
195 der = polyder(p)
196 p = polyfit(x_scaled, y_scaled,
197 w=1/(dy_scaled**2 + (polyval(x_scaled, der)*dx_scaled)**2),
198 deg=rank, full=False)
199 else:
200 p = polyfit(x_scaled, y_scaled, w=1/dy_scaled**2, deg=rank, full=False)
201
202 x = np.linspace(x_min, x_max)
203 plt.plot(x, polyval(x, p))
204 plt.legend()
205
206
207class Data_obj:
208 """Encapsulate data for bootstrap."""
209
210 def __init__(self, df, x_name, y_name, Ls, x_err=False, size_name='l1'):
211 self.x_err = x_err
212 self.len = 0
213 self.x = []
214 self.dx = []
215 self.y = []
216 self.dy = []
217 self.Ls = Ls
218 self.rng = default_rng()
219 for L in Ls:
220 df1 = df[df[size_name] == L]
221 self.len += len(df1)
222 self.x.append(np.array(df1[x_name]))
223 if x_err:
224 self.dx.append(np.array(df1[x_name+'_err']))
225 else:
226 self.dx.append(None)
227 self.y.append(np.array(df1[y_name]))
228 self.dy.append(np.array(df1[y_name+'_err']))
229
230 def get_data(self):
231 return self.Ls, self.x, self.dx, self.y, self.dy
232
233 def get_data_flat(self):
234 L = np.empty((self.len,))
235 x = np.empty((self.len,))
236 if self.x_err:
237 dx = np.empty((self.len,))
238 else:
239 dx = None
240 y = np.empty((self.len,))
241 dy = np.empty((self.len,))
242 i = 0
243 for L_temp, x_temp, dx_temp, y_temp, dy_temp in \
244 zip(self.Ls, self.x, self.dx, self.y, self.dy):
245 le = len(x_temp)
246 L[i:i+le] = L_temp
247 x[i:i+le] = x_temp
248 if self.x_err:
249 dx[i:i+le] = dx_temp
250 y[i:i+le] = y_temp
251 dy[i:i+le] = dy_temp
252 i += le
253 return L, x, dx, y, dy
254
255 def generate_data(self):
256 y_generated = []
257 for y, dy, in zip(self.y, self.dy):
258 y_generated.append(self.rng.normal(loc=y, scale=dy))
259
260 if self.x_err:
261 x_generated = []
262 for x, dx, in zip(self.x, self.dx):
263 x_generated.append(self.rng.normal(loc=x, scale=dx))
264 else:
265 x_generated = self.x
266
267 return self.Ls, x_generated, self.dx, y_generated, self.dy