{ "cells": [ { "cell_type": "markdown", "id": "10e0ce23", "metadata": {}, "source": [ "
\n", " import numpy as np\n", "
\n", "\n",
" np.array? öffnet documentation
\n",
" Bsp : \n",
"
a = np.array([1, 2, 3]) , np.zeros(n) , np.ones(n) , np.ones(n, dtype=int oder float ) , \n",
"
a *= 2 👉 [2, 4, 6] , a[-1] 👉 6 letztes El. \n",
"
b = np.array([[1, 2, 3], [97, 98, 99]])\n",
"
a.shape 👉 Dimension (Zeilen, Spalten) \n",
"
\n",
" a[i_start : i_stop : i_step] (falls a[::] 👉 i_start = 0, stop = \"array.length\", step = 1) \n",
"
\n",
" a[i_start : i_stop] indexierung bei n-dim. (hier 2-D) Arrays : b[1, 0:] 👉 [97, 98, 99] Operationen auf Teilbereichen ebenfalls möglich \n",
"
\n",
" print?
\n",
" Bsp :
print(a, end=';') 👉 [2, 4, 6]; falls loop durch array dann 2;4;6; \n",
"
print('x = {}'.format(7)) {:d} (int), {:f} (float), {:0.nf} (float mit n nachkommastellen) 👉 x = 7 \n",
"
\n",
" Loops\n",
"
Bsp :
for index, value in enumerate(a):\n",
"
a[index] = val**2\n",
"
a 👉 [4, 16, 36] \n",
"
\n",
"
for i in range(i_start, i_end, i_step):\n",
"
for i in a[1:]:\n",
"
\n", " import os\n", "
\n", "\n",
" Navigation
\n",
" Bsp :
\n",
" cwd = os.getcwd() , os.listdir(cwd) 👉 ['file1', 'file2', ...] , file = os.path.join(cwd, 'filename') , os.chdir(r\"path\")
\n",
"
\n",
" Speichern / Lesen \n",
"
Bsp :\n",
"
data = np.loadtxt(file, dtype='float', comments='#', delimiter=',' , skiprows=n ) falls Dataset-dimension und Anzahl Zeilen z.B. 2 ist geht auch x, y = np.loadtxt(...)
z.B.: print(data) 👉 [7, 7, 7, 420, 69] \n",
"
np.savetxt('filename', (x, y, z) arrays oder variablen , delimiter=',', header='Beispieldatei Datenanalyse, 14.02.2023. Format: x, y, z') (x, y, z ➡️ np.array([...]))\n",
"
\n",
" ...\"low-level\" : \n",
"
\n",
" with open('filename', 'r') as f: \n",
"
lines = f.readlines()\n",
"
with open('filename', 'w') as f: \n",
"
f.writelines('x, y, z\\n')\n",
"
for i in range(0, len(x)):\n",
"
f.writelines('{:0.5f}, {:0.5f}, {:0.5f}\\n'.format(x[i], y[i], z[i]))\n",
"
\n",
" Additional Stuff\n",
"
np.sum , np.empty (faster than np.zeros) , np.arange(a_min, a_max, distance_from_points) , np.linspace(a_min, a_max, amount_of_points) , len(a) , np.fromstring , np.append, np.transpose\n",
"
\n",
" Funktionen\n",
"
def function_name(parameter1, parameter2, keyword_argument=initial_value): \n",
"
... \n",
"
return return_value1, return_value2\n",
"
❗️ ACHTUNG : Funktionsname nicht als Variablenname brauchen ❗️\n",
"
\n", " import matplotlib.pyplot plt\n", "
\n", "\n",
" plt.plot?\n",
"
plt.plot(x, y) x.shape == y.shape , plt.figure , plt.show , plt.title , plt.xlabel , plt.ylabel , plt.grid , plt.subplot(row, column, index) multiple plots in one fig. color attribute : color = 'color' or 'hex' , plt.bar , plt.barh horizontal , plt.hist\n",
"
Bsp : \n",
"
fig = plt.figure() \n",
"
a = fig.add_subplot(1, 1, 1)\n",
"
a.plot(t, V)\n",
"
a.set_xlabel('t (s)')\n",
"
a.set_ylabel('V (U)')\n",
"
a.set_title('Rauschen einer Messung')\n",
"
\n",
" np.histogram?\n",
"
np.histogram(V, bins)\n",
"
Bsp : \n",
"
cm = 1 / 2.54 Umrechnung Zoll zu Centimeter \n",
"
binsize = 0.01\n",
"
bins = np.arange(np.min(V), np.max(V), binsize)\n",
"
hist, _ = np.histogram(V, bins)\n",
"
fig = plt.figure(figsize=(30*cm, 25.5*cm))\n",
"
ax1 = fig.add_subplot(2,2,1)\n",
"
ax1.bar(bins[:-1], hist, width=0.8*binsize) width < binsize\n",
"
ax1.set_title('Binsize: {:.2f}V'.format(binsize))\n",
"
Standardabw. σ = np.sqrt((1/(len(array)-1)) * (1/(len(array))) * np.sum(array)) Mittelwert \n",
"
np.std 1/n anstatt 1/(n-1) (Approximation für grosse Datensätze) \n",
"
= np.sqrt(np.var)\n",
"
\n",
" Additional Stuff\n",
"
np.random.rand, np.random.randint(x, size=y) , np.mean , plt.suptitle , plt.scatter , plt.legend\n",
"
colors = np.array([0, 10, ... , 100]) , plt.scatter(x, y, c=colors, cmap='viridis')
np.min , np.max\n",
"
ax.axvline(Mittelwert, color='r') , ax.set_xlim(13, 16) Zoom in x-Achse 13-16 \n",
"
Normal distribution , Standard deviation , Variance\n",
"
Zentraler Grenzwertsatz (3blue1brown)\n",
"
\n",
" Signalauflösung Uₘᵢₙ ~ ΔUₘᵢₙ > 0 (kleinster Abstand zwischen zwei gemessenen Werten, Einheit: Signalgrösse) \n",
"
Bsp :\n",
"
Auflösungsfunktion :\n",
"
#pre : non-empty array with positive values, epsilon > 0 (smaller than Umin (z.B.: 1 * 10**-7))
#post : Auflösung der Signalgrösse Umin = 'float' \n",
"
\n",
"
def resolution(array, epsilon):\n",
"
diffArray = np.diff(np.sort(array))\n",
"
maxNum = np.max(diffArray)\n",
"
\n",
"
#get rid of all zeros in diffArray since Umin > 0 \n",
"
for i, val in enumerate(diffArray):\n",
"
if diffArray[i] < epsilon:\n",
"
diffArray[i] = maxNum\n",
"
\n",
"
return min(diffArray)\n",
"
\n",
" Messrate / Sampling Rate, fₛ = 1/Δt \n",
"
Bsp : \n",
"
fₛ = 1 / (t[1] - t[0])\n",
"
\n",
" Additional Stuff\n",
"
Interactive plots:\n",
"
🟢 on : %matplotlib notebook \n",
"
🔴 off : %matplotlib inline\n",
"
\n",
"
Binsize < Uₘᵢₙ 👉 empty bins\n",
"
Multimodal distribution\n",
"
\n",
" Fehler des Mittelwerts Δµ = np.std(array)/np.sqrt(N)Anzahl Messungen \n",
"
Bsp:\n",
"
\n",
"
N = np.logspace(1, np.log10(len(array)), 100, dtype=int)\n",
"
\n",
"
mean_n = np.zeros(len(N))\n",
"
std_n = np.zeros(len(N))\n",
"
error_mean = np.zeros(len(N))\n",
"
\n",
"
for i, n in enumerate(N):\n",
"
mean_n[i] = np.mean(array[:n]) \n",
"
std_n[i] = np.std(array[:n]) \n",
"
error_mean[i] = np.std(array[:n]) / np.sqrt(n) \n",
"
\n",
" Unsicherheit von F(x1, x2,...) für unkorrelierte Variablen x1, x2, ...\n",
"
σF² = (∂F/∂x1)²σₓ₁² + (∂F/∂x2)²σₓ₂² + ... \n",
"
falls σₓ₁ = σₓ₂ := σₓ 👉 σF² = (∂F/∂x1 + ∂F/∂x2)²σₓ² + ... \n",
"
σF = √σF²\n",
"
\n",
"
Unsicherheit zu gross 👉 Asymmetrische Verteilung\n",
"
\n",
" Additional Stuff\n",
"
np.logspace , plt.semilogx log scaling on the x axis , plt.semilogy , plt.set_yscale , plt.set_xscale , plt.errorbar\n",
"
Standard error / Fehler des Mittelwerts\n",
"
\n",
" Autokovarianz\n",
"
Rₓₓ(Δ) = (1/(N - 1)) * Σₙ ₌ ₁ᴺ(xₙ - x̄)*(xₙ₊Δ - x̄) Indexverschiebung \n",
"
Rₓₓ(𝜏) = (1/(N - 1)) * Σₜ ₌ ₜ₁ᵗᴺ(x(t) - x̄)*(x(t + 𝜏) - x̄) Zeitverschiebung \n",
"
Bsp:\n",
"
Autokovarianzfunktion (fast):\n",
"
\n",
"
def autocov(x, shift):\n",
"
if shift == 0:\n",
"
acov = np.var(x) \n",
"
else:\n",
"
N = len(x) \n",
"
x_mean = np.mean(x) \n",
"
x_residue = x - x_mean \n",
"
acov = np.sum(x_residue[:-shift] * x_residue[shift:]) / (N - shift) \n",
"
return acov\n",
"
\n",
"
𝜏 → ∞ 👉 Rₓₓ(𝜏) ≈ 0\n",
"
Korrelationszeit 𝜏꜀ 👉 Rₓₓ(𝜏꜀) ≈ 0 \n",
"
\n",
"
Autokorrelationskoeffizient\n",
"
ρₓₓ = Rₓₓ(𝜏)/σₓ² ∈ [-1, 1] \n",
"
\n",
" Additional Stuff\n",
"
σₓ = np.sqrt((σₓₓ)²)\n",
"
plt.tight_layout() keine Überlappung \n",
"
Bestimmung der Dauer eines kurzen Laserpulses 👉 Autokovarianz oder Autokorrelation, da z.B Laserpulse (z.B 150fs) kürzer als Abtastraten von Detektoren (z.B bis zu 1ns) \n",
"
Covariance\n",
"
Autocovariance\n",
"
\n", " import math\n", "
\n", "\n",
" Nyquist-Frequenz\n",
" fₘₐₓ die ohne Aliasing Effekt gesampled werden kann \n",
"
fₘₐₓ = 1/2Δt \n",
"
= 1 / (2 * (t[1] - t[0]))\n",
"
\n",
" Frequenzauflösung \n",
"
Δf = 2*fₘₐₓ/N \n",
"
= 1 / t[-1] - t[0] oft t[0] = 0 \n",
"
\n",
" Fourier Transformation\n",
"
X(fₙ) = (1/N) * Σₖ ₌ ₀ᴺ ⁻ ¹x(tₖ) * exp(-i2πfₙtₖ)\n",
" \n",
"
fₙ = nΔf\n",
"
Δf = 1 / tₜₒₜ\n",
"
tₖ = kΔt\n",
" \n",
"
\n",
"
np.fft.fftfreq(len(array), Δt), np.fft.fft(array)\n",
"
\n",
"
Bsp:\n",
"
A = np.fft.fft(array)\n",
"
A[0]\n",
" zero-frequency term (the sum of the signal), which is always purely real for real inputs \n",
"
A[1:math.floor(len(array)/2)] \n",
" positive-frequency terms \n",
"
A[math.floor((len(V) / 2) + 1):]\n",
" negative-frequency terms, in order of decreasingly negative frequency \n",
"
AmaxEven = A[len(array)/2]\n",
" represents both positive and negative Nyquist frequency, and is also purely real for real input \n",
"
AmaxOddPos = A[math.floor((len(V) - 1) / 2)]\n",
" contains the largest positive frequency \n",
"
AmaxOddNeg = A[math.floor((len(V) + 1) / 2)]\n",
" contains the largest negative frequency \n",
"
\n",
"
\n",
" \n",
" f = np.fft.fftfreq(len(array), t[1] - t[0])\n",
"
spectrum = np.fft.fft(array)\n",
"
psd = (t[1] - t[0]/len(array)) * np.abs(spectrum)**2\n",
"
\n",
" \n",
"
Rücktrafo:\n",
" \n",
"
f = np.fft.ifft(spectrum)\n",
"
real part: np.real(f), imaginary part: np.imag(f)\n",
" \n",
"
\n",
" PSD\n",
"
Sₓₓ(fₙ) = Δt * |X(fₙ)|² Normierung mit Δt 👉 PSD, sonst PD \n",
"
👉 wie viel Oszillationsenergie in einem bestimmten Teil des Spektrums vorhanden ist \n",
"
\n",
"
[Sₓₓ(fₙ)] = Signal²/Frequenz(Hz) \n",
"
plt.plot(f)(Index, Frequenz) , plt.plot(f, psd)(Frequenz, psd) \n",
"
\n",
" Additional Stuff\n",
"
Fourier Transformation (3blue1brown)\n",
"
PSD\n",
"
Nyquist-Frequenz\n",
"
\n",
" PSDs und Varianzen unkorrelierter Variablen sind additiv:\n",
"
\n",
" \n",
" Sₓₓ(fₙ) = Sₐₐ(fₙ) + S₆₆(fₙ)\n",
" \n",
"
σₓ² = σₐ² + σ₆² \n",
" (Parseval) \n",
"
folgt aus Substitution 👉 x(tₖ) = a(tₖ) + b(tₖ) \n",
"
\n", " Gleitender Mittelwert\n", " 𝑥̃ᵢ = mean({𝑥ᵢ−Δ,...,𝑥ᵢ+Δ})\n", "
\n", "\n",
" Filtern im Zeitraum\n",
"
Bsp:\n",
"
Glättungsfunktion (mit gleitendem Mittelwert 𝑥̃ᵢ):\n",
"
\n",
" \n",
" #pre: 1-dimensional array, delta is a natural number\n",
"
#post: delta > 0 👉 smoothened array, delta = 0 👉 copy of array \n",
"
\n",
" def smooth(array, delta): \n",
"
sArray = np.empty(len(array))\n",
"
for i, val1 in enumerate(array):\n",
"
if i - delta >= 0 and i + delta < len(array - 1):\n",
"
s = 0\n",
"
for j in array[i - delta: i + (delta + 1)]:\n",
"
s = s + j\n",
"
sArray[i] = s / ((2*delta) + 1)\n",
"
else:\n",
"
sArray[i] = 0\n",
"
\n",
"
return sArray\n",
"
\n",
"
Effekt: \n",
" Schnelles Rauschen wird entfernt, langsame Ausschläge werden beibehalten.\n",
"
Je grösser delta ist desto glatter wird das Signal, falls aber delta zu gross (abhängig von Datensatz) gewählt wird werden die peaks verbreitert.\n",
"
\n",
" Filtern in Frequenzraum\n",
"
Bsp:\n",
"
Filterfunktion:\n",
" \n",
"
#pre: time axis t, signal, minimum and maximum frequency fMin, fMax\n",
"
#post: filtered signal (in time domain) 👉 Frequenzen ausserhalb von [fMin, fMax] wurden auf 0 gesetzt \n",
"
def freqFilter(t, signal, fMin, fMax):\n",
"
f = np.fft.fftfreq(len(signal), t[1] - t[0])\n",
"
spectrum = np.fft.fft(signal)\n",
"
for i in range(len(f)):\n",
"
if abs(f[i]) < fMin or abs(f[i]) > fMax:\n",
"
spectrum[i] = 0\n",
"
filteredf = np.real(np.fft.ifft(spectrum))\n",
"
\n",
"
return filteredf\n",
"
\n",
"
\n",
" \n",
" Je nach Wahl von fMin und fMax (abhängig von Datensatz), kann man von einem Tief-/Hochpassfilter oder sogar Bandpassfilter sprechen\n",
"
Tiefpassfilter 👉 kann Einhüllende des Pulses extrahieren (bis auf einen Skalierungsfaktor), dann diese weiter analysieren um z.B. Pulslänge zu bestimmen (z.B. Annäherung durch Gaussfunktion od. numerische ermittlung von FWHM (Week 10))\n",
" \n",
"
\n",
" Additional Stuff\n",
"
Michelson Interferometer\n",
"
Parseval Theorem\n",
"
Gleitender Mittelwert (Moving average)\n",
"
\n",
" Satz von Bayes\n",
"
\n",
" P(B|A) = P(A|B)P(B)/P(A)\n",
"
\n",
" Probability Mass Function\n",
"
Aᵢ diskreter Satz von Ereignissen dann gilt:\n",
"
P(Aᵢ) ≥ 0\n",
"
ΣᵢP(Aᵢ) = 1\n",
"
\n",
" Bsp:\n",
"
\n",
" def PMF(data, resolution):\n",
"
# Definieren der Bins und Berechnen des Histogramms \n",
"
# Wir addieren resolution / 1000 zur Obergrenze, um sicherzustellen, dass der letzte Wert auch im Array ist. \n",
"
bin_centers = np.arange(np.min(data), np.max(data) + resolution / 1000, resolution)\n",
"
bin_edges = np.linspace(bin_centers[0] - resolution / 2, bin_centers[-1] + resolution / 2, len(bin_centers) + 1)\n",
"
hist, _ = np.histogram(data, bin_edges)\n",
"
# Normieren \n",
"
px = hist / np.sum(hist)\n",
"
return bin_centers, px\n",
"
bin_centers 👉 Werte(Signal), px 👉 PMF \n",
"
\n",
" Probability Density Function\n",
"
Falls das Ergebnis des Experiments eine kontinuierliche Zufallsvariable x ist dann gilt:\n",
"
f(x) ≥ 0\n",
"
-∞→∞∫f(x)dx = 1\n",
"
P(a ≤ x ≤ b) = ∫ₐᵇf(x)dx\n",
"
❗️ Achtung: f(x) ist nicht die Wahrscheinlichkeit, dass x auftritt und [P(Aᵢ)] ≠ [f(x)] ❗️ \n",
"
\n",
" folgende Formulierungen gelten für PMF und PDF (∫ ↔ Σ), sind jedoch nicht für jede Verteilung sinnvoll \n",
"
\n",
"
Mode\n",
"
xₘₒ𝒹ₑ := {x ∈ ℝ : f(x) = max(f(x))}\n",
"
Für PDF mit nur einem Maximum 👉 xₘₒ𝒹ₑ = df(xₘₒ𝒹ₑ)/dx = 0\n",
"
\n",
" Bsp:\n",
"
\n",
" def mode(data, res):\n",
"
x, px = PMF(data, res)\n",
"
return x[np.argmax(px)]\n",
"
\n",
" Median\"die Mitte der PDF\" \n",
"
1/2 = -∞→xₘₑ𝒹ᵢₐₙ∫f(x)dx\n",
"
Bsp:\n",
"
\n",
" def median(data, resolution):\n",
"
x, px = PMF(data, resolution)\n",
"
# Wir summieren die Wahrscheinlichkeiten auf, bis wir 50% erreicht haben, der Median entspricht \n",
"
# dem Wert bei 50%. \n",
"
curr_sum = 0\n",
"
for i in range(len(x)):\n",
"
curr_sum += px[i]\n",
"
if curr_sum >= 0.5:\n",
"
break\n",
"
return x[i]\n",
"
\n",
" Full Width at Half Maximum (FWHM)\n",
"
f(a) = f(b) = 1/2f(xₘₒ𝒹ₑ)\n",
"
FWHM = b - a\n",
"
FWHMGₐᵤₛₛ = 2σsqrt(2log2)\n",
"
\n",
" bei bimodaler Verteilung weniger sinnvoll... \n",
"
\n",
" Bsp:\n",
"
\n",
" def FWHM(x, y):\n",
"
# np.where findet alle Werte die die gegebene Bedingung erfüllen und gibt deren Indizes aus. \n",
"
# Wir wählen also den ersten und letzten Wert der grösser ist als die hälfte des Maximums, \n",
"
# und geben deren Abstand wieder. \n",
"
x0 = x[np.where(px >= np.max(y)/2)[0][0]]\n",
"
x1 = x[np.where(px >= np.max(y)/2)[0][-1]]\n",
"
return abs(x1 - x0)\n",
"
\n",
" Momente\n",
"
Mₘ := -∞→∞∫xᵐf(x)dx m-te Moment einer PDF \n",
"
M̃ₘ = 〈(x - M₁)ᵐ〉m-te zentrale Moment einer PDF (Aussagen relativ vom Mittelwert) \n",
"
\n",
"
\n",
" \n",
" 0.Moment: M₀ = 1\n",
"
1.Moment: M₁ = Mittelwert \n",
"
\n",
"
2.Zentrale Moment: M̃ₘ = Varianz σ²\n",
"
3. 👉 \"Schiefe\" der PDF \n",
"
4. 👉 \"Wölbung\" der PDF\n",
" \n",
"
\n",
" Bsp:\n",
"
\n",
" #n-tes Moment eines Datensatzes \n",
"
def moment(data, n):\n",
"
return np.sum(data**n)/len(data)\n",
"
\n",
" Additional Stuff\n",
"
np.argmax(), np.where()\n",
"
Bayes Theorem (3blue1brown)\n",
"
Bayesian Statistics\n",
"
Probability Density Function, Probability Mass Function\n",
"
Mode, Median, FWHM\n",
"
\n",
" Gauss-Verteilung/Normalverteilung\n",
"
f(x) = 1/(sqrt(2π)σ) * exp((x - µ)²/(2σ²)) Erwartungswert µ \n",
"
Die Verteilung der Mittelwerte von grossen Stichproben von Zufallsvariablen nähern sich der Gauss \n",
" Verteilung an. Unabhängig davon welche Verteilung der einzelnen Zufallsvariable zugrunde liegt. \n",
"
\n",
" Binomialverteilung\n",
"
P(𝛙ₖ) = (N!/k!(N-k)!)pᵏqᴺ⁻ᵏ \n",
"
p = P(u), q = 1 - p = P(d), wobei u und d die zwei möglichen Zustände sind \n",
"
Σₖ₌₀ᴺP(𝛙ₖ) = 1 👉 0.Moment & Wahrsch.verteilung normiert \n",
"
\n",
"
Mₘ = Σₖ₌₀ᴺ(kᵐP(𝛙ₖ))\n",
"
Mₘ₊₁ = NpMₘ + pq(∂Mₘ/∂p)\n",
"
👉 σ = sqrt(Npq) \n",
"
\n",
" Poisson Verteilung Grenzfall der Binomialverteilung für sehr seltene Ereignisse \n",
"
Bsp: Photonenstatistik in einem Laserstrahl \n",
"
\n",
"
P(k) = (N << k)lim(P(𝛙ₖ))\n",
"
µ = Np (M₁ der Binomialverteilung) \n",
"
\n",
"
P(k) = (µᵏ/k!)exp(-µ)\n",
"
Mₘ₊₁ = µMₘ + µ(∂Mₘ/∂µ)\n",
"
\n",
" Additional Stuff\n",
"
np.random.normal()\n",
"
Binomialverteilung\n",
"
Poisson Verteilung\n",
"
Zentraler Grenzwertsatz (3blue1brown)\n",
"
\n",
" Annahme: Polynomieller Zusammenhang\n",
"
👉 Messgrösse y(x) = f(x, a) = Σₙ ₌ ₀ᵐ(aₙxⁿ) , a = (a₀,..., aₘ) \n",
"
\n", " Likelihood Funktion keine Wahrscheinlichkeitsverteilung \n", "
\n", "\n", " Log-Likelihood Funktion\n", "
\n", "\n", " Maximum Likelihood\n", "
\n", "\n",
" Lineares Fitten nicht immer Sinnvoll wenn z.B. kein Physikalisches Modell zum Fitten benutzt wird \n",
"
Minimieren der Summe der Residuenquadrate\n",
"
S = Σₙ ₌ ₁ᴺ(wᵢ(yᵢ - f(xᵢ, a))²), wᵢ = 1/σᵢ² \n",
"
Als Funktion: \n",
"
def S_(x, y, sigma, f, a):\n",
"
return np.sum((y - f(x,a))**2 / sigma**2) \n",
"
\n",
"
Bsp:\n",
"
\n",
" # Wertebereiche für a0 und a1 definieren \n",
"
a0_range = np.linspace(-0.5, 1.5, 50)\n",
"
a1_range = np.linspace(0, 2, 60)\n",
"
\n",
"
# Array für die berechneten S initialisieren \n",
"
S = np.zeros(shape=(50, 60))\n",
"
\n",
"
# S für alle Kombinationen berechnen \n",
"
for i, a0 in enumerate(a0_range):\n",
"
for j, a1 in enumerate(a1_range):\n",
"
S[i, j] = np.sum((y_val - (a0 + a1 * x_val))**2 / sigma_y**2)\n",
"
\n",
"
\n",
" im = ax.pcolormesh(a0_range, a1_range, np.log(S).T, shading='nearest')\n",
"
fig.colorbar(im, label=r'$\\log S$')\n",
"
ax.set_xlabel('$a_0$ (N)')\n",
"
ax.set_ylabel('$a_1$ (N/cm)')\n",
" \n",
"
Bsp:\n",
"
\n",
" Analytische Methode\n",
" \n",
" import matplotlib.gridspec as gs\n",
" \n",
" gs0 = gs.GridSpec(nrows=2, ncols=3, height_ratios=[3, 1], hspace=.1, figure=fig)\n",
"
Nâ = Y Normalmatrix 👉 â = N⁻¹Y\n",
"
â = (â₀,..., âₘ)ᵀ, N = ((Σᵢwᵢ, Σᵢwᵢxᵢ),(Σᵢwᵢxᵢ, Σᵢwᵢxᵢ²))ᵀ, Y = (Σᵢwᵢyᵢ, Σᵢwᵢxᵢyᵢ)ᵀ\n",
"
wᵢ = 1/σᵢ²\n",
"
\n",
" Bsp 1:\n",
"
Explizite Berechnung bei Grad 2:\n",
"
\n",
" w = 1/sigmaF**2\n",
"
\n",
"
N = np.array([[np.sum(w), np.sum(w * x)], [np.sum(w * x), np.sum(w * (x**2))]])\n",
"
Y = np.array([np.sum(w * F), np.sum(w * x * F)])\n",
"
\n",
" Ninv = np.linalg.inv(N) Matrix Invertieren \n",
"
\n",
"
fitParams = Ninv @ Y , @ oder np.dot(Ninv, Y) Matrixmultiplikation (❗️auf Dimension achten❗️) \n",
"
#a0, a1 = fitParams \n",
"
\n",
"
sigma_a0 = np.sqrt(Ninv[0, 0])\n",
"
sigma_a1 = np.sqrt(Ninv[1, 1])\n",
"
Diagonaleinträge der Kovarianzmatrix sind die Standardabw. (sigma) der Fitparameter \n",
"
\n",
"
\n",
" Bsp 2:\n",
"
Fit-Funktion für Polynome bel. Ordnung:\n",
"
\n",
" #pre: datasets x, y, sigma(y), Polynomgrad deg \n",
"
#post: Kovarianzmatrix, Fitparameter (Spalten)vektor \n",
"
def linearFit(x, y, sigma, deg):\n",
"
w = 1/sigma**2\n",
"
N = np.zeros(shape=(deg, deg))\n",
"
Y = np.zeros(shape=(deg))\n",
"
\n",
"
for i in range(deg):\n",
"
Y[i] = np.sum(w)*(np.sum(x)**i)*np.sum(y)\n",
"
for j in range(deg):\n",
"
N[i, j] = np.sum(w)*np.sum(x)**(i+j)\n",
"
\n",
"
#kovarianzmatrix \n",
"
Ninv = np.linalg.inv(N)\n",
"
\n",
"
fitParams = Ninv @ Y\n",
"
\n",
"
return Ninv, fitParams\n",
"
\n",
"
\n",
" \n",
" fit_n = np.zeros(len(x))\n",
"
for index, x_i in enumerate(x):\n",
"
sum_i = 0\n",
"
for n, a_n in enumerate(fitParams_n):\n",
"
sum_i += a_n * x_i**n\n",
"
\n",
"
fit_n[index] = sum_i \n",
" \n",
"
👉 ax = fig.add_subplot(gs0[0, 0])\n",
"
ax.set_xticklabels([]) Entfernen der Achsenindizes \n",
"
ax.text(x, y, \"text\") Text im Plot \n",
"
\n",
" Additional Stuff\n",
"
Underfitting 👉 Abweichung von Datenpunkten und Fit, Struktur in den Residuen\n",
"
Overfitting 👉 Residuen zufällig Verteilt (gut) aber Fitparameter verlieren physikalische Bedeutung also n-1 fittet den Datensatz vielleicht genauso gut\n",
"
\n",
"
a.errorbar(x_val, y_val, sigma_y, capsize=3, linestyle='None', marker='.')\n",
"
np.zeros(shape=(x, y))\n",
"
Likelihood Function\n",
"
Linear Regression (video)\n",
"
\n", " Gradientenverfahren\n", "
\n", "\n",
" Newton Verfahren basically Gradientenverfahren aber 2.Term der Taylor Enticklung wird auch berücksichtigt \n",
"
\n",
"
\n",
" Marquardts Methode\n",
"
\n",
"
\n", " from scipy.optimize import curve_fit\n", "
\n", "\n",
" Additional Stuff\n",
"
np.linalg.norm, np.diag\n",
"
Gradientenverfahren\n",
"
\n", " import pandas as pd\n", "
\n", "\n",
" pandas basics\n",
"
df = pd.read_csv('file.csv') \"dataframe\" \n",
"
df.head() erste 5 Zeilen anzeigen \n",
"
df[5:10] slicing wie bei numpy 👉 Spalten 5-9 \n",
"
df['Spaltenname1', 'Spaltenname2',...]\n",
"
df.drop(['Spaltenname1', 'Spaltenname2', ...], axis=1) Spalte ausschliessen \n",
"
\n", " Decision Trees\n", "
\n", "from sklearn
\n", "sklearn.model_selection import train_test_split
\n", "sklearn.tree import DecisionTreeClassifier
\n", "sklearn.tree import plot_tree
\n", "sklearn.metrics import accuracy_score
\n", "\n",
" Test und Trainingssatz erstellen, Tree plotten, Accuracy Score bestimmen\n",
"
Bsp:\n",
"
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7) \n",
"
train size sollte grösse als test sein, wenn nur test angegeben wird, dann ist train das Komplement davon \n",
"
clf = DecisionTreeClassifier(max_depth=4).fit(X_train, y_train)\n",
"
plot_tree(clf)\n",
"
y_pred = clf.predict(X_test)\n",
"
print('Accuracy Score: {}'.format(accuracy_score(y_pred, y_test)))\n",
"
\n",
"
Neue Predictions mit trainiertem Modell 👉 clf.predict(df2)\n",
"
\n", " Linear Regression\n", "
\n", "from sklearn
\n", "sklearn.linear_model import LinearRegression
\n", "sklearn.metrics import r2_score
\n", "\n",
" R2-Score\n",
"
\n",
"
\n",
" Test und Trainingssatz erstellen, R2-Score bestimmen ähnlicher Ablauf wie bei Linear Regression \n",
"
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n",
"
reg = LinearRegression().fit(X_train, y_train)\n",
"
y_pred = reg.predict(X_test) \n",
"
print(\"R2-Score: {:.3f}\".format(r2_score(y_test, y_pred)))\n",
"
\n", " Logistic Regression same wie oben... \n", "
\n", "from sklearn
\n", "sklearn.linear_model import LogisticRegression
\n", "\n", " logreg = LogisticRegression(max_iter=1000).logreg.fit(X_train, y_train) optional aber oft gut \n", "
\n", "\n",
" Additional Stuff\n",
"
Gini-Coefficient\n",
"
scikit-learn\n",
"
R2-Score\n",
"
from sklearn
\n", "sklearn.model_selection import GridSearchCV
\n", "sklearn.preprocessing import PolynomialFeatures
\n", "sklearn.pipeline import Pipeline
\n", "sklearn.neural_network import MLPClassifier
\n", "\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7)\n",
"
#mlp = MLPClassifier(hidden_layer_sizes=(32,32,16,16,8), max_iter=5000, random_state=0) verschiedene hidden layers probieren \n",
"
mlp = MLPClassifier(hidden_layer_sizes=(32, 32, 32, 32, 32, 32), max_iter=5000, random_state=0)\n",
"
mlp.fit(X_train, y_train)\n",
"
\n",
"
y_pred = mlp.predict(X_test)\n",
"
print('Accuracy Score: {}'.format(accuracy_score(y_pred, y_test)))\n",
"
\n",
"
X_final = pd.read_csv('X_final.csv')\n",
"
y_final = mlp.predict(X_final.values)\n",
"
\n",
"
fig = plt.figure()\n",
"
ax = fig.add_subplot(1, 1, 1)\n",
"
ax.scatter(X_final['X1'], X_final['X2'], c=y_final)\n",
"
ax.set_aspect(1)\n",
"
\n",
"
ax.set_xlabel('X1')\n",
"
ax.set_ylabel('X2')\n",
"
\n", " ...many functions (see sol.)\n", "
\n", "\n",
" Additional Stuff\n",
"
np.reshape()\n",
"
Bsp:\n",
"
X = np.reshape(df['x'].values, newshape=(-1, 1)) (-1, 1) bedeutet: eine Spalte mit sovielen Zeilen wie nötig \n",
"
Dictionaries very similar to objects in JS \n",
"
\n", " import skimage.io\n", "
\n", "\n",
" analog zu einlesen von Textdaten \n",
"
\n",
" img = skimage.io.imread(files[n], as_gray=True (od. False) , ...) returns 2D np.array \n",
"
fig = plt.figure()\n",
"
a = fig.add_subplot(1, 1, 1)\n",
"
a.imshow(img (, cmap='gray') ) Bild \"plotten\" \n",
"
a.set_xticks([]) Achsenbeschriftung entfernen \n",
"
\n", " import pickle\n", "
\n", "\n",
" eine weitere Art um Daten einzulesen... \n",
"
input = pickle.load(open('filename', 'rb'))\n",
"
... plt.imshow(input, cmap='gray_r')\n",
"
\n", " ...many functions (see sol.)\n", "
\n", "\n",
" Additional Stuff\n",
"
plt.set_aspect(1) aspect ratio \n",
"
\n", " (2023 Aleksandar Tuzlak | atuzlak@ethz.ch | aleksandartuzlak.com | n.ethz.ch/~atuzlak )\n", "
\n", "