{ "cells": [ { "cell_type": "markdown", "id": "10e0ce23", "metadata": {}, "source": [ "
\n", "
\n", "
\n", "

Week 1
\n", "

Basics, Arrays / Numpy-Arrays, Laden und Speichern von Daten
\n", " \n", "
\n", "
\n", "

\n", " import numpy as np\n", "

\n", "

\n", " np.array? öffnet documentation
\n", " Bsp : \n", "
a = np.array([1, 2, 3]) , np.zeros(n) , np.ones(n) , np.ones(n, dtype=int oder float ) , \n", "
a *= 2 👉 [2, 4, 6] , a[-1] 👉 6 letztes El. \n", "
b = np.array([[1, 2, 3], [97, 98, 99]])\n", "
a.shape 👉 Dimension (Zeilen, Spalten) \n", "

\n", " a[i_start : i_stop : i_step] (falls a[::] 👉 i_start = 0, stop = \"array.length\", step = 1) \n", "
\n", " a[i_start : i_stop] indexierung bei n-dim. (hier 2-D) Arrays : b[1, 0:] 👉 [97, 98, 99] Operationen auf Teilbereichen ebenfalls möglich \n", "

\n", "

\n", " print?
\n", " Bsp :
print(a, end=';') 👉 [2, 4, 6]; falls loop durch array dann 2;4;6; \n", "
print('x = {}'.format(7)) {:d} (int), {:f} (float), {:0.nf} (float mit n nachkommastellen) 👉 x = 7 \n", "

\n", "

\n", " Loops\n", "
Bsp :
for index, value in enumerate(a):\n", "
a[index] = val**2\n", "
a 👉 [4, 16, 36] \n", "
\n", "
for i in range(i_start, i_end, i_step):\n", "
for i in a[1:]:\n", "

\n", "

\n", " import os\n", "

\n", "

\n", " Navigation
\n", " Bsp :
\n", " cwd = os.getcwd() , os.listdir(cwd) 👉 ['file1', 'file2', ...] , file = os.path.join(cwd, 'filename') , os.chdir(r\"path\")
\n", "

\n", "

\n", " Speichern / Lesen \n", "
Bsp :\n", "
data = np.loadtxt(file, dtype='float', comments='#', delimiter=',' , skiprows=n ) falls Dataset-dimension und Anzahl Zeilen z.B. 2 ist geht auch x, y = np.loadtxt(...)
z.B.: print(data) 👉 [7, 7, 7, 420, 69]
\n", "

np.savetxt('filename', (x, y, z) arrays oder variablen , delimiter=',', header='Beispieldatei Datenanalyse, 14.02.2023. Format: x, y, z') (x, y, z ➡️ np.array([...]))\n", "

\n", " ...\"low-level\" : \n", "
\n", " with open('filename', 'r') as f: \n", "
lines = f.readlines()\n", "

with open('filename', 'w') as f: \n", "
f.writelines('x, y, z\\n')\n", "
for i in range(0, len(x)):\n", "
f.writelines('{:0.5f}, {:0.5f}, {:0.5f}\\n'.format(x[i], y[i], z[i]))\n", "

\n", "

\n", " Additional Stuff\n", "
np.sum , np.empty (faster than np.zeros) , np.arange(a_min, a_max, distance_from_points) , np.linspace(a_min, a_max, amount_of_points) , len(a) , np.fromstring , np.append, np.transpose\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 2
\n", "

Funktionen, Mittelwert / Standardabweichung, Histogramm, Normalverteilung
\n", " \n", "
\n", "
\n", "

\n", " Funktionen\n", "
def function_name(parameter1, parameter2, keyword_argument=initial_value): \n", "
... \n", "
return return_value1, return_value2\n", "

❗️ ACHTUNG : Funktionsname nicht als Variablenname brauchen ❗️\n", "

\n", "

\n", " import matplotlib.pyplot plt\n", "

\n", "

\n", " plt.plot?\n", "
plt.plot(x, y) x.shape == y.shape , plt.figure , plt.show , plt.title , plt.xlabel , plt.ylabel , plt.grid , plt.subplot(row, column, index) multiple plots in one fig. color attribute : color = 'color' or 'hex' , plt.bar , plt.barh horizontal , plt.hist\n", "
Bsp : \n", "
fig = plt.figure() \n", "
a = fig.add_subplot(1, 1, 1)\n", "
a.plot(t, V)\n", "
a.set_xlabel('t (s)')\n", "
a.set_ylabel('V (U)')\n", "
a.set_title('Rauschen einer Messung')\n", "

\n", "

\n", " np.histogram?\n", "
np.histogram(V, bins)\n", "
Bsp : \n", "
cm = 1 / 2.54 Umrechnung Zoll zu Centimeter \n", "
binsize = 0.01\n", "
bins = np.arange(np.min(V), np.max(V), binsize)\n", "
hist, _ = np.histogram(V, bins)\n", "
fig = plt.figure(figsize=(30*cm, 25.5*cm))\n", "
ax1 = fig.add_subplot(2,2,1)\n", "
ax1.bar(bins[:-1], hist, width=0.8*binsize) width < binsize\n", "
ax1.set_title('Binsize: {:.2f}V'.format(binsize))\n", "

Standardabw. σ = np.sqrt((1/(len(array)-1)) * (1/(len(array))) * np.sum(array)) Mittelwert \n", "
np.std 1/n anstatt 1/(n-1) (Approximation für grosse Datensätze) \n", "
= np.sqrt(np.var)\n", "

\n", "

\n", " Additional Stuff\n", "
np.random.rand, np.random.randint(x, size=y) , np.mean , plt.suptitle , plt.scatter , plt.legend\n", "
colors = np.array([0, 10, ... , 100]) , plt.scatter(x, y, c=colors, cmap='viridis') colormap , plt.colorbar\n", "
np.min , np.max\n", "
ax.axvline(Mittelwert, color='r') , ax.set_xlim(13, 16) Zoom in x-Achse 13-16 \n", "
Normal distribution , Standard deviation , Variance\n", "
Zentraler Grenzwertsatz (3blue1brown)\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 3
\n", "

Auflösung (des Signalwerts) einer Messung, Oszillierendes Signal & Samplingrate
\n", " \n", "
\n", "
\n", "

\n", " Signalauflösung Uₘᵢₙ ~ ΔUₘᵢₙ > 0 (kleinster Abstand zwischen zwei gemessenen Werten, Einheit: Signalgrösse) \n", "
Bsp :\n", "
Auflösungsfunktion :\n", "
#pre : non-empty array with positive values, epsilon > 0 (smaller than Umin (z.B.: 1 * 10**-7))
#post : Auflösung der Signalgrösse Umin = 'float'
\n", "
\n", "
def resolution(array, epsilon):\n", "
diffArray = np.diff(np.sort(array))\n", "
maxNum = np.max(diffArray)\n", "
\n", "
#get rid of all zeros in diffArray since Umin > 0 \n", "
for i, val in enumerate(diffArray):\n", "
if diffArray[i] < epsilon:\n", "
diffArray[i] = maxNum\n", "
\n", "
return min(diffArray)\n", "

\n", "

\n", " Messrate / Sampling Rate, fₛ = 1/Δt \n", "
Bsp : \n", "
fₛ = 1 / (t[1] - t[0])\n", "

\n", "

\n", " Additional Stuff\n", "
Interactive plots:\n", "
🟢 on : %matplotlib notebook \n", "
🔴 off : %matplotlib inline\n", "
\n", "
Binsize < Uₘᵢₙ 👉 empty bins\n", "
Multimodal distribution\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 4
\n", "

Fehler des Mittelwertes (mit Gauss), Gausssche Fehlerfortpflanzung
\n", " \n", "
\n", "
\n", "

\n", " Fehler des Mittelwerts Δµ = np.std(array)/np.sqrt(N)Anzahl Messungen \n", "
Bsp:\n", "
\n", "
N = np.logspace(1, np.log10(len(array)), 100, dtype=int)\n", "
\n", "
mean_n = np.zeros(len(N))\n", "
std_n = np.zeros(len(N))\n", "
error_mean = np.zeros(len(N))\n", "
\n", "
for i, n in enumerate(N):\n", "
mean_n[i] = np.mean(array[:n]) \n", "
std_n[i] = np.std(array[:n]) \n", "
error_mean[i] = np.std(array[:n]) / np.sqrt(n) \n", "

\n", "

\n", " Unsicherheit von F(x1, x2,...) für unkorrelierte Variablen x1, x2, ...\n", "
σF² = (∂F/∂x1)²σₓ₁² + (∂F/∂x2)²σₓ₂² + ... \n", "
falls σₓ₁ = σₓ₂ := σₓ 👉 σF² = (∂F/∂x1 + ∂F/∂x2)²σₓ² + ... \n", "
σF = √σF²\n", "
\n", "
Unsicherheit zu gross 👉 Asymmetrische Verteilung\n", "

\n", "

\n", " Additional Stuff\n", "
np.logspace , plt.semilogx log scaling on the x axis , plt.semilogy , plt.set_yscale , plt.set_xscale , plt.errorbar\n", "
Standard error / Fehler des Mittelwerts\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 5
\n", "

Kovarianz, Autokovarianz f(t), Korrelationskoeffizient
\n", " \n", "
\n", "
\n", " Kovarianz (σₓᵧ)² = cov(x, y) = (1/(N - 1)) * Σₙ ₌ ₁ᴺ(xₙ - x̄)*(yₙ - ȳ) , [(σₓᵧ)²] = [x] × [y] \n", "
np.cov(x, y) 👉 [[(σₓₓ)², (σₓᵧ)²], [(σᵧₓ)², (σᵧᵧ)²]] \n", "
\n", "
(normalisierter) Korrelationskoeffizient ρₓᵧ = (σₓᵧ)²/σₓσᵧ ∈ [-1, 1] \n", "
= 0 👉 unkorrelierte Variablen\n", "
= ±1 👉 maximal (anti-)korrelierte Variablen
\n", "
np.corrcoef(x, y) 👉 [[ρₓₓ=1 , ρₓᵧ], [ρᵧₓ, ρᵧᵧ =1 ]]\n", "

\n", "

\n", " Autokovarianz\n", "
Rₓₓ(Δ) = (1/(N - 1)) * Σₙ ₌ ₁ᴺ(xₙ - x̄)*(xₙ₊Δ - x̄) Indexverschiebung \n", "
Rₓₓ(𝜏) = (1/(N - 1)) * Σₜ ₌ ₜ₁ᵗᴺ(x(t) - x̄)*(x(t + 𝜏) - x̄) Zeitverschiebung \n", "
Bsp:\n", "
Autokovarianzfunktion (fast):\n", "
\n", "
def autocov(x, shift):\n", "
if shift == 0:\n", "
acov = np.var(x) \n", "
else:\n", "
N = len(x) \n", "
x_mean = np.mean(x) \n", "
x_residue = x - x_mean \n", "
acov = np.sum(x_residue[:-shift] * x_residue[shift:]) / (N - shift) \n", "
return acov\n", "
\n", "
𝜏 → ∞ 👉 Rₓₓ(𝜏) ≈ 0\n", "
Korrelationszeit 𝜏꜀ 👉 Rₓₓ(𝜏꜀) ≈ 0 \n", "
\n", "
Autokorrelationskoeffizient\n", "
ρₓₓ = Rₓₓ(𝜏)/σₓ² ∈ [-1, 1] \n", "

\n", "

\n", " Additional Stuff\n", "
σₓ = np.sqrt((σₓₓ)²)\n", "
plt.tight_layout() keine Überlappung \n", "
Bestimmung der Dauer eines kurzen Laserpulses 👉 Autokovarianz oder Autokorrelation, da z.B Laserpulse (z.B 150fs) kürzer als Abtastraten von Detektoren (z.B bis zu 1ns) \n", "
Covariance\n", "
Autocovariance\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 6
\n", "

Fouriertransformation (Discrete Fourier Transform DFT), Spektrale Leistungsdichte (Power Spectral Density PSD)
\n", " \n", "
\n", "
\n", "

\n", " import math\n", "

\n", "

\n", " Nyquist-Frequenz\n", " fₘₐₓ die ohne Aliasing Effekt gesampled werden kann \n", "
fₘₐₓ = 1/2Δt \n", "
= 1 / (2 * (t[1] - t[0]))\n", "

\n", "

\n", " Frequenzauflösung \n", "
Δf = 2*fₘₐₓ/N \n", "
= 1 / t[-1] - t[0] oft t[0] = 0 \n", "

\n", "

\n", " Fourier Transformation\n", "
X(fₙ) = (1/N) * Σₖ ₌ ₀ᴺ ⁻ ¹x(tₖ) * exp(-i2πfₙtₖ)\n", " \n", "
fₙ = nΔf\n", "
Δf = 1 / tₜₒₜ\n", "
tₖ = kΔt\n", "
\n", "
\n", "
np.fft.fftfreq(len(array), Δt), np.fft.fft(array)\n", "
\n", "
Bsp:\n", "
A = np.fft.fft(array)\n", "
A[0]\n", " zero-frequency term (the sum of the signal), which is always purely real for real inputs \n", "
A[1:math.floor(len(array)/2)] \n", " positive-frequency terms \n", "
A[math.floor((len(V) / 2) + 1):]\n", " negative-frequency terms, in order of decreasingly negative frequency \n", "
AmaxEven = A[len(array)/2]\n", " represents both positive and negative Nyquist frequency, and is also purely real for real input \n", "
AmaxOddPos = A[math.floor((len(V) - 1) / 2)]\n", " contains the largest positive frequency \n", "
AmaxOddNeg = A[math.floor((len(V) + 1) / 2)]\n", " contains the largest negative frequency \n", "
\n", "
\n", " \n", " f = np.fft.fftfreq(len(array), t[1] - t[0])\n", "
spectrum = np.fft.fft(array)\n", "
psd = (t[1] - t[0]/len(array)) * np.abs(spectrum)**2\n", "
\n", "
\n", "
Rücktrafo:\n", " \n", "
f = np.fft.ifft(spectrum)\n", "
real part: np.real(f), imaginary part: np.imag(f)\n", "
\n", "

\n", "

\n", " PSD\n", "
Sₓₓ(fₙ) = Δt * |X(fₙ)|² Normierung mit Δt 👉 PSD, sonst PD \n", "
👉 wie viel Oszillationsenergie in einem bestimmten Teil des Spektrums vorhanden ist \n", "
\n", "
[Sₓₓ(fₙ)] = Signal²/Frequenz(Hz) \n", "
plt.plot(f)(Index, Frequenz) , plt.plot(f, psd)(Frequenz, psd) \n", "

\n", "

\n", " Additional Stuff\n", "
Fourier Transformation (3blue1brown)\n", "
PSD\n", "
Nyquist-Frequenz\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 7
\n", "

Rauschen (in PSD), Parseval-Theorem, Glätten, gleitender Mittelwert, Filtern
\n", " \n", "
\n", "
\n", " \"Parseval-Theorem\"\n", "

\n", " PSDs und Varianzen unkorrelierter Variablen sind additiv:\n", "
\n", " \n", " Sₓₓ(fₙ) = Sₐₐ(fₙ) + S₆₆(fₙ)\n", " \n", "
σₓ² = σₐ² + σ₆²
\n", " (Parseval) \n", "
folgt aus Substitution 👉 x(tₖ) = a(tₖ) + b(tₖ) \n", "

\n", "

\n", " Gleitender Mittelwert\n", " 𝑥̃ᵢ = mean({𝑥ᵢ−Δ,...,𝑥ᵢ+Δ})\n", "

\n", "

\n", " Filtern im Zeitraum\n", "
Bsp:\n", "
Glättungsfunktion (mit gleitendem Mittelwert 𝑥̃ᵢ):\n", "
\n", " \n", " #pre: 1-dimensional array, delta is a natural number\n", "
#post: delta > 0 👉 smoothened array, delta = 0 👉 copy of array
\n", "
\n", " def smooth(array, delta): \n", "
sArray = np.empty(len(array))\n", "
for i, val1 in enumerate(array):\n", "
if i - delta >= 0 and i + delta < len(array - 1):\n", "
s = 0\n", "
for j in array[i - delta: i + (delta + 1)]:\n", "
s = s + j\n", "
sArray[i] = s / ((2*delta) + 1)\n", "
else:\n", "
sArray[i] = 0\n", "
\n", "
return sArray\n", "
\n", "
Effekt: \n", " Schnelles Rauschen wird entfernt, langsame Ausschläge werden beibehalten.\n", "
Je grösser delta ist desto glatter wird das Signal, falls aber delta zu gross (abhängig von Datensatz) gewählt wird werden die peaks verbreitert.
\n", "

\n", "

\n", " Filtern in Frequenzraum\n", "
Bsp:\n", "
Filterfunktion:\n", " \n", "
#pre: time axis t, signal, minimum and maximum frequency fMin, fMax\n", "
#post: filtered signal (in time domain) 👉 Frequenzen ausserhalb von [fMin, fMax] wurden auf 0 gesetzt
\n", "
def freqFilter(t, signal, fMin, fMax):\n", "
f = np.fft.fftfreq(len(signal), t[1] - t[0])\n", "
spectrum = np.fft.fft(signal)\n", "
for i in range(len(f)):\n", "
if abs(f[i]) < fMin or abs(f[i]) > fMax:\n", "
spectrum[i] = 0\n", "
filteredf = np.real(np.fft.ifft(spectrum))\n", "
\n", "
return filteredf\n", "
\n", "
\n", " \n", " Je nach Wahl von fMin und fMax (abhängig von Datensatz), kann man von einem Tief-/Hochpassfilter oder sogar Bandpassfilter sprechen\n", "
Tiefpassfilter 👉 kann Einhüllende des Pulses extrahieren (bis auf einen Skalierungsfaktor), dann diese weiter analysieren um z.B. Pulslänge zu bestimmen (z.B. Annäherung durch Gaussfunktion od. numerische ermittlung von FWHM (Week 10))\n", "
\n", "

\n", "

\n", " Additional Stuff\n", "
Michelson Interferometer\n", "
Parseval Theorem\n", "
Gleitender Mittelwert (Moving average)\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 8 \"🏖️\"
\n", "

\n", " \n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "

Week 9 (Bayesian Statistics ausgelassen)
\n", "

Bayesian/Frequentist approach, Wahrscheinlichkeitsverteilung (Charakteristika, Momente), Probability Mass/Density Function (PMF/PDF)
\n", " \n", "
\n", "
\n", "

\n", " Satz von Bayes\n", "
\n", " P(B|A) = P(A|B)P(B)/P(A)\n", "

\n", "

\n", " Probability Mass Function\n", "
Aᵢ diskreter Satz von Ereignissen dann gilt:\n", "
P(Aᵢ) ≥ 0\n", "
ΣᵢP(Aᵢ) = 1\n", "
\n", " Bsp:\n", "
\n", " def PMF(data, resolution):\n", "
# Definieren der Bins und Berechnen des Histogramms \n", "
# Wir addieren resolution / 1000 zur Obergrenze, um sicherzustellen, dass der letzte Wert auch im Array ist. \n", "
bin_centers = np.arange(np.min(data), np.max(data) + resolution / 1000, resolution)\n", "
bin_edges = np.linspace(bin_centers[0] - resolution / 2, bin_centers[-1] + resolution / 2, len(bin_centers) + 1)\n", "

hist, _ = np.histogram(data, bin_edges)\n", "

# Normieren \n", "
px = hist / np.sum(hist)\n", "

return bin_centers, px\n", "
bin_centers 👉 Werte(Signal), px 👉 PMF \n", "

\n", "

\n", " Probability Density Function\n", "
Falls das Ergebnis des Experiments eine kontinuierliche Zufallsvariable x ist dann gilt:\n", "
f(x) ≥ 0\n", "
-∞→∞∫f(x)dx = 1\n", "
P(a ≤ x ≤ b) = ∫ₐᵇf(x)dx\n", "
❗️ Achtung: f(x) ist nicht die Wahrscheinlichkeit, dass x auftritt und [P(Aᵢ)] ≠ [f(x)] ❗️ \n", "

\n", "
\n", "

\n", " folgende Formulierungen gelten für PMF und PDF (∫ ↔ Σ), sind jedoch nicht für jede Verteilung sinnvoll \n", "
\n", "
Mode\n", "
xₘₒ𝒹ₑ := {x ∈ ℝ : f(x) = max(f(x))}\n", "
Für PDF mit nur einem Maximum 👉 xₘₒ𝒹ₑ = df(xₘₒ𝒹ₑ)/dx = 0\n", "
\n", " Bsp:\n", "
\n", " def mode(data, res):\n", "
x, px = PMF(data, res)\n", "
return x[np.argmax(px)]\n", "

\n", "

\n", " Median\"die Mitte der PDF\" \n", "
1/2 = -∞→xₘₑ𝒹ᵢₐₙ∫f(x)dx\n", "
Bsp:\n", "
\n", " def median(data, resolution):\n", "
x, px = PMF(data, resolution)\n", "
# Wir summieren die Wahrscheinlichkeiten auf, bis wir 50% erreicht haben, der Median entspricht \n", "
# dem Wert bei 50%. \n", "

curr_sum = 0\n", "
for i in range(len(x)):\n", "
curr_sum += px[i]\n", "
if curr_sum >= 0.5:\n", "
break\n", "

return x[i]\n", "

\n", "

\n", " Full Width at Half Maximum (FWHM)\n", "
f(a) = f(b) = 1/2f(xₘₒ𝒹ₑ)\n", "
FWHM = b - a\n", "
FWHMGₐᵤₛₛ = 2σsqrt(2log2)\n", "
\n", " bei bimodaler Verteilung weniger sinnvoll... \n", "
\n", " Bsp:\n", "
\n", " def FWHM(x, y):\n", "
# np.where findet alle Werte die die gegebene Bedingung erfüllen und gibt deren Indizes aus. \n", "
# Wir wählen also den ersten und letzten Wert der grösser ist als die hälfte des Maximums, \n", "
# und geben deren Abstand wieder. \n", "
x0 = x[np.where(px >= np.max(y)/2)[0][0]]\n", "
x1 = x[np.where(px >= np.max(y)/2)[0][-1]]\n", "
return abs(x1 - x0)\n", "

\n", "

\n", " Momente\n", "
Mₘ := -∞→∞∫xᵐf(x)dx m-te Moment einer PDF \n", "
M̃ₘ = 〈(x - M₁)ᵐ〉m-te zentrale Moment einer PDF (Aussagen relativ vom Mittelwert) \n", "
\n", "
\n", " \n", " 0.Moment: M₀ = 1\n", "
1.Moment: M₁ = Mittelwert \n", "
\n", "
2.Zentrale Moment: M̃ₘ = Varianz σ²\n", "
3. 👉 \"Schiefe\" der PDF \n", "
4. 👉 \"Wölbung\" der PDF\n", "
\n", "
\n", " Bsp:\n", "
\n", " #n-tes Moment eines Datensatzes \n", "
def moment(data, n):\n", "
return np.sum(data**n)/len(data)\n", "

\n", "
\n", "

\n", " Additional Stuff\n", "
np.argmax(), np.where()\n", "
Bayes Theorem (3blue1brown)\n", "
Bayesian Statistics\n", "
Probability Density Function, Probability Mass Function\n", "
Mode, Median, FWHM\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 10
\n", "

Binomial-/Poisson Verteilung, Zentraler Grenzwertsatz, Gauss Verteilung
\n", " \n", "
\n", "
\n", "

\n", " Gauss-Verteilung/Normalverteilung\n", "
f(x) = 1/(sqrt(2π)σ) * exp((x - µ)²/(2σ²)) Erwartungswert µ \n", "
Die Verteilung der Mittelwerte von grossen Stichproben von Zufallsvariablen nähern sich der Gauss \n", " Verteilung an. Unabhängig davon welche Verteilung der einzelnen Zufallsvariable zugrunde liegt. \n", "

\n", "

\n", " Binomialverteilung\n", "
P(𝛙ₖ) = (N!/k!(N-k)!)pᵏqᴺ⁻ᵏ \n", "
p = P(u), q = 1 - p = P(d), wobei u und d die zwei möglichen Zustände sind \n", "
Σₖ₌₀ᴺP(𝛙ₖ) = 1 👉 0.Moment & Wahrsch.verteilung normiert \n", "
\n", "
Mₘ = Σₖ₌₀ᴺ(kᵐP(𝛙ₖ))\n", "
Mₘ₊₁ = NpMₘ + pq(∂Mₘ/∂p)\n", "
👉 σ = sqrt(Npq) \n", "

\n", "

\n", " Poisson Verteilung Grenzfall der Binomialverteilung für sehr seltene Ereignisse \n", "
Bsp: Photonenstatistik in einem Laserstrahl \n", "
\n", "
P(k) = (N << k)lim(P(𝛙ₖ))\n", "
µ = Np (M₁ der Binomialverteilung) \n", "
\n", "
P(k) = (µᵏ/k!)exp(-µ)\n", "
Mₘ₊₁ = µMₘ + µ(∂Mₘ/∂µ)\n", "

\n", "

\n", " Additional Stuff\n", "
np.random.normal()\n", "
Binomialverteilung\n", "
Poisson Verteilung\n", "
Zentraler Grenzwertsatz (3blue1brown)\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 11 (Likelihood Function ausgelassen)
\n", "

Likelihood Funktion, Lineares Fitten von Polynomen
\n", " \n", "
\n", "
\n", "

\n", " Annahme: Polynomieller Zusammenhang\n", "
👉 Messgrösse y(x) = f(x, a) = Σₙ ₌ ₀ᵐ(aₙxⁿ) , a = (a₀,..., aₘ) \n", "

\n", "

\n", " Likelihood Funktion keine Wahrscheinlichkeitsverteilung \n", "

\n", "

\n", " Log-Likelihood Funktion\n", "

\n", "

\n", " Maximum Likelihood\n", "

\n", "

\n", " Lineares Fitten nicht immer Sinnvoll wenn z.B. kein Physikalisches Modell zum Fitten benutzt wird \n", "
Minimieren der Summe der Residuenquadrate\n", "
S = Σₙ ₌ ₁ᴺ(wᵢ(yᵢ - f(xᵢ, a))²), wᵢ = 1/σᵢ² \n", "
Als Funktion: \n", "
def S_(x, y, sigma, f, a):\n", "
return np.sum((y - f(x,a))**2 / sigma**2) \n", "
\n", "
Bsp:\n", "
\n", " # Wertebereiche für a0 und a1 definieren \n", "
a0_range = np.linspace(-0.5, 1.5, 50)\n", "
a1_range = np.linspace(0, 2, 60)\n", "
\n", "
# Array für die berechneten S initialisieren \n", "
S = np.zeros(shape=(50, 60))\n", "
\n", "
# S für alle Kombinationen berechnen \n", "
for i, a0 in enumerate(a0_range):\n", "
for j, a1 in enumerate(a1_range):\n", "
S[i, j] = np.sum((y_val - (a0 + a1 * x_val))**2 / sigma_y**2)\n", "
\n", "
\n", " im = ax.pcolormesh(a0_range, a1_range, np.log(S).T, shading='nearest')\n", "
fig.colorbar(im, label=r'$\\log S$')\n", "
ax.set_xlabel('$a_0$ (N)')\n", "
ax.set_ylabel('$a_1$ (N/cm)')\n", "
\n", "
Bsp:\n", "

\n", " \"log(s)\"\n", "

\n", " Analytische Methode\n", "
Nâ = Y Normalmatrix 👉 â = N⁻¹Y\n", "
â = (â₀,..., âₘ)ᵀ, N = ((Σᵢwᵢ, Σᵢwᵢxᵢ),(Σᵢwᵢxᵢ, Σᵢwᵢxᵢ²))ᵀ, Y = (Σᵢwᵢyᵢ, Σᵢwᵢxᵢyᵢ)ᵀ\n", "
wᵢ = 1/σᵢ²\n", "
\n", " Bsp 1:\n", "
Explizite Berechnung bei Grad 2:\n", "
\n", " w = 1/sigmaF**2\n", "
\n", "
N = np.array([[np.sum(w), np.sum(w * x)], [np.sum(w * x), np.sum(w * (x**2))]])\n", "
Y = np.array([np.sum(w * F), np.sum(w * x * F)])\n", "
\n", " Ninv = np.linalg.inv(N) Matrix Invertieren \n", "
\n", "
fitParams = Ninv @ Y , @ oder np.dot(Ninv, Y) Matrixmultiplikation (❗️auf Dimension achten❗️) \n", "
#a0, a1 = fitParams \n", "
\n", "
sigma_a0 = np.sqrt(Ninv[0, 0])\n", "
sigma_a1 = np.sqrt(Ninv[1, 1])\n", "
Diagonaleinträge der Kovarianzmatrix sind die Standardabw. (sigma) der Fitparameter \n", "
\n", "
\n", " Bsp 2:\n", "
Fit-Funktion für Polynome bel. Ordnung:\n", "
\n", " #pre: datasets x, y, sigma(y), Polynomgrad deg \n", "
#post: Kovarianzmatrix, Fitparameter (Spalten)vektor \n", "
def linearFit(x, y, sigma, deg):\n", "
w = 1/sigma**2\n", "
N = np.zeros(shape=(deg, deg))\n", "
Y = np.zeros(shape=(deg))\n", "
\n", "
for i in range(deg):\n", "
Y[i] = np.sum(w)*(np.sum(x)**i)*np.sum(y)\n", "
for j in range(deg):\n", "
N[i, j] = np.sum(w)*np.sum(x)**(i+j)\n", "
\n", "
#kovarianzmatrix \n", "
Ninv = np.linalg.inv(N)\n", "
\n", "
fitParams = Ninv @ Y\n", "
\n", "
return Ninv, fitParams\n", "
\n", "
\n", " \n", " fit_n = np.zeros(len(x))\n", "
for index, x_i in enumerate(x):\n", "
sum_i = 0\n", "
for n, a_n in enumerate(fitParams_n):\n", "
sum_i += a_n * x_i**n\n", "
\n", "
fit_n[index] = sum_i \n", "
\n", "

\n", " import matplotlib.gridspec as gs\n", "

\n", "

\n", " gs0 = gs.GridSpec(nrows=2, ncols=3, height_ratios=[3, 1], hspace=.1, figure=fig)\n", "
👉 ax = fig.add_subplot(gs0[0, 0])\n", "
ax.set_xticklabels([]) Entfernen der Achsenindizes \n", "
ax.text(x, y, \"text\") Text im Plot \n", "

\n", "

\n", "

\n", " Additional Stuff\n", "
Underfitting 👉 Abweichung von Datenpunkten und Fit, Struktur in den Residuen\n", "
Overfitting 👉 Residuen zufällig Verteilt (gut) aber Fitparameter verlieren physikalische Bedeutung also n-1 fittet den Datensatz vielleicht genauso gut\n", "
\n", "
a.errorbar(x_val, y_val, sigma_y, capsize=3, linestyle='None', marker='.')\n", "
np.zeros(shape=(x, y))\n", "
Likelihood Function\n", "
Linear Regression (video)\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 12
\n", "

Fitten von nicht linearen Funktionen, Gradientenverfahren
\n", " \n", "
\n", "
\n", "

\n", " Gradientenverfahren\n", "

\n", "

\n", " Newton Verfahren basically Gradientenverfahren aber 2.Term der Taylor Enticklung wird auch berücksichtigt \n", "
\n", "

\n", "

\n", " Marquardts Methode\n", "
\n", "

\n", "

\n", " from scipy.optimize import curve_fit\n", "

\n", "

\n", " Additional Stuff\n", "
np.linalg.norm, np.diag\n", "
Gradientenverfahren\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 13
\n", "

Machine Learning, Entscheidungsbäume, Lineare Regression, Logistische Regression, R2-Score
\n", " \n", "
\n", "
\n", "

\n", " import pandas as pd\n", "

\n", "

\n", " pandas basics\n", "
df = pd.read_csv('file.csv') \"dataframe\" \n", "
df.head() erste 5 Zeilen anzeigen \n", "
df[5:10] slicing wie bei numpy 👉 Spalten 5-9 \n", "
df['Spaltenname1', 'Spaltenname2',...]\n", "
df.drop(['Spaltenname1', 'Spaltenname2', ...], axis=1) Spalte ausschliessen \n", "

\n", "

\n", " Decision Trees\n", "

\n", "

from sklearn

\n", "

sklearn.model_selection import train_test_split

\n", "

sklearn.tree import DecisionTreeClassifier

\n", "

sklearn.tree import plot_tree

\n", "

sklearn.metrics import accuracy_score

\n", "

\n", " Test und Trainingssatz erstellen, Tree plotten, Accuracy Score bestimmen\n", "
Bsp:\n", "
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7) \n", "
train size sollte grösse als test sein, wenn nur test angegeben wird, dann ist train das Komplement davon \n", "
clf = DecisionTreeClassifier(max_depth=4).fit(X_train, y_train)\n", "
plot_tree(clf)\n", "
y_pred = clf.predict(X_test)\n", "
print('Accuracy Score: {}'.format(accuracy_score(y_pred, y_test)))\n", "
\n", "
Neue Predictions mit trainiertem Modell 👉 clf.predict(df2)\n", "

\n", "

\n", " Linear Regression\n", "

\n", "

from sklearn

\n", "

sklearn.linear_model import LinearRegression

\n", "

sklearn.metrics import r2_score

\n", "

\n", " R2-Score\n", "
\n", "

\n", "

\n", " Test und Trainingssatz erstellen, R2-Score bestimmen ähnlicher Ablauf wie bei Linear Regression \n", "
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n", "
reg = LinearRegression().fit(X_train, y_train)\n", "
y_pred = reg.predict(X_test) \n", "
print(\"R2-Score: {:.3f}\".format(r2_score(y_test, y_pred)))\n", "

\n", "

\n", " Logistic Regression same wie oben... \n", "

\n", "

from sklearn

\n", "

sklearn.linear_model import LogisticRegression

\n", "

\n", " logreg = LogisticRegression(max_iter=1000).logreg.fit(X_train, y_train) optional aber oft gut \n", "

\n", "

\n", " Additional Stuff\n", "
Gini-Coefficient\n", "
scikit-learn\n", "
R2-Score\n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 14
\n", "

Grid Search, (K-Fold) Cross-Validation, Over-/Underfitting, Neural Network
\n", " \n", "
\n", "
\n", "

from sklearn

\n", "

sklearn.model_selection import GridSearchCV

\n", "

sklearn.preprocessing import PolynomialFeatures

\n", "

sklearn.pipeline import Pipeline

\n", "

sklearn.neural_network import MLPClassifier

\n", "

\n", " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7)\n", "
#mlp = MLPClassifier(hidden_layer_sizes=(32,32,16,16,8), max_iter=5000, random_state=0) verschiedene hidden layers probieren \n", "
mlp = MLPClassifier(hidden_layer_sizes=(32, 32, 32, 32, 32, 32), max_iter=5000, random_state=0)\n", "
mlp.fit(X_train, y_train)\n", "
\n", "
y_pred = mlp.predict(X_test)\n", "
print('Accuracy Score: {}'.format(accuracy_score(y_pred, y_test)))\n", "
\n", "
X_final = pd.read_csv('X_final.csv')\n", "
y_final = mlp.predict(X_final.values)\n", "
\n", "
fig = plt.figure()\n", "
ax = fig.add_subplot(1, 1, 1)\n", "
ax.scatter(X_final['X1'], X_final['X2'], c=y_final)\n", "
ax.set_aspect(1)\n", "
\n", "
ax.set_xlabel('X1')\n", "
ax.set_ylabel('X2')\n", "

\n", "

\n", " ...many functions (see sol.)\n", "

\n", "

\n", " Additional Stuff\n", "
np.reshape()\n", "
Bsp:\n", "
X = np.reshape(df['x'].values, newshape=(-1, 1)) (-1, 1) bedeutet: eine Spalte mit sovielen Zeilen wie nötig \n", "
Dictionaries very similar to objects in JS \n", "

\n", "
\n", "
\n", "
\n", "
\n", "

Week 15
\n", "

Filtern mit Faltungsmatrizen (, k-means Clustering in 2D)
\n", " \n", "
\n", "
\n", "

\n", " import skimage.io\n", "

\n", "

\n", " analog zu einlesen von Textdaten \n", "
\n", " img = skimage.io.imread(files[n], as_gray=True (od. False) , ...) returns 2D np.array \n", "
fig = plt.figure()\n", "
a = fig.add_subplot(1, 1, 1)\n", "
a.imshow(img (, cmap='gray') ) Bild \"plotten\" \n", "
a.set_xticks([]) Achsenbeschriftung entfernen \n", "

\n", "

\n", " import pickle\n", "

\n", "

\n", " eine weitere Art um Daten einzulesen... \n", "
input = pickle.load(open('filename', 'rb'))\n", "
... plt.imshow(input, cmap='gray_r')\n", "

\n", "

\n", " ...many functions (see sol.)\n", "

\n", "

\n", " Additional Stuff\n", "
plt.set_aspect(1) aspect ratio \n", "

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "

\n", " (2023 n.ethz.ch/~atuzlak )\n", "

\n", "
\n", "
\n", "
" ] }, { "cell_type": "markdown", "id": "e282a83b", "metadata": {}, "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }