Week 9 (4/4-4/10)¶
Notebook¶
Download the notebook file: week_9_class.ipynb
Weekly digest¶
Data formats¶
Dates and times
Project¶
Resources¶
1. OpenCage Geocoding API¶
[1]:
oc_url = "https://api.opencagedata.com/geocode/v1/json"
key = "65b1171691324bb68feac286a27"
2. DataFrame with times¶
[2]:
import pandas as pd
df = pd.DataFrame([
{
"event": "event_1",
"start": '2021-03-04T23:03:12.351+00:00',
"end": '2021-03-04T23:04:08.669+00:00'
},
{
"event": "event_2",
"start": '2021-03-05T12:12:12.010+00:00',
"end": '2021-03-05T17:56:17.123+00:00'
},
{
"event": "event_3",
"start": '2021-03-04T13:29:45.594+00:00',
"end": '2021-03-06T00:04:46.032+00:00'
},
])
3. Marathon results¶
4. Histogram demo¶
[ ]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed
%config InlineBackend.figure_format = 'retina'
sns.set_theme(style="darkgrid", palette="bright")
def hist(data, bins=10, shift=0):
'''
Plots a histogram of data.
bins:
number of bins
shift:
shift of bin boundaries.
'''
plt.figure(figsize=(12,4))
max_shift = 5
data = np.array(data)
plt.xlim(data.min() - max_shift, data.max() + max_shift)
sns.histplot(data,
stat="density",
bins=bins,
binrange = (data.min() - max_shift + shift,
data.max() + max_shift + shift)
)
sns.rugplot(data, height=0.05, color='w')
plt.show()
N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(hist, shift=(0, 5, 0.25), bins=(1, 30, 1), data=fixed(data));
5. KDE plot¶
[ ]:
from scipy.stats import norm
def kde(data, h, x):
"""
Compute KDE of data with Gaussian kernels
x:
KDE value will be computed for each value
of this array.
h:
Kernel bandwidth
"""
y = np.zeros_like(x)
for X in data:
y += norm.pdf(x, loc=X, scale=h)
return y/len(data)
def plot_kde(data, h=0.1, show_kernels=False):
"""
Plot KDE
data:
An array with data defining KDE.
h:
Kernel bandwidth.
show_kernels:
Boolean. If true, individual kernels are
plotted too.
"""
x = np.linspace(data.min() - 3, data.max() + 3, 400)
plt.figure(figsize=(14,4))
plt.plot(x, kde(data, h, x), lw=7, c='steelblue', alpha=0.4)
plt.plot(data, [0]*len(data), 'r|', ms=15, mew=2)
if show_kernels:
for X in data:
plt.plot(x, 1/len(data)*norm.pdf(x, loc=X, scale=h), 'r--', lw=0.7)
6. KDE integral widget¶
[ ]:
from ipywidgets import interact, fixed, FloatSlider
from scipy.stats import gaussian_kde
def integrate(data, lower=7, upper=12):
"""
Plot integral of KDE
data:
An array with data defining KDE
lower, upper:
Limits of integration.
"""
kde_data = gaussian_kde(data)
integral = kde_data.integrate_box(lower, upper)
xfill = np.linspace(lower, upper, 100)
x_min = data.min() - 1
x_max = data.max() + 1
x = np.linspace(x_min, x_max, 400)
plt.figure(figsize=(10, 5))
plt.xlim(x_min - 1, x_max + 1)
plt.title(f"$\int^{{{upper:.2f}}}_{{{lower:.2f}}}\ f(t)dt = {integral:.3f}$",
fontsize=20,
y=1.1)
plt.plot(x, kde_data(x))
if lower <= upper:
plt.fill_between(xfill,
kde_data(xfill),
0,
color='steelblue',
alpha=0.3)
plt.plot(data, [0] * len(data), 'r|', ms=15)
N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(integrate,
data=fixed(data),
lower=FloatSlider(min=data.min() - 1,
max=data.max() + 1,
step=0.2,
continuous_update=False),
upper=FloatSlider(min=data.min() - 1,
max=data.max() + 1,
step=0.2,
continuous_update=False));