Week 9 (4/4-4/10)¶
Notebook¶
- Download the notebook file: week_9_class.ipynb 
Weekly digest¶
Data formats¶
- Dates and times 
Project¶
Resources¶
1. OpenCage Geocoding API¶
[1]:
oc_url =  "https://api.opencagedata.com/geocode/v1/json"
key = "65b1171691324bb68feac286a27"
2. DataFrame with times¶
[2]:
import pandas as pd
df = pd.DataFrame([
    {
        "event": "event_1",
        "start": '2021-03-04T23:03:12.351+00:00',
        "end": '2021-03-04T23:04:08.669+00:00'
    },
    {
        "event": "event_2",
        "start": '2021-03-05T12:12:12.010+00:00',
        "end": '2021-03-05T17:56:17.123+00:00'
    },
    {
        "event": "event_3",
        "start": '2021-03-04T13:29:45.594+00:00',
        "end": '2021-03-06T00:04:46.032+00:00'
    },
])
3. Marathon results¶
4. Histogram demo¶
[ ]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed
%config InlineBackend.figure_format = 'retina'
sns.set_theme(style="darkgrid", palette="bright")
def hist(data, bins=10, shift=0):
    '''
    Plots a histogram of data.
    bins:
        number of bins
    shift:
        shift of bin boundaries.
    '''
    plt.figure(figsize=(12,4))
    max_shift = 5
    data = np.array(data)
    plt.xlim(data.min() - max_shift, data.max() + max_shift)
    sns.histplot(data,
                 stat="density",
                 bins=bins,
                 binrange =  (data.min() - max_shift + shift,
                               data.max() + max_shift + shift)
                )
    sns.rugplot(data, height=0.05, color='w')
    plt.show()
N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(hist, shift=(0, 5, 0.25), bins=(1, 30, 1), data=fixed(data));
5. KDE plot¶
[ ]:
from scipy.stats import norm
def kde(data, h, x):
    """
    Compute KDE of data with  Gaussian kernels
    x:
       KDE value will be computed for each value
       of this array.
    h:
       Kernel bandwidth
    """
    y = np.zeros_like(x)
    for X in data:
        y += norm.pdf(x, loc=X, scale=h)
    return y/len(data)
def plot_kde(data, h=0.1, show_kernels=False):
    """
    Plot KDE
    data:
        An array with data defining KDE.
    h:
        Kernel bandwidth.
    show_kernels:
        Boolean. If true, individual kernels are
        plotted too.
    """
    x = np.linspace(data.min() - 3, data.max() + 3, 400)
    plt.figure(figsize=(14,4))
    plt.plot(x, kde(data, h, x), lw=7, c='steelblue', alpha=0.4)
    plt.plot(data, [0]*len(data), 'r|', ms=15, mew=2)
    if show_kernels:
        for X in data:
            plt.plot(x, 1/len(data)*norm.pdf(x, loc=X, scale=h), 'r--', lw=0.7)
6. KDE integral widget¶
[ ]:
from ipywidgets import interact, fixed, FloatSlider
from  scipy.stats import gaussian_kde
def integrate(data, lower=7, upper=12):
    """
    Plot integral of KDE
    data:
        An array with data defining KDE
    lower, upper:
        Limits of integration.
    """
    kde_data = gaussian_kde(data)
    integral = kde_data.integrate_box(lower, upper)
    xfill = np.linspace(lower, upper, 100)
    x_min = data.min() - 1
    x_max = data.max() + 1
    x = np.linspace(x_min, x_max, 400)
    plt.figure(figsize=(10, 5))
    plt.xlim(x_min - 1, x_max + 1)
    plt.title(f"$\int^{{{upper:.2f}}}_{{{lower:.2f}}}\ f(t)dt = {integral:.3f}$",
              fontsize=20,
              y=1.1)
    plt.plot(x, kde_data(x))
    if lower <= upper:
        plt.fill_between(xfill,
                         kde_data(xfill),
                         0,
                         color='steelblue',
                         alpha=0.3)
    plt.plot(data, [0] * len(data), 'r|', ms=15)
N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(integrate,
         data=fixed(data),
         lower=FloatSlider(min=data.min() - 1,
                           max=data.max() + 1,
                           step=0.2,
                           continuous_update=False),
         upper=FloatSlider(min=data.min() - 1,
                           max=data.max() + 1,
                           step=0.2,
                           continuous_update=False));