Week 9 (4/4-4/10)

Notebook

Weekly digest

Data formats

  • JSON

  • Dates and times

Project

Resources

1. OpenCage Geocoding API

Documentation

[1]:
oc_url =  "https://api.opencagedata.com/geocode/v1/json"
key = "65b1171691324bb68feac286a27"

2. DataFrame with times

[2]:
import pandas as pd

df = pd.DataFrame([
    {
        "event": "event_1",
        "start": '2021-03-04T23:03:12.351+00:00',
        "end": '2021-03-04T23:04:08.669+00:00'
    },
    {
        "event": "event_2",
        "start": '2021-03-05T12:12:12.010+00:00',
        "end": '2021-03-05T17:56:17.123+00:00'
    },
    {
        "event": "event_3",
        "start": '2021-03-04T13:29:45.594+00:00',
        "end": '2021-03-06T00:04:46.032+00:00'
    },
])

3. Marathon results

4. Histogram demo

[ ]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed

%config InlineBackend.figure_format = 'retina'
sns.set_theme(style="darkgrid", palette="bright")

def hist(data, bins=10, shift=0):
    '''
    Plots a histogram of data.

    bins:
        number of bins
    shift:
        shift of bin boundaries.
    '''

    plt.figure(figsize=(12,4))
    max_shift = 5
    data = np.array(data)
    plt.xlim(data.min() - max_shift, data.max() + max_shift)
    sns.histplot(data,
                 stat="density",
                 bins=bins,
                 binrange =  (data.min() - max_shift + shift,
                               data.max() + max_shift + shift)
                )
    sns.rugplot(data, height=0.05, color='w')
    plt.show()


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(hist, shift=(0, 5, 0.25), bins=(1, 30, 1), data=fixed(data));

5. KDE plot

[ ]:
from scipy.stats import norm

def kde(data, h, x):
    """
    Compute KDE of data with  Gaussian kernels

    x:
       KDE value will be computed for each value
       of this array.
    h:
       Kernel bandwidth
    """

    y = np.zeros_like(x)
    for X in data:
        y += norm.pdf(x, loc=X, scale=h)
    return y/len(data)


def plot_kde(data, h=0.1, show_kernels=False):
    """
    Plot KDE

    data:
        An array with data defining KDE.
    h:
        Kernel bandwidth.
    show_kernels:
        Boolean. If true, individual kernels are
        plotted too.
    """

    x = np.linspace(data.min() - 3, data.max() + 3, 400)
    plt.figure(figsize=(14,4))
    plt.plot(x, kde(data, h, x), lw=7, c='steelblue', alpha=0.4)
    plt.plot(data, [0]*len(data), 'r|', ms=15, mew=2)
    if show_kernels:
        for X in data:
            plt.plot(x, 1/len(data)*norm.pdf(x, loc=X, scale=h), 'r--', lw=0.7)

6. KDE integral widget

[ ]:
from ipywidgets import interact, fixed, FloatSlider
from  scipy.stats import gaussian_kde

def integrate(data, lower=7, upper=12):
    """
    Plot integral of KDE

    data:
        An array with data defining KDE
    lower, upper:
        Limits of integration.
    """

    kde_data = gaussian_kde(data)
    integral = kde_data.integrate_box(lower, upper)
    xfill = np.linspace(lower, upper, 100)

    x_min = data.min() - 1
    x_max = data.max() + 1
    x = np.linspace(x_min, x_max, 400)

    plt.figure(figsize=(10, 5))
    plt.xlim(x_min - 1, x_max + 1)
    plt.title(f"$\int^{{{upper:.2f}}}_{{{lower:.2f}}}\ f(t)dt = {integral:.3f}$",
              fontsize=20,
              y=1.1)
    plt.plot(x, kde_data(x))
    if lower <= upper:
        plt.fill_between(xfill,
                         kde_data(xfill),
                         0,
                         color='steelblue',
                         alpha=0.3)
    plt.plot(data, [0] * len(data), 'r|', ms=15)


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(integrate,
         data=fixed(data),
         lower=FloatSlider(min=data.min() - 1,
                           max=data.max() + 1,
                           step=0.2,
                           continuous_update=False),
         upper=FloatSlider(min=data.min() - 1,
                           max=data.max() + 1,
                           step=0.2,
                           continuous_update=False));