# Was ist eigentlich ein Histogramm und was eine empirische Verteilungsfunktion?
#
# von
#
# Jonas Alexander Lache <jonas.lache(at)ruhr-uni-bochum.de>
# Daniel Meißner <daniel.meissner-i4k(at)ruhr-uni-bochum.de>
#
# Dieses Werk ist lizenziert unter einer Creative Commons
# Namensnennung-Weitergabe unter gleichen Bedingungen 4.0 International
# Lizenz. Um eine Kopie der Lizenz zu erhalten, besuchen Sie
# http://creativecommons.org/licenses/by-sa/4.0/.
#
# SPDX-License-Identifier: CC-BY-SA-4.0

from manim import *
import numpy as np
import math

ORCA_BLUE = "#002b44"
ORCA_RED = "#e61300"
ORCA_GREEN = "#4cb011"
ORCA_GREY = "#f5f5f5"
ORCA_WHITE = "#ffffff"

config.background_color = ORCA_WHITE

def scene_heading(title):
    heading = Tex(
        r"\textsf{{\bfseries {}}}".format(title),
        color=ORCA_BLUE,
        font_size=56,
        tex_environment="flushleft"
    )
    heading.to_edge(UL)
    return heading

# weights of package in grams
data = np.array([
    46.37,
    46.76,
    46.16,
    46.20,
    47.15,
    46.32,
    46.30,
    46.53,
    46.25,
    46.90,
    46.01,
    46.22,
    46.26,
    46.25,
    46.18,
    45.90,
    45.96,
    46.08,
    46.16,
    46.19,
    45.94,
    46.17,
    46.12,
    45.86
])

def compute_histogram(data):
    # hardcode values for nicer appearance
    a = 45.8
    b = 47.2
    n = 24
    intervals = [a + i*0.2 for i in range(8)]
    values = np.array(list(map(
        lambda a, b: np.count_nonzero((data >= a) & (data < b)) / n / (b-a),
        intervals,
        intervals[1:]
    )))
    return intervals, values

class StepByStepHistogram(Scene):
    def construct(self):
        heading = scene_heading("Das Histogramm")
        self.play(Create(heading))
        self.wait()
        table = MobjectTable([
            [
                Tex(r"\textsf{"+f"{data[i]:.2f}"+r"}", color=ORCA_BLUE)
                for i in range(6*j,6*(j+1))
            ]
            for j in range(4)
        ])
        self.play(Create(table))
        self.wait(9)
        self.play(FadeOut(table))

        histogram = compute_histogram(data)
        axes = self._create_axes(histogram)
        dots = VGroup(*map(
            lambda x: Dot(axes[0].number_to_point(x), color=ORCA_RED),
            data
        ))
        self.play(Create(axes[0]))
        self.play(Create(dots))
        self.wait(0.5)
        self.play(axes[0].animate.add_ticks().add_numbers(color=ORCA_BLUE))
        self.wait(1.5)
        self.play(FadeOut(axes[0], dots), run_time=0.5)
        self._show_bin_rules()
        self.play(FadeIn(axes[0], dots), run_time=0.5)
        self.wait(1)
        self.play(
            Create(axes[1]),
            VGroup(axes, dots).animate.to_edge(0.5*DOWN, buff=0.8),
        )
        self.wait()

        histogram_with_axes = VGroup(axes, dots)
        bins, values = histogram
        for a, b, h in zip(bins, bins[1:], values):
            x1, x2 = axes[0].number_to_point([a, b])
            y1, y2 = axes[1].number_to_point([0, h])
            rect = Rectangle(
                width=(x2 - x1)[0],
                height=(y2 - y1)[1],
                color=ORCA_BLUE,
            )
            rect.shift((x1 + x2)/2 + [0, rect.height/2, 0])
            rect.set_stroke(color=ORCA_BLUE, width=1)
            rect.set_fill(ORCA_BLUE, opacity=0.5)
            histogram_with_axes.add(rect)
            self.play(Create(rect),run_time=0.5)
        self.wait(8.5)

        self.play(histogram_with_axes.animate.scale(0.6).to_edge(LEFT))

        texts = Tex(
            r"\textsf{Treppenfunktion}",
            r"\textsf{R-Befehl:} \texttt{hist}",
            r"\textsf{standardmäßig keine Normierung}",
            color=ORCA_BLUE
        )
        texts.arrange(DOWN)
        for text in texts:
            text.to_edge(RIGHT, buff=1)

        self.play(Create(texts[0]))
        self.wait(12)
        self.play(Create(texts[1]))
        self.wait(5)
        self.play(Create(texts[2]))
        self.wait(2.5)
        self.play(FadeOut(
            heading,
            histogram_with_axes,
            texts
        ))
        self.wait(0.5)

    def _show_bin_rules(self):
        rules = Tex(
            r"\textsf{Sturges}",
            r"\textsf{Freedman-Diaconis}",
            r"\textsf{Scott für normalverteilte Daten}",
            color=ORCA_BLUE
        )
        rules.center().arrange(DOWN)

        self.play(Create(rules),run_time=2)
        self.play(FadeOut(*rules))


    def _create_axes(self, histogram):
        bins, values = histogram
        a = min(bins)
        b = max(bins)
        n = len(bins) - 1
        dx = (b-a)/n
        x_axis = NumberLine(
            [a, b + dx/2, dx],
            length=10,
            include_tip=True,
            include_ticks=False,
            tip_width=0.25,
            tip_height=0.25,
            include_numbers=False,
            decimal_number_config={"num_decimal_places": 1},
            color=ORCA_BLUE
        )
        y_axis = NumberLine(
            [0, 2, 0.5],
            length=6,
            include_tip=True,
            include_ticks=True,
            tip_width=0.25,
            tip_height=0.25,
            include_numbers=True,
            color=ORCA_BLUE,
            rotation=90*DEGREES,
            label_direction=LEFT
        )
        y_axis.numbers.set_color(ORCA_BLUE)
        y_axis.shift(
            x_axis.number_to_point(a) - y_axis.number_to_point(0)
        )
        vgroup = VGroup(x_axis, y_axis)
        return vgroup

class Comparison(Scene):
    def construct(self):
        heading = scene_heading("Unterschiede zwischen den Darstellungen")
        self.play(Create(heading))
        self.wait()

        hist = Tex(r"\textsf{Histogramm}", color=ORCA_BLUE)
        ecdf = Tex(
            r"\textsf{empirische Verteilungsfunktion}",
            color=ORCA_BLUE
        )

        hist.to_edge(LEFT)
        ecdf.to_edge(RIGHT)
        VGroup(hist, ecdf).next_to(heading, DOWN).align_to(heading, LEFT)

        hist_points = Tex(
            r"\textsf{Schätzer für Dichtefunktion}\\[5ex]",
            r"\textsf{Häufiger im Alltag zu sehen}\\[5ex]",
            r"\textsf{Gute Übersicht über Verteilung}",
            color=ORCA_BLUE,
            tex_environment="flushleft",
            font_size=36
        ).shift(0.5*DOWN)
        ecdf_points = Tex(
            r"\textsf{Schätzer für Verteilungsfunktion}\\[5ex]",
            r"\textsf{Kein Informationsverlust}\\[5ex]",
            r"\textsf{Verwendung in mathematischer Statistik}",
            color=ORCA_BLUE,
            tex_environment="flushleft",
            font_size=36
        ).shift(0.5*DOWN)

        hist_points.to_edge(LEFT)
        ecdf_points.align_to(ecdf, LEFT)

        # Überschrift Histogramm und ECDF
        self.play(Create(hist), Create(ecdf))
        self.wait(5)
        self.play(Create(hist_points[0]))
        self.wait(3)
        self.play(Create(ecdf_points[0]))
        self.wait(3)
        self.play(Create(hist_points[1]))
        self.wait(7.5)
        self.play(Create(hist_points[2]))
        self.wait(7)
        self.play(Create(ecdf_points[1]))
        self.wait(4)
        self.play(Create(ecdf_points[2]))
        self.wait(3)

class StepByStepECDF(Scene):
    def construct(self):
        def compute_ecdf(data):
            intervals = np.unique(data)
            values = np.array(list(map(
                lambda x: np.count_nonzero(data <= x) / len(data),
                intervals
            )))
            return intervals, values

        heading = scene_heading("Die empirische Verteilungsfunktion")
        self.play(Create(heading))
        self.wait(2)
        ax = Axes(
            x_range=[45.8, 47.3, 0.2],
            y_range=[0, 1.1, 0.1],
            x_length=10,
            y_length=5,
            axis_config={
                "include_tip":True,
                "include_numbers":True,
                "color":ORCA_BLUE,
                "font_size":24,
                "decimal_number_config":{"num_decimal_places": 1}
            }
        )

        ax[0].numbers.set_color(ORCA_BLUE)
        ax[1].numbers.set_color(ORCA_BLUE)
        ax[0].tip.scale(0.7)
        ax[1].tip.scale(0.7)
        ax.center()

        intervals, values = compute_ecdf(data)
        ecdf = VGroup(
            *map(
                lambda a, b, y: Line(
                    start=ax.coords_to_point(a, y, 0),
                    end=ax.coords_to_point(b, y, 0),
                    color=ORCA_BLUE
                ),
                np.append(intervals,ax.x_range[1]),
                np.append(intervals,ax.x_range[1])[1:],
                values
            )
        )

        ecdf_dots = VGroup(
            *map(
                lambda a, b, y: Dot(
                    ax.coords_to_point(a, y, 0),
                    radius=0.05,
                    color=ORCA_BLUE
                ),
                intervals,
                intervals[1:],
                values
            )
        )
        ecdf_dots.add(Dot(
            ax.coords_to_point(max(data), 1, 0),
            radius=0.05,
            color=ORCA_BLUE
        ))

        dots = VGroup(
            *map(
                lambda x: Dot(
                    ax.coords_to_point(x, 0, 0),
                    color=ORCA_RED
                ),
                np.sort(data)
            )
        )

        self.play(Create(ax), run_time=2)
        self.play(Create(dots), run_time=1.25)
        self.wait()
        self.play(Create(ecdf), Create(ecdf_dots), run_time=1.5)
        self.wait()
        self.play(Indicate(ax[0], color=ORCA_BLUE, scale_factor=1.1))
        self.wait(2)
        self.play(Indicate(ax[1], color=ORCA_BLUE, scale_factor=1.1))
        self.wait()
        self.play(
            VGroup(ax, ecdf, ecdf_dots, dots).animate.scale(0.5).to_edge(LEFT)
        )
        self.wait(2)
        r_command = Tex(
            r"\textsf{R-Befehl: }\texttt{plot.ecdf}",
            color=ORCA_BLUE
        )
        r_command.next_to(ax, RIGHT, buff=1.5)
        self.play(Create(r_command))
        self.wait(5)

        self.play(FadeOut(ax, ecdf, ecdf_dots, dots, r_command))

        formula = MathTex(
            r"F_n(x) = \frac{1}{n} \#\{1 \le i \le n \:\colon\: x_i \le x\}",
            color=ORCA_BLUE
        )
        formula.center()
        self.play(Create(formula))
        self.wait(2.5)
        self.play(Indicate(formula[0][9], color=ORCA_RED, scale_factor=1.5))
        self.wait(3.5)
        self.play(Indicate(formula[0][17:21], color=ORCA_RED, scale_factor=1.5))
        self.wait(3)
        self.play(Indicate(formula[0][6:9], color=ORCA_RED, scale_factor=1.5))
        self.wait(4)
        VGroup(ax, ecdf, ecdf_dots, dots).scale(1.5).center()
        self.play(
            formula.animate.to_edge(DOWN),
            FadeIn(ax, ecdf, ecdf_dots, dots)
        )
        self.wait(4.5)
        self.play(FadeOut(heading,formula, ax, ecdf, ecdf_dots, dots))
        self.wait(0.5)