# Was ist eigentlich ein Histogramm und was eine empirische Verteilungsfunktion? # # von # # Jonas Alexander Lache # Daniel Meißner # # Dieses Werk ist lizenziert unter einer Creative Commons # Namensnennung-Weitergabe unter gleichen Bedingungen 4.0 International # Lizenz. Um eine Kopie der Lizenz zu erhalten, besuchen Sie # http://creativecommons.org/licenses/by-sa/4.0/. # # SPDX-License-Identifier: CC-BY-SA-4.0 from manim import * import numpy as np import math ORCA_BLUE = "#002b44" ORCA_RED = "#e61300" ORCA_GREEN = "#4cb011" ORCA_GREY = "#f5f5f5" ORCA_WHITE = "#ffffff" config.background_color = ORCA_WHITE def scene_heading(title): heading = Tex( r"\textsf{{\bfseries {}}}".format(title), color=ORCA_BLUE, font_size=56, tex_environment="flushleft" ) heading.to_edge(UL) return heading # weights of package in grams data = np.array([ 46.37, 46.76, 46.16, 46.20, 47.15, 46.32, 46.30, 46.53, 46.25, 46.90, 46.01, 46.22, 46.26, 46.25, 46.18, 45.90, 45.96, 46.08, 46.16, 46.19, 45.94, 46.17, 46.12, 45.86 ]) def compute_histogram(data): # hardcode values for nicer appearance a = 45.8 b = 47.2 n = 24 intervals = [a + i*0.2 for i in range(8)] values = np.array(list(map( lambda a, b: np.count_nonzero((data >= a) & (data < b)) / n / (b-a), intervals, intervals[1:] ))) return intervals, values class StepByStepHistogram(Scene): def construct(self): heading = scene_heading("Das Histogramm") self.play(Create(heading)) self.wait() table = MobjectTable([ [ Tex(r"\textsf{"+f"{data[i]:.2f}"+r"}", color=ORCA_BLUE) for i in range(6*j,6*(j+1)) ] for j in range(4) ]) self.play(Create(table)) self.wait(9) self.play(FadeOut(table)) histogram = compute_histogram(data) axes = self._create_axes(histogram) dots = VGroup(*map( lambda x: Dot(axes[0].number_to_point(x), color=ORCA_RED), data )) self.play(Create(axes[0])) self.play(Create(dots)) self.wait(0.5) self.play(axes[0].animate.add_ticks().add_numbers(color=ORCA_BLUE)) self.wait(1.5) self.play(FadeOut(axes[0], dots), run_time=0.5) self._show_bin_rules() self.play(FadeIn(axes[0], dots), run_time=0.5) self.wait(1) self.play( Create(axes[1]), VGroup(axes, dots).animate.to_edge(0.5*DOWN, buff=0.8), ) self.wait() histogram_with_axes = VGroup(axes, dots) bins, values = histogram for a, b, h in zip(bins, bins[1:], values): x1, x2 = axes[0].number_to_point([a, b]) y1, y2 = axes[1].number_to_point([0, h]) rect = Rectangle( width=(x2 - x1)[0], height=(y2 - y1)[1], color=ORCA_BLUE, ) rect.shift((x1 + x2)/2 + [0, rect.height/2, 0]) rect.set_stroke(color=ORCA_BLUE, width=1) rect.set_fill(ORCA_BLUE, opacity=0.5) histogram_with_axes.add(rect) self.play(Create(rect),run_time=0.5) self.wait(8.5) self.play(histogram_with_axes.animate.scale(0.6).to_edge(LEFT)) texts = Tex( r"\textsf{Treppenfunktion}", r"\textsf{R-Befehl:} \texttt{hist}", r"\textsf{standardmäßig keine Normierung}", color=ORCA_BLUE ) texts.arrange(DOWN) for text in texts: text.to_edge(RIGHT, buff=1) self.play(Create(texts[0])) self.wait(12) self.play(Create(texts[1])) self.wait(5) self.play(Create(texts[2])) self.wait(2.5) self.play(FadeOut( heading, histogram_with_axes, texts )) self.wait(0.5) def _show_bin_rules(self): rules = Tex( r"\textsf{Sturges}", r"\textsf{Freedman-Diaconis}", r"\textsf{Scott für normalverteilte Daten}", color=ORCA_BLUE ) rules.center().arrange(DOWN) self.play(Create(rules),run_time=2) self.play(FadeOut(*rules)) def _create_axes(self, histogram): bins, values = histogram a = min(bins) b = max(bins) n = len(bins) - 1 dx = (b-a)/n x_axis = NumberLine( [a, b + dx/2, dx], length=10, include_tip=True, include_ticks=False, tip_width=0.25, tip_height=0.25, include_numbers=False, decimal_number_config={"num_decimal_places": 1}, color=ORCA_BLUE ) y_axis = NumberLine( [0, 2, 0.5], length=6, include_tip=True, include_ticks=True, tip_width=0.25, tip_height=0.25, include_numbers=True, color=ORCA_BLUE, rotation=90*DEGREES, label_direction=LEFT ) y_axis.numbers.set_color(ORCA_BLUE) y_axis.shift( x_axis.number_to_point(a) - y_axis.number_to_point(0) ) vgroup = VGroup(x_axis, y_axis) return vgroup class Comparison(Scene): def construct(self): heading = scene_heading("Unterschiede zwischen den Darstellungen") self.play(Create(heading)) self.wait() hist = Tex(r"\textsf{Histogramm}", color=ORCA_BLUE) ecdf = Tex( r"\textsf{empirische Verteilungsfunktion}", color=ORCA_BLUE ) hist.to_edge(LEFT) ecdf.to_edge(RIGHT) VGroup(hist, ecdf).next_to(heading, DOWN).align_to(heading, LEFT) hist_points = Tex( r"\textsf{Schätzer für Dichtefunktion}\\[5ex]", r"\textsf{Häufiger im Alltag zu sehen}\\[5ex]", r"\textsf{Gute Übersicht über Verteilung}", color=ORCA_BLUE, tex_environment="flushleft", font_size=36 ).shift(0.5*DOWN) ecdf_points = Tex( r"\textsf{Schätzer für Verteilungsfunktion}\\[5ex]", r"\textsf{Kein Informationsverlust}\\[5ex]", r"\textsf{Verwendung in mathematischer Statistik}", color=ORCA_BLUE, tex_environment="flushleft", font_size=36 ).shift(0.5*DOWN) hist_points.to_edge(LEFT) ecdf_points.align_to(ecdf, LEFT) # Überschrift Histogramm und ECDF self.play(Create(hist), Create(ecdf)) self.wait(5) self.play(Create(hist_points[0])) self.wait(3) self.play(Create(ecdf_points[0])) self.wait(3) self.play(Create(hist_points[1])) self.wait(7.5) self.play(Create(hist_points[2])) self.wait(7) self.play(Create(ecdf_points[1])) self.wait(4) self.play(Create(ecdf_points[2])) self.wait(3) class StepByStepECDF(Scene): def construct(self): def compute_ecdf(data): intervals = np.unique(data) values = np.array(list(map( lambda x: np.count_nonzero(data <= x) / len(data), intervals ))) return intervals, values heading = scene_heading("Die empirische Verteilungsfunktion") self.play(Create(heading)) self.wait(2) ax = Axes( x_range=[45.8, 47.3, 0.2], y_range=[0, 1.1, 0.1], x_length=10, y_length=5, axis_config={ "include_tip":True, "include_numbers":True, "color":ORCA_BLUE, "font_size":24, "decimal_number_config":{"num_decimal_places": 1} } ) ax[0].numbers.set_color(ORCA_BLUE) ax[1].numbers.set_color(ORCA_BLUE) ax[0].tip.scale(0.7) ax[1].tip.scale(0.7) ax.center() intervals, values = compute_ecdf(data) ecdf = VGroup( *map( lambda a, b, y: Line( start=ax.coords_to_point(a, y, 0), end=ax.coords_to_point(b, y, 0), color=ORCA_BLUE ), np.append(intervals,ax.x_range[1]), np.append(intervals,ax.x_range[1])[1:], values ) ) ecdf_dots = VGroup( *map( lambda a, b, y: Dot( ax.coords_to_point(a, y, 0), radius=0.05, color=ORCA_BLUE ), intervals, intervals[1:], values ) ) ecdf_dots.add(Dot( ax.coords_to_point(max(data), 1, 0), radius=0.05, color=ORCA_BLUE )) dots = VGroup( *map( lambda x: Dot( ax.coords_to_point(x, 0, 0), color=ORCA_RED ), np.sort(data) ) ) self.play(Create(ax), run_time=2) self.play(Create(dots), run_time=1.25) self.wait() self.play(Create(ecdf), Create(ecdf_dots), run_time=1.5) self.wait() self.play(Indicate(ax[0], color=ORCA_BLUE, scale_factor=1.1)) self.wait(2) self.play(Indicate(ax[1], color=ORCA_BLUE, scale_factor=1.1)) self.wait() self.play( VGroup(ax, ecdf, ecdf_dots, dots).animate.scale(0.5).to_edge(LEFT) ) self.wait(2) r_command = Tex( r"\textsf{R-Befehl: }\texttt{plot.ecdf}", color=ORCA_BLUE ) r_command.next_to(ax, RIGHT, buff=1.5) self.play(Create(r_command)) self.wait(5) self.play(FadeOut(ax, ecdf, ecdf_dots, dots, r_command)) formula = MathTex( r"F_n(x) = \frac{1}{n} \#\{1 \le i \le n \:\colon\: x_i \le x\}", color=ORCA_BLUE ) formula.center() self.play(Create(formula)) self.wait(2.5) self.play(Indicate(formula[0][9], color=ORCA_RED, scale_factor=1.5)) self.wait(3.5) self.play(Indicate(formula[0][17:21], color=ORCA_RED, scale_factor=1.5)) self.wait(3) self.play(Indicate(formula[0][6:9], color=ORCA_RED, scale_factor=1.5)) self.wait(4) VGroup(ax, ecdf, ecdf_dots, dots).scale(1.5).center() self.play( formula.animate.to_edge(DOWN), FadeIn(ax, ecdf, ecdf_dots, dots) ) self.wait(4.5) self.play(FadeOut(heading,formula, ax, ecdf, ecdf_dots, dots)) self.wait(0.5)