Kernel Density Estimation =========================== .. _kernel_density_notebook: `Link to Notebook GitHub <https://github.com/statsmodels/statsmodels/blob/master/examples/notebooks/kernel_density.ipynb>`_ .. raw:: html <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">scipy</span> <span class="k">import</span> <span class="n">stats</span> <span class="kn">import</span> <span class="nn">statsmodels.api</span> <span class="k">as</span> <span class="nn">sm</span> <span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> <span class="kn">from</span> <span class="nn">statsmodels.distributions.mixture_rvs</span> <span class="k">import</span> <span class="n">mixture_rvs</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h4 id="A-univariate-example.">A univariate example.<a class="anchor-link" href="#A-univariate-example.">¶</a></h4> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">12345</span><span class="p">)</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">obs_dist1</span> <span class="o">=</span> <span class="n">mixture_rvs</span><span class="p">([</span><span class="o">.</span><span class="mi">25</span><span class="p">,</span><span class="o">.</span><span class="mi">75</span><span class="p">],</span> <span class="n">size</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">dist</span><span class="o">=</span><span class="p">[</span><span class="n">stats</span><span class="o">.</span><span class="n">norm</span><span class="p">,</span> <span class="n">stats</span><span class="o">.</span><span class="n">norm</span><span class="p">],</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=.</span><span class="mi">5</span><span class="p">),</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=.</span><span class="mi">5</span><span class="p">)))</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">kde</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">nonparametric</span><span class="o">.</span><span class="n">KDEUnivariate</span><span class="p">(</span><span class="n">obs_dist1</span><span class="p">)</span> <span class="n">kde</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">obs_dist1</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">normed</span><span class="o">=</span><span class="k">True</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">'red'</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde</span><span class="o">.</span><span class="n">density</span><span class="p">,</span> <span class="n">lw</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">'black'</span><span class="p">);</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">obs_dist2</span> <span class="o">=</span> <span class="n">mixture_rvs</span><span class="p">([</span><span class="o">.</span><span class="mi">25</span><span class="p">,</span><span class="o">.</span><span class="mi">75</span><span class="p">],</span> <span class="n">size</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">dist</span><span class="o">=</span><span class="p">[</span><span class="n">stats</span><span class="o">.</span><span class="n">norm</span><span class="p">,</span> <span class="n">stats</span><span class="o">.</span><span class="n">beta</span><span class="p">],</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=.</span><span class="mi">5</span><span class="p">),</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="o">.</span><span class="mi">5</span><span class="p">))))</span> <span class="n">kde2</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">nonparametric</span><span class="o">.</span><span class="n">KDEUnivariate</span><span class="p">(</span><span class="n">obs_dist2</span><span class="p">)</span> <span class="n">kde2</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">obs_dist2</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">normed</span><span class="o">=</span><span class="k">True</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">'red'</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde2</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde2</span><span class="o">.</span><span class="n">density</span><span class="p">,</span> <span class="n">lw</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">'black'</span><span class="p">);</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <p>The fitted KDE object is a full non-parametric distribution.</p> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">obs_dist3</span> <span class="o">=</span> <span class="n">mixture_rvs</span><span class="p">([</span><span class="o">.</span><span class="mi">25</span><span class="p">,</span><span class="o">.</span><span class="mi">75</span><span class="p">],</span> <span class="n">size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">dist</span><span class="o">=</span><span class="p">[</span><span class="n">stats</span><span class="o">.</span><span class="n">norm</span><span class="p">,</span> <span class="n">stats</span><span class="o">.</span><span class="n">norm</span><span class="p">],</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=.</span><span class="mi">5</span><span class="p">),</span><span class="nb">dict</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">scale</span><span class="o">=.</span><span class="mi">5</span><span class="p">)))</span> <span class="n">kde3</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">nonparametric</span><span class="o">.</span><span class="n">KDEUnivariate</span><span class="p">(</span><span class="n">obs_dist3</span><span class="p">)</span> <span class="n">kde3</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">kde3</span><span class="o">.</span><span class="n">entropy</span> </pre></div> </div> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">kde3</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h4 id="CDF">CDF<a class="anchor-link" href="#CDF">¶</a></h4> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde3</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde3</span><span class="o">.</span><span class="n">cdf</span><span class="p">);</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h4 id="Cumulative-Hazard-Function">Cumulative Hazard Function<a class="anchor-link" href="#Cumulative-Hazard-Function">¶</a></h4> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde3</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde3</span><span class="o">.</span><span class="n">cumhazard</span><span class="p">);</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h4 id="Inverse-CDF">Inverse CDF<a class="anchor-link" href="#Inverse-CDF">¶</a></h4> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde3</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde3</span><span class="o">.</span><span class="n">icdf</span><span class="p">);</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h4 id="Survival-Function">Survival Function<a class="anchor-link" href="#Survival-Function">¶</a></h4> </div> </div> </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span><span class="mi">8</span><span class="p">))</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">111</span><span class="p">)</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kde3</span><span class="o">.</span><span class="n">support</span><span class="p">,</span> <span class="n">kde3</span><span class="o">.</span><span class="n">sf</span><span class="p">);</span> </pre></div> </div> </div> </div> <div class="output_wrapper"> <div class="output"> <div class="output_area"><div class="prompt"></div> </div> </div> </div> </div> <script src="https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"type="text/javascript"></script> <script type="text/javascript"> init_mathjax = function() { if (window.MathJax) { // MathJax loaded MathJax.Hub.Config({ tex2jax: { // I'm not sure about the \( and \[ below. It messes with the // prompt, and I think it's an issue with the template. -SS inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ] }, displayAlign: 'left', // Change this to 'center' to center equations. "HTML-CSS": { styles: {'.MathJax_Display': {"margin": 0}} } }); MathJax.Hub.Queue(["Typeset",MathJax.Hub]); } } init_mathjax(); // since we have to load this in a ..raw:: directive we will add the css // after the fact function loadcssfile(filename){ var fileref=document.createElement("link") fileref.setAttribute("rel", "stylesheet") fileref.setAttribute("type", "text/css") fileref.setAttribute("href", filename) document.getElementsByTagName("head")[0].appendChild(fileref) } // loadcssfile({{pathto("_static/nbviewer.pygments.css", 1) }}) // loadcssfile({{pathto("_static/nbviewer.min.css", 1) }}) loadcssfile("../../../_static/nbviewer.pygments.css") loadcssfile("../../../_static/ipython.min.css") </script>