Prediction (out of sample)
============================


.. _predict_notebook:

`Link to Notebook GitHub <https://github.com/statsmodels/statsmodels/blob/master/examples/notebooks/predict.ipynb>`_

.. raw:: html

   
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">print_function</span>
   <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
   <span class="kn">import</span> <span class="nn">statsmodels.api</span> <span class="k">as</span> <span class="nn">sm</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Artificial-data">Artificial data<a class="anchor-link" href="#Artificial-data">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">nsample</span> <span class="o">=</span> <span class="mi">50</span>
   <span class="n">sig</span> <span class="o">=</span> <span class="mf">0.25</span>
   <span class="n">x1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="n">nsample</span><span class="p">)</span>
   <span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">column_stack</span><span class="p">((</span><span class="n">x1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x1</span><span class="p">),</span> <span class="p">(</span><span class="n">x1</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span>
   <span class="n">X</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">add_constant</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
   <span class="n">beta</span> <span class="o">=</span> <span class="p">[</span><span class="mf">5.</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.02</span><span class="p">]</span>
   <span class="n">y_true</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">beta</span><span class="p">)</span>
   <span class="n">y</span> <span class="o">=</span> <span class="n">y_true</span> <span class="o">+</span> <span class="n">sig</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="n">nsample</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Estimation">Estimation<a class="anchor-link" href="#Estimation">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">olsmod</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">OLS</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">X</span><span class="p">)</span>
   <span class="n">olsres</span> <span class="o">=</span> <span class="n">olsmod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
   <span class="nb">print</span><span class="p">(</span><span class="n">olsres</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="In-sample-prediction">In-sample prediction<a class="anchor-link" href="#In-sample-prediction">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">ypred</span> <span class="o">=</span> <span class="n">olsres</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
   <span class="nb">print</span><span class="p">(</span><span class="n">ypred</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>                            OLS Regression Results                            
   ==============================================================================
   Dep. Variable:                      y   R-squared:                       0.981
   Model:                            OLS   Adj. R-squared:                  0.980
   Method:                 Least Squares   F-statistic:                     793.2
   Date:                Mon, 20 Jul 2015   Prob (F-statistic):           1.34e-39
   Time:                        17:44:19   Log-Likelihood:                -3.9531
   No. Observations:                  50   AIC:                             15.91
   Df Residuals:                      46   BIC:                             23.55
   Df Model:                           3                                         
   Covariance Type:            nonrobust                                         
   ==============================================================================
                    coef    std err          t      P&gt;|t|      [95.0% Conf. Int.]
   ------------------------------------------------------------------------------
   const          4.9076      0.093     52.736      0.000         4.720     5.095
   x1             0.5215      0.014     36.334      0.000         0.493     0.550
   x2             0.5079      0.056      9.002      0.000         0.394     0.621
   x3            -0.0217      0.001    -17.249      0.000        -0.024    -0.019
   ==============================================================================
   Omnibus:                        0.097   Durbin-Watson:                   2.249
   Prob(Omnibus):                  0.953   Jarque-Bera (JB):                0.210
   Skew:                           0.096   Prob(JB):                        0.900
   Kurtosis:                       2.747   Cond. No.                         221.
   ==============================================================================
   
   Warnings:
   [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Create-a-new-sample-of-explanatory-variables-Xnew,-predict-and-plot">Create a new sample of explanatory variables Xnew, predict and plot<a class="anchor-link" href="#Create-a-new-sample-of-explanatory-variables-Xnew,-predict-and-plot">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">x1n</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">20.5</span><span class="p">,</span><span class="mi">25</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
   <span class="n">Xnew</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">column_stack</span><span class="p">((</span><span class="n">x1n</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x1n</span><span class="p">),</span> <span class="p">(</span><span class="n">x1n</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span>
   <span class="n">Xnew</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">add_constant</span><span class="p">(</span><span class="n">Xnew</span><span class="p">)</span>
   <span class="n">ynewpred</span> <span class="o">=</span>  <span class="n">olsres</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">Xnew</span><span class="p">)</span> <span class="c"># predict out of sample</span>
   <span class="nb">print</span><span class="p">(</span><span class="n">ynewpred</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>[  4.36419585   4.86372083   5.32288364   5.71400582   6.01939792
      6.23426583   6.36749844   6.4402072    6.48225762   6.52736237
      6.60754246   6.74786624   6.96233076   7.25156207   7.60271237
      7.9915709    8.38654164   8.7538345    9.06301692   9.29201382
      9.43073454   9.48273062   9.4646124    9.40331987   9.33169661
      9.28309362   9.28588985   9.35882964   9.50794309   9.72555529
      9.99154729  10.27666165  10.54730926  10.77108802  10.92210636
     10.98523716  10.95860313  10.85388518  10.69440335  10.51128533
     10.33835231  10.20656224  10.13892353  10.14671482  10.22763195
     10.36616652  10.53615388  10.70507189  10.83938423  10.91005034]
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Plot-comparison">Plot comparison<a class="anchor-link" href="#Plot-comparison">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
   
   <span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="s">&#39;o&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Data&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y_true</span><span class="p">,</span> <span class="s">&#39;b-&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;True&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">((</span><span class="n">x1</span><span class="p">,</span> <span class="n">x1n</span><span class="p">)),</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">((</span><span class="n">ypred</span><span class="p">,</span> <span class="n">ynewpred</span><span class="p">)),</span> <span class="s">&#39;r&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;OLS prediction&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s">&quot;best&quot;</span><span class="p">);</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>[ 10.88191737  10.71896653  10.44111456  10.09374917   9.73661657
      9.4291935    9.21612524   9.11629492   9.11820024   9.18276965]
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Predicting-with-Formulas">Predicting with Formulas<a class="anchor-link" href="#Predicting-with-Formulas">&#182;</a></h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>Using formulas can make both estimation and prediction a lot easier</p>
   
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">statsmodels.formula.api</span> <span class="k">import</span> <span class="n">ols</span>
   
   <span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="s">&quot;x1&quot;</span> <span class="p">:</span> <span class="n">x1</span><span class="p">,</span> <span class="s">&quot;y&quot;</span> <span class="p">:</span> <span class="n">y</span><span class="p">}</span>
   
   <span class="n">res</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s">&quot;y ~ x1 + np.sin(x1) + I((x1-5)**2)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   
   
   
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>We use the <code>I</code> to indicate use of the Identity transform. Ie., we don't want any expansion magic from using <code>**2</code></p>
   
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">res</span><span class="o">.</span><span class="n">params</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>Now we only have to pass the single variable and we get the transformed right-hand side variables automatically</p>
   
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">In&nbsp;[&nbsp;]:</div>
   <div class="inner_cell">
       <div class="input_area">
   <div class=" highlight hl-ipython3"><pre><span class="n">res</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">exog</span><span class="o">=</span><span class="nb">dict</span><span class="p">(</span><span class="n">x1</span><span class="o">=</span><span class="n">x1n</span><span class="p">))</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   
   </div>
   
   </div>
   </div>
   
   </div>

   <script src="https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"type="text/javascript"></script>
   <script type="text/javascript">
   init_mathjax = function() {
       if (window.MathJax) {
           // MathJax loaded
           MathJax.Hub.Config({
               tex2jax: {
               // I'm not sure about the \( and \[ below. It messes with the
               // prompt, and I think it's an issue with the template. -SS
                   inlineMath: [ ['$','$'], ["\\(","\\)"] ],
                   displayMath: [ ['$$','$$'], ["\\[","\\]"] ]
               },
               displayAlign: 'left', // Change this to 'center' to center equations.
               "HTML-CSS": {
                   styles: {'.MathJax_Display': {"margin": 0}}
               }
           });
           MathJax.Hub.Queue(["Typeset",MathJax.Hub]);
       }
   }
   init_mathjax();

   // since we have to load this in a ..raw:: directive we will add the css
   // after the fact
   function loadcssfile(filename){
       var fileref=document.createElement("link")
       fileref.setAttribute("rel", "stylesheet")
       fileref.setAttribute("type", "text/css")
       fileref.setAttribute("href", filename)

       document.getElementsByTagName("head")[0].appendChild(fileref)
   }
   // loadcssfile({{pathto("_static/nbviewer.pygments.css", 1) }})
   // loadcssfile({{pathto("_static/nbviewer.min.css", 1) }})
   loadcssfile("../../../_static/nbviewer.pygments.css")
   loadcssfile("../../../_static/ipython.min.css")
   </script>