<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[The DIY Data Scientist]]></title><description><![CDATA[Learn real-world, do-it-yourself (DIY) analytics skills that make you stand out at work with weekly hands-on tutorials designed for ANY professional, including Excel workbooks, data and code.]]></description><link>https://thediydatascientist.substack.com</link><image><url>https://substackcdn.com/image/fetch/$s_!51Dz!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9226edbd-0140-41ef-8ece-9b28d38e49dc_512x512.png</url><title>The DIY Data Scientist</title><link>https://thediydatascientist.substack.com</link></image><generator>Substack</generator><lastBuildDate>Fri, 15 May 2026 08:50:12 GMT</lastBuildDate><atom:link href="https://thediydatascientist.substack.com/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[David Langer]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[thediydatascientist@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[thediydatascientist@substack.com]]></itunes:email><itunes:name><![CDATA[David Langer]]></itunes:name></itunes:owner><itunes:author><![CDATA[David Langer]]></itunes:author><googleplay:owner><![CDATA[thediydatascientist@substack.com]]></googleplay:owner><googleplay:email><![CDATA[thediydatascientist@substack.com]]></googleplay:email><googleplay:author><![CDATA[David Langer]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[Webinar Recording - Beyond Dashboards]]></title><description><![CDATA[Your Data Warehouse & AI Strategy]]></description><link>https://thediydatascientist.substack.com/p/webinar-recording-beyond-dashboards</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/webinar-recording-beyond-dashboards</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Wed, 13 May 2026 21:06:47 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/7730ea91-aaa5-4e24-b6ad-ef522882360a_1200x639.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BPIx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BPIx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 424w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 848w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 1272w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BPIx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png" width="900" height="814" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:814,&quot;width&quot;:900,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:628128,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197560263?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BPIx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 424w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 848w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 1272w, https://substackcdn.com/image/fetch/$s_!BPIx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F20701d7a-4096-4c98-a3b1-45e9790edb39_900x814.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#9989; Your data warehouse is more valuable than ever.</strong></p><p>Traditional BI assets are not being seen as central to a real-world AI strategy. Nothing can be further from the truth!</p><p>I&#8217;m betting that your data warehouse is probably more valuable than your organization realizes. The secret is transforming your data warehouse into a crystal ball to predict the future.</p><p><strong>&#9989; Hard financial impacts vs. squishy productivity benefits.</strong></p><p>When deploying AI tools like Copilot and ChatGPT across business functions, the expected benefits tend to fall into the category of &#8220;productivity benefits.&#8221;</p><p>Your data warehouse can be central to demonstrable financial benefits to your organization that your CEO will love.</p><p>&#9989; <strong>Your KPI knowledge is key.</strong></p><p>As the source of clean enterprise data for reporting and dashboards, data warehousing teams have intimate knowledge of the key performance indicators (KPIs) that matter to executives.</p><p>This knowledge is central to prioritizing use cases that deliver hard financial benefits to your organization and capture executives&#8217; attention.</p><p><strong>&#9989; You don&#8217;t need Data Scientists or AI Engineers.</strong></p><p>Successfully transforming your data warehouse is all about two things: </p><ol><li><p>Knowledge of your business processes. </p></li><li><p>Deep understanding of your data.</p></li></ol><p>BI and data teams already have both of these. All you need is to augment their existing skills, and you&#8217;ll unleash the true value of your data warehouse.</p><p><strong>Check out the full webinar recording &#128071;</strong></p>
      <p>
          <a href="https://thediydatascientist.substack.com/p/webinar-recording-beyond-dashboards">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 6: Cluster Distances ]]></title><description><![CDATA[Don't worry. It's all intuition. There's no math in this one.]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-641</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-641</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Tue, 12 May 2026 13:06:03 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/e3df34e7-d2a3-4322-9505-3b70f78be69c_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to this tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">Check out Part 1 here</a></strong>.</p><p>You learned in <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-3b8">Part 3</a> of this tutorial series how the agglomerative hierarchical clustering algorithm calculates the distance between two data points (i.e., rows) in a dataset.</p><p>This tutorial will cover the various methods the <em>AgglomerativeClustering</em> class in <em>scikit-learn</em> offers for calculating distances between clusters.</p><p>As covered in <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-bb8">Part 2</a> of this tutorial series, the algorithm needs both types of distances to mine the cluster hierarchy (i.e., taxonomy) from a dataset.</p><p><strong>These methods for calculating distances between clusters are called </strong><em><strong>linkages</strong></em><strong>. The </strong><em><strong>AgglomerativeClustering </strong></em><strong>class supports four different linkages</strong><em><strong>.</strong></em></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>#1 - Single Link</h3><p>Understanding each linkage type is quite intuitive when you use a graphical representation. For this tutorial, assume we have the following two clusters:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!J_Q2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!J_Q2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!J_Q2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png" width="560" height="269.61538461538464" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:560,&quot;bytes&quot;:92754,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!J_Q2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!J_Q2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff5bcd4b7-679b-44fa-8432-c433c41bf23d_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The first linkage is known as <em>single link</em>.</p><p>Single link calculates the distances between all the points in each of the clusters and then chooses the smallest of these distances to represent how <em>similar</em> (i.e., close together) the clusters are.</p><p>Graphically, single link would use the following two points for the distance between the clusters because it is the minimum:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VwSd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VwSd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VwSd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png" width="561" height="270.09684065934067" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:561,&quot;bytes&quot;:109232,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VwSd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!VwSd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb52eb754-250a-4ad0-aa52-d1ab11827adf_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here's how to use single link with the <em>AgglomerativeClustering</em> class:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;962e51c1-4768-45f2-b09e-8aba01a6bc0b&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering

# Use single link for cluster distances
agg_clustering = AgglomerativeClustering(linkage = 'single')</code></pre></div><p>NOTE - This code is the same whether you&#8217;re using Python in Excel, VS Code, or Jupyter Notebook.</p><div class="pullquote"><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><h3>#2 - Complete Link</h3><p>If there&#8217;s a linkage that uses the minimum distance between two points in each cluster, it stands to reason that there would be a linkage that uses the maximum distance.</p><p>This is known as <em>complete link</em>.</p><p>I know.</p><p>Why couldn&#8217;t they just call them min and max? &#129335;&#8205;&#9794;&#65039;</p><p>Here&#8217;s complete link shown graphically:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WlA7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WlA7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WlA7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png" width="551" height="265.2822802197802" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:551,&quot;bytes&quot;:117389,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WlA7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!WlA7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffae67639-8707-4d6c-a6d1-e6a621603c66_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And in code:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;676d90f6-4b73-4f32-8197-534b696d57f1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering

# Use complete link for cluster distances
agg_clustering = AgglomerativeClustering(linkage = 'complete')</code></pre></div><div><hr></div><h3>#3 - Average Link</h3><p>The next linkage is the <em>average link</em>. As you might have guessed, this linkage uses the average distance between all data points in two clusters.</p><p>To keep the graphic from being overwhelming, I&#8217;ve only included the distance lines for two data points from the orange cluster:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bV3m!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bV3m!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bV3m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png" width="550" height="264.8008241758242" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:550,&quot;bytes&quot;:155745,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bV3m!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!bV3m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46285832-fa01-4ab9-b298-b003244591ad_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Imagine all the distance lines from every data point in the orange cluster to every data point in the green cluster. </p><p>The similarity of the clusters is the average of all those distance lines - a smaller average indicates more similar clusters.</p><p>Here&#8217;s the code to specify using average link:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;a42dc009-417c-4ac4-adf5-ead0ccd9f240&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering

# Use Ward's method for cluster distances
agg_clustering = AgglomerativeClustering(linkage = 'average')</code></pre></div><div><hr></div><h3>#4 - Ward Linkage</h3><p>The last linkage is the <em>AgglomerativeClustering</em> default: the W<em>ard linkage</em>. The Ward linkage (or <em>Ward&#8217;s method</em>) is more involved than the previous linkages.</p><p>If you&#8217;re familiar with the k-means clustering algorithm (e.g., from my <strong><a href="https://www.daveondata.com/cluster-analysis-with-python-info">Cluster Analysis with Python</a></strong> online course), then Ward linkage will be familiar to you.</p><p>The ward linkage uses <em>prototypes</em>. Think of prototypes as being data points that the algorithm &#8220;makes up&#8221; to be the center of each cluster.</p><p>The most common type of prototype is the <em>centroid</em>, which is the average of all data points in the cluster.</p><p>Graphically, the orange and green dots are the centroids of the two clusters in our running example:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mPuI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mPuI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mPuI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png" width="550" height="264.8008241758242" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:550,&quot;bytes&quot;:95923,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mPuI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!mPuI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3decbbdd-ca66-4c8b-bb6e-e883161ef7ee_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The ward linkage first evaluates each cluster by finding all the distances (i.e., the dotted lines) between the data points in a cluster and their centroid:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vEdM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vEdM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vEdM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png" width="550" height="264.8008241758242" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:701,&quot;width&quot;:1456,&quot;resizeWidth&quot;:550,&quot;bytes&quot;:118753,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!vEdM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 424w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 848w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!vEdM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc1a82f5e-c903-411f-a115-534fda0bffeb_2332x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Consider the orange cluster above. There are eight data points in the cluster and eight distances between each data point and the centroid.</p><p>The ward linkage takes each one of these distance values and squares them (i.e., the distance multiplied by itself). It then sums all these squared distances.</p><p>It then repeats this process for the green cluster.</p><p>Lastly, it then adds the sum of the squared distances for the orange cluster to the sum of the squared distances for the green cluster.</p><p>Think of this last calculation (i.e., adding the two sums together) as being the baseline.</p><p>The ward linkage then combines the two clusters and calculates a new centroid (represented in blue):</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2XdJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2XdJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 424w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 848w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 1272w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2XdJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png" width="554" height="262.5412087912088" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:690,&quot;width&quot;:1456,&quot;resizeWidth&quot;:554,&quot;bytes&quot;:88063,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2XdJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 424w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 848w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 1272w, https://substackcdn.com/image/fetch/$s_!2XdJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e4f17ee-c516-4349-afdf-97386641a19c_2387x1132.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The ward linkage then sums up all the squared distances for the new, larger cluster:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VNQ5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VNQ5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 424w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 848w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 1272w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VNQ5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png" width="555" height="263.0151098901099" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:690,&quot;width&quot;:1456,&quot;resizeWidth&quot;:555,&quot;bytes&quot;:141200,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/197037536?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VNQ5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 424w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 848w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 1272w, https://substackcdn.com/image/fetch/$s_!VNQ5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2593e4a0-9b5f-4f6d-8930-0489606b2c96_2387x1132.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Think of the sum of the squared distances for the new, larger cluster as being the new hotness.</p><p>Ward linkage calculates the difference between the baseline and the new hotness, treating it as a penalty for combining clusters. Ward linkage looks to make this penalty as small as possible when considering which clusters to combine.</p><p>By minimizing the penalty, ward linkage prioritizes combining the most similar clusters.</p><p>Even though Ward&#8217;s method is the default, here&#8217;s the code for reference:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;ea521016-a16b-421f-8560-8d568e0c98cb&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering

# Use Ward's method for cluster distances
agg_clustering = AgglomerativeClustering(linkage = 'ward')</code></pre></div><div><hr></div><h3>Which Linkage to Use?</h3><p>It shouldn&#8217;t come as a surprise that using different linkages can produce different clusterings.</p><p>So, you may be asking yourself, &#8220;Which linkage should I use?&#8221;</p><p>When it comes to cluster analysis, I&#8217;m a big fan of experimenting to see what works best. For example, using different linkages and seeing which produces the most useful clusters.</p><p>That being said, here are some rules of thumb:</p><ul><li><p>Single link is good at handling non-spherical cluster shapes, but is sensitive to outliers.</p></li><li><p>Complete link is less susceptible to outliers, but can break large clusters, and it tends to produce spherical clusters.</p></li><li><p>Think of average link as being a compromise between the single and complete linkages.</p></li><li><p>Think of Ward&#8217;s method as being a bit of an improvement over average link.</p></li></ul><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will be the last in this series. The topic is using ML predictive models to help interpret clusters.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e7964d23-9984-4186-afa0-54d0912d303b_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a685568b-33dd-4b2e-8f2f-448cc217a864_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/78441089-72da-4172-8370-e69109f7f469_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/411b62e1-1ad8-4edd-afd6-507d95602e4a_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">The DIY Data Scientist is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 8: Partnering with AI]]></title><description><![CDATA[This is more than a prompt. It's how you make impact at work - fast.]]></description><link>https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-cd5</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-cd5</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 08 May 2026 18:59:40 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/62fe2d87-1b49-4738-8c0b-1fcffb137077_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to this tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">Check out Part 1 here</a></strong>.</p><p>While it&#8217;s tempting to want to feed your data to AI and trust that the output is correct, this is a bad idea for two reasons:</p><ol><li><p>AI tools like ChatGPT, Claude, and Copilot often make mistakes in analytics. For example, they often make incorrect assumptions and never tell you about them.</p></li><li><p>You&#8217;re accountable for the quality of your analytics. No executive I&#8217;ve ever worked with will buy the argument, &#8220;Don&#8217;t blame me! It&#8217;s the AI&#8217;s fault.&#8221;</p></li></ol><p><strong>That being said, once you&#8217;ve developed fundamental skills in data analysis, you can partner with AI tools to accelerate the impact you can make at work.</strong></p><div><hr></div><h3>Introducing Your AI Partner</h3><p>Here are some screenshots of me partnering with Copilot in Excel to build a sales forecast. Simply paste the prompt into Copilot in Excel to get started:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jRwp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jRwp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 424w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 848w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 1272w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jRwp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png" width="1456" height="1212" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1212,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:252436,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196922952?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jRwp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 424w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 848w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 1272w, https://substackcdn.com/image/fetch/$s_!jRwp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb310970a-150d-4ca8-bbad-b23ff1e6837e_1466x1220.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>NOTE -</strong> Because of the random nature of AI tools like Copilot in Excel, your experience might not be 100% the same as what you will see in these images. </p><p>Also, the prompt is designed to work with LLM-based chat tools like ChatGPT, Claude, Copilot, etc. You&#8217;re not limited to Copilot in Excel.</p><div><hr></div><h3>It&#8217;s Not Just About Forecasting Sales</h3><p>While the example images show partnering with AI to forecast sales, the prompt will work for any domain:</p><ul><li><p>Forecasting sales</p></li><li><p>Forecasting product demand</p></li><li><p>Forecasting customer service call volume</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nVhm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nVhm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 424w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 848w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 1272w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nVhm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png" width="1456" height="1225" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1225,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:342951,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196922952?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nVhm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 424w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 848w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 1272w, https://substackcdn.com/image/fetch/$s_!nVhm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff8d6746b-a92f-4a2d-9ce5-df2092ce8f80_1466x1233.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>Best Practices Are Included</h3><p>All of the best practices you learned about in my 7-part tutorial series are included with the AI prompt. For example, using line charts to assess the characteristics of a time series:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!o2L4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!o2L4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 424w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 848w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 1272w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!o2L4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png" width="1446" height="1326" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1326,&quot;width&quot;:1446,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:357550,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196922952?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!o2L4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 424w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 848w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 1272w, https://substackcdn.com/image/fetch/$s_!o2L4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff48c4343-e9e7-44cd-beef-8a2183a28f47_1446x1326.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Are you ready to get started? </p><p>Let&#8217;s start with tips for using the prompt.</p>
      <p>
          <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-cd5">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 5: Tuning]]></title><description><![CDATA[Cluster analysis is like your car. It needs to be tuned for optimal performance.]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-46f</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-46f</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Wed, 06 May 2026 17:36:18 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/a418e76e-b5b9-4fd7-825b-35d15f8603a6_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to the tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">Check out Part 1 here</a></strong>.</p><p>As you saw in the <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-66a">last tutorial</a></strong><a href="https://www.daveondata.com/hierarchical-clustering-tutorial-part-4">&#8203;</a>, allowing agglomerative hierarchical clustering to find all possible clusters is usually not a good idea except for the simplest of datasets.</p><p>The intuitive way to think about what happened in the last tutorial is to imagine your automobile (or car, as we say in the United States). Specifically, imagine your car&#8217;s engine.</p><p>For your car&#8217;s engine to work optimally, it needs to he tuned. For example, I live in a mountain valley in Montana. My car&#8217;s engine likely needs to be tuned differently than the same make/model at sea level (i.e., elevation of 4,800 feet vs. 0 feet).</p><p><strong>Similarly, as with a car, you must tune the clustering for the situation at hand. And this tuning will likely differ across datasets.</strong></p><p>Don&#8217;t worry if this sounds complicated, because it&#8217;s really not.</p><p>In this tutorial, you will learn a powerful technique for tuning the hierarchical clustering process to find a more optimal number of clusters.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>Loading the Dataset</h3><p>To follow along (highly recommended), download the <em>CustomerBehavior.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p><em>NOTE - While I&#8217;m using Python in Excel for this tutorial, 99+% of the code is the same whether you use Excel, VS Code, or Jupyter Notebook.</em></p><p>To keep from repeating the same content, I&#8217;m going to assume that you&#8217;re familiar with <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-66a">Part 4</a></strong> of this tutorial series.</p><p>Here&#8217;s how I set up my <em>Python Code</em> worksheet with comments:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!aSqO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!aSqO!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 424w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 848w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 1272w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!aSqO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png" width="433" height="185.00458190148913" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:373,&quot;width&quot;:873,&quot;resizeWidth&quot;:433,&quot;bytes&quot;:80557,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!aSqO!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 424w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 848w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 1272w, https://substackcdn.com/image/fetch/$s_!aSqO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9fb6748-352c-4d15-acf9-9db7dcf17460_873x373.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Per my usual, I add the Python in Excel code to the right of each comment. The following code loads the <em>CustomerBehavior</em> Excel table into Python:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uVCN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uVCN!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 424w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 848w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 1272w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uVCN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png" width="1456" height="302" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:302,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:43259,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uVCN!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 424w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 848w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 1272w, https://substackcdn.com/image/fetch/$s_!uVCN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc7780d-071a-4882-811c-545c61d4e4b2_1590x330.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Here&#8217;s the Python code for you to use in your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;fba51340-6430-484e-b1ab-e2e7abbbb56e&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load the CustomerBehavior Excel table
customer_behavior = xl("CustomerBehavior[#All]", headers = True)</code></pre></div><div class="pullquote"><p><strong>BTW - The code above is the only Python in this tutorial that is specific to Microsoft Excel. </strong></p><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><h3>A Problem of Scale</h3><p>Here&#8217;s a small subset of the data used in this tutorial series:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tCI6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tCI6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 424w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 848w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 1272w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tCI6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png" width="1456" height="567" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:567,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:114180,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tCI6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 424w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 848w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 1272w, https://substackcdn.com/image/fetch/$s_!tCI6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F794c3d81-b0cf-4163-b94b-c186e2fe77f7_1516x590.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Take a look at the data above, specifically the <em>Income</em> and <em>RecencyInDays</em> columns (i.e., <em>features</em>). Here are working definitions of each of these features:</p><ul><li><p><em><strong>Income:</strong></em> Yearly household income in US dollars.</p></li><li><p><em><strong>RecencyInDays:</strong></em> The number of days since the customer&#8217;s last purchase.</p></li></ul><p>Now, consider the magnitude of the data values in each of these features. For example, the first row of data has an <em>Income</em> of <em>$58,138</em> and a <em>RecencyInDays</em> of <em>58</em>.</p><p>Notice how the <em>Income</em> values are far larger than the <em>RecencyInDays</em> values? In technical terms, these two features are on different <em>scales</em>. </p><p>Features with different scales are very common in real-world analytics. For example, imagine you&#8217;re working in a supply chain organization, managing the shipment of products to customers. </p><p>A logistics dataset might contain <em>DistanceShipped</em> and <em>ProductWeight</em> features. If you&#8217;re shipping to consumers, it&#8217;s likely that <em>DistanceShipped</em> will contain values far larger than the <em>ProductWeight</em> feature (i.e., on different scales).</p><p>This is important because the most commonly used clustering techniques (i.e., <em>algorithms</em>) rely on <em>Euclidean distance</em> (see <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-3b8">Part 3</a></strong> for more details). And Euclidean distance works best when all the features are on the same scale.</p><p>To achieve the best tuning results (i.e., get the best clustering possible), the data needs to be transformed so all features are on the same scale.</p><div><hr></div><h3>Scaling the Dataset</h3><p>As it turns out, scaling datasets is so common in real-world DIY data science scenarios that Python&#8217;s <em>scikit-learn</em> library provides functionality for you to do this in just a few lines of code.</p><p>For example, the <em>StandardScaler</em> class implements one of the most common ways of scaling (i.e., <em>standardizing</em>) data - <strong><a href="https://en.wikipedia.org/wiki/Standard_score">the Z-score</a></strong>. Many professionals (maybe you) have learned about the Z-score in an introductory statistics class.</p><p>For the purposes of this tutorial, the exact mathematics behind the Z-score is not necessary. What is important is the following intuition:</p><ul><li><p>Clustering techniques work best when all of the features are on the same scale.</p></li><li><p>If you notice features that are not on the same scale in your dataset, then you should scale them.</p></li></ul><p>Now, this is super important.</p><p><strong>Nothing requires you to scale your data. Technically, it will work just fine. However, to achieve the best results from your cluster analysis, scale the data when appropriate.</strong></p><p>The following code demonstrates how to scale your data using the <em>StandardScaler</em> class from the <em>scikit-learn</em> library:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Bn5n!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Bn5n!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 424w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 848w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 1272w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Bn5n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png" width="1456" height="679" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:679,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:132534,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Bn5n!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 424w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 848w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 1272w, https://substackcdn.com/image/fetch/$s_!Bn5n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aada7fd-f6db-4c3f-82fe-10f61b430ad1_1665x777.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The magic of the code above happens with line 7. This is where the data are transformed (i.e., scaled using Z-scores). Clicking on <em>DataFrame &gt;</em> gives you a preview of the scaled data:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Mzy_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Mzy_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 424w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 848w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 1272w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Mzy_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png" width="1456" height="749" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:749,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:367224,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Mzy_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 424w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 848w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 1272w, https://substackcdn.com/image/fetch/$s_!Mzy_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79cbdc27-44d3-4752-95ec-f3b69523916d_1633x840.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the preview above, you can see how the <em>Income</em> feature is now on the same scale as all the other features (e.g., an <em>Income</em> value of <em>0.31465</em> compared to a <em>RecencyInDays</em> value of <em>0.31083</em>).</p><p>One thing that I should mention if you&#8217;re feeling a bit overwhelmed by this.</p><p><strong>When in doubt, scale your data before performing your cluster analysis. In general, there&#8217;s no downside to scaling your numeric features other than making your computer do some extra work.</strong></p><p>With the data scaled, it&#8217;s now time to find the optimal number of clusters.</p><div><hr></div><h3>Tuning Hierarchical Clustering</h3><p>The goal of tuning a clustering algorithm is to determine the optimal number of clusters for a given dataset. Here are the critical things to keep in mind as you perform tuning:</p><ul><li><p>For any reasonably large (i.e., complex) dataset, you&#8217;re very unlikely to find the &#8220;correct&#8221; number of clusters. Every cluster analysis is wrong to a certain degree. The question is not whether the cluster analysis is &#8220;correct&#8221;, but whether the insights are useful to the business.</p></li><li><p>Think of the optimal number of clusters found to be determined by the exact features and rows of data you use. If these change in any way, then you need to re-tune.</p></li></ul><p>The second bullet deserves a bit of additional context. While it&#8217;s intuitive that two different datasets are likely to have a different number of optimal clusters, the idea applies within a single cluster analysis as well.</p><p>Consider the dataset used in the tutorial series. Let&#8217;s say that you were able to get more rows of data (i.e., more customers). It&#8217;s possible that the optimal number of clusters might change as a result of the additional rows, but you wouldn&#8217;t know that unless you re-tuned.</p><p><strong>Whenever you add/remove features/rows from your datasets, repeat the tuning process and check whether the optimal number of clusters has changed. </strong></p><p>By default, the <em>AgglomerativeClustering </em>class is set to find two clusters. In practice, this default setting (i.e., <em>hyperparameter</em>) is usually too low for real-world cluster analysis. However, it represents the starting point for the tuning process.</p><p>At a high level, here is the process for tuning agglomerative hierarchical clustering using the <em>n_clusters</em> (i.e., the hyperparameter that controls the number of clusters found):</p><ol><li><p>Start with <em>n_clusters</em> <em>= 2</em> and evaluate the quality of the clusters.</p></li><li><p>Move to <em>n_clusters = 3</em> and evaluate the quality.</p></li><li><p>Repeat this process with higher values of <em>n_clusters</em>.</p></li><li><p>Pick the <em>n_clusters</em> value with the highest quality score.</p></li></ol><p>The above tuning process is conceptually simple, but there&#8217;s a catch.</p><p>How do you define &#8220;cluster quality&#8221;?</p><p>In this tutorial series, I will use a cluster quality calculation known as the <strong><a href="https://en.wikipedia.org/wiki/Silhouette_(clustering)">silhouette coefficient</a></strong><a href="https://en.wikipedia.org/wiki/Silhouette_(clustering)">&#8203;</a>. This calculation gives each data point (i.e., row in the dataset) a score ranging from -1 to 1.</p><div class="pullquote"><p>My <strong><a href="https://www.daveondata.com/cluster-analysis-with-python-info">Cluster Analysis with Python</a></strong> online course will teach you to tune the k-means and DBSCAN clustering algorithms in detail in as little as a weekend.</p></div><p>To implement the process above, a couple of helper objects are a good idea:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!NlwD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!NlwD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 424w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 848w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 1272w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!NlwD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png" width="1456" height="408" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f070a1a7-376c-498d-967e-d72d449f6109_1665x467.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:408,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:54135,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!NlwD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 424w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 848w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 1272w, https://substackcdn.com/image/fetch/$s_!NlwD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff070a1a7-376c-498d-967e-d72d449f6109_1665x467.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;bc70207a-df37-437e-965b-c41857f3c91b&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Create a range object to hold cluster values
n_clusters = range(2, 15)

# Empty list to store average silhouette scores
silhouette_scores = []</code></pre></div><p>The <em>n_clusters</em> object contains the values 2, 3, 4, 5, &#8230; 14. These are the various numbers of clusters to be tested to find the optimal value.</p><p>The <em>silhouette_scores </em>list stores the average silhouette coefficient for each number of clusters. For example, the average silhouette coefficient score will differ between 2 and 5 clusters.</p><p>With the helper objects created, here&#8217;s the code to implement the tuning process:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bh3b!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bh3b!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 424w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 848w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bh3b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png" width="1456" height="1050" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1050,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:240422,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bh3b!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 424w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 848w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!bh3b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecc0c86e-b114-4d30-9f53-e52d14cf7908_1667x1202.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;dad617d0-d151-418c-8948-df6e92f630fa&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score

# Perform a clustering for each value of n_clusters
for num_clusters in n_clusters:
    agg_clustering = AgglomerativeClustering(n_clusters = num_clusters)  

    # Perform the clustering
    clustering = agg_clustering.fit(behavior_scaled_X)

    # Calculate the silhouette score and store it
    silhouette_scores.append(silhouette_score(behavior_scaled_X, 
                                              clustering.labels_))

# Examine the silhouette scores
print(silhouette_scores)</code></pre></div><p>The above code runs rather quickly, but know that the running time can increase dramatically if you&#8217;re evaluating many values of <em>n_clusters</em> and/or your dataset is large.</p><p><strong>The silhouette score works by looking at each data point in a clustering and then comparing two things:</strong></p><ul><li><p><strong>The average distance from the data point to all the other data points in the same cluster.</strong></p></li><li><p><strong>The average distance from the data point to all the other data points in </strong><em><strong>the next nearest cluster</strong></em><strong>.</strong></p></li></ul><p>This is why the <em>silhouette_score</em> function needs both the original dataset used for the clustering and the resulting cluster assignments (i.e., <em>labels_</em>).</p><p>Interpreting silhouette scores is quite simple - scores of 1 are ideal, and scores of -1 are terrible. For example, as shown in the output above, the average silhouette score for 2 clusters is <em>0.32147</em>, while the average score for 14 clusters is <em>0.05916.</em></p><p>Rather than looking at the raw average silhouette scores, I prefer to visualize them:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!SWoy!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!SWoy!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 424w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 848w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 1272w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!SWoy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png" width="1456" height="485" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:485,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:82791,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!SWoy!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 424w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 848w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 1272w, https://substackcdn.com/image/fetch/$s_!SWoy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c09701a-490e-44d7-bfcb-9420e8d43969_1665x555.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;8f5129f6-a1cf-4cdf-9124-a7e277b82df6&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from plotnine import ggplot, aes, theme_bw, geom_line, geom_point

# Build a plot of the silhouette scores
(ggplot(None, aes(x = n_clusters, y = silhouette_scores)) +
 theme_bw() + 
 geom_line() +
 geom_point())</code></pre></div><p>Clicking the <em>PngImageFile &gt;</em> gives you the visualization:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gfXH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gfXH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 424w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 848w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 1272w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gfXH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png" width="1456" height="1167" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1167,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:76367,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196663467?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gfXH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 424w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 848w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 1272w, https://substackcdn.com/image/fetch/$s_!gfXH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F362f5111-5f38-4960-a236-439649f7eb7f_1665x1335.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The above visualization shows that the highest average silhouette score is where <em>n_clusters = 2</em>. It is tempting to say that the optimal number of clusters is two and leave it at that.</p><p>However, there&#8217;s more going on.</p><p>Clustering quality metrics should always guide your analysis. For example, the above diagram shows:</p><ul><li><p>Consistently low values when <em>n_clusters &gt; 5</em>.</p></li><li><p>Potentially reasonable values of <em>n_clusters</em> are 2, 3, 4, and 5.</p></li></ul><p><strong>As the DIY data scientist, you should investigate reasonable values. It&#8217;s possible that a &#8220;suboptimal&#8221; value of </strong><em><strong>n_clusters</strong></em><strong> (e.g., 4) might produce the best business interpretation/insights.</strong></p><p>Part 7 of this tutorial series will teach you techniques for evaluating the quality of your clusters to generate business insights and determine an optimal number of clusters.</p><p>In other words, this ambiguity in the average silhouette scores is job security for humans at the time of this writing. &#129315;</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-641">Check out Part 6 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will teach you the various ways agglomerative hierarchical clustering builds clusters (i.e., <em>linkages</em>).</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/92bbb247-54a6-480f-a0cf-be81c4538dee_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/42665671-6d3b-4904-8b03-28518bf2f082_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/44153cef-c736-4656-ad4e-a4b2d4755f56_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7d6e8bc6-f5f8-447d-93a5-2d58f2f195ac_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">The DIY Data Scientist is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 4: Python Code]]></title><description><![CDATA[Believe it or not, the code is the easy part.]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-66a</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-66a</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 01 May 2026 12:51:04 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/840245cf-d709-433d-8948-799af15e3eae_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to the tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">Check out Part 1 here</a></strong>.</p><p>After completing the first three tutorials, it&#8217;s time to apply what you&#8217;ve learned and perform hierarchical clustering on a dataset.</p><p>Today&#8217;s tutorial will use a dataset of customer behaviors. Specifically, grocery purchases. The dataset also includes customer characteristics (e.g., <em>Income</em> and <em>Age</em>).</p><p>To follow along, download the <em>CustomerBehavior.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p><em>NOTE - While I&#8217;m using Python in Excel for this tutorial, 99+% of the code is the same whether you use Excel, VS Code, or Jupyter Notebook.</em></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>Loading the Data</h3><p>The dataset used in this tutorial series has the tabular form that you need for any analytics work you might perform:</p><ul><li><p>The rows of the dataset are the logical items to be analyzed. In this dataset, it&#8217;s grocery store customers. However, the rows can be anything - claims, users, virtual machines, patients, employees, etc.</p></li><li><p>The columns of the dataset are the attributes of the items to be analyzed. Two examples from this dataset are the <em>NumStorePurchases</em> and <em>Age</em> columns.</p></li></ul><p>The following screenshot illustrates a subset of the dataset:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!u1C2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!u1C2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 424w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 848w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 1272w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!u1C2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png" width="1456" height="567" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/84870561-d044-40d9-8e9c-93c39d455958_1516x590.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:567,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:114180,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!u1C2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 424w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 848w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 1272w, https://substackcdn.com/image/fetch/$s_!u1C2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84870561-d044-40d9-8e9c-93c39d455958_1516x590.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>When using Python in Excel, it&#8217;s best to organize your Python formulas (i.e., code) to make writing and maintaining your data analysis goodness easier.</p><p>I&#8217;m a big fan of putting all my Python formulas in a single worksheet, with the code laid out vertically, step by step. First, it&#8217;s good to add a dedicated worksheet:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!E2qZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!E2qZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 424w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 848w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 1272w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!E2qZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png" width="350" height="48.54838709677419" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:86,&quot;width&quot;:620,&quot;resizeWidth&quot;:350,&quot;bytes&quot;:11367,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!E2qZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 424w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 848w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 1272w, https://substackcdn.com/image/fetch/$s_!E2qZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61616cf2-8cce-4240-a1cb-d75d478db592_620x86.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Next, I add comments to my Python worksheets for my own long-term sanity:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xPHx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xPHx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 424w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 848w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 1272w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xPHx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png" width="350" height="188.38526912181302" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cd693810-05af-49e8-9f09-923813b632f2_706x380.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:380,&quot;width&quot;:706,&quot;resizeWidth&quot;:350,&quot;bytes&quot;:28563,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xPHx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 424w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 848w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 1272w, https://substackcdn.com/image/fetch/$s_!xPHx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd693810-05af-49e8-9f09-923813b632f2_706x380.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>I then place my Excel formulas immediately to the right of each comment. In this case, I would click cell <em>C2</em> to hold the Python formula that loads the <em>CustomerBehavior</em> table from the Excel workbook into Python.</p><p>The best way to write Python formulas is to use Excel&#8217;s new Python Editor, which you can access from the Ribbon:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3qIX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3qIX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 424w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 848w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 1272w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3qIX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png" width="579" height="251.71859706362153" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:533,&quot;width&quot;:1226,&quot;resizeWidth&quot;:579,&quot;bytes&quot;:84885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3qIX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 424w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 848w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 1272w, https://substackcdn.com/image/fetch/$s_!3qIX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F704e5f3e-e76f-47ee-a7fd-12d25888145e_1226x533.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Clicking the Python Editor opens a new pane:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YYqK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YYqK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 424w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 848w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YYqK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png" width="323" height="454.50127226463104" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1106,&quot;width&quot;:786,&quot;resizeWidth&quot;:323,&quot;bytes&quot;:230007,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YYqK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 424w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 848w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!YYqK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06b4f4e6-d970-4567-9823-fa06f58b0106_786x1106.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Notice how the button at the bottom of the Python Editor references cell <em>C2</em>? Clicking the button inserts a new Python formula in that cell where you write your code:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Jq0D!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Jq0D!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 424w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 848w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 1272w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Jq0D!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png" width="1456" height="302" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:302,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:43259,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Jq0D!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 424w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 848w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 1272w, https://substackcdn.com/image/fetch/$s_!Jq0D!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9c760689-a3e1-46c6-8a49-a89c118c5bdc_1590x330.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The code in the image above loads the <em>CustomerBehavior</em> table from Excel into a Python <em>DataFrame </em>(<em>DataFrames</em> are how Python represents entire data tables).</p><p>Clicking the disk icon will save (i.e., run) the Python formula:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!i7EV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!i7EV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 424w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 848w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 1272w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!i7EV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png" width="1456" height="304" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:304,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:49557,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!i7EV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 424w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 848w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 1272w, https://substackcdn.com/image/fetch/$s_!i7EV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb0aefbf6-7e21-4acd-bb52-f4ae810c77d9_1590x332.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Clicking on the <em>&gt;</em> in the Python Editor gives you a preview of the <em>DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kyQL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kyQL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 424w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 848w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 1272w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kyQL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png" width="1456" height="1271" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1271,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:169861,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kyQL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 424w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 848w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 1272w, https://substackcdn.com/image/fetch/$s_!kyQL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6990db1e-83ed-4730-a289-882e855b42b8_1592x1390.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here&#8217;s the Python code for you to use in your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;4bd76246-fd9e-41be-b983-6a1d5008ebfc&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load the CustomerBehavior Excel table
customer_behavior = xl("CustomerBehavior[#All]", headers = True)</code></pre></div><div class="pullquote"><p><strong>BTW - The code above is the only Python in this tutorial that is specific to Microsoft Excel. </strong></p><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><h3>Inspecting the Data</h3><p>Before applying any analytics technique, ensure the dataset meets its requirements. </p><p>For example, hierarchical clustering only works with complete datasets. Even if a single cell is missing data, hierarchical clustering will throw an error.</p><p>The following code gets summary information about the<em> DataFrame, </em>which is super useful for inspecting a dataset:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nbqr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nbqr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 424w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 848w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 1272w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nbqr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png" width="1456" height="1088" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1088,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:230919,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nbqr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 424w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 848w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 1272w, https://substackcdn.com/image/fetch/$s_!nbqr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67cbba1-1679-4544-a115-ebbcf78388df_1743x1302.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here&#8217;s the code for your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;ff202bbd-38e5-4944-a02a-f8046e694b5c&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Get summary info about the DataFrame
customer_behavior.info()</code></pre></div><p>We can use this dataset with hierarchical clustering for the following two reasons:</p><ul><li><p>There are 2,205 rows of data (i.e., <em>entries</em> in the output above), and no data is missing (i.e., <em>2205 non-null</em> in the output above). Translate <em>null</em> as meaning <em>missing</em>.</p></li><li><p>All of the features are numeric.</p></li></ul><p>In Python, the <em>scikit-learn</em> library is the de facto standard for machine learning, including hierarchical clustering. The <em>scikit-learn</em> library provides the <em>AgglomerativeClustering</em> class for hierarchical clustering of your datasets. </p><p>Like most clustering algorithms in <em>scikit-learn</em> (e.g., k-means and DBSCAN), the <em>AgglomerativeClustering</em> class uses Euclidean distance, which only works with numeric columns (i.e., <em>features</em>).</p><p>Since both requirements are met, this dataset is ready for hierarchical clustering.</p><div class="pullquote"><p>In real-world analytics, it&#8217;s common to need to perform cluster analysis on a dataset with both numeric and categorical features. </p><p>If you need to cluster categorical data (which is the norm), my <strong><a href="https://www.daveondata.com/cluster-analysis-with-python-info">Cluster Analysis with Python</a></strong> online course will teach you how in as little as a weekend.</p></div><h3>Clustering the Data</h3><p>With the data inspected and found fit for use with the <em>AgglomerativeClustering</em> class, you can perform the actual clustering with just a few lines of code:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!R5av!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!R5av!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 424w, https://substackcdn.com/image/fetch/$s_!R5av!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 848w, https://substackcdn.com/image/fetch/$s_!R5av!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 1272w, https://substackcdn.com/image/fetch/$s_!R5av!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!R5av!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png" width="728" height="209.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:419,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:94396,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!R5av!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 424w, https://substackcdn.com/image/fetch/$s_!R5av!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 848w, https://substackcdn.com/image/fetch/$s_!R5av!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 1272w, https://substackcdn.com/image/fetch/$s_!R5av!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81e06133-abdd-485b-9ee4-2157fc2d1e30_1945x560.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>And the code for your workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;35e23a94-c598-4c2d-b6d8-43298c5af663&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.cluster import AgglomerativeClustering

# Let the algorithm find as many clusters as it can
agg_clustering = AgglomerativeClustering(n_clusters = None, 
                                         distance_threshold = 0)

clustering = agg_clustering.fit(customer_behavior)</code></pre></div><p>A couple of things I need to mention about the above code:</p><ul><li><p>For this tutorial, I&#8217;ll use the default way the <em>AgglomerativeClustering </em>algorithm evaluates the quality of clusters (i.e., I'll use the default <em>linkage</em>). </p></li><li><p>I'll also tell the algorithm to find as many clusters as it can.</p></li></ul><p>The above parameters are passed to the <em>AgglomerativeClustering</em> constructor, which tells the algorithm to compute the complete taxonomy (i.e., what the <em>scikit-learn</em> documentation calls the "full tree") from the data.</p><div><hr></div><h3>Visualizing the Clusters</h3><p>Unfortunately, the <em>scikit-learn</em> library doesn't provide an easy way to create a dendrogram for hierarchical clusterings. </p><p>However, the <em>scikit-learn</em> online documentation does give the following code for<strong><a href="https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html"> a custom plotting function</a>:</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WOnF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WOnF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 424w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 848w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 1272w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WOnF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png" width="1456" height="1058" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1058,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:235722,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WOnF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 424w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 848w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 1272w, https://substackcdn.com/image/fetch/$s_!WOnF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F977b07b2-d2ff-4497-8249-7e3d2d9b728c_1755x1275.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;37bad124-cfed-4a45-af02-22df73ba997e&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from scipy.cluster.hierarchy import dendrogram

def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx &lt; n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]
    ).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)</code></pre></div><p>Given that we told the <em>AgglomerativeClustering</em> algorithm to find as many clusters as possible (i.e., compute the entire tree), we'll need a large canvas for the dendrogram. </p><p>The following code sets this up and calls the custom plotting function:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eHau!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eHau!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 424w, https://substackcdn.com/image/fetch/$s_!eHau!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 848w, https://substackcdn.com/image/fetch/$s_!eHau!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 1272w, https://substackcdn.com/image/fetch/$s_!eHau!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eHau!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png" width="1456" height="424" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:424,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:63161,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eHau!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 424w, https://substackcdn.com/image/fetch/$s_!eHau!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 848w, https://substackcdn.com/image/fetch/$s_!eHau!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 1272w, https://substackcdn.com/image/fetch/$s_!eHau!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16de02f0-edc4-4654-b6a1-3eb9536233dc_1750x510.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Clicking <em>PngImageFile &gt;</em> displays the dendrogram:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qLz4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qLz4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 424w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 848w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 1272w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qLz4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png" width="1456" height="1054" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1054,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:152941,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/196058145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qLz4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 424w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 848w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 1272w, https://substackcdn.com/image/fetch/$s_!qLz4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91668a3c-8db7-44fe-98d3-5b8cde2fbbb3_1750x1267.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The above dendrogram illustrates a critical idea in real-world cluster analysis.</p><p><strong>To get usable clusters, you must tune the algorithm. This tuning will differ for each dataset you cluster.</strong></p><p>Part 6 of this tutorial series will teach you about the various linkage options and how they can impact the number of clusters found.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-46f">Check out Part 5 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will show you how to tune the agglomerative hierarchical clustering algorithm for better results.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9b81fa8e-085a-428c-a4a0-60ed3810c839_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a07fb437-0db6-475e-8b98-77e9a71994ab_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/61c52e17-cd78-4830-8132-4e91ce2c8dc8_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/31ac6368-330e-4ac9-95e6-d7f1b6f6dd4e_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">The DIY Data Scientist is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 3: Calculating Distance]]></title><description><![CDATA[Honestly, you don't need to be a math genius to understand this.]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-3b8</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-3b8</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Tue, 28 Apr 2026 16:36:49 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/364f00a3-4a55-42e9-909b-ad7762a3e1e0_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to this tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">Check out Part 1 here</a></strong>.</p><p>Every clustering algorithm that you commonly use in DIY data science needs some way to calculate the distance (i.e., similarity) between two rows of data.</p><p><em>Euclidean distance</em> is the default method for calculating distance in these algorithms.</p><p><strong>Don&#8217;t panic! Euclidean distance is a fancy name for something you learned in elementary or middle school - the Pythagorean theorem.</strong></p><p>This tutorial will demonstrate how this works using a simple step-by-step graphical approach.</p><p>Honestly, you don&#8217;t need to be a math genius to understand how this all works.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>Setting the Context</h3><p>To jog your memory, here&#8217;s the hypothetical dataset from <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-bb8">the last tutorial</a>:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FAfG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FAfG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 424w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 848w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 1272w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FAfG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png" width="168" height="230.70422535211267" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/474039de-0196-4128-90af-454ee541f51f_284x390.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:390,&quot;width&quot;:284,&quot;resizeWidth&quot;:168,&quot;bytes&quot;:38758,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FAfG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 424w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 848w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 1272w, https://substackcdn.com/image/fetch/$s_!FAfG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F474039de-0196-4128-90af-454ee541f51f_284x390.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>And the scatter plot of the dataset:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7yKH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7yKH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 424w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 848w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 1272w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7yKH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png" width="600" height="445.8204334365325" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:960,&quot;width&quot;:1292,&quot;resizeWidth&quot;:600,&quot;bytes&quot;:24935,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7yKH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 424w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 848w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 1272w, https://substackcdn.com/image/fetch/$s_!7yKH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e094b5-ce35-4064-acce-0b9460dee34a_1292x960.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As discussed in the last tutorial, the agglomerative hierarchical clustering algorithm calculates distances between all the pairs of data points in the dataset.</p><p>The algorithm finds that the following two data points (i.e., rows) are closest and clusters them:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!amXI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!amXI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!amXI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!amXI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!amXI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!amXI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png" width="618" height="460.2552504038772" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:618,&quot;bytes&quot;:83525,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!amXI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!amXI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!amXI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!amXI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f21da1-edcd-472f-a55b-a38e5457f0de_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The tutorial will use these two data points to explain Euclidean distance.</p><p>Intuitively, we know the distance between these two data points is represented by the line below:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!PO6b!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!PO6b!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 424w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 848w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 1272w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!PO6b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png" width="644" height="481.345080763583" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1018,&quot;width&quot;:1362,&quot;resizeWidth&quot;:644,&quot;bytes&quot;:94925,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!PO6b!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 424w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 848w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 1272w, https://substackcdn.com/image/fetch/$s_!PO6b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01e06eeb-c409-412f-9e5f-c0c3bde6265c_1362x1018.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>The Power of the Pythagorean Theorem</h3><p>Zooming in on these two data points, we can also imagine them as being part of a triangle with each side of the triangle labeled <em>a</em>, <em>b</em>, and <em>c</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Gkoz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Gkoz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 424w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 848w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 1272w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Gkoz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png" width="288" height="104.64864864864865" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/abc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:242,&quot;width&quot;:666,&quot;resizeWidth&quot;:288,&quot;bytes&quot;:14968,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Gkoz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 424w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 848w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 1272w, https://substackcdn.com/image/fetch/$s_!Gkoz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabc26014-4d7e-42b5-bb41-f42f1d5e39eb_666x242.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>As you might recall from school, the Pythagorean theorem gives the mathematical relationship between the lengths of the sides of the above triangle:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YIOf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YIOf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 424w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 848w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 1272w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YIOf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png" width="280" height="165.1086956521739" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:434,&quot;width&quot;:736,&quot;resizeWidth&quot;:280,&quot;bytes&quot;:22716,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YIOf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 424w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 848w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 1272w, https://substackcdn.com/image/fetch/$s_!YIOf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5963a89f-bcdd-422d-ace5-c5b546376926_736x434.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>From the above images, we now have a way to calculate the distance between the two data points - it&#8217;s the length of side <em>c</em>!</p><p>Plugging the data points into the Pythagorean theorem will calculate the distance. To do this, we&#8217;ll need the data values:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!z3LF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!z3LF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 424w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 848w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 1272w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!z3LF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png" width="393" height="141.2077922077922" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:332,&quot;width&quot;:924,&quot;resizeWidth&quot;:393,&quot;bytes&quot;:32155,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!z3LF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 424w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 848w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 1272w, https://substackcdn.com/image/fetch/$s_!z3LF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F689400f1-d18f-4c90-b3a6-e95690aeca6c_924x332.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The above image shows which dataset rows correspond with the data points in the scatter plot. The image also displays the data values for each row.</p><p>The next step is to think about these data values in terms of the x-axis and y-axis coordinates:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!q9xF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!q9xF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 424w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 848w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 1272w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!q9xF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png" width="410" height="184.5887445887446" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:416,&quot;width&quot;:924,&quot;resizeWidth&quot;:410,&quot;bytes&quot;:35280,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!q9xF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 424w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 848w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 1272w, https://substackcdn.com/image/fetch/$s_!q9xF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40e5c792-9fba-4cb7-87c8-521ae20dd8d1_924x416.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>With the coordinates in place, the lengths of the sides of the triangle are just the differences in the x-axis and y-axis coordinates:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rCsJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rCsJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 424w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 848w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 1272w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rCsJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png" width="468" height="195.07456978967494" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:436,&quot;width&quot;:1046,&quot;resizeWidth&quot;:468,&quot;bytes&quot;:43361,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rCsJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 424w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 848w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 1272w, https://substackcdn.com/image/fetch/$s_!rCsJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8724907c-ea85-4547-ad8c-01cdd02eb830_1046x436.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The rest is simple plug-and-chug math:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!f-s8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!f-s8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 424w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 848w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 1272w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!f-s8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png" width="1420" height="436" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:436,&quot;width&quot;:1420,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:49920,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!f-s8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 424w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 848w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 1272w, https://substackcdn.com/image/fetch/$s_!f-s8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F603f1ce3-fbd1-42ef-ab3a-dc838da3d0e5_1420x436.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Sa5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Sa5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 424w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 848w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 1272w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Sa5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png" width="319" height="158.87450980392157" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:508,&quot;width&quot;:1020,&quot;resizeWidth&quot;:319,&quot;bytes&quot;:34793,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Sa5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 424w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 848w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 1272w, https://substackcdn.com/image/fetch/$s_!2Sa5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56e65240-8a66-44e3-aca7-e3c3963d1a65_1020x508.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The distance between the Row 2 and Row 5 data points is 0.102.</p><p>Easy peasy!</p><div><hr></div><h3>Handling Real-World Datasets</h3><p>Now, here&#8217;s the magic of the Pythagorean theorem.</p><p><strong>The Pythagorean theorem </strong><em><strong>is</strong></em><strong> Euclidean distance in two dimensions (e.g., a triangle is a 2D shape). The same ideas behind the Pythagorean theorem scale to more dimensions.</strong></p><p>For example, all the visualizations above are 2D. They have an x-axis and a y-axis. Consider a 3D visualization. This adds a z-axis.</p><p>As shown above, the calculation uses the differences between the x and y values for the data points. In 3D, this would include a difference for the z values as a third term:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9K46!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9K46!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 424w, https://substackcdn.com/image/fetch/$s_!9K46!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 848w, https://substackcdn.com/image/fetch/$s_!9K46!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 1272w, https://substackcdn.com/image/fetch/$s_!9K46!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9K46!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png" width="1242" height="114" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:114,&quot;width&quot;:1242,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:31551,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195768029?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9K46!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 424w, https://substackcdn.com/image/fetch/$s_!9K46!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 848w, https://substackcdn.com/image/fetch/$s_!9K46!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 1272w, https://substackcdn.com/image/fetch/$s_!9K46!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cdb6714-d423-424d-a7b1-b5bd1ac17588_1242x114.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p><strong>The above pattern scales beyond 3D. In other words, if you have 15 columns in your dataset, there would be 15 different terms under the square root symbol.</strong></p><p>The good news is that Python will calculate all this for you automagically.</p><p>The important part for you is the intuition of what&#8217;s going on with the distance calculation. This is where the Pythagorean theorem helps build your intuition of how clustering determines similarity.</p><p>Now you know how agglomerative hierarchical clustering works behind the scenes.</p><p>The algorithm iteratively calculates the Euclidean distances between data points and/or clusters to build the taxonomy.</p><p>As you might imagine, when your datasets have many columns and rows, your laptop has to do a lot of work performing all the calculations.</p><p>The larger your datasets, the longer the algorithm will take to run. However, the time spent increases extremely quickly as you add rows/columns (i.e., there&#8217;s a nonlinear relationship between dataset size and running time).</p><p><strong>This means that agglomerative hierarchical clustering doesn&#8217;t scale well to large datasets. I will cover scaling clustering to very large datasets in a future <a href="https://thediydatascientist.substack.com/p/live-crash-courses">live crash course</a>.</strong></p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-66a">Check out Part 4 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will continue this tutorial series by teaching you the Python code to perform your own hierarchical clustering.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/98fd63cb-6ddf-4969-a6de-569d4371efb9_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/77651caf-bc43-4021-a18b-8b5b694bc8c2_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/588b239b-469c-40dd-a762-3455aa0d8fbe_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ed1b5189-12df-41d8-aadb-4ffbb5edeeae_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">The DIY Data Scientist is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Market Basket Analysis Part 6: Partnering with AI]]></title><description><![CDATA[This is more than a prompt. It's how you make impact at work - fast.]]></description><link>https://thediydatascientist.substack.com/p/market-basket-analysis-part-6-partnering</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/market-basket-analysis-part-6-partnering</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Mon, 27 Apr 2026 22:22:36 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/ad83472c-d471-4cd1-9bf7-3e4d9b7e3ed3_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to the tutorial series? <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">Check out Part 1 here</a></strong>.</p><p>While it&#8217;s tempting to want to feed your data to AI and trust that the output is correct, this is a bad idea for two reasons:</p><ol><li><p>AI tools like ChatGPT, Claude, and Copilot often make mistakes in analytics. For example, they often make incorrect assumptions and never tell you about them.</p></li><li><p>You&#8217;re accountable for the quality of your analytics. No executive I&#8217;ve ever worked with will buy the argument, &#8220;Don&#8217;t blame me! It&#8217;s the AI&#8217;s fault.&#8221;</p></li></ol><p><strong>That being said, once you&#8217;ve developed fundamental skills in data analysis, you can partner with AI tools to accelerate the impact you can make at work.</strong></p><div><hr></div><h4>Introducing Your AI Partner</h4><p>Here are some screenshots of me partnering with ChatGPT to conduct a market basket analysis using a healthcare dataset:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TOOk!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TOOk!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 424w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 848w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TOOk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png" width="1228" height="1048" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/eec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1048,&quot;width&quot;:1228,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:167352,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195567257?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TOOk!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 424w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 848w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!TOOk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feec270be-69af-45b5-861e-732b6a2d1f40_1228x1048.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>NOTE -</strong> Because of the random nature of AI tools like ChatGPT, your experience might not be 100% the same as what you will see in these images. </p><p>Also, the prompt is designed to work with LLM-based chat tools like ChatGPT, Claude, Copilot, etc.</p><div><hr></div><h4>It&#8217;s Python in Excel-Friendly</h4><p>I&#8217;m a huge fan of Python in Excel because it is the easiest, fastest way for business professionals to have more impact at work using analytics.</p><p>So, the prompt is designed to help you perform market basket analyses if you&#8217;re using Python in Excel:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tDvV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tDvV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 424w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 848w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 1272w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tDvV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png" width="1128" height="1084" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1084,&quot;width&quot;:1128,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:245094,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195567257?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tDvV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 424w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 848w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 1272w, https://substackcdn.com/image/fetch/$s_!tDvV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84af2abd-a523-43e0-ba64-3de9e5585e60_1128x1084.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>NOTE -</strong> Because 99+% of Python code is the same whether you use Excel, VS Code, or Jupyter Notebooks, the prompt will help you no matter what tool you use.</p><div><hr></div><h4>Partner with AI Step-by-Step</h4><p>Because I selected &#8220;beginner&#8221; mode for the prompt, I received detailed step-by-step instructions on how to perform the market basket analysis:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MOvq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MOvq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 424w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 848w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 1272w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MOvq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png" width="1188" height="1084" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1084,&quot;width&quot;:1188,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:210330,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195567257?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MOvq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 424w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 848w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 1272w, https://substackcdn.com/image/fetch/$s_!MOvq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ec9f845-3d4b-498c-a670-b7583ac840f3_1188x1084.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>NOTE -</strong> While the screenshots above use a healthcare-specific dataset with ChatGPT, you can use the following prompt with ChatGPT, Claude, Copilot, etc., for any dataset from any industry.</p><p>Are you ready to get started? </p><p>Let&#8217;s start with tips for using the prompt.</p>
      <p>
          <a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-6-partnering">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 2: The Intuition]]></title><description><![CDATA[Yes. You can learn cluster analysis even if you don't have a math or technical background. Honestly.]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-bb8</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-bb8</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 24 Apr 2026 17:26:27 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/aa5dbc3c-cf74-4150-b9dd-f5233035f96a_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Are you new to this tutorial series? <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">Check out Part 1 here</a>.</p><p>You can think of a cluster as nothing more than a grouping of data points (e.g., rows of data in a table). At a high level, the goal of creating clusters is two-fold:</p><ul><li><p>Find groups of data points that are very much alike.</p></li><li><p>Ensure the groups are as different from each other as possible.</p></li></ul><p>BTW - I will use <em>groups</em> and <em>clusters</em> interchangeably in the tutorial series.</p><p>There are two strategies commonly used to perform hierarchical clustering:</p><ul><li><p><em>Divisive</em> is a top-down approach.</p></li><li><p><em>Agglomerative</em> is a bottom-up approach.</p></li></ul><p>In real-world DIY data science, <em>agglomerative hierarchical clustering</em> is overwhelmingly used. So, this will be the focus of the tutorial series.</p><p>This tutorial will use the same <a href="https://www.daveondata.com/data-science-consulting-info">intuitive approach I use with my clients</a> to help you understand how this bottom-up approach works - no math required.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><p></p><h4>The Starting Intuition</h4><p>Imagine you work for a company that sells physical products (e.g., retail) and wants to build a taxonomy from your product data (e.g., a product catalog).</p><p>To keep the examples easy to understand, let&#8217;s assume your product data consists only of length and width measurements.</p><p>Yep. Only two columns (i.e., <em>features</em>). </p><p>Don&#8217;t worry.</p><p>Everything you will learn in this tutorial series applies whether you have two features or 100.</p><p>Also, since I will use visualizations in this tutorial, let&#8217;s assume the product catalog consists of only six products.</p><p>Again, this is OK because what you will learn scales to much larger datasets (e.g., 1,000,000 rows).</p><p>Here&#8217;s the data:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nhLb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nhLb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 424w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 848w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 1272w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nhLb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png" width="176" height="241.69014084507043" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:390,&quot;width&quot;:284,&quot;resizeWidth&quot;:176,&quot;bytes&quot;:38758,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nhLb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 424w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 848w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 1272w, https://substackcdn.com/image/fetch/$s_!nhLb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e7e6895-66ec-47cd-b15e-b4fe83af82ba_284x390.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Learning how clustering techniques (i.e., <em>algorithms</em>) work is much easier using visualizations. Here's the above data visualized as a <em>scatter plot</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uKca!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uKca!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 424w, https://substackcdn.com/image/fetch/$s_!uKca!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 848w, https://substackcdn.com/image/fetch/$s_!uKca!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 1272w, https://substackcdn.com/image/fetch/$s_!uKca!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uKca!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png" width="561" height="416.8421052631579" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:960,&quot;width&quot;:1292,&quot;resizeWidth&quot;:561,&quot;bytes&quot;:24935,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uKca!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 424w, https://substackcdn.com/image/fetch/$s_!uKca!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 848w, https://substackcdn.com/image/fetch/$s_!uKca!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 1272w, https://substackcdn.com/image/fetch/$s_!uKca!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94205c04-2a8e-4535-9cc0-30b396d3dd02_1292x960.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Agglomerative hierarchical clustering works by starting with each data point (e.g., row of data) and identifying which other data points are most similar.</p><p><strong>As with most clustering algorithms, &#8220;similar&#8221; is defined as the distance between points:</strong></p><ol><li><p><strong>Closer data points are more similar.</strong></p></li><li><p><strong>Distant data points are less similar.</strong></p></li></ol><p>Consider the point in the top right of the visualization. The algorithm calculates the distance (i.e., similarity) between this point and all the other points (I didn&#8217;t draw all the lines to reduce clutter):</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EYHP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EYHP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EYHP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png" width="586" height="436.4232633279483" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:586,&quot;bytes&quot;:110663,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!EYHP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!EYHP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd06fcd6-c6f2-489a-9d77-06e2981f9c85_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Compare the above distances to the distances for the point below:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-Uye!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-Uye!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-Uye!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png" width="585" height="435.67851373182555" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:585,&quot;bytes&quot;:97900,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-Uye!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!-Uye!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b148506-4088-454a-a2a1-0e7eb04a9c29_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And compare the following:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!85-S!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!85-S!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!85-S!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!85-S!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!85-S!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!85-S!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png" width="608" height="452.8077544426494" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:608,&quot;bytes&quot;:101162,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!85-S!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!85-S!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!85-S!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!85-S!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8119dc22-4bde-4e8f-b8fb-3a82c59c9119_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The algorithm continues in this fashion, calculating distances, to cluster the most similar data points together. The algorithm is <em>greedy</em>, meaning it chooses the best clustering it can find as soon as it can.</p><p>This is an important idea.</p><p><strong>The clusters found early on may not be the &#8220;best&#8221; later on. This greediness is necessary for the algorithm to have any chance of running fast enough to be useful.</strong></p><p>Greediness is a recurring theme in data science (e.g., decision tree-based machine learning) and works well in practice - so don&#8217;t worry! &#128513;</p><div><hr></div><h4>The First Cluster</h4><p>Based on all the distance calculations, the algorithm greedily finds the first cluster:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9iq-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9iq-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9iq-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png" width="603" height="449.0840064620355" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:603,&quot;bytes&quot;:83525,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9iq-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!9iq-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b362c4b-dbc9-4476-9e32-e9b06f7a3006_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This distance comparison process continues, and the algorithm greedily finds a second cluster:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2K0M!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2K0M!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2K0M!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png" width="605" height="450.5735056542811" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1238,&quot;resizeWidth&quot;:605,&quot;bytes&quot;:92250,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2K0M!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 424w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 848w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 1272w, https://substackcdn.com/image/fetch/$s_!2K0M!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02664874-8a23-4d16-bfd7-e436bef8f6ea_1238x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>At this stage, the visualization above shows that four products belong to clusters, while two do not, based on the distances.</p><div><hr></div><h4>Clusters as Data Points</h4><p>Now, here&#8217;s the interesting thing.</p><p>Conceptually, the algorithm treats found clusters like individual data points.</p><p>For example, in the visualization above, the distance between the top-right data point will be calculated for each cluster, not for individual data points within the clusters.</p><p>A future tutorial will teach you the strategies used to make this happen. But for now, it&#8217;s not necessary.</p><p>Next, the algorithm considers the distances between clusters and any data points that do not belong to clusters.</p><p>This is where something interesting happens.</p><p><strong>The algorithm finds that the clusters are closer to each other than the two data points that don&#8217;t belong to any clusters. In response, the algorithm clusters the clusters:</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hg9m!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hg9m!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 424w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 848w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 1272w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hg9m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png" width="614" height="452.8864" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1250,&quot;resizeWidth&quot;:614,&quot;bytes&quot;:97645,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!hg9m!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 424w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 848w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 1272w, https://substackcdn.com/image/fetch/$s_!hg9m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12aa2d16-c100-4cb0-9290-c67b44909668_1250x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The visualization above demonstrates how the algorithm constructs the hierarchy of nested clusters. Looking at the data points provides intuition about what&#8217;s happening:</p><ul><li><p>The two unclustered data points have relatively large or small <em>Length</em> and/or <em>Width</em> values.</p></li><li><p>The cluster of clusters contains the four data points with more typical values of <em>Length</em> and <em>Width</em>.</p></li><li><p>The contained cluster on the left has two data points with relatively low <em>Length</em> values and relatively high <em>Width</em> values.</p></li><li><p>The contained cluster on the right has two data points with relatively high <em>Length</em> values and relatively low <em>Width</em> values.</p></li></ul><p>This is how the algorithm mines a taxonomy (i.e., a <em>hierarchy</em>) directly from the data.</p><p><strong>However, the algorithm isn&#8217;t complete yet because it still needs to cluster all the data. So, the algorithm compares the cluster of clusters to the two remaining data points and finds the following:</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JN2w!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JN2w!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 424w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 848w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 1272w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JN2w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png" width="623" height="450.86813186813185" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1274,&quot;resizeWidth&quot;:623,&quot;bytes&quot;:115690,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JN2w!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 424w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 848w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 1272w, https://substackcdn.com/image/fetch/$s_!JN2w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F22bc3f1b-1770-4a7e-888f-0c79362638dc_1274x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>At this point, there's only one last cluster that includes all the data points:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uhuu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uhuu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 424w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 848w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 1272w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uhuu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png" width="636" height="454.5674418604651" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1290,&quot;resizeWidth&quot;:636,&quot;bytes&quot;:116726,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uhuu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 424w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 848w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 1272w, https://substackcdn.com/image/fetch/$s_!uhuu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf3640c2-cdea-4942-82ff-587c8072845a_1290x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the visualization above, I drew the last cluster using a rectangle simply because it fit better. &#129315;</p><div class="pullquote"><p><strong>Ready to learn more? </strong></p><p><strong>My <a href="https://www.daveondata.com/cluster-analysis-with-python-info">Cluster Analysis with Python</a> online course will teach you how to use k-means and DBSCAN clustering in a weekend.</strong></p></div><h4>Mapping It Back to the Data</h4><p>While going through the images above step-by-step helps to build your intuition, the final result is cluttered and hard to read (I added the row numbers from the dataset for context):</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!940n!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!940n!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 424w, https://substackcdn.com/image/fetch/$s_!940n!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 848w, https://substackcdn.com/image/fetch/$s_!940n!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 1272w, https://substackcdn.com/image/fetch/$s_!940n!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!940n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png" width="645" height="461" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:922,&quot;width&quot;:1290,&quot;resizeWidth&quot;:645,&quot;bytes&quot;:128425,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!940n!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 424w, https://substackcdn.com/image/fetch/$s_!940n!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 848w, https://substackcdn.com/image/fetch/$s_!940n!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 1272w, https://substackcdn.com/image/fetch/$s_!940n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe28db8b3-83bf-405b-a53d-62d42ac3bee0_1290x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This is where a special kind of visualization known as a <em>dendrogram</em> comes in handy:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qZBx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qZBx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 424w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 848w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 1272w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qZBx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png" width="648" height="476.36756756756756" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:816,&quot;width&quot;:1110,&quot;resizeWidth&quot;:648,&quot;bytes&quot;:30475,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/195261236?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qZBx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 424w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 848w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 1272w, https://substackcdn.com/image/fetch/$s_!qZBx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31b1c8b6-2280-4c73-9cc1-bbb3f8b39ffa_1110x816.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the above dendrogram, the numbers along the bottom correspond to the rows of the dataset (i.e., Python starts counting from 0). </p><p>When using real (i.e., much larger) datasets, these numbers typically correspond to the number of rows per cluster.</p><p>Here&#8217;s what the dendrogram shows:</p><ul><li><p>Rows 2 and 5 are a cluster.</p></li><li><p>Rows 1 and 4 are a cluster.</p></li><li><p>There&#8217;s a cluster that contains the previous two clusters.</p></li><li><p>There&#8217;s a cluster that contains all the above clusters and row 3.</p></li><li><p>There&#8217;s one last cluster that contains all the above clusters and row 0.</p></li></ul><p>The heights of the lines in the dendrogram indicate the similarity between clusters. For example, the cluster of rows 2 and 5 is the shortest.</p><p>This aligns with what you saw in the previous visualizations: these rows are the closest.</p><p>Like most data visualizations, dendrograms break down when the scale becomes large (e.g., many nested clusters). A later tutorial will cover a strategy for optimizing the number of clusters found.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-3b8">Check out Part 3 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will continue this tutorial series by teaching you how distances are calculated in agglomerative hierarchical clustering.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e315fb2e-c8cf-4c55-9a5e-c7bf5ea4ef02_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/45da79ca-3c29-4998-92a9-a5b4b76d9830_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f46bde0-3971-485f-bd8c-34eb8916da92_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86476cde-bc8f-449f-b417-18b7b006ed62_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Hierarchical Clustering with Python Part 1: Introduction]]></title><description><![CDATA[Don't make the same mistake I made by ignoring cluster analysis. It's wildly useful for ANY professional!]]></description><link>https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Tue, 21 Apr 2026 16:19:49 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/a899637b-e016-4a11-924c-fd07f1f6b497_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>There&#8217;s an unfortunate reality when it comes to how data science is defined in social media and most organizations:</p><ul><li><p>Data science == machine learning.</p></li><li><p>Machine learning == predictive ML models.</p></li><li><p>Predictive ML models == production deployments.</p></li></ul><p>Before I get a bunch of &#128293; comments and email replies, let me state something for the record.</p><p>When done well, the business value of production ML predictive models can be substantial.</p><p>However, these situations are typically the exception rather than the rule. This has been <a href="https://www.daveondata.com/data-science-consulting-info">my hands-on experience with my clients</a> and is also reported in industry data collected by TDWI, Forrester, and Gartner.</p><p><strong>For example, the percentage of ML projects intended for production but never make it is very high.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><p></p><p>This is unfortunate, because what often gets lost in the discussions about data science is that there are two forms of ML commonly used in business analytics:</p><ol><li><p>Supervised Learning: The machine learns from labeled examples.</p></li><li><p>Unsupervised Learning: The machine learns from unlabeled examples.</p></li></ol><p>Supervised Learning is how you craft ML predictive models, such as decision trees and random forests. These models learn from datasets in which each row contains an outcome of interest (i.e., the <em>label</em>).</p><p>For example, you work for a governmental agency and want to craft an ML model to predict claims fraud. Every row of your historical dataset needs a label indicating whether a claim was fraudulent.</p><p>Supervised Learning gets all the love in social media, but there&#8217;s a problem.</p><p><strong>Most of the world&#8217;s data is unlabeled - including the data in your organization.</strong></p><p>So what do you do?</p><p>You use Unsupervised Learning.</p><div><hr></div><h4>Introducing Cluster Analysis</h4><p>More specifically, you use a form of Unsupervised Learning called <em>cluster analysis</em>. Here&#8217;s a definition from my favorite machine learning textbook:</p><div class="callout-block" data-callout="true"><p>&#8220;Cluster analysis groups data objects based only on information found in the data that describes the objects and their relationships.</p><p>The goal is that the objects within a group be similar (or related) to one another and different from (or unrelated to) the objects in other groups.</p><p>The greater the similarity (or homogeneity) within a group and the greater the difference between groups, the better or more distinct the clustering.&#8221;</p></div><p><strong>Because so much real-world data is unlabeled, cluster analysis is a widely used tool in analytics for discovering structure and generating new insights.</strong></p><p>While many forms of cluster analysis have been invented over the years, the three clustering algorithms that are most used in business analytics are:</p><ul><li><p>K-means clustering</p></li><li><p>DBSCAN clustering</p></li><li><p>Hierarchical clustering</p></li></ul><p>The third is the subject of this newsletter tutorial series.</p><div class="pullquote"><p><strong>If you&#8217;re serious about building analytics skills, my <a href="https://www.daveondata.com/cluster-analysis-with-python-info">Cluster Analysis with Python</a> online course will teach you how to use k-means and DBSCAN in a weekend.</strong></p></div><h4>Introducing Hierarchical Clustering</h4><p>Based on the above definition, hierarchical clustering mines groupings from unlabeled datasets. What distinguishes hierarchical clustering is how the mined groupings are defined.</p><p>The easiest way to intuit how hierarchical clustering works is to see a typical real-world example:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!emtu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!emtu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 424w, https://substackcdn.com/image/fetch/$s_!emtu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 848w, https://substackcdn.com/image/fetch/$s_!emtu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 1272w, https://substackcdn.com/image/fetch/$s_!emtu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!emtu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png" width="1456" height="873" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:873,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:76839,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194923937?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!emtu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 424w, https://substackcdn.com/image/fetch/$s_!emtu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 848w, https://substackcdn.com/image/fetch/$s_!emtu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 1272w, https://substackcdn.com/image/fetch/$s_!emtu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4f7fe629-b032-4920-a335-4828f0f7940f_2163x1297.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The image above is a typical representation of a company - an org chart. This is an example of hierarchical clustering. Organizations worldwide cluster employees based on management hierarchies.</p><p>BTW - In machine learning terminology, the diagram above is known as a <em>dendrogram</em> and is commonly used to visualize hierarchical clustering results.</p><p><strong>Hierarchical clustering can take an unlabeled dataset and mine a hierarchical structure (often referred to as a </strong><em><strong>taxonomy</strong></em><strong>) directly from the data.</strong></p><p>You can then analyze the hierarchical clustering to derive new insights based on your business/processes.</p><p>For example, consider the highlighted portion of the dendrogram below:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3bH4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3bH4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 424w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 848w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 1272w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3bH4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png" width="1456" height="873" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:873,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:76965,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194923937?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3bH4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 424w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 848w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 1272w, https://substackcdn.com/image/fetch/$s_!3bH4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa9ab086-b284-4e13-9e5c-7f287f520c19_2163x1297.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Let&#8217;s assume you&#8217;re unfamiliar with the above organization and its people. You can use hierarchical clustering to derive insights like:</p><ul><li><p>&#8220;The lower left cluster comprises observations (i.e., employees) with titles indicative of supply chain management functions.&#8221;</p></li><li><p>&#8220;The lower right cluster comprises observations with titles indicative of manufacturing functions.&#8221;</p></li><li><p>&#8220;The upper cluster appears to represent the organization&#8217;s manufacturing and supply chain division.&#8221;</p></li></ul><p><strong>While a contrived example to be sure, the above illustrates that cluster analysis is a universally applicable skill:</strong></p><ul><li><p>Marketing: Segmenting customers into groups for more effective campaigns.</p></li><li><p>IT Operations: Anomaly detection in network operations and security.</p></li><li><p>Text Analytics: Group documents based on similar content.</p></li><li><p>Healthcare: Mining patient data for groups to improve outcomes.</p></li></ul><p>The list is endless!</p><p><strong>If you&#8217;re serious about having more impact at work using data, you want skills with cluster analysis!</strong></p><p>&#128073; Want to learn more? <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python-bb8">Check out Part 2 here</a>.</p><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will continue this tutorial series by teaching you how the hierarchical clustering technique works.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a7a95cb5-66a9-48a1-884f-5e6e8733fc84_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a8a10035-0e2f-48bd-afaf-2c67478e2ce6_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b3f11a40-0516-4e77-a810-ae2d4f95c5e3_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a69ed1ec-d54f-4c76-a2b5-b84135d573cd_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 7: Using External Drivers]]></title><description><![CDATA[This is how you build state-of-the-art forecasts in 2026.]]></description><link>https://thediydatascientist.substack.com/p/forecasting-with-excel-part-7-using</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/forecasting-with-excel-part-7-using</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 17 Apr 2026 16:07:28 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/949b9b64-2e3c-49de-b70b-e26b856bc2a9_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">get started with Part 1 here</a></strong>.</p><p>&#8203;<a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-6-the">The last tutorial</a> sang the praises of Python in Excel by demonstrating how you can get better exponential triple smoothing (ETS) forecasts than can be achieved using Excel&#8217;s native <em>FORECAST.ETS()</em> function.</p><p>This week&#8217;s tutorial will take it up a notch by demonstrating how Python in Excel enables you to create state-of-the-art forecasts using external drivers. </p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p>You will also need, of course, <a href="https://support.microsoft.com/en-us/office/get-started-with-python-in-excel-a33fbcbe-065b-41d3-82cf-23d05397f53d">access to Python in Excel</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4><strong>Endogenous vs Exogenous Features</strong></h4><p>When it comes to using data to build better forecasts, the features you use fall into one of two categories. While there are technical definitions for the following, for this tutorial series, I will take an intuitive approach:</p><ul><li><p><em>Endogenous</em> means the feature comes from &#8220;inside&#8221; the time series target values. For example, a moving average calculated from the time series target values is endogenous.</p></li><li><p><em>Exogenous</em> means the features come from &#8220;outside&#8221; the time series target values. For example, monthly marketing spend is an exogenous feature.</p></li></ul><p>Classic time series models like those produced from Excel&#8217;s <em>FORECAST.ETS()</em> function uses only endogenous features.</p><p><strong>However, modern business processes are highly complex, with many factors that influence outcomes. These complexities often mean that the best forecasting models will use endogenous and exogenous features.</strong></p><p>Because of their power, <a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">I start by brainstorming exogenous features with my clients to build state-of-the-art forecasting</a>. This is where Python in Excel is a game-changer.</p><div><hr></div><h4><strong>Preparing the Data</strong></h4><p>To keep this tutorial from being too long, I will assume you have some familiarity with Python in Excel. If you&#8217;re new to Python in Excel, check out <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-6-the">Part 6</a> of this tutorial series for detailed step-by-step instructions.</p><p>Using a dedicated worksheet to hold your Python in Excel formulas is a good idea. I typically name my worksheet simply <em>Python Code:</em></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VfGL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VfGL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 424w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 848w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 1272w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VfGL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png" width="444" height="48" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:80,&quot;width&quot;:740,&quot;resizeWidth&quot;:444,&quot;bytes&quot;:13496,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VfGL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 424w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 848w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 1272w, https://substackcdn.com/image/fetch/$s_!VfGL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F69284a42-8ee7-489e-af0d-fa5a7501a3df_740x80.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Next, I organize my Python formulas vertically in the worksheet, forming a step-by-step execution order from top to bottom. I also add comments in the worksheet for each step:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WH7P!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WH7P!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 424w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 848w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 1272w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WH7P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png" width="473" height="230.26369168357" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8f08a255-520a-4a49-ba21-7351af497336_986x480.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:480,&quot;width&quot;:986,&quot;resizeWidth&quot;:473,&quot;bytes&quot;:48585,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WH7P!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 424w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 848w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 1272w, https://substackcdn.com/image/fetch/$s_!WH7P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f08a255-520a-4a49-ba21-7351af497336_986x480.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The actual Python formulas I place in column <em>C</em> are aligned to the comments. For example, here's the formula for cell <em>C3</em> to load the <em>SalesAndMarketing</em> table into Python:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!g3qJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!g3qJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 424w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 848w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 1272w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!g3qJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png" width="1427" height="337" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4032f206-22cc-4ff7-8374-e86044909489_1427x337.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:337,&quot;width&quot;:1427,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:39119,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!g3qJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 424w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 848w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 1272w, https://substackcdn.com/image/fetch/$s_!g3qJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4032f206-22cc-4ff7-8374-e86044909489_1427x337.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>And here&#8217;s the Python code for your workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;5d47d834-3273-4b3a-a38b-f7dee218f0de&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load the Sales table as a DataFrame
sales = xl("SalesAndMarketing[#All]", headers = True)</code></pre></div><div class="pullquote"><p><strong> If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><p>The <em>SalesAndMarketing</em> table contains an exogenous feature for the monthly marketing spend:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!I0Xn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!I0Xn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 424w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 848w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 1272w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!I0Xn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png" width="391" height="237.09574468085106" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:513,&quot;width&quot;:846,&quot;resizeWidth&quot;:391,&quot;bytes&quot;:140454,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!I0Xn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 424w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 848w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 1272w, https://substackcdn.com/image/fetch/$s_!I0Xn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b7eab60-8f53-4cf9-804e-b23c6d338ca7_846x513.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p><strong>As covered in Part 6, Python in Excel includes the mighty </strong><em><strong>statsmodels</strong></em><strong> library, which provides a number of powerful forecasting models. These models prefer </strong><em><strong>DataFrames</strong></em><strong> where the index is the timestamp:</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kfpU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kfpU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 424w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 848w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 1272w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kfpU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png" width="1425" height="560" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:560,&quot;width&quot;:1425,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:81824,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kfpU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 424w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 848w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 1272w, https://substackcdn.com/image/fetch/$s_!kfpU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84fe5eb2-6494-4d0a-aa58-77b0e7dbfe2e_1425x560.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EZzc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EZzc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 424w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 848w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 1272w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EZzc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png" width="356" height="267.5909694555113" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:566,&quot;width&quot;:753,&quot;resizeWidth&quot;:356,&quot;bytes&quot;:134042,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!EZzc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 424w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 848w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 1272w, https://substackcdn.com/image/fetch/$s_!EZzc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F76f366db-e6ed-4023-bc23-42b7ba462cf0_753x566.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the Python code for your workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;14591075-5b57-4d52-9aec-6c7afb280738&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Change the YearMonth column
sales['YearMonth'] = pd.to_datetime(sales['YearMonth'])

# Make YearMonth the index
sales = sales.set_index('YearMonth')
sales = sales.asfreq('MS')
sales = sales.sort_index()</code></pre></div><p>In real-world projects, there is often much more data preparation than you see here (e.g., getting access to the data in the first place).</p><div><hr></div><h4>Forecast KPIs</h4><p>As covered in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8">Part 3</a>, this tutorial series uses bias and mean absolute error (MAE) to measure forecast accuracy.</p><p>The following Python formula creates a utility function to calculate and display these key performance indicators (KPIs):</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!O97F!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!O97F!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 424w, https://substackcdn.com/image/fetch/$s_!O97F!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 848w, https://substackcdn.com/image/fetch/$s_!O97F!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 1272w, https://substackcdn.com/image/fetch/$s_!O97F!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!O97F!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png" width="1427" height="982" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:982,&quot;width&quot;:1427,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:167526,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!O97F!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 424w, https://substackcdn.com/image/fetch/$s_!O97F!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 848w, https://substackcdn.com/image/fetch/$s_!O97F!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 1272w, https://substackcdn.com/image/fetch/$s_!O97F!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c230cf-5ba2-4eee-85c0-bc21cf9f3b79_1427x982.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the Python code:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;e347e7c1-cb01-41d9-9bb4-b5198855fe58&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Utility function for our model KPIs
def forecast_kpis(forecasts, targets):
    # Bias calculations
    errors = forecasts - targets 
    
    targets_sum = np.sum(targets)
    bias = np.mean(errors)
    bias_pct = np.sum(errors) / targets_sum 

    # MAE calculations
    mae = np.mean(np.abs(errors))
    mae_pct = np.sum(np.abs(errors)) / targets_sum

    # Display
    print(f'Bias: {bias:0.4f}, {(bias_pct * 100):0.2f}%')
    print(f'MAE: {mae:0.4f}, {(mae_pct * 100):0.2f}%') </code></pre></div><div><hr></div><h4>Splitting the Data</h4><p>As covered in <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is">Part 4</a> of this tutorial series, the best forecasters test their model predictions for accuracy by using a test holdout set with the following characteristics:</p><ul><li><p>The latest of the time series data.</p></li><li><p>Enough data to cover at least one full cycle of any seasonality.</p></li></ul><p>In the case of the time series used in this tutorial, the last full year of data is used as the test holdout set:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FVI3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FVI3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 424w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 848w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 1272w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FVI3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png" width="1427" height="472" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:472,&quot;width&quot;:1427,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:69612,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FVI3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 424w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 848w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 1272w, https://substackcdn.com/image/fetch/$s_!FVI3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa61f8f7c-2ffb-4eff-98d8-df4422a4f179_1427x472.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Python code:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;fbadedf8-4d49-4224-94d9-e936f37753e5&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Use the first 4 years of endogenous data to train the model
sales_train_y = sales.loc[:'2023-12-01', 'Sales']

# Use the last year of endogenous data to test the model
sales_test_y = sales.loc['2024-01-01':, 'Sales']</code></pre></div><p><strong>As detailed in the code comments above, the </strong><em><strong>Sales</strong></em><strong> column is the target (i.e., </strong><em><strong>endogenous</strong></em><strong>) data for this time series.</strong></p><p><strong>The dataset also contains the </strong><em><strong>MarketingSpend</strong></em><strong> feature, which is </strong><em><strong>exogenous</strong></em><strong> data that must also be split using the same date-based logic:</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!czxS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!czxS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 424w, https://substackcdn.com/image/fetch/$s_!czxS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 848w, https://substackcdn.com/image/fetch/$s_!czxS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 1272w, https://substackcdn.com/image/fetch/$s_!czxS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!czxS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png" width="1425" height="470" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:470,&quot;width&quot;:1425,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:74406,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!czxS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 424w, https://substackcdn.com/image/fetch/$s_!czxS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 848w, https://substackcdn.com/image/fetch/$s_!czxS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 1272w, https://substackcdn.com/image/fetch/$s_!czxS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a8e09db-57db-4128-82a9-669d61118b42_1425x470.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;f62a61f2-7981-439d-9054-41d21c80e0cc&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Get the first 4 years of exogenous data
sales_train_X = sales.loc[:'2023-12-01', 'MarketingSpend']

# Use the last year of exogenous data to test the model
sales_test_X = sales.loc['2024-01-01':, 'MarketingSpend']</code></pre></div><p>With the data split, it's time to discuss the details of using exogenous data for forecasting.</p><div><hr></div><h4><strong>How to Use Exogenous Data</strong></h4><p>When used properly, exogenous data is a powerful way to increase the accuracy of your forecasts. Think of exogenous data as a way to add additional information that isn&#8217;t available from the raw time series targets.</p><p>For example, consider the <em>MarketingSpend</em> feature of the tutorial&#8217;s dataset. When done correctly, the budget spend on marketing activities should have a positive impact on <em>Sales</em>.</p><p>State-of-the-art forecasting models are able to leverage the information contained in exogenous features to create more accurate forecasts than are possible from only using endogenous data.</p><p>So, the best forecasters are always on the lookout for:</p><ul><li><p>Exogenous features that can improve their forecasting models.</p></li><li><p>Forecasting techniques that can make maximum use of exogenous features (e.g., machine learning).</p></li></ul><p>However, there&#8217;s one big requirement for using exogenous data.</p><p><strong>Any exogenous feature you use in your forecasting models must have future data available before you can make a forecast.</strong></p><p>This is a bit abstract, so let me cement it with an example. Let&#8217;s say you use the <em>MarketingSpend</em> exogenous feature to build a forecasting model using all the data available (i.e., through the end of 2024).</p><p>In order to make forecasts for 2025, you would need <em>MarketingSpend</em> values for each month of 2025 that you are trying to forecast. In this example, you could get the 2025 marketing budget numbers from the finance team to use in your forecasts.</p><p>Here are some additional examples of exogenous data that are often quite valuable and have future data available:</p><ul><li><p>Capital investments</p></li><li><p>Product launches</p></li><li><p>Store openings</p></li><li><p>Weather</p></li></ul><p><strong>NOTE -</strong> Using weather in your forecasting models typically only works for relatively short forecasting horizons (e.g., up to a week).</p><div><hr></div><h4><strong>Training a SARIMAX Model</strong></h4><p>Unfortunately, the options for building forecasting models that use exogenous features are limited using native Excel features (e.g., multiple linear regression using the Analysis ToolPak).</p><p>Luckily, the mighty <em>statsmodels</em> package provides the Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors (SARIMAX) model that you can use with exogenous data.</p><p><strong>Due to space considerations, I can&#8217;t cover how SARIMAX works behind the scenes. However, the SARIMAX model is an extension of the popular ARIMA model, and an upcoming tutorial series will cover ARIMA using Python in Excel.</strong></p><p>For the purposes of this tutorial, here&#8217;s a SARIMAX model that works well with the <em>Sales</em> and <em>MarketingSpend</em> features:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ocBL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ocBL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 424w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 848w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 1272w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ocBL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png" width="1432" height="885" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:885,&quot;width&quot;:1432,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:141294,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ocBL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 424w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 848w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 1272w, https://substackcdn.com/image/fetch/$s_!ocBL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F045c869c-79b2-4a52-9cbc-c5148b431d00_1432x885.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;84712fac-b3c1-471f-ae17-4fe3d8cbd123&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from statsmodels.tsa.statespace.sarimax import SARIMAX

# Specify a SARIMAX model using exogenous features
sales_model = SARIMAX(sales_train_y, exog = sales_train_X,
                      order = (2, 1, 2), 
                      seasonal_order = (0, 1, 0, 12))

# Train the model on the data
sales_model_results = sales_model.fit(method = 'bfgs')</code></pre></div><p>And the Python formula to evaluate the SARIMAX model's predictions against the test holdout:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oUO_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oUO_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 424w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 848w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 1272w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oUO_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png" width="1435" height="607" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:607,&quot;width&quot;:1435,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:92789,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194457175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oUO_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 424w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 848w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 1272w, https://substackcdn.com/image/fetch/$s_!oUO_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff275764b-0f23-4e8b-ac3e-dbd66125c3ab_1435x607.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;f7d6dd23-06c2-4af4-85d0-2010e80291d1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Make 12 months of forecasts using exogenous features
forecasts = sales_model_results.forecast(steps = 12, 
                                         exog = sales_test_X)

# What is the model's forecast accuracy?
forecast_kpis(forecasts, sales_test_y)</code></pre></div><p>The first thing to note about the code above is that to make any forecasts, you must pass in the future exogenous data using the <em>exog</em> parameter.</p><p>The second thing to note about the code above is that the SARIMAX model&#8217;s MAE is better than the ETS model's from Part 6 of this tutorial series.</p><p>I know that this tutorial has been a whirlwind, but I wanted to illustrate that if you&#8217;re serious about better forecasts, you can&#8217;t go wrong building skills with Python in Excel and <em>statsmodels</em>.</p><p>&#128073; <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-cd5">Are you ready to partner with AI to build your Excel forecasts?</a></strong></p><div><hr></div><p>That&#8217;s it for this tutorial series.</p><p>My next newsletter will kick off <a href="https://thediydatascientist.substack.com/p/hierarchical-clustering-with-python">a new tutorial series</a> that teaches real-world machine learning skills with hierarchical clustering.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/62e58c66-ad56-4571-8993-70fcc911204b_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5903d45b-05ef-4423-9810-68b1b2cd22c5_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b325e7e9-c8a0-4761-9fd9-b8fa1e55de75_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ee63b547-146c-48e0-badd-2883f2082710_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 6: 
The Power of Python]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals]]></description><link>https://thediydatascientist.substack.com/p/forecasting-with-excel-part-6-the</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/forecasting-with-excel-part-6-the</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Tue, 14 Apr 2026 18:29:14 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/0f3f69d3-cdaf-479b-9395-876cefc12d37_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">get started with Part 1 here</a></strong>.</p><p>If you&#8217;re serious about building better forecasts using Microsoft Excel, then you should be excited about the biggest upgrade in Excel&#8217;s history - <strong>the ability to run formulas written in Python.</strong></p><p>Python in Excel is a game-changer for millions of professionals wanting to unleash the power of their data.</p><p>Not only does access to Python provide you with powerful ways to build better forecasts, but you also get access to a host of advanced analytics, like:</p><ul><li><p>Machine learning predictive models</p></li><li><p>Mining free-form text</p></li><li><p>Cluster analysis</p></li></ul><p>This week&#8217;s tutorial shows you just a small sample of what&#8217;s possible for your forecasting when you embrace Python in Excel.</p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p>You will also need, of course, <a href="https://support.microsoft.com/en-us/office/get-started-with-python-in-excel-a33fbcbe-065b-41d3-82cf-23d05397f53d">access to Python in Excel</a>.</p><p><strong>NOTE -</strong> Even though I&#8217;m using Python in Excel for this tutorial, 99+% of the code is exactly the same whether you use Excel, Jupyter Notebook, or VS Code.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Preparing the Data</h4><p>When using Python in Excel, it&#8217;s best to organize your Python formulas (i.e., code) to make writing and maintaining your data goodness easier.</p><p>I&#8217;m a big fan of putting all my Python formulas in a single worksheet, with the code laid out step-by-step vertically. First step is to add a worksheet:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!PBhU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!PBhU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 424w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 848w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 1272w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!PBhU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png" width="414" height="44.75675675675676" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:80,&quot;width&quot;:740,&quot;resizeWidth&quot;:414,&quot;bytes&quot;:13496,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!PBhU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 424w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 848w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 1272w, https://substackcdn.com/image/fetch/$s_!PBhU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e884531-1c47-41d2-84dc-7dbec5021576_740x80.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Next, I add comments to my Python worksheets for my own long-term sanity:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!awj_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!awj_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 424w, https://substackcdn.com/image/fetch/$s_!awj_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 848w, https://substackcdn.com/image/fetch/$s_!awj_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 1272w, https://substackcdn.com/image/fetch/$s_!awj_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!awj_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png" width="432" height="83.38604651162791" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c94e5447-3ccf-469c-b305-07d460e54693_860x166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:166,&quot;width&quot;:860,&quot;resizeWidth&quot;:432,&quot;bytes&quot;:22263,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!awj_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 424w, https://substackcdn.com/image/fetch/$s_!awj_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 848w, https://substackcdn.com/image/fetch/$s_!awj_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 1272w, https://substackcdn.com/image/fetch/$s_!awj_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc94e5447-3ccf-469c-b305-07d460e54693_860x166.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>I then place my Excel formulas immediately to the right of each comment. In this case, I would click cell C2 to hold the Python formula that loads the <em>Sales</em> table.</p><p>The best way to write Python formulas is to use the new Python Editor, which you can access from the Ribbon:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZaQg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZaQg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 424w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 848w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 1272w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZaQg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png" width="571" height="248.24061990212073" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:533,&quot;width&quot;:1226,&quot;resizeWidth&quot;:571,&quot;bytes&quot;:84885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZaQg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 424w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 848w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 1272w, https://substackcdn.com/image/fetch/$s_!ZaQg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5df4549a-6f82-40a2-9b31-1316cdbbd4ac_1226x533.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Clicking the Python Editor opens a new pane:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9Ur-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9Ur-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 424w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 848w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9Ur-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png" width="271" height="381.33078880407123" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1106,&quot;width&quot;:786,&quot;resizeWidth&quot;:271,&quot;bytes&quot;:230007,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9Ur-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 424w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 848w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!9Ur-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa5710273-3558-42b9-ac6a-3a4053e2c4ca_786x1106.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Notice how the button at the bottom of the Python Editor references cell <em>C2</em>? Clicking the button inserts a new Python formula in that cell where you write your code:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WkAP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WkAP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 424w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 848w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 1272w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WkAP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png" width="728" height="176.0884588804423" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:350,&quot;width&quot;:1447,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:34478,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WkAP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 424w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 848w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 1272w, https://substackcdn.com/image/fetch/$s_!WkAP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea03e98-914a-4636-b32e-c3ab0b4b460c_1447x350.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The code in the image above loads the <em>Sales</em> table from Excel into a Python <em>DataFrame </em>(i.e., how Python represents entire data tables).</p><p>Clicking the disk icon will save (i.e., run) the Python formula:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wST8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wST8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 424w, https://substackcdn.com/image/fetch/$s_!wST8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 848w, https://substackcdn.com/image/fetch/$s_!wST8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 1272w, https://substackcdn.com/image/fetch/$s_!wST8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wST8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png" width="728" height="176.72827586206895" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:352,&quot;width&quot;:1450,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:36446,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wST8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 424w, https://substackcdn.com/image/fetch/$s_!wST8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 848w, https://substackcdn.com/image/fetch/$s_!wST8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 1272w, https://substackcdn.com/image/fetch/$s_!wST8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb688b118-d325-4c8d-a31f-a75fc2023383_1450x352.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>If you enter the code correctly, you should get a new <em>DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!g3RE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!g3RE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 424w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 848w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 1272w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!g3RE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png" width="1450" height="332" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:332,&quot;width&quot;:1450,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:35395,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!g3RE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 424w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 848w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 1272w, https://substackcdn.com/image/fetch/$s_!g3RE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f90ea8f-7cdd-47ce-862a-442f10edd8bd_1450x332.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Clicking on the <em>&gt;</em> in the Python Editor gives you a preview of the <em>DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j0pt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j0pt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 424w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 848w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 1272w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j0pt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png" width="1452" height="785" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:785,&quot;width&quot;:1452,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:75205,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j0pt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 424w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 848w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 1272w, https://substackcdn.com/image/fetch/$s_!j0pt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd600ec0-df47-4a35-945a-2cfc0793d847_1452x785.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The output above shows 60 rows of data (i.e., 5 years of monthly sales) and two columns: <em>YearMonth</em> and <em>Sales</em>.</p><p>Here&#8217;s the Python code for you to use in your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;e093dc22-79c1-45b0-8599-7b678f50f45b&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load the Sales table as a DataFrame
sales = xl("Sales[#All]", headers = True)</code></pre></div><div class="pullquote"><p><strong>BTW - The code above is the only Python in this tutorial that is specific to Microsoft Excel. </strong></p><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><p><strong>When forecasting with Python in Excel, you usually need to format your </strong><em><strong>DataFrames</strong></em><strong> in a particular way:</strong></p><ul><li><p><strong>Your timestamps have to be recognized by Python.</strong></p></li><li><p><strong>The </strong><em><strong>DataFrame</strong></em><strong> is indexed by the timestamp.</strong></p></li></ul><p>The <em>YearMonth</em> column isn&#8217;t recognized by Python as a timestamp because Excel didn&#8217;t recognize it as a timestamp. However, this is easily fixed by adding another Python formula to the worksheet:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EiGt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EiGt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 424w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 848w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 1272w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EiGt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png" width="520" height="119.16666666666667" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:220,&quot;width&quot;:960,&quot;resizeWidth&quot;:520,&quot;bytes&quot;:26820,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!EiGt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 424w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 848w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 1272w, https://substackcdn.com/image/fetch/$s_!EiGt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f2b173b-5a14-43eb-a0f2-d25e95f7821e_960x220.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CNy1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CNy1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 424w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 848w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 1272w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CNy1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png" width="1450" height="557" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:557,&quot;width&quot;:1450,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:82129,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CNy1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 424w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 848w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 1272w, https://substackcdn.com/image/fetch/$s_!CNy1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4e0c818-4bff-4c2d-898c-64cceca9cb8f_1450x557.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Conceptually, the Python formula in cell <em>C3</em> sets up the <em>DataFrame</em> to have an index based on the <em>YearMonth</em> column, where each value corresponds to the first day of each month.</p><p>Here&#8217;s the Python code to use in your own workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;1ad8da66-3360-4fd3-ad5f-7a66b7c50205&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Change the YearMonth column
sales['YearMonth'] = pd.to_datetime(sales['YearMonth'])

# Make YearMonth the index
sales = sales.set_index('YearMonth')
sales = sales.asfreq('MS')
sales = sales.sort_index()</code></pre></div><p>You can think of a <em>DataFrame</em> index as providing a unique identifier for each row in the table. You can see this in action by hovering over the <em>[PY]</em> in cell <em>C3</em> and clicking on the card:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!brs9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!brs9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 424w, https://substackcdn.com/image/fetch/$s_!brs9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 848w, https://substackcdn.com/image/fetch/$s_!brs9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 1272w, https://substackcdn.com/image/fetch/$s_!brs9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!brs9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png" width="627" height="164.02767857142857" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:293,&quot;width&quot;:1120,&quot;resizeWidth&quot;:627,&quot;bytes&quot;:51821,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!brs9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 424w, https://substackcdn.com/image/fetch/$s_!brs9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 848w, https://substackcdn.com/image/fetch/$s_!brs9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 1272w, https://substackcdn.com/image/fetch/$s_!brs9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F77134d8b-ce06-4364-bced-088eeb458d3b_1120x293.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jLQZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jLQZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 424w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 848w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 1272w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jLQZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png" width="309" height="463.27510917030565" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1030,&quot;width&quot;:687,&quot;resizeWidth&quot;:309,&quot;bytes&quot;:60774,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jLQZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 424w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 848w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 1272w, https://substackcdn.com/image/fetch/$s_!jLQZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd041f35-e010-44a4-84b8-80896b14c8c7_687x1030.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Excellent!</p><p>With the data now prepared for time series forecasting, here&#8217;s the first thing you&#8217;ll want to do - check to see if there&#8217;s a trend and/or seasonality in the data.</p><div><hr></div><h4>Detecting Trend and Seasonality</h4><p>As you learned in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38">Part 2</a> of this tutorial series, detecting trend and seasonality is a critical aspect of building better forecasts. You also learned some simple techniques for utilizing native Excel features to accomplish this.</p><p>Python in Excel offers you something so much more powerful: <em>Season-Trend decomposition using Loess (STL)</em>.</p><p><strong>Python in Excel comes with the mighty </strong><em><strong>statsmodels</strong></em><strong> library. Think of </strong><em><strong>statsmodels</strong></em><strong> as a collection of functions that you can use to access powerful tools to build better forecasts. In most cases, better forecasts are possible than with native Excel features.</strong></p><p><a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">This is one of many reasons why many of my clients are embracing Python in Excel.</a></p><p>Accessing the power of STL is simple using Python in Excel:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3X_d!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3X_d!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 424w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 848w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 1272w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3X_d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png" width="393" height="142.61664392905865" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:266,&quot;width&quot;:733,&quot;resizeWidth&quot;:393,&quot;bytes&quot;:48832,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3X_d!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 424w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 848w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 1272w, https://substackcdn.com/image/fetch/$s_!3X_d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4cbde2c9-2c76-411e-8962-48544c06e9bc_733x266.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!aTxH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!aTxH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 424w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 848w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 1272w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!aTxH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png" width="1447" height="647" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:647,&quot;width&quot;:1447,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:82979,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!aTxH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 424w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 848w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 1272w, https://substackcdn.com/image/fetch/$s_!aTxH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc611c241-ace7-4815-9f39-1fa19baaf011_1447x647.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The Python formula in cell <em>C4</em> tells <em>STL</em> to analyze the <em>Sales</em> column and that the data is monthly (i.e., <em>period = 12</em>). Also, <em>robust = True</em> tells STL to use a robust version that is resistant to some forms of outliers.</p><p>The formula also creates a visualization (i.e., plot) of the results for ease of analysis, specifying a visual that is 10 inches wide and 6 inches high.</p><p>Clicking the <em>&gt;</em> in the Python Editor provides the visual:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Bo1X!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Bo1X!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 424w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 848w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 1272w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Bo1X!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png" width="1450" height="987" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:987,&quot;width&quot;:1450,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:214942,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Bo1X!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 424w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 848w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 1272w, https://substackcdn.com/image/fetch/$s_!Bo1X!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b49db39-ddd9-4d7c-87ee-0dd146620afd_1450x987.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The above plots clearly show that the time series has a steady upward trend and pronounced seasonality (e.g., a dip in sales every January).</p><p>If you&#8217;re serious about building better forecasts, you want STL in your tool belt.</p><p>If that wasn&#8217;t enough, the <em>statsmodels</em> library has a lot more reasons why you want to be using Python in Excel to build your forecasts.</p><p>And here&#8217;s the code for your own Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;fdf99afc-b7e5-4d59-b127-6891fbe372c9&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from statsmodels.tsa.seasonal import STL

# Perform STL on the sales DataFrame
res = STL(sales['Sales'], period = 12, robust = True).fit()

# Display the results
fig = res.plot()
fig.set_size_inches(10, 6)
plt.show()</code></pre></div><div><hr></div><h4>Forecast KPIs</h4><p>You learned about measuring the accuracy of your forecasts in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8">Part 3</a> of this tutorial series using the <em>bias</em> and <em>MAE</em> calculations.</p><p>When using Python in Excel, it&#8217;s handy to have a function to calculate and display these KPIs for you:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Q-Fo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 424w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 848w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 1272w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png" width="459" height="170.7906976744186" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e3883157-9233-4221-9544-beeb933bcbd6_860x320.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:320,&quot;width&quot;:860,&quot;resizeWidth&quot;:459,&quot;bytes&quot;:85129,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 424w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 848w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 1272w, https://substackcdn.com/image/fetch/$s_!Q-Fo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3883157-9233-4221-9544-beeb933bcbd6_860x320.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xEyb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xEyb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 424w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 848w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 1272w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xEyb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png" width="1447" height="980" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:980,&quot;width&quot;:1447,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:165816,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xEyb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 424w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 848w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 1272w, https://substackcdn.com/image/fetch/$s_!xEyb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84933f90-35cd-4c50-822c-061ae23b492e_1447x980.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>BTW - I only write the above utility function once and then copy and paste it into my Excel workbooks as needed.</p><p>Here&#8217;s the code for your own Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;d5e797e4-3a7d-4c78-b9b9-b1451e8cbe6a&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Utility function for our model KPIs
def forecast_kpis(forecasts, targets):
    # Bias calculations
    errors = forecasts - targets 
    
    targets_sum = np.sum(targets)
    bias = np.mean(errors)
    bias_pct = np.sum(errors) / targets_sum 

    # MAE calculations
    mae = np.mean(np.abs(errors))
    mae_pct = np.sum(np.abs(errors)) / targets_sum

    # Display
    print(f'Bias: {bias:0.4f}, {(bias_pct * 100):0.2f}%')
    print(f'MAE: {mae:0.4f}, {(mae_pct * 100):0.2f}%') </code></pre></div><div><hr></div><h4>Better Exponential Smoothing</h4><p>In <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-5-using">the last tutorial</a>, you learned about Excel&#8217;s <em>FORECAST.ETS()</em> function.</p><p>The <em>ETS</em> in the function name stands for <em>exponential triple smoothing</em> and it should come as no surprise that the mighty <em>statsmodels</em> library also supports <em>ETS</em>.</p><p><strong>It should also not be surprising that using </strong><em><strong>statsmodels</strong></em><strong> provides you with more opportunities for better </strong><em><strong>ETS</strong></em><strong> forecasts than what is provided by Excel.</strong></p><p>By using Python in Excel, you get more fine-grained control over the forecasting model, as you will see in this tutorial. First up, as you learned in <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is">Part 4</a>, the data needs to be split:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_BnW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_BnW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 424w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 848w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 1272w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_BnW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png" width="451" height="199.27906976744185" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:380,&quot;width&quot;:860,&quot;resizeWidth&quot;:451,&quot;bytes&quot;:68152,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_BnW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 424w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 848w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 1272w, https://substackcdn.com/image/fetch/$s_!_BnW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b227394-2e53-44d8-bddf-418056f26ae4_860x380.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6Qo3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6Qo3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 424w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 848w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 1272w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6Qo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png" width="1450" height="467" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/79431385-349f-4ea0-b927-348a0746e24f_1450x467.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:467,&quot;width&quot;:1450,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:65346,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6Qo3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 424w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 848w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 1272w, https://substackcdn.com/image/fetch/$s_!6Qo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79431385-349f-4ea0-b927-348a0746e24f_1450x467.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here&#8217;s the code for your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;a0c28662-5331-4f4a-9070-d81b485e9529&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Use the first 4 years of data to train the model
sales_train = sales.loc[:'2023-12-01', 'Sales']

# Use the last year of data to test the model
sales_test = sales.loc['2024-01-01':, 'Sales']</code></pre></div><p>With the data split, building an exponential triple smoothing model is straightforward:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FKeV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FKeV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 424w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 848w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 1272w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FKeV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png" width="430" height="211.72332942555687" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:853,&quot;resizeWidth&quot;:430,&quot;bytes&quot;:115308,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FKeV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 424w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 848w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 1272w, https://substackcdn.com/image/fetch/$s_!FKeV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a59906f-d6ac-4a91-b79b-6756286552c2_853x420.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9gOT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9gOT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 424w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 848w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 1272w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9gOT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png" width="1450" height="867" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:867,&quot;width&quot;:1450,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:134885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9gOT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 424w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 848w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 1272w, https://substackcdn.com/image/fetch/$s_!9gOT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e8391ba-9591-4a72-b147-7595ae936f6c_1450x867.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The Python code for your Excel workbook:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;580fc380-99de-4382-88ac-1be89d16a706&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from statsmodels.tsa.holtwinters import ExponentialSmoothing

# First exponential triple smoothing model
fit1 = ExponentialSmoothing(sales_train,
                            trend = 'add',
                            seasonal = 'add',
                            seasonal_periods = 12,
                            use_boxcox = True).fit()

# Get the forecasts and KPIs
pred = fit1.forecast(steps = 12)
forecast_kpis(pred, sales_test)</code></pre></div><p>At a high level, there are three things to note about the above code:</p><ul><li><p>The code uses <em>trend = &#8216;add&#8217;</em> and <em>seasonal = &#8216;add&#8217;</em> to align to Excel&#8217;s <em>FORECAST.ETS()</em> function.</p></li><li><p>The <em>seasonal_periods = 12</em> indicates that the data is at the monthly grain.</p></li><li><p>The <em>use_boxcox = True</em> adds a bit of transformation to the data before the model is built. <em>FORECAST.ETS()</em> is more forgiving than <em>statsmodels</em>, so this is required.</p></li></ul><p>As detailed in <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-5-using">Part 5</a>, the above KPIs are an improvement over using Excel&#8217;s <em>FORECAST.ETS()</em> function:</p><ul><li><p>The magnitude of the bias improved from -1.06% to 0.27%.</p></li><li><p>The MAE improved from 4.68% to 4.06%.</p></li></ul><p>Excel&#8217;s <em>FORECAST.ETS()</em> function uses <em>additive</em> trends and seasonality with exponential smoothing.</p><p>However, exponential smoothing also supports <em>multiplicative</em> trends and seasonality. In some scenarios, multiplicative trends and seasonality produce better forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XxyH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XxyH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 424w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 848w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 1272w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XxyH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png" width="509" height="279.95" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:473,&quot;width&quot;:860,&quot;resizeWidth&quot;:509,&quot;bytes&quot;:63246,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XxyH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 424w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 848w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 1272w, https://substackcdn.com/image/fetch/$s_!XxyH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa402420d-7079-4d75-8a33-d8b04d374b6b_860x473.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hGwj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hGwj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 424w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 848w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 1272w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hGwj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png" width="1445" height="780" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:780,&quot;width&quot;:1445,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:116113,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/194090187?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!hGwj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 424w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 848w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 1272w, https://substackcdn.com/image/fetch/$s_!hGwj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36c5b30-9b6a-45dc-9406-040cffbfb4ea_1445x780.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As shown above, the multiplicative exponential smoothing model is a slight improvement in MAE compared to the additive model.</p><p>Here&#8217;s the code:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;d3a22051-2357-46ea-a466-8efff6bec951&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Multiplicative exponential triple smoothing model
fit2 = ExponentialSmoothing(sales_train,
                            trend = 'mul',
                            seasonal = 'mul',
                            seasonal_periods = 12,
                            use_boxcox = True).fit()

# Get the forecasts and KPIs
pred = fit2.forecast(steps = 12)
forecast_kpis(pred, sales_test)</code></pre></div><div><hr></div><p>That&#8217;s it for this tutorial.</p><p>My next newsletter will show you another reason you want to build better forecasts using Python in Excel - incorporating external drivers.</p><p>Stay healthy and happy data sleuthing!</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-7-using">Check out Part 7 here</a></strong>.</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f8036bf8-2d33-45c3-ba05-ddb529566752_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/24cf5d95-6a67-4492-b2fc-51cd81bc57b7_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6db15b39-4f58-4777-a735-b16a920e0786_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fff9a0ce-c856-44cb-9523-0386dc67ae82_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 5: Using FORECAST.ETS()]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals]]></description><link>https://thediydatascientist.substack.com/p/forecasting-with-excel-part-5-using</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/forecasting-with-excel-part-5-using</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Thu, 09 Apr 2026 13:53:39 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/4545fc75-f660-4a25-b439-cc5b3b59f66b_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">get started with Part 1 here</a></strong>.</p><p>Through this tutorial series, you&#8217;ve been learning the fundamentals of time series forecasting using Microsoft Excel.</p><p>In this tutorial, you will learn the premier way to build data-driven forecasts using native Excel features - the <em>FORECAST.ETS()</em> function.</p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><h4>Preparing the Data</h4><p>The <em>FORECAST.ETS()</em> function requires the use of Excel-formatted dates. The <em>Sales</em> table in the tutorial workbook represents a common situation where Excel doesn't like the format:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Wgth!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Wgth!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 424w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 848w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 1272w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Wgth!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png" width="144" height="279.6" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:466,&quot;width&quot;:240,&quot;resizeWidth&quot;:144,&quot;bytes&quot;:34611,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Wgth!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 424w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 848w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 1272w, https://substackcdn.com/image/fetch/$s_!Wgth!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1839033-78c4-49ce-8702-0ea0dba2bf80_240x466.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As covered in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38">Part 2</a> of this tutorial series, the answer to this problem is to insert a <em>Date</em> column using the following formula to convert the <em>YearMonth</em> data into an Excel-friendly format:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6cJB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6cJB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 424w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 848w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 1272w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6cJB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png" width="636" height="47.15172413793103" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:86,&quot;width&quot;:1160,&quot;resizeWidth&quot;:636,&quot;bytes&quot;:37619,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6cJB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 424w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 848w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 1272w, https://substackcdn.com/image/fetch/$s_!6cJB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9b743d1-7e69-48fb-91f2-c9a1f8a65ff0_1160x86.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pmn4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pmn4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 424w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 848w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 1272w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pmn4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png" width="244" height="267.23809523809524" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/21bfd398-328e-4608-8995-f42da930baa0_420x460.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:460,&quot;width&quot;:420,&quot;resizeWidth&quot;:244,&quot;bytes&quot;:37965,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!pmn4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 424w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 848w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 1272w, https://substackcdn.com/image/fetch/$s_!pmn4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21bfd398-328e-4608-8995-f42da930baa0_420x460.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>NOTE - You may need to explicitly format column <em>C</em> as Excel dates if your table doesn't look like the image above.</p><h4>Establishing the Baseline</h4><p>As covered in <a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is">Part 4</a> of the tutorial series, establishing a baseline is a critical step in crafting the best forecasting models. Unfortunately, not doing this is <a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">one of the most common mistakes my clients make</a>:</p><ul><li><p>All data through December 2023 for training the forecasting baseline.</p></li><li><p>All data for 2024 for testing the forecast baseline.</p></li><li><p>A simple 6-month moving average as the baseline model.</p></li></ul><p>The first step is to calculate the 6-month moving average for December 2023 by adding an <em>MA(6) Forecast</em> column and entering the following in cell E<em>50</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wMDa!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wMDa!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 424w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 848w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 1272w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wMDa!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png" width="308" height="249.4448669201521" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/abe31e0f-6111-4826-8438-0053453d6fa6_526x426.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:426,&quot;width&quot;:526,&quot;resizeWidth&quot;:308,&quot;bytes&quot;:57101,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wMDa!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 424w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 848w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 1272w, https://substackcdn.com/image/fetch/$s_!wMDa!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabe31e0f-6111-4826-8438-0053453d6fa6_526x426.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Next, the forecasts for 2024 are simply the forecasted value for December 2023:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!28s9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!28s9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 424w, https://substackcdn.com/image/fetch/$s_!28s9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 848w, https://substackcdn.com/image/fetch/$s_!28s9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 1272w, https://substackcdn.com/image/fetch/$s_!28s9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!28s9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png" width="377" height="440.3001485884101" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:786,&quot;width&quot;:673,&quot;resizeWidth&quot;:377,&quot;bytes&quot;:341418,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!28s9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 424w, https://substackcdn.com/image/fetch/$s_!28s9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 848w, https://substackcdn.com/image/fetch/$s_!28s9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 1272w, https://substackcdn.com/image/fetch/$s_!28s9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5678729-5b9b-4ca4-932e-43d3087fe1c4_673x786.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As covered in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8">Part 3</a>, next up is calculating the MA(6) errors in a new column:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kSZY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kSZY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 424w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 848w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 1272w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kSZY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png" width="495" height="407.5827814569536" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:746,&quot;width&quot;:906,&quot;resizeWidth&quot;:495,&quot;bytes&quot;:100868,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kSZY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 424w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 848w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 1272w, https://substackcdn.com/image/fetch/$s_!kSZY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1ee9b10-a44a-481c-9d69-7b6cf0871a7f_906x746.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And lastly, as detailed in Part 3 and Part 4, calculating the key performance indicators (KPIs) for the MA(6) baseline model for the test dataset:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1Rrr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1Rrr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 424w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 848w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 1272w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1Rrr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png" width="320" height="155.89743589743588" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:266,&quot;width&quot;:546,&quot;resizeWidth&quot;:320,&quot;bytes&quot;:30285,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1Rrr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 424w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 848w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 1272w, https://substackcdn.com/image/fetch/$s_!1Rrr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ab86f22-dd5d-48e2-a8af-8594179e6643_546x266.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Here&#8217;s what the KPIs tell you:</p><ul><li><p>The MA(6) baseline typically forecasts values too low for the test dataset.</p></li><li><p>On average, the MA(6) baseline is 4.08% too low.</p></li><li><p>The typical MA(6) baseline is off by $74,530.</p></li><li><p>On average, the MA(6) baseline is off by 6.58%.</p></li></ul><p>With the baseline established, it&#8217;s now time to see if the FORECAST.ETS() function can provide uplift (i.e., beat the baseline KPIs).</p><h4>FORECAST.ETS() Intuition</h4><p>The <em>FORECAST.ETS()</em> function was added to Microsoft Excel 2016 and empowers millions of professionals to build better data-driven forecasts.</p><p>The <em>ETS</em> portion of the function name stands for <em>exponential triple smoothing</em>, a battle-tested forecasting model in use for more than 60 years. For this tutorial series, an intuitive understanding of ETS is enough.</p><p>You can think of ETS as learning the following from time series data:</p><ol><li><p>The average target of the time series, which is known as the <em>level</em>.</p></li><li><p>The average amount of fluctuation between two consecutive targets in the time series is known as the <em>trend</em>.</p></li><li><p>Patterns that appear at regular intervals (e.g., weekly or monthly) that impact the level known as the <em>seasonality</em>.</p></li></ol><p>Using ETS, each forecast can be thought of as being:</p><ul><li><p>Forecast = Level + Trend + Seasonality</p></li></ul><p>Now for the intuition around <em>exponential</em> <em>smoothing</em>.</p><p>Consider 1 above. Conceptually, this is similar to a moving average forecasting model. However, a moving average model treats each data point equally in the calculation.</p><p>ETS introduced the innovation of placing greater weight on recent data points than on older ones. In some cases, ETS will severely penalize older data points to improve forecasting (e.g., <em>exponential smoothing</em>).</p><p>If this seems overwhelming, then you&#8217;ll probably find this refreshing.</p><p>The <em>FORECAST.ETS()</em> function does all this for you automagically.</p><h4>Using FORECAST.ETS()</h4><h4>With the intuition established, it&#8217;s time to apply ETS to the Sales time series dataset.</h4><p>The <em>FORECAST.ETS()</em> function has some requirements that you need to be aware of. From Microsoft&#8217;s website:</p><p><em>&#8220;This function requires the timeline to be organized with a constant step between the different points. For example, that could be a monthly timeline with values on the 1st of every month, a yearly timeline, or a timeline of numerical indices.&#8221;</em></p><p>This is just a fancy way of saying that your dataset must have a constant grain (e.g., monthly sales) and can&#8217;t have any gaps in the dates (e.g., January 2022 is missing).</p><p>The easiest way to get started with <em>FORECAST.ETS()</em> is to use only its three required arguments:</p><ul><li><p>The date for which the forecast is created.</p></li><li><p>The historical targets.</p></li><li><p>The historical dates.</p></li></ul><p>First, a <em>FORECAST.ETS()</em> column will hold the forecasts. For the tutorial dataset, the first forecast for the test dataset is January 2024 (i.e., cell <em>C51</em>). Here&#8217;s the first step of setting up the formula:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!emHP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!emHP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 424w, https://substackcdn.com/image/fetch/$s_!emHP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 848w, https://substackcdn.com/image/fetch/$s_!emHP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 1272w, https://substackcdn.com/image/fetch/$s_!emHP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!emHP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png" width="380" height="101.0126582278481" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:168,&quot;width&quot;:632,&quot;resizeWidth&quot;:380,&quot;bytes&quot;:17576,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!emHP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 424w, https://substackcdn.com/image/fetch/$s_!emHP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 848w, https://substackcdn.com/image/fetch/$s_!emHP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 1272w, https://substackcdn.com/image/fetch/$s_!emHP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F777ca844-62e4-4ea5-84a6-d2f79f01ddcd_632x168.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Next is adding the historical time series values (i.e., all the <em>Sales</em> values through December 2023):</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TZhX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TZhX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 424w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 848w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 1272w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TZhX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png" width="398" height="103.23125" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:166,&quot;width&quot;:640,&quot;resizeWidth&quot;:398,&quot;bytes&quot;:38415,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TZhX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 424w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 848w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 1272w, https://substackcdn.com/image/fetch/$s_!TZhX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F426b129b-5a0b-4091-97ca-c5c680d4b70f_640x166.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Last, adding the historical dates:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6_AJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6_AJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 424w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 848w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 1272w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6_AJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png" width="480" height="113.6842105263158" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/abca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:180,&quot;width&quot;:760,&quot;resizeWidth&quot;:480,&quot;bytes&quot;:44507,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6_AJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 424w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 848w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 1272w, https://substackcdn.com/image/fetch/$s_!6_AJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabca2ca9-1acb-4856-b2f2-484f8c76bc1e_760x180.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Hitting <em>&lt;enter&gt;</em> gives the first forecast for the test dataset. The rest of the 2024 forecasts are simply a matter of dragging the formula down:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!J4g1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!J4g1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 424w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 848w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 1272w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!J4g1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png" width="174" height="355.4723926380368" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:666,&quot;width&quot;:326,&quot;resizeWidth&quot;:174,&quot;bytes&quot;:95495,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!J4g1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 424w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 848w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 1272w, https://substackcdn.com/image/fetch/$s_!J4g1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F329ce694-5789-41c1-ad77-1665a3be7a96_326x666.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As you can see in the image above, ETS is far more varied in its forecasts compared to using a simple MA(6) model.</p><p>However, are the ETS forecasts better?</p><h4>FORECAST.ETS() KPIs</h4><p>As you've learned, calculating forecast errors is the first step in evaluating your forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_Hin!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_Hin!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 424w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 848w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 1272w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_Hin!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png" width="266" height="88.66666666666667" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:140,&quot;width&quot;:420,&quot;resizeWidth&quot;:266,&quot;bytes&quot;:13646,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_Hin!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 424w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 848w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 1272w, https://substackcdn.com/image/fetch/$s_!_Hin!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9f236f9-2eff-4e7d-97fb-ab42677cab2e_420x140.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The image above shows how to add a new <em>FORECAST.ETS()</em> Error column to the table with the error formula in cell <em>F51</em>. Hitting <em>&lt;enter&gt;</em> and dragging the formula down: RMSE:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Sqz9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Sqz9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 424w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 848w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 1272w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Sqz9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png" width="208" height="366.42487046632124" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:680,&quot;width&quot;:386,&quot;resizeWidth&quot;:208,&quot;bytes&quot;:41078,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Sqz9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 424w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 848w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 1272w, https://substackcdn.com/image/fetch/$s_!Sqz9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fecca9bb1-6dc8-47cc-8184-1758e5570582_386x680.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the KPIs for the ETS forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FLnZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FLnZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 424w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 848w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 1272w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FLnZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png" width="680" height="146.69910786699108" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:266,&quot;width&quot;:1233,&quot;resizeWidth&quot;:680,&quot;bytes&quot;:49498,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193601182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FLnZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 424w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 848w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 1272w, https://substackcdn.com/image/fetch/$s_!FLnZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4963e4c2-4a91-4d6d-95a1-b0cfd2b5d542_1233x266.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Yesssss!</p><p>Moving to a more complex model has improved the forecast KPIs.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-6-the">Check out Part 6 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this week.</p><p>My next newsletter will show you the biggest thing ever to happen to forecasting with Microsoft Excel - Python formulas.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/18f0dfd4-51ac-4b4c-a47a-4df1f5ccb62d_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/df3ebe1f-9422-4511-a187-aed0a74ccd4e_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c59357cd-4114-4a01-99c7-f433ed74ff64_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/51e2a178-4640-40db-b6b8-4bf4feda5b6d_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p></p><p></p>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 4: Is Your Model Any Good?]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals.]]></description><link>https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Mon, 06 Apr 2026 13:41:46 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/7261405f-6f5c-4644-a7ea-4b1ccd3597c1_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">get started with Part 1 here</a></strong>.</p><p>Forecasting is about predicting the future. But you don&#8217;t have a time machine. So, how can you trust that your forecasts are accurate?</p><p>In this week&#8217;s tutorial, you will learn how to test your models by simulating the future.</p><p>Unfortunately, most professionals (e.g., Finance pros) are not taught how to do this. This is your opportunity to stand out.</p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Simulating the Future</h4><p>To build better forecasts, you must use your data to simulate the future. You do this by splitting your data into two sets:</p><ol><li><p>The oldest data becomes what you use to build your forecasts.</p></li><li><p>The newest data becomes what you use to test forecast accuracy.</p></li></ol><p><a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">As I help my clients understand</a>, this strategy is critical to their forecasting success.</p><p>First, by using only the oldest data to build your forecasts, you are hoping to discover the underlying patterns that stand the test of time.</p><p>Imagine that you&#8217;re forecasting sales and have 6 years of monthly data. You use the first 4 years to build and the last 2 years to test.</p><p>If your forecasts are accurate, you&#8217;ve found these patterns. But that&#8217;s not all.</p><p>Second, consider a forecasting model that received the first 4 years of sales data.</p><p>From the model&#8217;s perspective, the future starts with the first month of the test data (i.e., January of year five). This is how you simulate the future.</p><p>Is this simulation perfect?</p><p>Absolutely not. But it&#8217;s the best you can do.</p><p>And, as I mentioned above, most professionals haven&#8217;t been taught how to do it.</p><div><hr></div><h4>Splitting the Data</h4><p>The <em>Sales</em> table of the tutorial Excel workbook has 5 years of monthly sales data.</p><p>While there is no magical equation that tells you how much data should be split between building your forecasting model (i.e., the <em>training dataset</em>) vs. the <em>test dataset</em>, here are some guidelines:</p><ul><li><p>Each dataset set should include, at a minimum, one complete cycle of seasonality.</p></li><li><p>More data is better than less data.</p></li></ul><p>As covered in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38">Part 2</a>, the <em>Sales</em> data exhibits yearly seasonality. Therefore, here are two reasonable approaches to splitting the data:</p><ol><li><p>Use 3 years of data for training and 2 years for testing.</p></li><li><p>Use 4 years of data for training and 1 year for testing.</p></li></ol><p>I will cover option 2 in this tutorial series. Try out option 1 and compare the results.</p><div><hr></div><h4>A Simple Moving Average Model</h4><p>In <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8">Part 3</a> of this tutorial series, I introduced a simple 6-month moving average as a forecasting model. I will continue to use this MA(6) model as a baseline in this tutorial.</p><p>Setting up the MA(6) model in Microsoft Excel is straightforward:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9FDt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9FDt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 424w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 848w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 1272w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9FDt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png" width="425" height="241.21621621621622" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:740,&quot;resizeWidth&quot;:425,&quot;bytes&quot;:47526,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9FDt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 424w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 848w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 1272w, https://substackcdn.com/image/fetch/$s_!9FDt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a4b2724-2c5a-4b68-8884-31ee42de251e_740x420.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Given my choice of reserving the last year of data for testing (often referred to as the <em>test holdout</em>), I drag the MA(6) formula down the length of the table and stop at the end of 2023:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qlh6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qlh6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 424w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 848w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 1272w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qlh6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png" width="350" height="237.09677419354838" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ebb61865-cf09-472d-8991-a6df25c815d1_620x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:620,&quot;resizeWidth&quot;:350,&quot;bytes&quot;:101244,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qlh6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 424w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 848w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 1272w, https://substackcdn.com/image/fetch/$s_!qlh6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febb61865-cf09-472d-8991-a6df25c815d1_620x420.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>As covered in Part 3, you can certainly measure the accuracy of the MA(6) model&#8217;s forecasts against the historical data.</p><p><strong>However, we typically care most about forecast accuracy on the test dataset.</strong></p><p>The simplest form of an MA(6) forecasting model will predict the last value for the test dataset. In this case, the value from cell <em>D50</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Lze4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Lze4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 424w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 848w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 1272w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Lze4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png" width="346" height="397.9" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:713,&quot;width&quot;:620,&quot;resizeWidth&quot;:346,&quot;bytes&quot;:308250,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Lze4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 424w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 848w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 1272w, https://substackcdn.com/image/fetch/$s_!Lze4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4afa6a4c-72ef-4f33-a307-2771675a73a2_620x713.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>To put the test dataset forecasts in context, a quick line chart is a very handy tool.</p><p>First, select the data for 2024:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!d-jz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!d-jz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 424w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 848w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 1272w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!d-jz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png" width="376" height="436.64516129032256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:720,&quot;width&quot;:620,&quot;resizeWidth&quot;:376,&quot;bytes&quot;:266459,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!d-jz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 424w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 848w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 1272w, https://substackcdn.com/image/fetch/$s_!d-jz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb67e9de7-f42a-489b-ac21-bbff6b494ff7_620x720.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And then insert a line chart using the Ribbon:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!il8U!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!il8U!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 424w, https://substackcdn.com/image/fetch/$s_!il8U!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 848w, https://substackcdn.com/image/fetch/$s_!il8U!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 1272w, https://substackcdn.com/image/fetch/$s_!il8U!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!il8U!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png" width="260" height="292.8327645051194" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:660,&quot;width&quot;:586,&quot;resizeWidth&quot;:260,&quot;bytes&quot;:114680,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!il8U!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 424w, https://substackcdn.com/image/fetch/$s_!il8U!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 848w, https://substackcdn.com/image/fetch/$s_!il8U!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 1272w, https://substackcdn.com/image/fetch/$s_!il8U!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28b916cc-ee75-4153-a3c7-bff066c7b849_586x660.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hyU5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hyU5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 424w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 848w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 1272w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hyU5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png" width="631" height="345.40315934065933" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:797,&quot;width&quot;:1456,&quot;resizeWidth&quot;:631,&quot;bytes&quot;:34209,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!hyU5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 424w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 848w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 1272w, https://substackcdn.com/image/fetch/$s_!hyU5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07f9d032-8e1a-4aff-83dd-6cc7b65c260e_1815x993.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The straight orange line in the visualization above is the MA(6) forecasted values.</p><p>There are a few things worth noting in the line chart:</p><ul><li><p>The MA(6) forecasts are too high for Jan, Mar, and Apr.</p></li><li><p>The MA(6) forecasts are very close for May through Sep.</p></li><li><p>The MA(6) forecasts are too low for Oct, Nov, and Dec.</p></li></ul><p>Overall, this doesn&#8217;t look too bad based on the visual. However, you don&#8217;t want to rely solely on eyeballing the data.</p><p>That&#8217;s where the KPIs come into the picture.</p><div><hr></div><h4>Simple MA(6) KPIs</h4><p>I cover the bias and mean absolute error (MAE) KPIs in Part 3 of the tutorial series, so check out the tutorial if you&#8217;re not familiar with these calculations.</p><p>First, calculating the MA(6) forecasting errors in column <em>E</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CPI8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CPI8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 424w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 848w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 1272w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CPI8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png" width="450" height="242.30769230769232" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:780,&quot;resizeWidth&quot;:450,&quot;bytes&quot;:140078,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CPI8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 424w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 848w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 1272w, https://substackcdn.com/image/fetch/$s_!CPI8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbc0701e9-81fd-4dbb-aa3d-a4d950720988_780x420.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Y5wR!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Y5wR!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 424w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 848w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 1272w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Y5wR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png" width="441" height="403.96946564885496" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:720,&quot;width&quot;:786,&quot;resizeWidth&quot;:441,&quot;bytes&quot;:391910,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Y5wR!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 424w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 848w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 1272w, https://substackcdn.com/image/fetch/$s_!Y5wR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60594632-1e36-4954-82bf-8ef0f61837d8_786x720.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>And the KPIs for just the test dataset (i.e., only for 2024):</strong></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9DRg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9DRg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 424w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 848w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 1272w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9DRg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png" width="428" height="78.40771349862258" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:133,&quot;width&quot;:726,&quot;resizeWidth&quot;:428,&quot;bytes&quot;:38573,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9DRg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 424w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 848w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 1272w, https://substackcdn.com/image/fetch/$s_!9DRg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dafa5e6-c614-4efa-a112-b1eb6c1d05ed_726x133.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VIKP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VIKP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 424w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 848w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 1272w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VIKP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png" width="418" height="99.12571428571428" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:166,&quot;width&quot;:700,&quot;resizeWidth&quot;:418,&quot;bytes&quot;:50511,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VIKP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 424w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 848w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 1272w, https://substackcdn.com/image/fetch/$s_!VIKP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89e49aa1-309e-4041-9267-f970ebb281b2_700x166.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!p1m5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!p1m5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 424w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 848w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 1272w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!p1m5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png" width="424" height="72.88403819918145" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:126,&quot;width&quot;:733,&quot;resizeWidth&quot;:424,&quot;bytes&quot;:42075,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!p1m5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 424w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 848w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 1272w, https://substackcdn.com/image/fetch/$s_!p1m5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff07b4456-ec5f-4638-8760-23fbf2a5c6f2_733x126.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!77Ne!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!77Ne!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 424w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 848w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 1272w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!77Ne!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png" width="418" height="87.78" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fcddffde-1294-4cb8-a241-cc84193723fa_600x126.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:126,&quot;width&quot;:600,&quot;resizeWidth&quot;:418,&quot;bytes&quot;:13726,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!77Ne!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 424w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 848w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 1272w, https://substackcdn.com/image/fetch/$s_!77Ne!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffcddffde-1294-4cb8-a241-cc84193723fa_600x126.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ULg-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ULg-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 424w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 848w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 1272w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ULg-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png" width="264" height="201.14285714285714" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f258a67d-8d51-41be-82d4-1d48290b0242_420x320.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:320,&quot;width&quot;:420,&quot;resizeWidth&quot;:264,&quot;bytes&quot;:39576,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ULg-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 424w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 848w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 1272w, https://substackcdn.com/image/fetch/$s_!ULg-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff258a67d-8d51-41be-82d4-1d48290b0242_420x320.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Assuming that the test dataset is a reasonable simulation of the future, here&#8217;s what the KPIs tell us about the MA(6) model:</p><ul><li><p>The forecasts will be, on average, 4.25% too low.</p></li><li><p>The forecasts will be, on average, off by $74,530.</p></li><li><p>The forecasts will be, on average, off by 6.86%.</p></li></ul><div><hr></div><h4>A Revised MA(6) Model</h4><p>What you&#8217;ve seen so far isn&#8217;t the only way moving averages can be used as forecasting models.</p><p>For example, the moving average calculation doesn&#8217;t have to stop with the end of the training dataset. You can use the model predictions in later calculations.</p><p>Conceptually, the model uses its previous predictions to create new predictions.</p><p>This is a bit abstract, so here&#8217;s the formula to demonstrate:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nPta!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nPta!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 424w, https://substackcdn.com/image/fetch/$s_!nPta!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 848w, https://substackcdn.com/image/fetch/$s_!nPta!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 1272w, https://substackcdn.com/image/fetch/$s_!nPta!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nPta!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp" width="446" height="226.77966101694915" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:826,&quot;resizeWidth&quot;:446,&quot;bytes&quot;:81922,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nPta!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 424w, https://substackcdn.com/image/fetch/$s_!nPta!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 848w, https://substackcdn.com/image/fetch/$s_!nPta!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 1272w, https://substackcdn.com/image/fetch/$s_!nPta!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37a85073-fe4c-41a2-8db0-7b203d439c5c_826x420.webp 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Notice in the formula above that the moving average is now calculated from the MA(6) model predictions. Dragging the formula down the length of the table changes the forecasts, errors, and KPIs:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MJg0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MJg0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 424w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 848w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 1272w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MJg0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png" width="454" height="555.9183673469388" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/acaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1020,&quot;width&quot;:833,&quot;resizeWidth&quot;:454,&quot;bytes&quot;:400582,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/193200140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MJg0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 424w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 848w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 1272w, https://substackcdn.com/image/fetch/$s_!MJg0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facaf7ca1-7884-4f23-9526-cee83be3f83a_833x1020.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The KPIs tell us that the revised MA(6) model performs worse than the simple MA(6) model.</p><p>However, don&#8217;t let this deter you from the powerful idea of forecasting models using previous predictions to make future ones.</p><p>This technique is used in a large number of forecasting techniques (e.g., Excel&#8217;s FORECAST.ETS() function) and is quite powerful - even if it didn&#8217;t work out so well in this particular case.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-5-using">Check out Part 5 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this week.</p><p>My next newsletter will build upon everything you&#8217;ve learned so far to use Microsoft Excel&#8217;s built-in premier forecasting technique - the FORECAST.ETS() function.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9e22a4ba-c1d1-4b5e-b8f6-6992f1aca4c5_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/21495cf8-b0e0-4646-aa04-47cd8610ae85_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0df56216-89d8-4f92-a116-89efc1244aa3_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/976d2a8a-59f8-479e-ad60-a3bc6e0236db_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 3: Measuring Accuracy]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals.]]></description><link>https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 03 Apr 2026 13:55:34 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/50c2e5d9-04f9-4e3b-a6d2-1a359e30c8c6_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">get started with Part 1 here</a></strong>.</p><p>Forecasts are used to make some of the biggest decisions within organizations. Often, these decisions involve many millions of dollars of spending:</p><ul><li><p>Should we build a new factory?</p></li><li><p>Should we open new stores?</p></li><li><p>Should we cut budgets?</p></li></ul><p>The list goes on and on.</p><p>The last thing you want to do is base these critical decisions on a forecast that isn&#8217;t accurate.</p><p>In this week&#8217;s tutorial, you will learn about two simple (but powerful) measures for assessing the accuracy (i.e., quality) of your forecasting models.</p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>A Simple Model</h4><p>As covered in <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38">the last tutorial</a> of this series, I&#8217;m going to use a 6-month moving average model to provide the baseline forecasts for the rest of this tutorial series.</p><p>Here&#8217;s how to calculate the forecasts using the <em>Sales</em> table from the tutorial&#8217;s Excel workbook:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!OJd0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!OJd0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 424w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 848w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 1272w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!OJd0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png" width="452" height="239.39470365699873" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:793,&quot;resizeWidth&quot;:452,&quot;bytes&quot;:98204,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!OJd0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 424w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 848w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 1272w, https://substackcdn.com/image/fetch/$s_!OJd0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02a36d68-ff3e-466b-b17d-38f6db32320b_793x420.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Dragging the formula down the length of the table populates all the forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-BVj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-BVj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 424w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 848w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 1272w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-BVj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png" width="345" height="283.4123222748815" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:520,&quot;width&quot;:633,&quot;resizeWidth&quot;:345,&quot;bytes&quot;:156168,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-BVj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 424w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 848w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 1272w, https://substackcdn.com/image/fetch/$s_!-BVj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd13917eb-07b1-4808-9e1c-62d553d13a07_633x520.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>I'm going to use the above forecasts in the accuracy measure calculations in this tutorial.</p><div><hr></div><h4>Your Model&#8217;s Bias</h4><p>When considering the accuracy of a forecasting model, here&#8217;s a fundamental question that needs to be answered:</p><p>&#8220;Are the forecasts typically too high or too low?&#8221;</p><p>This question concerns the model's <em>bias</em>, and there is a calculation that can tell you how biased your model is, on average.</p><p>Not surprisingly, the first thing you need to assess the bias of a forecast model is the forecast <em>errors</em>.</p><p>BTW - Other terms for the <em>error</em> are <em>residual</em> and <em>noise</em>.</p><p>Despite what term you use, calculating errors is simple. You subtract the target from the forecast:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Qub!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Qub!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 424w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 848w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 1272w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Qub!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png" width="455" height="222.2093023255814" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:860,&quot;resizeWidth&quot;:455,&quot;bytes&quot;:141871,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Qub!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 424w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 848w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 1272w, https://substackcdn.com/image/fetch/$s_!2Qub!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81b1b5db-7de6-4e6d-ae1c-5ce6b4376979_860x420.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>And dragging the calculation down the length of the table:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!R6ro!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!R6ro!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 424w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 848w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 1272w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!R6ro!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png" width="454" height="270.81627906976746" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:513,&quot;width&quot;:860,&quot;resizeWidth&quot;:454,&quot;bytes&quot;:261034,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!R6ro!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 424w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 848w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 1272w, https://substackcdn.com/image/fetch/$s_!R6ro!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc9a6bba-ae97-47ad-8dcb-93ae5a729ea9_860x513.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As you can see in the image above, the forecasts are both too high (e.g., $36,823 for March 2024) and too low (e.g., -$111,719 in December 2024).</p><p>While you can examine/analyze the individual error values, the bias calculation aims at providing a key performance indicator (KPI) for a forecast model&#8217;s predictive performance.</p><p>Intuitively, the bias KPI is just the average error:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!udKL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!udKL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 424w, https://substackcdn.com/image/fetch/$s_!udKL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 848w, https://substackcdn.com/image/fetch/$s_!udKL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 1272w, https://substackcdn.com/image/fetch/$s_!udKL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!udKL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png" width="380" height="267.02702702702703" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:520,&quot;width&quot;:740,&quot;resizeWidth&quot;:380,&quot;bytes&quot;:106155,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!udKL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 424w, https://substackcdn.com/image/fetch/$s_!udKL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 848w, https://substackcdn.com/image/fetch/$s_!udKL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 1272w, https://substackcdn.com/image/fetch/$s_!udKL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6526c71-33c2-477e-8a62-e8bc0da4aab8_740x520.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Think of the calculation in cell <em>E64</em> of the <em>Sales</em> worksheet as combining all the forecasts and telling you, on average, if the forecasts are too low or too high:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uWLh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uWLh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 424w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 848w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 1272w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uWLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png" width="485" height="262.80232558139534" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:466,&quot;width&quot;:860,&quot;resizeWidth&quot;:485,&quot;bytes&quot;:193254,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uWLh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 424w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 848w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 1272w, https://substackcdn.com/image/fetch/$s_!uWLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9659cbc-86c1-4f9a-a024-94b79857ad72_860x466.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The bias calculation shows that the MA(6) forecast model tends to make forecasts that are, on average, too low.</p><p>Unfortunately, the value of <em>-$28,539</em> isn&#8217;t super informative by itself.</p><p>For example, <em>-$28,539</em> doesn&#8217;t tell you if the bias is very large compared to the targets in your time series.</p><p>What can be more informative is calculating the bias percentage (what is often called the <em>scaled bias</em>):</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!cmeu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!cmeu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 424w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 848w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 1272w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!cmeu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png" width="345" height="74.8531810766721" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:133,&quot;width&quot;:613,&quot;resizeWidth&quot;:345,&quot;bytes&quot;:32738,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!cmeu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 424w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 848w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 1272w, https://substackcdn.com/image/fetch/$s_!cmeu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F646edaa4-b5a8-45ae-9ae9-7f79fd92de24_613x133.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Here&#8217;s what&#8217;s going on in the bias % calculation - the average error is being divided by the average value of all the targets.</p><p>Here&#8217;s the bias % for the MA(6) forecasting model:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LQyF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LQyF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 424w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 848w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 1272w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LQyF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png" width="489" height="262.05787781350483" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:500,&quot;width&quot;:933,&quot;resizeWidth&quot;:489,&quot;bytes&quot;:140955,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LQyF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 424w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 848w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 1272w, https://substackcdn.com/image/fetch/$s_!LQyF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0cdf5c4-ccd0-4fcd-b0de-bdfce27bbc33_933x500.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The bias % provides a better insight into how a model&#8217;s bias relates to the magnitude of the targets being forecasted.</p><p>In this case, the MA(6) model forecasts are, on average, 2.85% too low.</p><div><hr></div><h4>Your Model&#8217;s &#8220;Accuracy&#8221;</h4><p>It might have occurred to you that the bias calculation doesn&#8217;t really align with the idea of accuracy.</p><p>And you&#8217;d be right to think that.</p><p>By way of analogy, consider what &#8220;accuracy&#8221; means when throwing darts at a dartboard when the target is the bullseye.</p><p>In this scenario, accuracy can be measured as the distance from the bullseye to where a dart landed on the dartboard (e.g., the distance would be zero when you hit the bullseye).</p><p>When thought of this way, it doesn&#8217;t matter if the dart is too high or too low relative to the bullseye. All that matters is the distance.</p><p>Using this analogy, the <em>mean absolute error (MAE)</em> represents how far, on average, the darts are from the bullseye.</p><p>This makes MAE a very intuitive way to assess the quality of your forecasts, easily understood by business stakeholders.</p><p>Here&#8217;s the MAE for the MA(6) forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1znQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1znQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 424w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 848w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 1272w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1znQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png" width="298" height="291.6072607260726" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:593,&quot;width&quot;:606,&quot;resizeWidth&quot;:298,&quot;bytes&quot;:86897,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1znQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 424w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 848w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 1272w, https://substackcdn.com/image/fetch/$s_!1znQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef0630dc-02de-4c51-ae2a-207b7488663a_606x593.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>If you're unfamiliar, Excel's <em>ABS()</em> function stands for <em>absolute value</em>. Basically, think of this function as turning the negative errors into positive errors and leaving positive errors unchanged:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6FG5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6FG5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 424w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 848w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 1272w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6FG5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png" width="465" height="297.78077753779695" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:593,&quot;width&quot;:926,&quot;resizeWidth&quot;:465,&quot;bytes&quot;:211135,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6FG5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 424w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 848w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 1272w, https://substackcdn.com/image/fetch/$s_!6FG5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F633ac366-40c5-4bc5-b7bb-9f07e7946f2e_926x593.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>What the MAE calculation says about the MA(6) model is that the forecasts are off by $60,656 on average.</p><p>Unlike the bias calculation, the MAE can be quite informative on its own. For example, MAE can be used as a criterion for determining whether a model will be useful for decision-making.</p><p>It can also be super useful to calculate the MAE % (i.e., <em>scaled MAE</em>):</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ldNh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ldNh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 424w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 848w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 1272w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ldNh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png" width="323" height="67.44535073409462" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:128,&quot;width&quot;:613,&quot;resizeWidth&quot;:323,&quot;bytes&quot;:13178,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ldNh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 424w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 848w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 1272w, https://substackcdn.com/image/fetch/$s_!ldNh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa68b54d6-1724-4e78-b85d-bee8f4d5d545_613x128.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bv8d!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bv8d!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 424w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 848w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 1272w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bv8d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png" width="506" height="347.0953912111468" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2d81837c-d079-4be2-9546-852cc97cca88_933x640.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:640,&quot;width&quot;:933,&quot;resizeWidth&quot;:506,&quot;bytes&quot;:221785,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bv8d!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 424w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 848w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 1272w, https://substackcdn.com/image/fetch/$s_!bv8d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d81837c-d079-4be2-9546-852cc97cca88_933x640.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The MAE % is the average absolute error divided by the average target value and tells us that the forecasts are off by 6.07% on average.</p><p>However, MAE doesn&#8217;t tell us if the forecasts are typically too high or too low.</p><p>That&#8217;s what the bias tells us.</p><p>So, like chocolate and peanut butter, the combination of bias and MAE is better together when evaluating your forecasting models.</p><div><hr></div><h4>What About RMSE?</h4><p>One last thing I will mention is that I also use another metric to evaluate forecasting models: <em>root mean square error (RMSE)</em>.</p><p>The RMSE calculation is a little more complicated than the bias and MAE calculations. So, I rarely communicate this KPI to business stakeholders.</p><p>The first step is to calculate the <em>squared errors</em> of the forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6RNg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6RNg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 424w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 848w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 1272w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6RNg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png" width="274" height="270.8745247148289" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:520,&quot;width&quot;:526,&quot;resizeWidth&quot;:274,&quot;bytes&quot;:24254,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6RNg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 424w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 848w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 1272w, https://substackcdn.com/image/fetch/$s_!6RNg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd00d34f6-a5db-4d03-ae91-dde6f4d737cb_526x520.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!55P2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!55P2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 424w, https://substackcdn.com/image/fetch/$s_!55P2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 848w, https://substackcdn.com/image/fetch/$s_!55P2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 1272w, https://substackcdn.com/image/fetch/$s_!55P2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!55P2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png" width="613" height="264.31177446102816" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:520,&quot;width&quot;:1206,&quot;resizeWidth&quot;:613,&quot;bytes&quot;:286131,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!55P2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 424w, https://substackcdn.com/image/fetch/$s_!55P2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 848w, https://substackcdn.com/image/fetch/$s_!55P2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 1272w, https://substackcdn.com/image/fetch/$s_!55P2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cf8d085-a359-489e-8a6e-0dfb7a770380_1206x520.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Notice how the squared errors can get really large?</p><p>RMSE is designed to penalize larger forecast errors more than MAE.</p><p>Imagine you have one forecast that is way off (i.e., high error) and all the others are spot-on.</p><p>Because the MAE calculation averages errors, it can give a misleading picture of what&#8217;s happening in these hypothetical forecasts.</p><p>Since RMSE penalizes large errors more than small ones, this will be reflected in the RMSE KPI.</p><p>Next up are the final steps of the RMSE calculation:</p><ul><li><p>Taking the average of the squared errors.</p></li><li><p>Taking the square root of the average (i.e., mean).</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!w8Ps!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!w8Ps!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 424w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 848w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 1272w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!w8Ps!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png" width="440" height="299.8139534883721" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c82957bb-d08c-473b-82b7-39817c995f0d_860x586.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:586,&quot;width&quot;:860,&quot;resizeWidth&quot;:440,&quot;bytes&quot;:119056,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!w8Ps!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 424w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 848w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 1272w, https://substackcdn.com/image/fetch/$s_!w8Ps!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc82957bb-d08c-473b-82b7-39817c995f0d_860x586.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Y5pj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Y5pj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 424w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 848w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 1272w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Y5pj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png" width="437" height="300.21336459554516" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:586,&quot;width&quot;:853,&quot;resizeWidth&quot;:437,&quot;bytes&quot;:128700,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Y5pj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 424w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 848w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 1272w, https://substackcdn.com/image/fetch/$s_!Y5pj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13d555f2-40e1-459a-89c1-c285d38de35a_853x586.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And scaling RMSE:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!H2GJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!H2GJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 424w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 848w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 1272w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!H2GJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png" width="429" height="321.8757327080891" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/afe94f87-7505-4894-bae0-68d843581b24_853x640.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:640,&quot;width&quot;:853,&quot;resizeWidth&quot;:429,&quot;bytes&quot;:115450,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!H2GJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 424w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 848w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 1272w, https://substackcdn.com/image/fetch/$s_!H2GJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe94f87-7505-4894-bae0-68d843581b24_853x640.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_S2-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_S2-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 424w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 848w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 1272w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_S2-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png" width="432" height="321.48837209302326" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:640,&quot;width&quot;:860,&quot;resizeWidth&quot;:432,&quot;bytes&quot;:52972,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192965836?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_S2-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 424w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 848w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 1272w, https://substackcdn.com/image/fetch/$s_!_S2-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe5dcbbdf-75ac-486e-8948-9a7ce4d634d9_860x640.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Since the interpretation of RMSE is somewhat mathematical, I&#8217;m going to discuss using RMSE from a practical perspective.</p><p>You can think of RMSE as measuring a different aspect of a model&#8217;s forecasts as compared to MAE.</p><p>Unlike MAE, there isn&#8217;t an intuitive, direct understanding of RMSE. RMSE is most useful when comparing one forecasting model to another.</p><p>I will demonstrate using RMSE this way in a later tutorial.</p><p>Stay tuned.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/forecasting-with-excel-part-4-is">Check out Part 4 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this week.</p><p>My next newsletter will discuss a critical aspect of building better forecasts - evaluating whether your forecasting model is any good.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b0d486d9-5ceb-44b9-b8e7-596e8fa6d3c6_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/150639c0-b1d1-4d6e-ab03-3afac296003e_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8642d001-1a7f-4525-b498-4e648d5320fb_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/df3ec7b4-289d-4d55-848d-209a45d1c099_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 2: Detecting Trend & Seasonality]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals.]]></description><link>https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Wed, 01 Apr 2026 16:42:04 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/a7454b0c-ad88-43d8-bd42-420d521df7a2_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>If you&#8217;re new to this tutorial series, be sure to <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">check out Part 1 here</a></strong>.</p><p>The topic of this week's tutorial is detecting trend and seasonality in a time series using out-of-the-box Excel features.</p><p>If you would like to follow along with today's tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter's <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Your First Time Series Model</h4><p>The best way to learn data-driven forecasting is to start with simple models and gradually add complexity.</p><p>Simple forecasting models are also critical for establishing a baseline you use to assess whether a more complex model improves decision-making.</p><p>In this tutorial, you will learn about one of the simplest baseline forecasting models - the simple moving average.</p><p>Here&#8217;s how a moving average works:</p><ul><li><p>Forecasts are made by taking the average of a fixed number of previous target values.</p></li><li><p>As the forecaster, you pick how many previous target values to use in the moving average.</p></li></ul><p>Moving averages, I find, are quite intuitive when you see them in action.</p><p>Let&#8217;s assume a 6-month simple moving average. Using the data from the <em>Sales</em> worksheet of the tutorial&#8217;s Excel workbook:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CVXE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CVXE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 424w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 848w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 1272w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CVXE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png" width="434" height="253.16666666666666" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:420,&quot;width&quot;:720,&quot;resizeWidth&quot;:434,&quot;bytes&quot;:135832,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CVXE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 424w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 848w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 1272w, https://substackcdn.com/image/fetch/$s_!CVXE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a836bbf-e2e4-40c8-824e-33ec0f78a3ca_720x420.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Hitting the <em>&lt;enter&gt;</em> key will create a new column, which can be renamed <em>Forecast</em>. Dragging the formula down the length of the column produces a collection of forecasts:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!UAMQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!UAMQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 424w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 848w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 1272w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!UAMQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png" width="366" height="358.3993610223642" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:613,&quot;width&quot;:626,&quot;resizeWidth&quot;:366,&quot;bytes&quot;:193901,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!UAMQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 424w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 848w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 1272w, https://substackcdn.com/image/fetch/$s_!UAMQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42f241b4-70b4-4ce7-8e4d-ad16678829ee_626x613.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The next tutorial in this series will evaluate the predictive performance of a moving-average model. In this tutorial, we&#8217;ll use a moving average to determine the trend of a time series.</p><p>A common shorthand notation for moving-average models is MA(X), where X denotes the number of previous targets used in the average.</p><p>The above 6-month moving-average model is abbreviated as <em>MA(6)</em>.</p><div><hr></div><h4>Detecting Trend</h4><p>In this tutorial, you will learn a simple way of detecting a trend in a time series using an <em>MA(12)</em> model. While this technique is easy to implement using out-of-the-box Excel features, it has some issues.</p><p>Luckily, since there&#8217;s Python in Excel, you have access to more powerful tools. A later tutorial will demonstrate why <a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">my clients are moving to Python in Excel</a> so they can use tools like the mighty Python <em>statsmodels</em> package.</p><p>In the <em>Sales</em> worksheet, delete column <em>D</em> and repeat the moving average modeling process above, but using 12 data points instead:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!V6kR!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!V6kR!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 424w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 848w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 1272w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!V6kR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png" width="422" height="410.5945945945946" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:720,&quot;width&quot;:740,&quot;resizeWidth&quot;:422,&quot;bytes&quot;:228168,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!V6kR!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 424w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 848w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 1272w, https://substackcdn.com/image/fetch/$s_!V6kR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc79ec58c-c4dc-45bd-9ab4-e0ed869f84de_740x720.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And setting up the column by renaming it to <em>MA(12)</em> and dragging the formula all the way down the length of the table:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GFDU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GFDU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 424w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 848w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 1272w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GFDU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png" width="363" height="470.2369668246445" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:820,&quot;width&quot;:633,&quot;resizeWidth&quot;:363,&quot;bytes&quot;:233124,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!GFDU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 424w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 848w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 1272w, https://substackcdn.com/image/fetch/$s_!GFDU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0717c8fe-354b-4ba1-bf12-380e0de47215_633x820.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>To detect a trend, you now need a line chart of all three columns. First, click on any cell in the <em>Sales</em> table. Then insert a line chart using the Ribbon:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IZ0i!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IZ0i!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 424w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 848w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 1272w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IZ0i!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png" width="675" height="349.55357142857144" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:754,&quot;width&quot;:1456,&quot;resizeWidth&quot;:675,&quot;bytes&quot;:125886,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!IZ0i!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 424w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 848w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 1272w, https://substackcdn.com/image/fetch/$s_!IZ0i!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c0c6d3b-d2c2-4681-9619-d3232d895920_1506x780.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the line chart tells the story of the trend in the time series:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eDdx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eDdx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 424w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 848w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 1272w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eDdx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png" width="1456" height="568" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:568,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:160016,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eDdx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 424w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 848w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 1272w, https://substackcdn.com/image/fetch/$s_!eDdx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09bccf17-fe34-4289-a404-9c4cba5b4815_2546x993.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Voila!</p><p>The orange <em>MA(12)</em> line clearly has an upward trend.</p><div><hr></div><h4>Detecting Seasonality</h4><p>As with trends, Python in Excel provides a more sophisticated way of detecting seasonality, which I will cover in a later tutorial.</p><p>This week&#8217;s tutorial will use out-of-the-box features to help you detect if seasonality is present in a time series.</p><p>Here&#8217;s the intuition.</p><p>The <em>Sales</em> time series data is at the monthly grain, so the goal is to detect whether certain months tend to have higher target values than others and whether there is a pattern among these higher months.</p><p>A <em>Box and Whisker</em> plot (i.e., box plot) can be used to visualize this, but the Sales table isn&#8217;t set up correctly for Excel box plots. The good news is that setting this up is simple.</p><p>First, we need to create a column that Excel recognizes as a date column. Insert a new column named <em>Date</em> between <em>YearMonth</em> and <em>Sales</em> and enter the following formula:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JUq5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JUq5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 424w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 848w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 1272w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JUq5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png" width="600" height="104.51206715634838" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:166,&quot;width&quot;:953,&quot;resizeWidth&quot;:600,&quot;bytes&quot;:66841,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JUq5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 424w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 848w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 1272w, https://substackcdn.com/image/fetch/$s_!JUq5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1cb6c2ac-6742-48f8-88eb-22a0d46564b3_953x166.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Hitting <em>&lt;enter&gt;</em> populates the column, but you will need to tell Excel to format the column as a date explicitly:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XSY8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XSY8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 424w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 848w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 1272w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XSY8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png" width="479" height="203.82978723404256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4ea00376-248d-4513-a903-db3e7326ff54_846x360.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:360,&quot;width&quot;:846,&quot;resizeWidth&quot;:479,&quot;bytes&quot;:59095,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XSY8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 424w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 848w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 1272w, https://substackcdn.com/image/fetch/$s_!XSY8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ea00376-248d-4513-a903-db3e7326ff54_846x360.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>With the <em>Date</em> column created, you can extract the month abbreviations by inserting a <em>Month</em> column and entering the following formula:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gNer!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gNer!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 424w, https://substackcdn.com/image/fetch/$s_!gNer!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 848w, https://substackcdn.com/image/fetch/$s_!gNer!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 1272w, https://substackcdn.com/image/fetch/$s_!gNer!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gNer!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png" width="288" height="112.22535211267606" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:166,&quot;width&quot;:426,&quot;resizeWidth&quot;:288,&quot;bytes&quot;:15011,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gNer!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 424w, https://substackcdn.com/image/fetch/$s_!gNer!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 848w, https://substackcdn.com/image/fetch/$s_!gNer!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 1272w, https://substackcdn.com/image/fetch/$s_!gNer!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb21f90ed-64f8-4ba5-9c6a-f327b8a62cf3_426x166.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Hitting <em>&lt;enter&gt;</em> populates the column:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!u2OA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!u2OA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 424w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 848w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 1272w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!u2OA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png" width="587" height="196.94315004659833" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:360,&quot;width&quot;:1073,&quot;resizeWidth&quot;:587,&quot;bytes&quot;:147321,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!u2OA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 424w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 848w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 1272w, https://substackcdn.com/image/fetch/$s_!u2OA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314f89f7-b194-493c-83b6-0de2641c4000_1073x360.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Now, insert a box plot from the Ribbon:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Wf1N!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Wf1N!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 424w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 848w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 1272w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Wf1N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png" width="340" height="487.3720136518771" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/55136313-db88-45ca-8f84-7318124e8ed1_586x840.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:840,&quot;width&quot;:586,&quot;resizeWidth&quot;:340,&quot;bytes&quot;:84487,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Wf1N!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 424w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 848w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 1272w, https://substackcdn.com/image/fetch/$s_!Wf1N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55136313-db88-45ca-8f84-7318124e8ed1_586x840.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And here's the resulting visualization:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nCfd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nCfd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 424w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 848w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 1272w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nCfd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png" width="1456" height="967" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:967,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:30665,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192857660?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nCfd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 424w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 848w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 1272w, https://substackcdn.com/image/fetch/$s_!nCfd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5f5e19a-2add-455b-9144-5708ac734b71_1700x1129.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The box plot shows the range (i.e., <em>distribution</em>) of sales values by each month.</p><p>For more information on box plots, <a href="https://en.wikipedia.org/wiki/Box_plot">check out the Wikipedia article</a>.</p><p>The box plot indicates that sales are typically higher in the last three months of each year, with a noticeable decline in January. This is likely due to seasonality in the time series.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-3a8">Check out Part 3 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this newsletter.</p><p>The next newsletter will introduce you to a critical step for better forecasting in Excel: establishing a forecasting baseline.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/09d75716-60ba-48ec-8785-8aba9696311c_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3d2bdd6e-45c4-4ed0-8818-861fd9889fdd_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/58f469ae-7442-4b99-939c-d0b38e857f97_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c23f5e4f-0760-4978-9619-2c672e08dbd7_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Better Forecasting with Excel Part 1: The Fundamentals]]></title><description><![CDATA[Learn the Microsoft Excel forecasting skills that should be (but are not) taught to professionals.]]></description><link>https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Mon, 30 Mar 2026 13:46:08 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/87ea2a3f-eea0-4a34-9c7b-8fa528a56133_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>I&#8217;ve been doing analytics for 14+ years. And I&#8217;m a Microsoft Excel MVP. </p><p>So, it probably doesn&#8217;t shock you to know that I&#8217;ve taken online courses designed for Financial Analysis &amp; Planning (FP&amp;A) professionals. My goal in taking these courses was to understand how business professionals learn about data.</p><p>Quite frankly, what I saw in these courses shocked me.</p><p><strong>For example, I took an online Excel forecasting course designed for FP&amp;A professionals, and I was appalled by what wasn&#8217;t included. </strong></p><p>In essence, FP&amp;A professionals did not develop the most important skills for building high-quality forecasts. And this was a course designed to help professionals earn a FP&amp;A certification!</p><p>So, I decided to do something about it. Enter this tutorial series.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Tutorial Materials</h4><p>This week&#8217;s tutorial starts off with fundamental concepts in forecasting. This tutorial is designed for <em><strong>any</strong></em> professional.</p><p>The goal is to provide an intuitive introduction to data-driven forecasting using Excel. Therefore, I will limit the math coverage.</p><p>However, what you will learn is legit techniques for creating data-driven forecasts using native Excel features.</p><p>If you would like to follow along with today&#8217;s tutorial (highly recommended), you will need to download the <em>SalesTimeSeries.xlsx</em> file from the newsletter&#8217;s <a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a>.</p><div><hr></div><h4>What is Forecasting?</h4><p>Here&#8217;s a definition of forecasting from Wikipedia:</p><p><em>&#8220;Forecasting is the process of making predictions based on past and present data. Later, these can be compared with what actually happens.&#8221;</em></p><p>Based on this definition, you would be right to think that any form of predictive analytics is the same as forecasting.</p><p><strong>However, the common interpretation of </strong><em><strong>forecasting</strong></em><strong> is creating predictions from time-series data.</strong></p><p>You can think of this time series data as being like a crystal ball that allows you to predict what&#8217;s going to happen in the future.</p><p>For example, predicting monthly sales for the next twelve months using historical data.</p><p>Time series forecasting is a universal skill. Here are some examples from various industries and business functions:</p><ul><li><p>[Supply Chain] - Forecasting product demand.</p></li><li><p>[Customer Service] - Forecasting call volume.</p></li><li><p>[Healthcare] - Forecasting hospital supplies.</p></li><li><p>[IT] - Forecasting server/cloud usage.</p></li><li><p>[Finance] - Forecasting revenue.</p></li></ul><p>And the list goes on!</p><p><strong>What you will learn in this tutorial series is skills that can be applied to any forecasting problem, regardless of domain.</strong></p><p>Therefore, the exact nature of the dataset used in the tutorial series doesn&#8217;t matter.</p><div><hr></div><h4>What is a Time Series?</h4><p>In these tutorials, a time series is defined as a set of measurements collected at regular time intervals. These measurements can include anything, such as sales, demand, advertising spend, claims, etc.</p><p>The combination of measurement and a regular time interval is referred to as the <em>grain</em> of the data.</p><p>Consider the following snippet of data from the <em>Sales</em> worksheet of the tutorial Excel workbook:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!for_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!for_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 424w, https://substackcdn.com/image/fetch/$s_!for_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 848w, https://substackcdn.com/image/fetch/$s_!for_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 1272w, https://substackcdn.com/image/fetch/$s_!for_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!for_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png" width="232" height="174.50434782608696" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:346,&quot;width&quot;:460,&quot;resizeWidth&quot;:232,&quot;bytes&quot;:95661,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!for_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 424w, https://substackcdn.com/image/fetch/$s_!for_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 848w, https://substackcdn.com/image/fetch/$s_!for_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 1272w, https://substackcdn.com/image/fetch/$s_!for_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa842f499-102c-4e0d-b649-74dfd4de53f3_460x346.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>This is a time series dataset at the grain of <em>sales per month</em>.</p><p>Most classic (i.e., statistical) time series forecasting techniques work with a single measurement at a time, like the time series pictured above.</p><p>However, a time series can also include additional data. For example:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zvRR!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zvRR!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 424w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 848w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 1272w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zvRR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png" width="417" height="178.2094430992736" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:353,&quot;width&quot;:826,&quot;resizeWidth&quot;:417,&quot;bytes&quot;:148007,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zvRR!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 424w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 848w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 1272w, https://substackcdn.com/image/fetch/$s_!zvRR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe39ddfa-9038-4286-bf1f-c738e12fbcf6_826x353.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p><strong>In 2026, <a href="https://www.daveondata.com/machine-learning-forecasting-consulting-info">my clients want state-of-the-art forecasting</a> (e.g., machine learning) that leverages a wide range of additional data (e.g., promotions, weather, planned expansions, etc.) to improve decision-making.</strong></p><p>For example, in the image above, incorporating <em>MarketingSpend</em> data might produce better forecasts than just using <em>Sales</em> data alone.</p><p>To keep all of this data straight, I&#8217;m going to use the following terminology in this tutorial series:</p><ul><li><p><strong>Timestamps</strong> denote the time period for which the data is applicable. In the image above, <em>YearMonth</em> is the timestamp for the data.</p></li><li><p><strong>Targets</strong> are the measurement values to be forecasted. In the image above, the <em>Sales</em> column contains the targets.</p></li><li><p><strong>Features</strong> are additional measurements/data that can be used to improve target forecasts. In the example above, <em>MarketingSpend</em> is a feature.</p></li></ul><div><hr></div><h4>Time Series Forecasting Models</h4><p>To craft data-driven forecasts, we use <em>models</em>. Just in case you&#8217;re new to the concept of a model, here&#8217;s a definition from Wikipedia:</p><p><em>&#8220;A model is an informative representation of an object, person, or system.&#8221;</em></p><p>This definition is great for this tutorial series, because we&#8217;re looking to build an informative representation of the system that produces the targets (e.g., sales).</p><p>Notice how the definition uses <em>informative</em> and not <em>definitive</em>?</p><p>This is a critical idea embodied by the following famous quote from George Box:</p><p><em>&#8220;All models are wrong, but some models are useful.&#8221;</em></p><p><strong>Make no mistake. Your forecasting models will always be wrong to some degree.</strong></p><p>The question isn&#8217;t whether your forecasting model is correct. It&#8217;s whether your forecasting model produces &#8220;good enough&#8221; predictions so that effective decisions can be made.</p><p><strong>Ideally, your model is better than the one being used, so better decisions can be made, and you look like a hero at work.</strong></p><div><hr></div><h4>Time Series Characteristics</h4><p>When thinking about what makes a good forecasting model, we can decompose target values over time into three characteristics:</p><ol><li><p><strong>Trend</strong> is the tendency for target values to increase/decrease over time.</p></li><li><p><strong>Seasonality</strong> is a pattern in the targets that appears in regular intervals (e.g., higher retail sales starting with &#8220;Black Friday&#8221; each year).</p></li><li><p>The <strong>remainder</strong> is what is left over from the target values once trend and seasonality are accounted for (i.e., removed from the original target values).</p></li></ol><p>Before I discuss the first two in the context of the tutorial time series, I want to provide some additional context for the remainder.</p><p>The remainder is also known by other names:</p><ul><li><p>Residual</p></li><li><p>Noise</p></li><li><p>Error</p></li></ul><p>Think of the remainder of a time series as being the result of your model being wrong to some degree (i.e., if your model&#8217;s forecasts were perfect, there wouldn&#8217;t be any remainder).</p><p>OK. The easiest way to build an intuition about trend and seasonality is to use a data visualization.</p><p>In the tutorial Excel workbook, select the <em>Sales</em> worksheet and click on any cell within the table:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RSmw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RSmw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 424w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 848w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 1272w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RSmw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png" width="252" height="208.73819742489272" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:386,&quot;width&quot;:466,&quot;resizeWidth&quot;:252,&quot;bytes&quot;:30383,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RSmw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 424w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 848w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 1272w, https://substackcdn.com/image/fetch/$s_!RSmw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F093842de-6c3b-4b9c-8c0e-57bcc308815a_466x386.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Now, use the Ribbon to insert a mighty line chart:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dc7w!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dc7w!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 424w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 848w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 1272w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dc7w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png" width="523" height="351.5245901639344" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:820,&quot;width&quot;:1220,&quot;resizeWidth&quot;:523,&quot;bytes&quot;:116589,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!dc7w!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 424w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 848w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 1272w, https://substackcdn.com/image/fetch/$s_!dc7w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb99bdb74-bda8-4044-a8e0-6786088b4b7b_1220x820.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>First up, the line chart gives us a strong indication that there is a trend in the time series:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lEmb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lEmb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 424w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 848w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 1272w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lEmb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png" width="1456" height="616" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:616,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:63381,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lEmb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 424w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 848w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 1272w, https://substackcdn.com/image/fetch/$s_!lEmb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54ed5fc4-2e0d-456d-990e-a5456e8359ad_2799x1185.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Seasonality is a bit more challenging to spot, requiring hovering over the various data points in the line chart and checking the values:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fvns!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fvns!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 424w, https://substackcdn.com/image/fetch/$s_!fvns!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 848w, https://substackcdn.com/image/fetch/$s_!fvns!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 1272w, https://substackcdn.com/image/fetch/$s_!fvns!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fvns!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png" width="1456" height="620" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3026cb63-5992-4261-8614-dfae9feea753_2037x867.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:620,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:74425,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192528328?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fvns!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 424w, https://substackcdn.com/image/fetch/$s_!fvns!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 848w, https://substackcdn.com/image/fetch/$s_!fvns!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 1272w, https://substackcdn.com/image/fetch/$s_!fvns!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3026cb63-5992-4261-8614-dfae9feea753_2037x867.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>If you inspect the data points, you typically see more sales in the last months of the year than in the other months.</p><p>As with so many real-world situations, this is not a cut-and-dried pattern in the data.</p><p>Using the tutorial&#8217;s time series as a frame of reference, a useful forecasting model will need to accurately predict both the upward trend and the apparent seasonality in the data.</p><p>While simultaneously minimizing the remainder as much as possible.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part-f38">Check out Part 2 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this newsletter.</p><p>The next newsletter will cover more robust techniques for characterizing <em>trend</em> and <em>seasonality</em> by introducing simple models.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d84b444c-0480-40de-9ee8-b090d71ebd39_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d4c4050a-a8bb-4f16-98a7-420b03c54ae4_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ed23df78-61d2-46eb-9395-a08850588805_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d272ac8d-a168-4a5c-b3fa-9efb3836d4db_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Market Basket Analysis Part 5: Feature Engineering]]></title><description><![CDATA[This week's tutorial focuses on engineering features for market basket analysis (MBA).]]></description><link>https://thediydatascientist.substack.com/p/market-basket-analysis-part-5-feature</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/market-basket-analysis-part-5-feature</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Fri, 27 Mar 2026 13:43:52 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/21f0dda5-966a-4b5d-9802-c51a9356dcb5_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>This week's tutorial focuses on engineering features for market basket analysis (MBA). If you&#8217;re new to this tutorial series and need to catch up, here are the previous posts:</p><ul><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">Part 1: Introduction</a></p></li><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating">Part 2: Communicating Results</a></p></li><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-3">Part 3: Python Code</a></p></li><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-4-targeting">Part 4: Targeting Specific Outcomes</a></p></li></ul><p>As the best way to learn DIY data science skills is through practice, I highly recommend following along with this tutorial in your preferred tool (e.g., Python in Excel).</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Don&#8217;t Make This Mistake</h4><p>When professionals first learn MBA, a common reaction is something like this:</p><p><em>&#8220;Wait a minute, Dave. You mean I can only use true/false (i.e., binary) features with MBA? That seems like a huge problem for my analysis/domain/business problem!&#8221;</em></p><p>As it turns out, the requirement to use binary features is quite common in do-it-yourself (DIY) data science. Here are two examples:</p><ul><li><p>Linear/Logistic regression requires transforming categorical data into binary features.</p></li><li><p>Many machine learning algorithms offered by Python&#8217;s <em>scikit-learn</em> library require transforming categorical data into binary features.</p></li></ul><p><strong>So, don&#8217;t underestimate the power of MBA simply because you need to use binary features!</strong></p><p>As you build your DIY data science skills, a common theme across the techniques you use will be spending more time working with the data (e.g., cleaning) than you do writing code to perform the actual analysis.</p><p>Many years ago, research on data mining projects (think of data mining as what data science was called back then) was conducted. The research showed that between 60-80% of project effort was devoted to tasks like:</p><ul><li><p>Getting access to the data.</p></li><li><p>Understanding the data.</p></li><li><p>Cleaning the data.</p></li></ul><p>Based on my experience as an <a href="https://www.daveondata.com/data-science-consulting-info">analytics consultant</a>, nothing has changed in 2026.</p><p>These days, it&#8217;s common to refer to all of these data-related activities as <em>data wrangling</em>.</p><p>While all of these activities are critical to success, the most important aspect of data wrangling is <em>feature engineering</em>. Feature engineering is the process of crafting the best data possible for your analysis.</p><p>In this regard, the successful use of MBA is no different than logistic regression or random forest predictive models:</p><p><strong>The best analytics results come from the best features.</strong></p><p>In this tutorial, you are going to learn the following two patterns that I&#8217;ve used with MBA to provide insights that have delighted business stakeholders:</p><ul><li><p>Presence</p></li><li><p>Magnitude</p></li></ul><p>Let&#8217;s dive in.</p><div><hr></div><h4>Presence Features</h4><p>This MBA feature pattern is what you&#8217;re already familiar with from the tutorials in this series.</p><p>BTW - <em>Feature</em> is just another name for <em>column</em>, <em>variable</em>, or <em>attribute</em>.</p><p>Using the grocery store example from previous tutorials, consider whether a purchase (i.e., a transaction) includes <em>whole milk</em>.</p><p>The following <em>DataFrame</em> illustrates a hypothetical grocery store dataset built around presence features:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qilK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qilK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 424w, https://substackcdn.com/image/fetch/$s_!qilK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 848w, https://substackcdn.com/image/fetch/$s_!qilK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 1272w, https://substackcdn.com/image/fetch/$s_!qilK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qilK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png" width="420" height="220.26058631921825" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:644,&quot;width&quot;:1228,&quot;resizeWidth&quot;:420,&quot;bytes&quot;:72964,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qilK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 424w, https://substackcdn.com/image/fetch/$s_!qilK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 848w, https://substackcdn.com/image/fetch/$s_!qilK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 1272w, https://substackcdn.com/image/fetch/$s_!qilK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c6f52d-3b7f-4ee4-a11c-a1d3d00d2f0e_1228x644.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>While the above dataset uses <em>1s</em> and <em>0s</em> to indicate presence, <em>True</em>/<em>False</em> values could be used instead, as Python treats them as equivalent.</p><p><em>One-hot encoding</em> is a common technique for transforming categorical features into a binary representation (e.g., for use with the <em>scikit-learn</em> library).</p><p>Imagine you have a dataset tracking the results of the Olympic Games with a <em>Medal</em> feature to track each athlete&#8217;s results, like the following:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YHSS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YHSS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 424w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 848w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 1272w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YHSS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png" width="122" height="199.1264367816092" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:568,&quot;width&quot;:348,&quot;resizeWidth&quot;:122,&quot;bytes&quot;:23553,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YHSS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 424w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 848w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 1272w, https://substackcdn.com/image/fetch/$s_!YHSS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199e723-ec3d-415c-9319-81c9ca68c02a_348x568.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>After one-hot encoding the <em>Medal</em> feature, the resulting <em>DataFrame</em> would look something like this:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7nfv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7nfv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 424w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 848w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 1272w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7nfv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png" width="304" height="191.16157205240174" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:576,&quot;width&quot;:916,&quot;resizeWidth&quot;:304,&quot;bytes&quot;:35660,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7nfv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 424w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 848w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 1272w, https://substackcdn.com/image/fetch/$s_!7nfv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9771622b-ef85-4eb4-a8ca-bd827dfef1a5_916x576.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The result of one-hot encoding is a presence indicator (e.g., 1/0) and can be used with MBA.</p><p>The following code demonstrates how to use one-hot encoding on categorical columns using the <em>Adult.xlsx</em> Excel workbook available in the newsletter&#8217;s <a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a>, which contains data about US residents (e.g., age, occupation, income level, etc.).</p><p>Here&#8217;s the Python in Excel code:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3aas!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3aas!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 424w, https://substackcdn.com/image/fetch/$s_!3aas!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 848w, https://substackcdn.com/image/fetch/$s_!3aas!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 1272w, https://substackcdn.com/image/fetch/$s_!3aas!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3aas!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png" width="1452" height="880" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:880,&quot;width&quot;:1452,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:144736,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3aas!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 424w, https://substackcdn.com/image/fetch/$s_!3aas!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 848w, https://substackcdn.com/image/fetch/$s_!3aas!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 1272w, https://substackcdn.com/image/fetch/$s_!3aas!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F664ae5a3-209b-4cbd-ae92-ac2d3d06d9a7_1452x880.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="pullquote"><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><p>And here&#8217;s the Python code that you can cut and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;e83fdc0c-9b8f-4fe8-a48c-3b271338e2ac&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from sklearn.preprocessing import OneHotEncoder

# Load the Excel table
adult_train = xl("AdultTrain[#All]", headers = True)

# Features to one-hot encode
cat_features = ['Education', 'Occupation']

# Instantiate the encoder
encoder = OneHotEncoder(sparse_output = False)
encoder.set_output(transform = 'pandas')

# Learn encodings and transform data
train_cat = encoder.fit_transform(adult_train[cat_features])</code></pre></div><p>While you can use any Python technology (e.g., Jupyter Notebook) for this tutorial series, many professionals new to analytics find Excel much less intimidating. </p><p>In the code above, please note the following:</p><ul><li><p>The <em>OneHotEncoder</em> object is coded to output a <em>DataFrame</em> - this is usually what you want.</p></li><li><p>The <em>OneHotEncoder</em> object is only encoding the two features specified by <em>cat_features</em>.</p></li><li><p>The one-hot encoded data is returned and stored as <em>train_cat</em>.</p></li></ul><p>Using Python in Excel, you can quickly look at the resulting<em> DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!a1wZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!a1wZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 424w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 848w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 1272w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!a1wZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png" width="1456" height="908" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:908,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:54095,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!a1wZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 424w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 848w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 1272w, https://substackcdn.com/image/fetch/$s_!a1wZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc15871b1-7d67-4209-ae70-2b127a62bce7_1670x1042.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>As shown in the card above, the horizontal scroll bar indicates that there are many more one-hot encoded columns to the right, which are not depicted in the image.</p><p>Also note the <em>Education_</em> prefix for each feature name. This naming convention indicates which original feature served as the source of the one-hot encoding (i.e., the <em>Education_10th</em> feature represents the value of <em>10th</em> coming from the <em>Education</em> feature).</p><p>But that&#8217;s not all.</p><div><hr></div><h4>Engineering Numeric Presence Features for MBA</h4><p>There are many instances where you have a numeric feature that can easily be engineered into presence features for MBA. The running grocery store example of this tutorial series is a prime example. </p><p>Regardless of the count of a particular product within a transaction (e.g., three bottles of mustard), a simple binary indicator is all that is needed.</p><p>There are also times when numeric features do not represent counts, but a simple binary indicator of non-zero status is more than sufficient for MBA.</p><p>A great example of this situation is the <em>CapitalGain</em> feature found in the <em>Adult.xlsx</em> Excel workbook. Since the distribution of this feature is highly skewed (i.e., most values are 0), a simple presence feature is highly useful for MBA.</p><p>The following Python in Excel code demonstrates how to perform this transformation: </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tFjQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tFjQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 424w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 848w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 1272w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tFjQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png" width="1452" height="727" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:727,&quot;width&quot;:1452,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:79028,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tFjQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 424w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 848w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 1272w, https://substackcdn.com/image/fetch/$s_!tFjQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef641af6-15e8-4101-b9c9-a7accdb7fc35_1452x727.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And here&#8217;s the Python code that you can cut and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;784a5814-f5db-41d0-b387-f3fa54f98968&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Create a presence feature for CapitalGain
has_capital_gain = adult_train['CapitalGain'].gt(0)

# Get the counts of True/False
has_capital_gain.value_counts()</code></pre></div><p>As you might imagine, the two examples covered here are not the only ways to create presence features. The possibilities are endless. Here are two more to spark your creativity:</p><ul><li><p>Detecting the presence of a substring in text data.</p></li><li><p>Detecting if a behavior occurred within X amount of time.</p></li></ul><div><hr></div><h4>Magnitude Features</h4><p>Your MBA can often be more effective if the magnitudes of numeric features can be included as binary indicators. Making this happen is the result of a two-step process:</p><ul><li><p>Transforming the values of numeric features into categories.</p></li><li><p>One-hot encoding the categories.</p></li></ul><p>The first step is more common than you might imagine in analytics and is often referred to as <em>binning</em>. Here are some examples:</p><ul><li><p>Creating histograms requires defining bins for numeric feature data.</p></li><li><p>Decision tree-based machine learning algorithms bin numeric features as part of their learning process.</p></li><li><p>Using <em>deciles</em> to bin numeric feature data for RFM analysis.</p></li></ul><p>I&#8217;ll use the last example of RFM analysis to demonstrate a useful technique for creating magnitude features for MBA. First up, RFM stands for:</p><ul><li><p>(R)eceny</p></li><li><p>(F)requency</p></li><li><p>(M)onetary</p></li></ul><p>RFM analysis is an old-school marketing technique that classifies customers based on their RFM scores.</p><p>The most common (but not only) way to calculate the RFM score is by applying the following logic to each of the three RFM features:</p><ul><li><p>Classify each value of each feature in terms of deciles (i.e., bin the values into 10 distinct groups).</p></li><li><p>The top 10% of values receive a score of 9.</p></li><li><p>The next top 10% of values receive a score of 8.</p></li><li><p>And so on, until the bottom 10% of values receive a score of 0.</p></li></ul><p>To cement this type of feature engineering, I will demonstrate using the <em>HoursPerWeek</em> feature from the <em>Adult.xlsx</em> Excel workbook.</p><p>The <em>HoursPerWeek</em> feature illustrates a common problem when using deciles with business data: there are often many duplicates.</p><p>For example, given the business nature of the feature, many of the <em>HoursPerWeek</em> values are <em>40</em> (i.e., the standard US work week). When there are many duplicate values, creating deciles becomes impossible.</p><p>The following code demonstrates using the <em>rank()</em> method first to transform the data so it can be cleanly binned into deciles.</p><p>The code tells <em>rank()</em> to assign a value based on position. For example:</p><ul><li><p>The first value of 40 found would be assigned a rank of 1.</p></li><li><p>The second value of 40 found would be assigned a rank of 2.</p></li><li><p>And so on.</p></li></ul><p>The code then calls the <em>pandas qcut()</em> method to divide the rankings into deciles (i.e., <em>q = 10</em>). The following shows the code and the results:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uPjc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uPjc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 424w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 848w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 1272w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uPjc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png" width="1453" height="1170" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1170,&quot;width&quot;:1453,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:116367,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uPjc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 424w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 848w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 1272w, https://substackcdn.com/image/fetch/$s_!uPjc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cb5f58e-177b-41bb-9cb0-198f86cb8de4_1453x1170.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the Python code for cut and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;1d8e73c6-81a6-41f6-a9d5-d8f6adec32fd&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Handle all the duplicate values
hours_week_rank = adult_train['HoursPerWeek'].rank(method = 'first')

# Transform the ranks into deciles
hours_week_decile = pd.qcut(hours_week_rank, q = 10, labels = False)
hours_week_decile.value_counts().sort_index()</code></pre></div><p>After transforming the <em>HoursPerWeek</em> feature into categorical magnitudes, the next step is to use a <em>OneHotEncoder</em> to encode the data.</p><p>The following code calls the <em>to_frame()</em> method because the <em>OneHotEncoder</em> expects a <em>DataFrame</em> object:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!k7Dl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!k7Dl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 424w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 848w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 1272w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!k7Dl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png" width="1453" height="301" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:301,&quot;width&quot;:1453,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:42708,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!k7Dl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 424w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 848w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 1272w, https://substackcdn.com/image/fetch/$s_!k7Dl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3b86a0b-0171-4d4d-aa41-2711ed1a4770_1453x301.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Here&#8217;s the Python code for cut and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;8ccd4fae-fbb6-4541-abab-d703c7925813&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Reuse the encoder and make the Series a DataFrame
hours_week_cat = encoder.fit_transform(hours_week_decile.to_frame())</code></pre></div><p>The Python in Excel card gives you a preview of the <em>DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4xj0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4xj0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 424w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 848w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 1272w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4xj0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png" width="1251" height="927" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:927,&quot;width&quot;:1251,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:44773,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192141828?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4xj0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 424w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 848w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 1272w, https://substackcdn.com/image/fetch/$s_!4xj0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31094243-62d8-4e8d-8e89-4fb2e7f732a4_1251x927.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Voila! Magnitudes are formatted for your MBA.</p><p>As with presence features, there are many possibilities for engineering magnitude features for MBA. Once again, however, I&#8217;ve found that using the combination of <em>rank()</em> and <em>qcut()</em> is what I&#8217;ve used most often in my analyses.</p><p>I hope this tutorial has gotten you excited about the possibilities of using MBA in your own work. As I mentioned in a previous tutorial, I&#8217;ve found over the years that insights born of MBA resonate very well with business stakeholders.</p><p>&#128073; Ready to <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-6-partnering">partner with AI to do market basket analysis</a></strong>?</p><div><hr></div><p>That&#8217;s it for this week.</p><p>Next week&#8217;s newsletter will start <a href="https://thediydatascientist.substack.com/p/better-forecasting-with-excel-part">a new tutorial series</a> on building better forecasts using Microsoft Excel.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5c4ddc1a-53d1-4f1c-aaa2-85396b365bbd_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fb56331e-1e96-430b-b8e8-07fb0555c19b_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/419d21f5-5e07-4dfb-81cc-eaa18b920b82_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4e577385-5c24-47ac-a91a-8e62c2838b3a_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[Market Basket Analysis Part 4: Targeting Specific Outcomes]]></title><description><![CDATA[This week&#8217;s tutorial focuses on targeting a specific outcome of interest (e.g., paid conversion) using market basket analysis (MBA).]]></description><link>https://thediydatascientist.substack.com/p/market-basket-analysis-part-4-targeting</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/market-basket-analysis-part-4-targeting</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Wed, 25 Mar 2026 12:59:28 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/c1ec0db6-f302-4b0e-bb79-17e2792d387d_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>This week&#8217;s tutorial focuses on targeting a specific outcome of interest (e.g., paid conversion) using market basket analysis (MBA). If you&#8217;re new to the newsletter and need to catch up, check out Parts 1-3 of this tutorial series:</p><ul><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">Part 1: Introduction</a></p></li><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating">Part 2: Communicating Results</a></p></li><li><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-3">Part 3: Python Code</a></p></li></ul><p>As the best way to learn DIY data science skills is through practice, I highly recommend downloading the data from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong> and writing the code.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Market Basket Analysis Mines Your Data</h4><p><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-3">Part 3</a> of this tutorial series ended with 225 <em>association rules</em>. When you look at these rules, one thing becomes very clear.</p><p>MBA is very much a data mining technique that discovers all manner of new and interesting patterns in your data.</p><p>This is why grocery store data is a classic example for learning MBA. It&#8217;s easy to imagine a grocery store chain looking for new ways to boost revenue, such as creating endcaps in-store or offering coupons.</p><p>However, I need to be crystal clear on the following point.</p><p>It&#8217;s not only retail businesses (e.g., grocery stores) that benefit from mining data for new and interesting patterns using market basket analysis. It&#8217;s applicable in healthcare, government, non-profit, and all for-profit industries.</p><p><strong>**BUT**</strong></p><p>You&#8217;re not limited to using MBA just to mine these patterns. You can also use MBA for targeted analysis.</p><p>Specifically, you can search any mined <em>association rules</em> for a specific <em>consequent</em> (i.e., the right side of an <em>association rule</em>). While I will again use grocery store data as an example in this tutorial, the general pattern applies to any data you can think of.</p><p>BTW - The next tutorial will discuss general feature engineering patterns for MBA that can be applied to any domain.</p><div><hr></div><h4>Targeted Association Rules</h4><p>While the correct term is <em>consequent</em>, I use a more approachable term when using MBA and explaining the results to my business stakeholders - <em>outcome of interest</em>.</p><p>Let&#8217;s say our grocery store bought a large quantity of sausage at a low price. To ensure this purchase is profitable, we need to move the product quickly, as our store&#8217;s shelf space is limited.</p><p>We can use MBA to help us identify opportunities to make this happen by making sausage purchases as the outcome of interest. In this example, the <em>association rule</em> takes the following general form:</p><ul><li><p>{<em>antecedent</em>} &#8594; {sausage}</p></li></ul><p>The technical term for the left side of an <em>association rule</em> is <em>antecedent</em>. At this stage of the analysis, we don&#8217;t care about the exact antecedents. The goal is to mine the rules, evaluate them, and then make a decision.</p><p>NOTE - For brevity, I&#8217;m going to assume you&#8217;ve completed <a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-3">Part 3</a> of this tutorial, so I don&#8217;t have to repeat the code here.</p><p>The following code demonstrates getting only the association rules for a targeted outcome of interest using Python in Excel:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GKF6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GKF6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 424w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 848w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 1272w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GKF6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png" width="708" height="225.1743119266055" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:208,&quot;width&quot;:654,&quot;resizeWidth&quot;:708,&quot;bytes&quot;:15273,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192034460?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!GKF6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 424w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 848w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 1272w, https://substackcdn.com/image/fetch/$s_!GKF6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff02340d3-4d2a-47c9-ba67-f515f73be9f6_654x208.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>And here&#8217;s the Python code that you can cut and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;316a75b1-db49-463b-974e-8cad2fe3a11f&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Create a mask (i.e., filter) for the outcome of interest
targeted_mask = rules['consequents'] == {'sausage'}

# Filter the rules
targeted_rules = rules[targeted_mask]
targeted_rules</code></pre></div><p>While you can use any Python technology (e.g., Jupyter Notebook) for this tutorial series, many professionals new to analytics find Excel much less intimidating. </p><div class="pullquote"><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><p>Getting a large number of rules for the outcome of interest is common when using MBA. The following code sorts the list in descending order based on the <em>lift</em> metric and converts the rules to strings for ease of reading:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZBa7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZBa7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 424w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 848w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 1272w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZBa7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png" width="1456" height="690" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:690,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:132817,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192034460?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZBa7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 424w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 848w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 1272w, https://substackcdn.com/image/fetch/$s_!ZBa7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F802bad8e-1177-46a0-9a88-5dda87c3b78e_1460x692.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And the code for copy and paste:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;7adae590-15bb-4367-835e-68a9e76597fc&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Sort the rules for the outcome of interest
targeted_rules.sort_values(['lift', 'confidence', 'support'], 
                            ascending = False)

# Convert rules to strings for ease of viewing
targeted_rules["antecedents"] = targeted_rules["antecedents"].apply(lambda x: ", ".join(sorted(x)))
targeted_rules["consequents"] = targeted_rules["consequents"].apply(lambda x: ", ".join(sorted(x)))
targeted_rules</code></pre></div><p>When using Python in Excel, you can change the <em>Python Output</em> to write a <em>DataFrame</em> to the worksheet like so:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vNrF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vNrF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 424w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 848w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 1272w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vNrF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png" width="1455" height="685" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:685,&quot;width&quot;:1455,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:138516,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192034460?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!vNrF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 424w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 848w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 1272w, https://substackcdn.com/image/fetch/$s_!vNrF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4a972537-2c1f-4f1d-a066-916377fcb510_1455x685.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This makes reading the <em>association rules</em> much easier:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!S6bE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!S6bE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 424w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 848w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 1272w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!S6bE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png" width="1456" height="666" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:666,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:884208,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/192034460?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!S6bE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 424w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 848w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 1272w, https://substackcdn.com/image/fetch/$s_!S6bE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6d98edab-5c69-417a-977a-8b8b466b1c30_1880x860.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4>Evaluating Association Rules</h4><p>When evaluating association rules, here&#8217;s the process to follow:</p><ol><li><p>Prioritize <em>lift</em>.</p></li><li><p>Look for high levels of <em>confidence</em>.</p></li><li><p>Confirm there&#8217;s adequate <em>support</em>.</p></li></ol><p>These metrics were covered in detail in <a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating">Part 2</a>. Here&#8217;s a summary:</p><ul><li><p><em>ift</em> tells you how much more likely the outcome of interest is given the <em>antecedent</em>.</p></li><li><p><em>Confidence</em> gives you the reliability of the <em>association rule</em>.</p></li><li><p><em>Support</em> tells you how frequently the <em>itemset</em> occurs.</p></li></ul><p>Breaking this down for the first rule:</p><ol><li><p>Customers who purchase <em>whole milk</em> and <em>yogurt</em> together are 2.18 times more likely to buy <em>sausage</em> in the same transaction.</p></li><li><p>13.17% of Customers who purchase <em>whole milk</em> and <em>yogurt</em> together also purchase <em>sausage</em>.</p></li><li><p>The <em>itemset</em> of {whole milk, yogurt, sausage} represents 0.15% of all transactions.</p></li></ol><p>Here&#8217;s the intuition for successfully applying MBA: It&#8217;s a balancing act between two opposing forces.</p><p>First, higher <em>lift</em> is typically the result of rarer purchases/interactions/behaviors/etc.</p><p>Second, higher <em>support</em> is usually associated (no pun intended) with lower <em>lift</em>.</p><p>For example, consider the following association rule from the list above:</p><ul><li><p>{soda} &#8594; {sausage}</p></li></ul><p>This association rule has a <em>support</em> metric 4 times higher than the rule with the highest <em>lift</em>. However, this rule shows barely any <em>lift</em>, scoring only 1.014975.</p><p>This tradeoff shows how knowledge of business drivers is so essential for the successful application of MBA.</p><p>For example, recommending that the store add a refrigerated endcap featuring whole milk, yogurt, and sausage solely based on the highest lift score is unlikely to significantly increase sausage sales.</p><p>However, running ads/specials featuring beverages and sausage might produce better results, despite a lower <em>lift</em>, because it&#8217;s more practical from a business perspective.</p><p>This is just another example of the common theme in DIY data science.</p><p>It&#8217;s all about combining data, technique (e.g., MBA), and business knowledge. That&#8217;s how you stand out at work.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-5-feature">Check out Part 5 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this week.</p><p>Next week&#8217;s newsletter will continue the tutorial series by discussing engineering features for MBA.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1a5fd369-ee9b-44a6-9b6c-ad00286b5eb4_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/947d8881-e442-4861-9439-3f60534670ba_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/85a14a85-6d82-4fb6-bb67-af9e534c3c5d_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8138cdf3-bb92-414b-8025-ad1eea1f51ad_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p></p>]]></content:encoded></item><item><title><![CDATA[Market Basket Analysis Part 3: Python Code ]]></title><description><![CDATA[This tutorial focuses on the Python code for market basket analysis.]]></description><link>https://thediydatascientist.substack.com/p/market-basket-analysis-part-3</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/market-basket-analysis-part-3</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Mon, 23 Mar 2026 14:35:42 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/672b82a8-5989-47c6-98e6-e89c40195855_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>This tutorial focuses on the Python code for market basket analysis. Be sure to check out <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">Part 1</a></strong> and <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating">Part 2</a></strong> of this tutorial series if you&#8217;re new.</p><p>As the best way to learn DIY data science is through practice, I highly suggest you download the data from the newsletter&#8217;s <strong><a href="https://github.com/DaveOnData/DIYDataScientistDatasets">GitHub repository</a></strong> in the <em>MarketBasketPurchases.xlsx </em>file and follow along.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>Loading the Dataset</h4><p>It&#8217;s common for raw transactional data used in market basket analysis to be in the wrong format. The data used in this tutorial demonstrates. Here&#8217;s what the data looks like:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7Nzv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7Nzv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 424w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 848w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 1272w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7Nzv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png" width="464" height="166.09855072463768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:247,&quot;width&quot;:690,&quot;resizeWidth&quot;:464,&quot;bytes&quot;:40939,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7Nzv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 424w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 848w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 1272w, https://substackcdn.com/image/fetch/$s_!7Nzv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851679c6-b58d-40f4-be08-75b3f70f25cb_690x247.png 1456w" sizes="100vw" fetchpriority="high"></picture><div></div></div></a></figure></div><p>If you&#8217;re using a technology like Jupyter Notebook, you can get a CSV version of the data from GitHub and load it like this:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;0348aef4-5b1e-4a31-81d5-509cc0369fdf&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">import pandas as pd

# Load the raw purchase data
purchases_raw = pd.read_csv('MarketBasketPurchases.csv')</code></pre></div><p>However, if you&#8217;re just getting started with Python for analytics, I&#8217;m a big fan of using Python in Excel for a number of reasons:</p><ul><li><p>Python in Excel is included with Microsoft 365 subscriptions.</p></li><li><p>Python in Excel runs in the cloud, so you don&#8217;t need to mess with a local installation.</p></li><li><p>Python in Excel includes battle-tested libraries like <em>pandas</em>, <em>mlxtend</em>, <em>statsmodels</em>, and <em>scikit-learn</em> for powerful analytics.</p></li><li><p>99+% of Python in Excel code is exactly the same as using any other Python technology (e.g., Jupyter Notebooks).</p></li></ul><div class="pullquote"><p><strong>If you&#8217;re new to Python, my <a href="https://www.daveondata.com/python-in-excel-accelerator-info">Python in Excel Accelerator</a> online course will teach you the fundamentals you need for analytics in a weekend.</strong></p></div><h4>Getting Started with Python in Excel</h4><p>I want to be crystal clear on this point. You don&#8217;t need to use Python in Excel for any of my tutorials, since the Python code is the same.</p><p>However, with my 14+ years of analytics experience, I believe that Python in Excel is the easiest way for any professional to start building real-world analytics skills to stand out at work.</p><p>The first step in getting started with Python in Excel is to navigate to the <em>Formulas</em> section of the Ribbon and click on the <em>Python Editor</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Tzqf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Tzqf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 424w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 848w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 1272w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Tzqf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png" width="365" height="276.0095238095238" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/afe3e150-a493-427b-bb78-e35508a81413_525x397.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:397,&quot;width&quot;:525,&quot;resizeWidth&quot;:365,&quot;bytes&quot;:27209,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Tzqf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 424w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 848w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 1272w, https://substackcdn.com/image/fetch/$s_!Tzqf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafe3e150-a493-427b-bb78-e35508a81413_525x397.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This will open the <em>Python Editor</em> pane from the right side of Microsoft Excel:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!L6X9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!L6X9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 424w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 848w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 1272w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!L6X9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png" width="397" height="446.80993788819876" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:906,&quot;width&quot;:805,&quot;resizeWidth&quot;:397,&quot;bytes&quot;:177438,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!L6X9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 424w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 848w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 1272w, https://substackcdn.com/image/fetch/$s_!L6X9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8bdf105d-b75c-4449-b40b-685e87864ec1_805x906.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Clicking the <em>Add Python cell&#8230;</em> button creates a new kind of Excel formula that will run Python code:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zUm0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zUm0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 424w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 848w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 1272w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zUm0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png" width="453" height="249.93103448275863" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:512,&quot;width&quot;:928,&quot;resizeWidth&quot;:453,&quot;bytes&quot;:47260,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zUm0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 424w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 848w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 1272w, https://substackcdn.com/image/fetch/$s_!zUm0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2af0dab-557a-465e-9f64-6620269cd1b6_928x512.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4>Transforming the Data</h4><p>As shown in the first image above, the dataset is in <em>long</em> format. That is, each row of data is a single item purchased by a particular customer on a particular day. The combination of <em>MemberID</em> and <em>PurchaseDate</em> identifies all the items purchased in a single transaction (or <em>basket</em>).</p><p>Python in Excel comes with the <em>mlxtend</em> library for performing market basket analysis. However, the <em>mlxtend</em> library expects the data to be in a wide format of complete baskets, like the following:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pvcQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pvcQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 424w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 848w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 1272w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pvcQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png" width="572" height="130.42345276872965" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:280,&quot;width&quot;:1228,&quot;resizeWidth&quot;:572,&quot;bytes&quot;:80759,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!pvcQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 424w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 848w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 1272w, https://substackcdn.com/image/fetch/$s_!pvcQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2fb9087e-f3a0-4b40-8a3a-ac3f6cca7cc6_1228x280.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>In the image immediately above, the first <em>basket</em> contains 4 items: <em>sausage</em>, <em>whole milk</em>, <em>semi-finished bread</em>, and <em>yogurt</em>. </p><p>The following Python in Excel formula loads the raw data from the <em>Purchases</em> table stored in the workbook and transforms the data into a list of lists:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iOCB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iOCB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 424w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 848w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 1272w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iOCB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png" width="1246" height="620" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:620,&quot;width&quot;:1246,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:133021,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iOCB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 424w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 848w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 1272w, https://substackcdn.com/image/fetch/$s_!iOCB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff27b9f97-7cef-4e37-a1db-027ae94f94c9_1246x620.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here&#8217;s the code so you can copy and paste it into Python in Excel:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;91b27394-5b54-451a-9cf5-3d70e4e206e8&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load data from Excel table
purchases_raw = xl("Purchases[#All]", headers = True)

# Combine Member_number and Date into a unique transaction ID
purchases_raw = (purchases_raw
                     .assign(TransactionID = lambda df_: df_['MemberID'].astype(str) + "_" + df_['PurchaseDate'].astype(str))
                )

# Group items by transaction as a list of lists
transactions = (purchases_raw.groupby('TransactionID')
                    ['Item']
                    .agg(list)
                    .values
                    .tolist()
               )</code></pre></div><p><strong>NOTE -</strong> The first line of code is specific to Python in Excel, so you don&#8217;t need it if you&#8217;re using a technology like Jupyter Notebook.</p><p>A list of lists is a good way to store market basket analysis (MBA) data because each basket contains different counts of items. However, lists of lists don&#8217;t work with the most popular MBA algorithm (i.e., <em>apriori</em>) in <em>mlxtend</em>.</p><p>Luckily, the <em>mlxtend</em> library can convert the list of lists into a tabular format suitable for MBA with the <em>apriori</em> algorithm:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!K0Wr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!K0Wr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 424w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 848w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 1272w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!K0Wr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png" width="1222" height="718" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:718,&quot;width&quot;:1222,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:192580,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191772369?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!K0Wr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 424w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 848w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 1272w, https://substackcdn.com/image/fetch/$s_!K0Wr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37e3441d-a79f-4280-90ae-b61c54458b7d_1222x718.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And here&#8217;s the code so you can copy and paste it into Python in Excel:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;feb0f3da-3dff-484b-b894-3fb64c7db1af&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from mlxtend.preprocessing import TransactionEncoder

# One-hot encode the transactions
transaction_encoder = TransactionEncoder()

transactions_encoded = (transaction_encoder
                            .fit(transactions)
                            .transform(transactions)
                       )

transactions_df = pd.DataFrame(transactions_encoded, 
                               columns = transaction_encoder.columns_)</code></pre></div><p>Clicking the disk icon in the <em>Python Editor</em> saves and runs the Python code. As you will see in the output from cell <em>B3</em>, the data is transformed into a table (i.e., a <em>pandas DataFrame</em>) with 14963 rows (i.e., <em>baskets</em>) and 167 columns (i.e., <em>products</em>). </p><p><strong>BTW -</strong> In analytics, the term <em>matrix</em> is synonymous with table.</p><p>The <em>DataFrame</em> generated by the code in cell <em>B3</em> is technically a <em>sparse matrix</em>. Here&#8217;s why:</p><ul><li><p>Every item that is purchased in any transaction gets its own column in the <em>DataFrame</em>.</p></li><li><p>As most transactions will only contain a small subset of the possible items, most of the <em>DataFrame</em> cells have the value of <em>False</em> (i.e., 0).</p></li><li><p>As the <em>Trues</em> (i.e., the 1s) are the information, the <em>DataFrame</em> is sparsely populated with information.</p></li></ul><p>Sparse matrices are very common in real-world DIY data science (e.g., MBA &amp; text analytics) and can be very large (i.e., your laptop needs a lot of memory to create and use them).</p><p>So, when using Python in Excel, it&#8217;s quite easy to create sparse matrices that are too large to be handled by the secured cloud container. </p><p>When this happens, moving to Jupyter Notebooks on your local machine is usually required. The good news is that the code is exactly the same.</p><p>However, the dataset in this tutorial can be handled just fine with Python in Excel.</p><div><hr></div><h3>Mining Frequent Itemsets</h3><p>The next step is to mine the data for <em>frequent itemsets</em>. Intuitively, these are just collections of items that appear frequently across all transactions.</p><p>Using <em>mlxtend</em>, we can specify the size of the <em>itemsets</em> we wish to find. For example, we might find the following are <em>frequent itemsets</em> in the dataset:</p><ul><li><p>{UHT-milk, tropical fruit}</p></li><li><p>{whole milk, yogurt, sausage}</p></li><li><p>{beef, rolls/buns, citrus fruit, specialty chocolate}</p></li></ul><p>As covered in the previous tutorial, we typically don&#8217;t want all of the <em>frequent itemsets</em> for our analyses.</p><p>We want only those <em>frequent itemsets</em> that meet the minimum <em>support</em> threshold (e.g., appearing in at least 0.1% of the transactions). Setting a minimum <em>support</em> threshold helps to:</p><ul><li><p>Analyze commonly occurring <em>itemsets</em>.</p></li><li><p>Reduce the number of <em>association rules</em> produced from MBA.</p></li></ul><p>Here&#8217;s the code for doing this (for brevity, I&#8217;m not going to include Python in Excel code screenshots for the rest of this tutorial):</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;8a3ca2b9-c14f-4d71-ba8a-6f4574f358e3&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from mlxtend.frequent_patterns import apriori

# Use a low support threshold due to the small dataset
frequent_itemsets = apriori(transactions_df, min_support = 0.001, 
                            use_colnames = True)</code></pre></div><p>As covered in previous tutorials, MBA is an iterative process. Experimenting with various <em>support</em> thresholds optimizes your results.</p><p><em>Support</em> is a critical measure for the successful use of MBA, because <em>itemsets</em> and <em>association rules</em> with low support may occur purely by chance. However, don&#8217;t make the mistake of only considering <em>itemsets</em> and <em>association rules</em> with high support.</p><p>For example, you may find that an <em>association rule</em> with a <em>support</em> of 0.1% can lead to very profitable changes to a business process or save your organization a lot of money. Unfortunately, low <em>support</em> thresholds often yield very large numbers of association rules to analyze.</p><p>Consequently, when using MBA, a best practice is to start with a relatively low <em>support</em> threshold and then iteratively increase it to reduce the number of association rules you need to analyze.</p><div><hr></div><h4>Mining Association Rules</h4><p>With the <em>frequent itemsets</em> found, we can now mine them for any <em>association rules</em>. The following code uses a minimum <em>lift</em> threshold of 1.0. As covered in the previous tutorial, <em>lift</em> scores less than 1.0 mean that the right side of the <em>association rule</em> is less likely to happen because of what&#8217;s on the left side:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;bdbb2f97-efb2-45c1-ae05-8ea700f898a1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from mlxtend.frequent_patterns import association_rules

# Mine the association rules with a lift threshold
rules = association_rules(frequent_itemsets, metric = 'lift',
                          min_threshold = 1.0)

# Convert rules to strings for ease of viewing
rules["antecedents"] = rules["antecedents"].apply(lambda x: ", ".join(sorted(x)))
rules["consequents"] = rules["consequents"].apply(lambda x: ", ".join(sorted(x)))
rules.head()</code></pre></div><p>Running the above code produces 240 <em>association rules</em> mined from the <em>frequent itemsets</em> using a <em>lift</em> threshold of 1.0. A higher <em>lift</em> threshold would reduce the number of rules, while a lower one would increase it. This is one aspect of MBA that requires experimentation to find an optimal value.</p><p>The above code deserves some explanation:</p><ul><li><p>The <em>antecedent</em> is the left side of an <em>association rule</em>.</p></li><li><p>The <em>consequent</em> is the right side of an <em>association rule</em>.</p></li></ul><p>Typically, it's most useful to sort the <em>association rules</em> in descending order based on <em>lift</em>:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;b80a648a-8546-46cb-bc22-5c29c3f068a1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Get the rules in descending order by lift
sorted_rules = rules.sort_values(['lift', 'confidence', 'support'],
                                 ascending = False)
sorted_rules.head()</code></pre></div><p>The sorted rules show the following are the top 4 rules:</p><ul><li><p>{whole milk, yogurt} &#8594; {sausage} with a <em>lift</em> of 2.1829</p></li><li><p>{sausage} &#8594; {whole milk, yogurt} with a <em>lift</em> of 2.1829</p></li><li><p>{whole milk, sausage} &#8594; {yogurt} with a <em>lift</em> of 1.912</p></li><li><p>{yogurt} &#8594; {whole milk, sausage} with a <em>lift</em> of 1.912</p></li></ul><p>Consider the first rule. The <em>lift</em> metric shows that customers who purchase {whole milk, yogurt} together are 2.18 times more likely to purchase {sausage} than when purchasing {whole milk}, {yogurt}, and {sausage} in separate combinations.</p><p>This is why, in my experience, business stakeholders love MBA - they can easily understand the insights and form actionable plans based on the insights.</p><p>Now, you might have noticed that the 2nd rule is a mirror image of the first:</p><ul><li><p>{whole milk, yogurt} &#8594; {sausage}</p></li><li><p>{sausage} &#8594; {whole milk, yogurt}</p></li></ul><p>This is an expected outcome of MBA, but not particularly useful. In general, using MBA effectively means having a single item on the right side of the rules.</p><p>The following code makes this happen:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;af6c95be-ec36-462a-ae72-7e9f57316400&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Get rules where only 1 item is on the right side
mask = [c.count(',') == 0 for c in rules['consequents']]
rules_single_consequent = rules[mask]

# Sort the single association rules
rules_single_consequent.sort_values(['lift', 'confidence', 'support'],
                                     ascending = False)</code></pre></div><p>The above code reduces the number of rules from 240 to 225 and produces rules that are much more understandable to business stakeholders.</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-4-targeting">Check out Part 4 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this newsletter.</p><p>Next week&#8217;s newsletter will continue the tutorial series by demonstrating how to use MBA for more targeted analyses.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/70b9da1d-3860-4fe2-ae87-73b6f9bc09b3_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5e90404a-5f04-4008-9f86-2afc0fd2a5ed_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b990bd60-f673-4b5a-85fc-3197c1fece4f_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f2c4550f-a260-4ec0-8054-b293a200f407_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p></p>]]></content:encoded></item><item><title><![CDATA[Market Basket Analysis Part 2: Communicating Results]]></title><description><![CDATA[If you want to be a successful DIY data scientist, don&#8217;t make the same mistake I made early in my data science journey:]]></description><link>https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating</link><guid isPermaLink="false">https://thediydatascientist.substack.com/p/market-basket-analysis-part-2-communicating</guid><dc:creator><![CDATA[David Langer]]></dc:creator><pubDate>Sat, 21 Mar 2026 14:34:53 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/b15539af-bd54-47ea-89ee-a79b26d48d23_1200x630.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>BTW - If you&#8217;re new to this tutorial series, <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">check out Part 1 here</a></strong>.&#8203;</p><p>If you want to be a successful DIY data scientist, don&#8217;t make the same mistake I made early in my data science journey:</p><p><strong>Failing to realize that technical skills are a means to an end, not the end itself.</strong></p><p>Market basket analysis is a prime example of this.</p><p>Like so many who get into data science, my focus in my early days was the &#8220;cool stuff&#8221;:</p><ul><li><p>Decision trees</p></li><li><p>Random forests</p></li><li><p>K-means clustering</p></li><li><p>Linear/logistic regression</p></li></ul><p>Don&#8217;t get me wrong, all of these are powerful tools useful to any DIY data scientist.</p><p>But you know who typically doesn&#8217;t care about how you got to the insights?</p><p>Your business stakeholders. Here&#8217;s what I had to learn the hard way.</p><p>The best analytics are those that resonate with business stakeholders and drive changes to business processes - not necessarily the &#8220;cool stuff.&#8221;</p><p>This is where market basket analysis can be so effective.</p><p><strong>The results of market basket analysis can be summarized using metrics easily understood by business stakeholders, and the insights are usually perceived as actionable.</strong></p><p>This week&#8217;s tutorial is all about these metrics and how to use them to communicate your insights with business stakeholders.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4><strong>Setting Stakeholder Expectations</strong></h4><p>We first need to discuss a critical aspect of a successful market basket analysis (MBA): setting stakeholder expectations. I will teach you exactly <a href="https://www.daveondata.com/data-science-consulting-info">how I set expectations with my clients</a> in this section.</p><p>Like most analytics techniques you will use as a DIY data scientist, MBA mines your data for interesting historical patterns.</p><p>In the case of MBA, these patterns take the form of <em>association rules</em> like the following:</p><ul><li><p>If a patient has condition A, they&#8217;re more likely to develop condition B.</p></li><li><p>If someone buys a printer, they often buy ink.</p></li><li><p>People who buy peanut butter often buy jelly.</p></li></ul><p>It&#8217;s critical to understand what these association rules actually mean. While there are mathematical definitions for this, all we need is an intuitive understanding.</p><p>Take the second bullet above as an example. Using natural language, we could explain this association rule like so:</p><p><em>&#8220;We mined our historical data and found that customers who buy a printer also tend to buy ink as part of the same transaction.</em></p><p><em>However, it&#8217;s important to note that not every customer buys printers and ink in the same transaction.</em></p><p><em>Lastly, these insights are based on historical data. There&#8217;s no guarantee that this behavior will continue in the future.&#8221;</em></p><p><strong>The above summarizes that association rules are not </strong><em><strong>causal relationships</strong></em><strong>. Association rules embody </strong><em><strong>likely</strong></em><strong> outcomes based on your historical data.</strong></p><p>Continuing the example, the following are possible customer behaviors:</p><ol><li><p>The customer buys a printer and ink in the same transaction.</p></li><li><p>The customer buys only a printer.</p></li></ol><p>What the association rule tells you is that, historically, #1 is more likely than #2.</p><p>While every business stakeholder dreams of a causal relationship (e.g., for every ad dollar we spend, we make $1.5 dollars in sales no matter what), these are exceedingly rare in real-world analytics.</p><p>Instead, data-driven organizations mine their data for associations that can be exploited for economic gain (e.g., more sales or lower costs).</p><p>MBA is a powerful tool for mining these associations.</p><div><hr></div><h4>An Example Dataset</h4><p>Before we dive into the MBA metrics for explaining insights to business stakeholders, we need an example dataset:</p><ul><li><p>{Bread, Milk}</p></li><li><p>{Bread, Diapers, Beer, Eggs}</p></li><li><p>{Milk, Diapers, Beer, Cola}</p></li><li><p>{Bread, Milk, Diapers, Beer}</p></li><li><p>{Bread, Milk, Diapers, Cola}</p></li></ul><p>As mentioned in <a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-1-introduction">Part 1</a> of this tutorial series, MBA works with any kind of data. For example, I&#8217;ve successfully used MBA with <a href="https://www.daveondata.com/data-science-consulting-info">clients in marketing and product management</a>. However, I will use a grocery store example as it&#8217;s a classic way to teach MBA.</p><p>As was also mentioned in Part 1, quantities don&#8217;t matter. We're looking only for the presence of items in the transaction.</p><p>This means we can represent the above dataset using a <em>pandas DataFrame</em>:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!meCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!meCo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 424w, https://substackcdn.com/image/fetch/$s_!meCo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 848w, https://substackcdn.com/image/fetch/$s_!meCo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 1272w, https://substackcdn.com/image/fetch/$s_!meCo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!meCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png" width="472" height="247.53094462540716" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:644,&quot;width&quot;:1228,&quot;resizeWidth&quot;:472,&quot;bytes&quot;:72964,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://thediydatascientist.substack.com/i/191675662?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!meCo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 424w, https://substackcdn.com/image/fetch/$s_!meCo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 848w, https://substackcdn.com/image/fetch/$s_!meCo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 1272w, https://substackcdn.com/image/fetch/$s_!meCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66190ae1-6639-41e0-bf14-54fdb56da077_1228x644.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4>How Frequent is the Pattern?</h4><p>The first MBA metric is <em>support</em>. The support metric tells you how frequently an <em>itemset</em> appears in the dataset.</p><p>Using our example data, the following is an itemset that appears in 4 out of 5 transactions:</p><ul><li><p>{Milk}</p></li></ul><p>The <em>support</em> for this itemset is 4 / 5 = 0.8 or 80%.</p><p>Now, consider this itemset:</p><ul><li><p>{Milk, Diapers, Beer}</p></li></ul><p>The support for this itemset is 2 / 5 = 0.4 or 40%.</p><p><strong>Support is a critical measure for the successful use of an MBA, because itemsets and association rules with low support may occur purely by chance.</strong></p><p>However, don&#8217;t make the mistake of only considering itemsets and association rules with high support (i.e., using a high support threshold).</p><p>For example, you may find that an association rule with a support of 10% can lead to very profitable changes to a business process.</p><p>Unfortunately, low support thresholds often yield very large numbers of association rules to analyze.</p><p><strong>Consequently, when using MBA, a best practice is to start with a relatively low support threshold (e.g., 5%) and then iteratively increase it to reduce the number of association rules you need to analyze.</strong></p><div><hr></div><h4>What&#8217;s the Reliability of the Association?</h4><p>The second MBA metric is <em>confidence</em>. The confidence metric gives you an estimate of the reliability of an association rule given your historical data.</p><p>Consider the following association rule:</p><ul><li><p>{Milk, Diapers} &#8594; {Beer}</p></li></ul><p>In natural language, the above rule can be explained to a stakeholder as follows:</p><p>&#8220;Customers who purchase Milk and Diapers also tend to purchase Beer in the same transaction.&#8221;</p><p>The confidence metric is calculated by finding the support for all items in the rule:</p><ul><li><p>The support for {Milk, Diapers, Beer} is 2 / 5 = 0.4 or 40%</p></li></ul><p>Divided by the support for the left side of the rule:</p><ul><li><p>The support for {Milk, Diapers} is 2 /3 = 0.67 or 67%</p></li></ul><p>Therefore, the confidence for this rule is 0.4 / 0.67 = 0.597 or 59.7%</p><p><strong>Here&#8217;s how to combine support and confidence using natural language for your business stakeholders:</strong></p><p><strong>&#8220;About 2/3 of our transactions contain both Milk and Diapers. Of these transactions, 59.7% also contain Beer.&#8221;</strong></p><p>The first sentence explains the support (or frequency) of the left side of the rule. The second sentence explains the confidence (or conditional frequency) of the right side of the rule.</p><p>There&#8217;s an urban legend that a grocery store chain uncovered an association rule between Diapers and Beer. In response, the chain created a display in its stores containing Diapers and Beer to increase sales.</p><p>While certainly an amusing story, it does underline how MBA can be understood and used by business stakeholders to improve processes.</p><div><hr></div><h4>The Magic of Lift</h4><p>The third MBA metric is <em>lift</em>. The lift metric addresses a shortcoming of relying only on the support and confidence metrics. </p><p>If I&#8217;m being completely honest, the lift metric is so magical, it&#8217;s usually the primary way <a href="https://www.daveondata.com/data-science-consulting-info">I communicate analysis results with my clients</a>.</p><p>The problem with the confidence metric is that it doesn&#8217;t account for how often the right-hand side of an association rule appears in the dataset.</p><p>For example, consider this association rule:</p><ul><li><p>{Milk, Diapers} &#8594; {Cola}</p></li></ul><p>Let&#8217;s calculate the confidence for this association rule:</p><ul><li><p>The support for {Milk, Diapers, Cola} is 2 / 5 = 0.4</p></li><li><p>The support for {Milk, Diapers} is 2 / 3 = 0.67</p></li><li><p>The confidence is 0.4 / 0.67 = 0.597</p></li></ul><p>So, this association rule looks very similar to the previous one.</p><p><strong>**BUT**</strong></p><p>Cola purchases are rarer compared to Beer purchases. This is where the lift metric becomes useful.</p><p>The lift metric is calculated by dividing the association rule&#8217;s confidence by the support for the right side of the rule:</p><ul><li><p>The support for {Cola} is 2 / 5 = 0.4</p></li><li><p>So, the lift for the association rule is 0.597 / 0.4 = 1.49</p></li></ul><p>Now, compare this to the lift for the first association rule:</p><ul><li><p>Support for {Beer} is 3 / 5 = 0.6</p></li><li><p>Lift is 0.597 / 0.6 = 0.995</p></li></ul><p><strong>And here&#8217;s where the magic of the lift metric comes into play when explaining your MBA insights to business stakeholders:</strong></p><p><em>&#8220;The analysis shows that customers who purchase Milk and Diapers together are 1.5 times more likely to purchase Cola compared to purchasing items independently.</em></p><p><em>Additionally, the analysis shows that customers who purchase Milk and Diapers together are less likely to purchase Beer than those who purchase items independently.</em></p><p><em>Based on the analysis, we could experiment with providing coupons for Cola to customers who regularly purchase Milk and Diapers together.&#8221;</em></p><p>Now that&#8217;s what I call using data to drive impact!</p><p>&#128073; Ready to learn more? <strong><a href="https://thediydatascientist.substack.com/p/market-basket-analysis-part-3">Check out Part 3 here</a></strong>.</p><div><hr></div><p>That&#8217;s it for this newsletter.</p><p>Next week&#8217;s newsletter will continue the tutorial series on market basket analysis and teach you how to perform MBA using Python code.</p><p>Stay healthy and happy data sleuthing!</p><div><hr></div><h3>&#128105;&#8205;&#127979; Ready to Learn More Analytics Skills?</h3><p>My paid subscribers have access to <strong>exclusive monthly live crash courses </strong>that include:</p><ul><li><p>PDFs of all slides.</p></li><li><p>Excel workbooks, code, and data.</p></li><li><p>Recordings so you can learn on your schedule.</p></li></ul><p>Here are some examples of my live crash courses:</p><div class="image-gallery-embed" data-attrs="{&quot;gallery&quot;:{&quot;images&quot;:[{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/986b640b-d71f-45d1-8b19-bc888d1bbc20_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0ac556b2-cdc4-4d2f-b5f2-2a4c8e6eb4a2_1024x576.png&quot;},{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/00b58ec7-1ac5-4b5f-9f21-36982c33a7f9_1440x700.png&quot;}],&quot;caption&quot;:&quot;&quot;,&quot;alt&quot;:&quot;&quot;,&quot;staticGalleryImage&quot;:{&quot;type&quot;:&quot;image/png&quot;,&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ffa1c129-3af8-495d-95e4-d6a26fcb101e_1456x474.png&quot;}},&quot;isEditorNode&quot;:true}"></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/p/become-a-member&quot;,&quot;text&quot;:&quot;Learn More&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://thediydatascientist.substack.com/p/become-a-member"><span>Learn More</span></a></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://thediydatascientist.substack.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading The DIY Data Scientist! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p></p>]]></content:encoded></item></channel></rss>