<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>OCR on Jamie Ede</title>
    <link>https://www.jamieede.com/tags/ocr/</link>
    <description>Recent content in OCR on Jamie Ede</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Sat, 30 May 2026 10:00:00 +0000</lastBuildDate>
    <atom:link href="https://www.jamieede.com/tags/ocr/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Measuring OCR accuracy for a 1994 service manual RAG</title>
      <link>https://www.jamieede.com/posts/measuring-ocr-accuracy-1994-service-manual-rag/</link>
      <pubDate>Sat, 30 May 2026 10:00:00 +0000</pubDate>
      <guid>https://www.jamieede.com/posts/measuring-ocr-accuracy-1994-service-manual-rag/</guid>
      <description>&lt;p&gt;A torque figure with a missing digit is not a typo. It is a mechanic over-tightening a brake bolt. When you turn a scanned service manual into a RAG chatbot, OCR fidelity stops being a quality nicety and becomes a safety property.&lt;/p&gt;&#xA;&lt;p&gt;The chatbot in question reads a 1994 Yamaha XV250 Virago service manual: 291 pages, scanned to an image-only PDF, no text layer at all. It is live here: &lt;a href=&#34;https://virago.edestudio.us/&#34; target=&#34;_blank&#34; &gt;virago.edestudio.us&lt;/a&gt;&#xA;. Ask it about valve clearances or jet sizes and it answers from the OCR&amp;rsquo;d corpus, reading the retrieved chunk text verbatim. Whatever the OCR got wrong is what the rider is told.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
