<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Embeddings on Jamie Ede</title>
    <link>https://www.jamieede.com/tags/embeddings/</link>
    <description>Recent content in Embeddings on Jamie Ede</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Fri, 22 May 2026 10:00:00 +0000</lastBuildDate>
    <atom:link href="https://www.jamieede.com/tags/embeddings/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Chunking and embedding technical documentation for RAG</title>
      <link>https://www.jamieede.com/posts/chunking-embedding-technical-docs-rag/</link>
      <pubDate>Fri, 22 May 2026 10:00:00 +0000</pubDate>
      <guid>https://www.jamieede.com/posts/chunking-embedding-technical-docs-rag/</guid>
      <description>&lt;p&gt;&amp;ldquo;271 pages&amp;rdquo; is not &amp;ldquo;271 vectors.&amp;rdquo; Split settings and embedding model choice determine whether &lt;a href=&#34;https://www.jamieede.com/astra-chat&#34; &gt;Astra Docs Chat&lt;/a&gt;&#xA; retrieves the right paragraph when you ask about PCU groups, hybrid search, or collection APIs.&lt;/p&gt;&#xA;&lt;p&gt;Context: &lt;a href=&#34;https://www.jamieede.com/posts/building-astra-docs-chat-rag-over-datastax-on-langflow-and-cloudflare-pages/&#34; &gt;Building Astra Docs Chat&lt;/a&gt;&#xA; · &lt;a href=&#34;https://www.jamieede.com/posts/langflow-rag-astra-db-ingest-and-chat-flows/&#34; &gt;Langflow ingest flow&lt;/a&gt;&#xA; · &lt;a href=&#34;https://www.jamieede.com/posts/batch-ingest-markdown-langflow-api/&#34; &gt;Batch ingest&lt;/a&gt;&#xA;&lt;/p&gt;&#xA;&lt;p&gt;Try retrieval in production: &lt;a href=&#34;https://www.jamieede.com/astra-chat&#34; &gt;Astra Docs Chat&lt;/a&gt;&#xA;&lt;/p&gt;&#xA;&lt;hr&gt;&#xA;&#xA;&lt;h2 id=&#34;shape-of-the-corpus&#34; class=&#34;anchor-link&#34;&gt;&lt;a href=&#34;#shape-of-the-corpus&#34;&gt;Shape of the corpus&lt;span class=&#34;pilcrow&#34;&gt;&amp;nbsp;¶&lt;/span&gt;&lt;/a&gt;&lt;/h2&gt;&#xA;&lt;p&gt;Each file in my local &lt;code&gt;pages/&lt;/code&gt; export is trafilatura-extracted markdown from &lt;a href=&#34;https://docs.datastax.com/en/astra-db-serverless/&#34; target=&#34;_blank&#34; &gt;Astra DB Serverless docs&lt;/a&gt;&#xA;:&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
