<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Llm-Inference on kmarble.dev</title><link>https://kmarble.dev/tags/llm-inference/</link><description>Recent content in Llm-Inference on kmarble.dev</description><generator>Hugo -- gohugo.io</generator><language>en</language><copyright>© 2026</copyright><lastBuildDate>Fri, 05 Jun 2026 15:55:14 -0500</lastBuildDate><atom:link href="https://kmarble.dev/tags/llm-inference/index.xml" rel="self" type="application/rss+xml"/><item><title>Strix Halo LLM Serving: 25 tok/s at 151k Context Under 100W</title><link>https://kmarble.dev/posts/strix-halo-llm-inference-show-and-tell/</link><pubDate>Sun, 26 Apr 2026 00:00:00 +0000</pubDate><guid>https://kmarble.dev/posts/strix-halo-llm-inference-show-and-tell/</guid><description/></item></channel></rss>