<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/11/15/three-policy-en.html</loc>
<lastmod>2025-11-15T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/11/15/three-policy-zh.html</loc>
<lastmod>2025-11-15T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/12/01/kl-estimators-en.html</loc>
<lastmod>2025-12-01T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/12/01/kl-estimators-zh.html</loc>
<lastmod>2025-12-01T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/12/17/offpolicy-en.html</loc>
<lastmod>2025-12-17T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/reinforcement-learning/2025/12/17/offpolicy-zh.html</loc>
<lastmod>2025-12-17T00:00:00+00:00</lastmod>
</url>
<url>
<loc>https://xihuai18.github.io/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/blog/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/cv/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/news/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/projects/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/publications/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/blog/category/reinforcement-learning/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/blog/2025/</loc>
</url>
<url>
<loc>https://xihuai18.github.io/googlea3a90baf4191fbfd.html</loc>
<lastmod>2026-04-21T00:20:54+00:00</lastmod>
</url>
</urlset>
