[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-llm-fine-tuning":3},{"tag":4,"articles":11,"peer_article_count":50},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":10},"93aa15ea-c3f0-4f7d-a7c2-b22a81051ec1","LLM fine-tuning","llm-fine-tuning",3,"LLM 微調指的是在既有基礎模型上，透過監督式資料或強化學習調整模型行為，讓它更貼近特定任務與領域。這個主題涵蓋資料準備、訓練穩定性、評估與部署，例如 PPO 的替代方法、BPO\u002FGBPO，以及用 S3、SageMaker 和 MLflow 加速實作。","LLM fine-tuning covers the methods used to adapt a base model to a specific task or domain, from supervised training to RL-based alignment. It matters because stability, data pipelines, and tooling shape real outcomes; examples include BPO\u002FGBPO as PPO alternatives and AWS workflows with S3, SageMaker, and MLflow.",[12,21,29,36,43],{"id":13,"slug":14,"title":15,"summary":16,"category":17,"image_url":18,"cover_image":18,"language":19,"created_at":20},"186b266a-5b45-4bd4-85a4-5fa62fcc50dc","google-openrl-llm-fine-tuning-kubernetes-zh","Google OpenRL 把 RL 細調搬上 Kubernetes","Google OpenRL 讓團隊在自己的 Kubernetes 叢集上做 LLM post-training 與 fine-tuning，重點是把研究流程和基礎架構拆開，減少 RL 迭代時的雜務。","model-release","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1782572576166-gzxw.png","zh","2026-06-27T15:02:27.036919+00:00",{"id":22,"slug":23,"title":24,"summary":25,"category":26,"image_url":27,"cover_image":27,"language":19,"created_at":28},"7c4c30b3-b2a8-48a7-b2ea-96c40c16ae19","llm-fine-tuning-turns-generic-models-into-domain-tools-zh","LLM 微調把通用模型變專用工具","我把企業 LLM 微調拆成一套可直接抄的流程：先判斷該不該微調，再做資料清理、模型選擇、評估與上線。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1782569910494-nhtn.png","2026-06-27T14:17:56.614064+00:00",{"id":30,"slug":31,"title":32,"summary":33,"category":26,"image_url":34,"cover_image":34,"language":19,"created_at":35},"a4cb421e-464e-4933-9e1c-6371d3cd1503","prevent-catastrophic-forgetting-llm-fine-tuning-zh","如何防止 LLM 微調災難性遺忘","用 Anchored Weight Decay 在 LLM 微調時降低舊任務漂移，保住原有能力並檢查模型是否回復。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780730281470-8i97.png","2026-06-06T07:17:28.426709+00:00",{"id":37,"slug":38,"title":39,"summary":40,"category":26,"image_url":41,"cover_image":41,"language":19,"created_at":42},"923bb0c4-95f3-49a0-8e01-5cdd6bcd2e32","fixing-llm-forgetting-es-fine-tuning-zh","ES 微調忘記問題有解了","這篇論文指出，LLM 用 evolution strategies 微調時的「忘記」多半是可回復的漂移，靠 anchored weight decay 就能壓住。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780604276240-arx4.png","2026-06-04T20:17:25.720929+00:00",{"id":44,"slug":45,"title":46,"summary":47,"category":26,"image_url":48,"cover_image":48,"language":19,"created_at":49},"7a04d752-3f1a-4df7-b7c5-8bcb1e69c565","bounded-ratio-reinforcement-learning-ppo-zh","BRRL 取代 PPO 剪裁：BPO 與 GBPO 的穩定性升級","BRRL 把 PPO 的剪裁目標改寫成有界比例框架，推出 BPO 與 GBPO，主打更穩定的更新與更清楚的理論基礎。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776751794578-t5j7.png","2026-04-21T06:09:39.661696+00:00",17]