[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-policy-arbitration":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"e362a565-bb88-40a9-ba90-31cea19a000c","policy arbitration","policy-arbitration",0,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"75bcc569-5e89-45c8-b809-6f169e929f4b","rl-training-hands-off-control-gradually-zh","RL 先接管再放手","這篇論文證明，RL 可以先借用可用的基線策略，再把控制權逐步交給學到的新策略。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780986786312-03yo.png","zh","2026-06-09T06:32:32.849589+00:00"]