[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-goal-reaching":3},{"tag":4,"articles":9},{"id":5,"name":6,"slug":6,"article_count":7,"description_zh":8,"description_en":8},"5d04c7f6-dfc5-45e7-8dfe-dc2cc87d8ac4","goal-reaching",0,null,[10],{"id":11,"slug":12,"title":13,"summary":14,"category":15,"image_url":16,"cover_image":16,"language":17,"created_at":18},"75bcc569-5e89-45c8-b809-6f169e929f4b","rl-training-hands-off-control-gradually-zh","RL 先接管再放手","這篇論文證明，RL 可以先借用可用的基線策略，再把控制權逐步交給學到的新策略。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780986786312-03yo.png","zh","2026-06-09T06:32:32.849589+00:00"]