[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-模型邏輯":3},{"tag":4,"articles":9},{"id":5,"name":6,"slug":6,"article_count":7,"description_zh":8,"description_en":8},"9b08bb26-84e6-4e23-97a5-3572123ef38b","模型邏輯",0,null,[10],{"id":11,"slug":12,"title":13,"summary":14,"category":15,"image_url":16,"cover_image":16,"language":17,"created_at":18},"a4cf24e5-b958-4f91-bdca-2f1a57e81aef","why-benchmark-leaderboards-are-wrong-about-model-logic-zh","為什麼基準排行榜看錯了模型邏輯","排行榜的月度波動放大了進步感，卻掩蓋了模型邏輯仍然脆弱的事實。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780673571153-x7yi.png","zh","2026-06-05T15:32:23.043639+00:00"]