[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-gemini-3-flash":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"77c722a6-bd70-4c97-a3e1-43366bf70059","Gemini 3 Flash","gemini-3-flash",0,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"1848b0d4-2c8a-4c24-928b-46f0ddb4dbb2","why-benchmark-leaderboards-are-wrong-about-model-logic-en","Why benchmark leaderboards are wrong about model logic","Leaderboard churn overstates progress and hides how weak model logic still is.","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1780673573292-rj31.png","en","2026-06-05T15:32:23.511842+00:00"]