[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-moe":3},{"tag":4,"articles":11,"peer_article_count":102},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":10},"dc48d588-7063-4e5e-9689-757acf80390d","MoE","moe",9,"MoE（Mixture of Experts）是一種讓模型按需啟用部分專家的架構，常用來在總參數、推理成本與效果之間取得平衡。從開源寫碼模型到長上下文 agent 系統，MoE 正成為大模型工程化的重要路線。","MoE, or Mixture of Experts, is an architecture that activates only a subset of experts per token or task, balancing total parameter count, inference cost, and quality. It shows up in open coding models, long-context agents, and other systems built for efficient scaling.",[12,21,29,36,43,50,58,65,73,80,87,95],{"id":13,"slug":14,"title":15,"summary":16,"category":17,"image_url":18,"cover_image":18,"language":19,"created_at":20},"976800ba-7c59-4890-a17c-866a751f4f61","self-host-minimax-m3-gpu-cloud-zh","MiniMax M3 自架 GPU 雲部署分析","MiniMax M3 有 229.9B MoE 權重、1M context 和多模態輸出，但要自架就得準備很大的 GPU 記憶體與成本。","model-release","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1781716686607-r9gm.png","zh","2026-06-17T17:17:35.332244+00:00",{"id":22,"slug":23,"title":24,"summary":25,"category":26,"image_url":27,"cover_image":27,"language":19,"created_at":28},"7cd85acd-4f96-43b7-a980-db5092ece240","variable-width-transformers-cut-wasted-capacity-zh","可變寬度 Transformer 省算力","這篇論文證明，Transformer 不必每一層都同寬；把前後層加寬、中央層縮窄，可以在維持表現下減少計算與記憶體。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1781677978302-5pqn.png","2026-06-17T06:32:32.22399+00:00",{"id":30,"slug":31,"title":32,"summary":33,"category":26,"image_url":34,"cover_image":34,"language":19,"created_at":35},"072a2114-1f7f-4d61-99f7-be82c686c286","unipool-shared-expert-pool-moe-zh","UniPool：共享 MoE 專家池","UniPool 把 MoE 的分層專家改成全域共享池，減少重複參數，並在五個 LLaMA 規模模型上改善驗證損失。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1778221269156-lam7.png","2026-05-08T06:20:40.070989+00:00",{"id":37,"slug":38,"title":39,"summary":40,"category":26,"image_url":41,"cover_image":41,"language":19,"created_at":42},"a2761ec3-eb6a-4982-b95c-0400b46b33f5","tide-cross-architecture-diffusion-llm-distillation-zh","TIDE 讓跨架構蒸餾可行","TIDE 針對 diffusion LLM 的跨架構蒸餾，加入噪聲感知權重與 tokenizer 感知目標，讓 0.6B 學生模型更接近大模型表現。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1777529462046-z8hb.png","2026-04-30T06:10:31.730141+00:00",{"id":44,"slug":45,"title":46,"summary":47,"category":17,"image_url":48,"cover_image":48,"language":19,"created_at":49},"1c99e395-4b38-4793-9604-1de54b9f2897","qwen36-35b-a3b-open-source-coding-model-zh","Qwen3.6-35B-A3B 打開開源寫碼新路線","Qwen3.6-35B-A3B 以 35B 總參數、3B 啟用參數和 Anthropic API 相容性，直接瞄準 Claude Code 工作流。這款開源 MoE 模型想把效能、成本和工具整合一次做到位。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776643431808-tti7.png","2026-04-20T00:03:37.398827+00:00",{"id":51,"slug":52,"title":53,"summary":54,"category":55,"image_url":56,"cover_image":56,"language":19,"created_at":57},"4e82e9ad-4f0d-449f-b769-aa7035d4ffd4","april-2026-open-source-ai-projects-watch-zh","2026年4月值得追的開源 AI 專案","2026 年 4 月的開源 AI 很熱鬧。GitHub 的 agent 工具、Hugging Face 的模型下載數都很猛，這篇整理最值得看的專案、數據和實際影響。","industry","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776211622123-luij.png","2026-04-15T00:06:44.711318+00:00",{"id":59,"slug":60,"title":61,"summary":62,"category":17,"image_url":63,"cover_image":63,"language":19,"created_at":64},"d4969444-3b01-40cb-8411-c422b535cdf1","kimi-k25-moonshot-open-model-elite-zh","Kimi K2.5 上線：開源模型打進第一梯隊","Moonshot AI 的 Kimi K2.5 在 2026\u002F1\u002F27 上線，主打 256K context、Agent Swarm、MIT 授權，還把多項 benchmark 拉到和 GPT、Claude 同一張桌子。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775272373330-xkt6.png","2026-04-04T03:12:36.705829+00:00",{"id":66,"slug":67,"title":68,"summary":69,"category":70,"image_url":71,"cover_image":71,"language":19,"created_at":72},"868034d7-415b-49bd-8f25-4dbd602e7094","unsloth-qwen35-partial-fine-tuning-zh","Unsloth 讓 Qwen3.5 可分層微調","Unsloth 新增 Qwen3.5 視覺模型分層微調，能只訓練 vision、language、attention 或 MLP。VRAM 更省，訓練也更快，對多模態團隊很實用。","tools","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775218014686-wj6q.png","2026-04-03T12:06:38.523525+00:00",{"id":74,"slug":75,"title":76,"summary":77,"category":17,"image_url":78,"cover_image":78,"language":19,"created_at":79},"e1694231-0e5e-4476-ab60-b48d674a3f3b","kimi-k25-brings-vision-code-and-swarm-agents-zh","Kimi K2.5 把視覺、程式碼和 Agent 放一起","Moonshot AI 推出 Kimi K2.5，主打 256K context、原生視覺和 Agent Swarm。這篇拆解它對開發者、團隊與自動化流程的實際影響。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775156810921-zcn3.png","2026-04-02T19:06:36.581036+00:00",{"id":81,"slug":82,"title":83,"summary":84,"category":17,"image_url":85,"cover_image":85,"language":19,"created_at":86},"c679b51f-194a-463b-87fc-7695256ff752","mimo-v2-pro-vs-omni-vs-flash-2026-zh","MiMo V2 Pro、Omni、Flash 怎麼選","MiMo 2026 三款模型分工很清楚：Flash 主打開源與 coding，Pro 提供 1M context，Omni 則處理圖像、音訊與影片。這篇直接比 benchmark、價格與適用場景。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775092816164-mhzf.png","2026-04-02T01:18:43.576128+00:00",{"id":88,"slug":89,"title":90,"summary":91,"category":17,"image_url":92,"cover_image":93,"language":19,"created_at":94},"9e1044b4-946d-47fe-9e2a-c2ee032e1164","xiaomi-mimo-v2-pro-1t-moe-agents-zh","小米 MiMo-V2-Pro 登場：1T MoE 模型","小米推出 MiMo-V2-Pro，總參數超過 1T、每 token 啟用 42B，還有 1M context。SWE-bench 成績逼近 Claude Sonnet 4.6，價格卻低很多。",null,"https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1774597397882-x4i9.png","2026-03-28T03:06:19.002353+00:00",{"id":96,"slug":97,"title":98,"summary":99,"category":17,"image_url":100,"cover_image":100,"language":19,"created_at":101},"58b64033-7eb6-49b9-9aab-01cf8ae1b2f2","nvidia-rubin-six-chips-one-ai-supercomputer-zh","NVIDIA Rubin 把六顆晶片塞進 AI 機櫃","NVIDIA 在 CES 2026 端出 Rubin 平台，主打推論 Token 成本最高可比 Blackwell 低 10 倍，MoE 訓練可少用 4 倍 GPU。重點不只是一顆新 GPU，而是把 CPU、網路、DPU、交換器整包賣成機櫃級 AI 系統。","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1774516131100-fn1e.png","2026-03-26T07:18:45.861277+00:00",2]