{"id":3379,"date":"2025-02-08T20:50:01","date_gmt":"2025-02-08T12:50:01","guid":{"rendered":"https:\/\/fbilab.org\/MetAIoT\/?p=3379"},"modified":"2025-03-16T20:50:38","modified_gmt":"2025-03-16T12:50:38","slug":"%e6%8a%80%e8%a1%93%e5%a0%b1%e5%91%8a-deepseek-v3-technical-report","status":"publish","type":"post","link":"https:\/\/fbilab.org\/MetAIoT\/?p=3379","title":{"rendered":"[\u6280\u8853\u5831\u544a] DeepSeek-V3 Technical Report"},"content":{"rendered":"<p>\u8ad6\u6587\u51fa\u8655\uff1b<a href=\"https:\/\/arxiv.org\/abs\/2412.19437\">https:\/\/arxiv.org\/abs\/2412.19437<\/a><\/p>\n<p>\u6211\u5011\u63d0\u51fa\u4e86 DeepSeek-V3\uff0c\u4e00\u500b\u5f37\u5927\u7684 Mixture-of-Experts (MoE) \u8a9e\u8a00\u6a21\u578b\uff0c\u64c1\u6709 671B \u7e3d\u53c3\u6578\uff0c\u6bcf\u500b\u7b26\u865f\u6709 37B \u555f\u52d5\u53c3\u6578\u3002\u70ba\u4e86\u5be6\u73fe\u9ad8\u6548\u63a8\u7406\u548c\u4f4e\u6210\u672c\u8a13\u7df4\uff0cDeepSeek-V3 \u63a1\u7528\u4e86\u591a\u982d\u6f5b\u5728\u6ce8\u610f\u529b\uff08MLA\uff09\u548c DeepSeekMoE \u67b6\u69cb\uff0c\u9019\u4e9b\u67b6\u69cb\u5728 DeepSeek-V2 \u4e2d\u5f97\u5230\u4e86\u5145\u5206\u9a57\u8b49\u3002\u6b64\u5916\uff0cDeepSeek-V3 \u7387\u5148\u63a1\u7528\u4e86\u7121\u8f14\u52a9\u640d\u5931\u7684\u8ca0\u8f09\u5e73\u8861\u7b56\u7565\uff0c\u4e26\u8a2d\u5b9a\u4e86\u591a\u6a19\u8a18\u9810\u6e2c\u8a13\u7df4\u76ee\u6a19\uff0c\u4ee5\u7372\u5f97\u66f4\u5f37\u7684\u6027\u80fd\u3002\u6211\u5011\u5728 14.8 \u842c\u5104\u500b\u4e0d\u540c\u7684\u9ad8\u54c1\u8cea\u4ee3\u5e63\u4e0a\u9810\u5148\u8a13\u7df4 DeepSeek-V3\uff0c\u63a5\u8457\u9032\u884c\u76e3\u7763\u5fae\u8abf\u548c\u5f37\u5316\u5b78\u7fd2\u968e\u6bb5\uff0c\u4ee5\u5145\u5206\u767c\u63ee\u5176\u80fd\u529b\u3002\u7d9c\u5408\u8a55\u4f30\u986f\u793a\uff0cDeepSeek-V3 \u7684\u8868\u73fe\u512a\u65bc\u5176\u4ed6\u958b\u653e\u539f\u59cb\u78bc\u6a21\u578b\uff0c\u4e26\u9054\u5230\u8207\u9818\u5148\u7684\u5c01\u9589\u5f0f\u539f\u59cb\u78bc\u6a21\u578b\u76f8\u5ab2\u7f8e\u7684\u6548\u80fd\u3002\u5118\u7ba1\u6027\u80fd\u512a\u7570\uff0cDeepSeek-V3 \u7684\u5b8c\u6574\u8a13\u7df4\u50c5\u9700 2.788M H800 GPU \u5c0f\u6642\u3002\u6b64\u5916\uff0c\u5176\u8a13\u7df4\u904e\u7a0b\u975e\u5e38\u7a69\u5b9a\u3002\u5728\u6574\u500b\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u6211\u5011\u6c92\u6709\u9047\u5230\u4efb\u4f55\u7121\u6cd5\u6062\u5fa9\u7684\u640d\u5931\u5cf0\u503c\uff0c\u4e5f\u6c92\u6709\u57f7\u884c\u4efb\u4f55\u56de\u6efe\u3002\u6a21\u578b\u6aa2\u67e5\u9ede\u4f4d\u65bc \u539f\u59cb\u78bc\uff1a<a href=\"https:\/\/github.com\/deepseek-ai\/DeepSeek-V3\">https:\/\/github.com\/deepseek-ai\/DeepSeek-V3<\/a><\/p>\n<p>\u900f\u904e DeepL.com\uff08\u514d\u8cbb\u7248\uff09\u7ffb\u8b6f<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" class=\"alignnone size-full wp-image-3380\" src=\"https:\/\/fbilab.org\/MetAIoT\/wp-content\/uploads\/2025\/03\/\u87a2\u5e55\u64f7\u53d6\u756b\u9762-2025-03-16-204820.png\" alt=\"\" width=\"549\" height=\"370\" srcset=\"https:\/\/fbilab.org\/MetAIoT\/wp-content\/uploads\/2025\/03\/\u87a2\u5e55\u64f7\u53d6\u756b\u9762-2025-03-16-204820.png 549w, https:\/\/fbilab.org\/MetAIoT\/wp-content\/uploads\/2025\/03\/\u87a2\u5e55\u64f7\u53d6\u756b\u9762-2025-03-16-204820-300x202.png 300w\" sizes=\"auto, (max-width: 549px) 100vw, 549px\" \/><\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8ad6\u6587\u51fa\u8655\uff1bhttps:\/\/arxiv.org\/abs\/2412.19437 \u6211\u5011\u63d0\u51fa\u4e86 DeepSeek-V3 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3380,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[99,86,56],"tags":[],"class_list":["post-3379","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ai","category-news","category-research"],"acf":[],"_links":{"self":[{"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/posts\/3379","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3379"}],"version-history":[{"count":1,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/posts\/3379\/revisions"}],"predecessor-version":[{"id":3381,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/posts\/3379\/revisions\/3381"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=\/wp\/v2\/media\/3380"}],"wp:attachment":[{"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3379"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3379"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/fbilab.org\/MetAIoT\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3379"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}