{"id":64602,"date":"2024-06-01T02:59:30","date_gmt":"2024-05-31T23:29:30","guid":{"rendered":"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/"},"modified":"2024-06-01T02:59:30","modified_gmt":"2024-05-31T23:29:30","slug":"%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch","status":"publish","type":"post","link":"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/","title":{"rendered":"\u062f\u0631\u06a9 GPT: \u0646\u062d\u0648\u0647 \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u06cc\u06a9 \u0645\u062f\u0644 \u0633\u0627\u062f\u0647 GPT \u0628\u0627 PyTorch"},"content":{"rendered":"<p><\/p>\n<div data-article-id=\"1872511\" id=\"article-body\">\n<p><em>\u062f\u0631 \u0627\u0628\u062a\u062f\u0627 \u062f\u0631 14\/5\/2024 \u062f\u0631 emangini.com \u0627\u0631\u0633\u0627\u0644 \u0634\u062f\u0647 \u0627\u0633\u062a<\/em><\/p>\n<p>\u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u06cc \u062c\u0627\u0645\u0639 \u062a\u0648\u0636\u06cc\u062d \u0645\u0641\u0635\u0644\u06cc \u062f\u0631\u0628\u0627\u0631\u0647 \u0646\u062d\u0648\u0647 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u06cc\u06a9 \u0645\u062f\u0644 \u0633\u0627\u062f\u0647 GPT (\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0632 \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647) \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 PyTorch \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u062f\u0647\u062f.  \u0645\u0627 \u0627\u062c\u0632\u0627\u06cc \u0644\u0627\u0632\u0645\u060c \u0646\u062d\u0648\u0647 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0648 \u0646\u062d\u0648\u0647 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0631\u0627 \u067e\u0648\u0634\u0634 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u0627\u062f. <\/p>\n<p>\u0628\u0631\u0627\u06cc \u06a9\u0633\u0627\u0646\u06cc \u0627\u0632 \u0634\u0645\u0627 \u06a9\u0647 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u0646\u0628\u0627\u0644 \u06a9\u0646\u06cc\u062f\u060c \u06cc\u06a9 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646 \u0648 \u0647\u0645\u0686\u0646\u06cc\u0646 \u06cc\u06a9 \u0646\u0648\u062a \u0628\u0648\u06a9 Jupyter \u062f\u0631 UnderstandingGPT (GitHub) \u0648\u062c\u0648\u062f \u062f\u0627\u0631\u062f.<\/p>\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 counter-hierarchy ez-toc-counter-rtl ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0641\u0647\u0631\u0633\u062a \u0645\u0637\u0627\u0644\u0628<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D9%85%D8%B9%D8%B1%D9%81%DB%8C\" >\u0645\u0639\u0631\u0641\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D8%A8%D8%B1%D9%BE%D8%A7%DB%8C%DB%8C\" >\u0628\u0631\u067e\u0627\u06cc\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D8%A7%DB%8C%D8%AC%D8%A7%D8%AF_%D9%85%D8%AC%D9%85%D9%88%D8%B9%D9%87_%D8%AF%D8%A7%D8%AF%D9%87\" >\u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D8%B3%D8%A7%D8%AE%D8%AA_%D9%85%D8%AF%D9%84_GPT\" >\u0633\u0627\u062e\u062a \u0645\u062f\u0644 GPT<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D8%A2%D9%85%D9%88%D8%B2%D8%B4_%D9%85%D8%AF%D9%84\" >\u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D8%AA%D9%88%D9%84%DB%8C%D8%AF_%D9%85%D8%AA%D9%86\" >\u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D9%86%D8%AA%DB%8C%D8%AC%D9%87\" >\u0646\u062a\u06cc\u062c\u0647<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/nabfollower.com\/blog\/%d8%af%d8%b1%da%a9-gpt-%d9%86%d8%ad%d9%88%d9%87-%d9%be%db%8c%d8%a7%d8%af%d9%87-%d8%b3%d8%a7%d8%b2%db%8c-%db%8c%da%a9-%d9%85%d8%af%d9%84-%d8%b3%d8%a7%d8%af%d9%87-gpt-%d8%a8%d8%a7-pytorch\/#%D9%85%D9%86%D8%A7%D8%A8%D8%B9\" >\u0645\u0646\u0627\u0628\u0639:<\/a><\/li><\/ul><\/li><\/ul><\/li><\/ul><\/nav><\/div>\n<h2><span class=\"ez-toc-section\" id=\"%D9%85%D8%B9%D8%B1%D9%81%DB%8C\"><\/span>\n<p>  \u0645\u0639\u0631\u0641\u06cc<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0645\u062f\u0644 GPT \u06cc\u06a9 \u0645\u0639\u0645\u0627\u0631\u06cc \u0645\u0628\u062a\u0646\u06cc \u0628\u0631 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0648\u0638\u0627\u06cc\u0641 \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc (NLP) \u0645\u0627\u0646\u0646\u062f \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0637\u0631\u0627\u062d\u06cc \u0634\u062f\u0647 \u0627\u0633\u062a.  \u0645\u062f\u0644 \u0647\u0627\u06cc \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631\u060c \u0645\u0639\u0631\u0641\u06cc \u0634\u062f\u0647 \u062a\u0648\u0633\u0637 Vaswani \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646.  (2017)\u060c \u0627\u0632 \u0645\u06a9\u0627\u0646\u06cc\u0633\u0645\u200c\u0647\u0627\u06cc \u062a\u0648\u062c\u0647 \u0628\u0647 \u062e\u0648\u062f \u0628\u0631\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062a\u0648\u0627\u0644\u06cc \u062f\u0627\u062f\u0647\u200c\u0647\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u0628\u0647 \u0622\u0646\u200c\u0647\u0627 \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc\u200c\u062f\u0647\u062f \u0648\u0627\u0628\u0633\u062a\u06af\u06cc\u200c\u0647\u0627\u06cc \u062f\u0648\u0631\u0628\u0631\u062f \u0631\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0645\u0648\u062b\u0631\u062a\u0631\u06cc \u0646\u0633\u0628\u062a \u0628\u0647 \u0634\u0628\u06a9\u0647\u200c\u0647\u0627\u06cc \u0639\u0635\u0628\u06cc \u0628\u0627\u0632\u06af\u0634\u062a\u06cc \u0633\u0646\u062a\u06cc (RNNs) \u062b\u0628\u062a \u06a9\u0646\u0646\u062f.  \u0645\u0639\u0645\u0627\u0631\u06cc GPT\u060c \u0628\u0647 \u0637\u0648\u0631 \u062e\u0627\u0635\u060c \u06cc\u06a9 \u0645\u062f\u0644 \u0627\u062a\u0648\u0631\u06af\u0631\u0633\u06cc\u0648 \u0627\u0633\u062a \u06a9\u0647 \u0645\u062a\u0646 \u0631\u0627 \u0628\u0627 \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u06a9\u0644\u0645\u0647 \u0628\u0639\u062f\u06cc \u062f\u0631 \u06cc\u06a9 \u062f\u0646\u0628\u0627\u0644\u0647 \u062a\u0648\u0644\u06cc\u062f \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0631\u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f \u062a\u06a9\u0645\u06cc\u0644 \u0645\u062a\u0646\u060c \u062a\u0631\u062c\u0645\u0647 \u0648 \u062e\u0644\u0627\u0635\u0647\u200c\u0633\u0627\u0632\u06cc \u0642\u062f\u0631\u062a\u0645\u0646\u062f \u0645\u06cc\u200c\u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0622\u0645\u0648\u0632\u0634 \u0634\u0645\u0627 \u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0646\u0633\u062e\u0647 \u0633\u0627\u062f\u0647 \u0634\u062f\u0647 \u0627\u0632 GPT\u060c \u0622\u0645\u0648\u0632\u0634 \u0622\u0646 \u0628\u0631 \u0631\u0648\u06cc \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u06a9\u0648\u0686\u06a9 \u0648 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u06cc\u06cc \u0645\u06cc \u06a9\u0646\u062f.  \u0645\u0627 \u0627\u0632 PyTorch \u0648 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Hugging Face Transformers \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u0648 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u062e\u0648\u0627\u0647\u06cc\u0645 \u06a9\u0631\u062f.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D8%A8%D8%B1%D9%BE%D8%A7%DB%8C%DB%8C\"><\/span>\n<p>  \u0628\u0631\u067e\u0627\u06cc\u06cc<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0642\u0628\u0644 \u0627\u0632 \u0634\u0631\u0648\u0639\u060c \u0645\u0637\u0645\u0626\u0646 \u0634\u0648\u06cc\u062f \u06a9\u0647 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627\u06cc \u0645\u0648\u0631\u062f \u0646\u06cc\u0627\u0632 \u0631\u0627 \u0646\u0635\u0628 \u06a9\u0631\u062f\u0647 \u0627\u06cc\u062f.  \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f \u0622\u0646\u0647\u0627 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 pip \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight shell\"><code>pip <span class=\"nb\">install <\/span>torch transformers\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0627\u06cc\u0646 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u0647\u0627 \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u0648 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 GPT \u0645\u0627 \u0627\u0633\u0627\u0633\u06cc \u0647\u0633\u062a\u0646\u062f.  PyTorch \u06cc\u06a9 \u0686\u0627\u0631\u0686\u0648\u0628 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0627\u0633\u062a \u06a9\u0647 \u0627\u0646\u0639\u0637\u0627\u0641\u200c\u067e\u0630\u06cc\u0631\u06cc \u0648 \u0633\u0631\u0639\u062a \u0631\u0627 \u0641\u0631\u0627\u0647\u0645 \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 Transformers \u062a\u0648\u0633\u0637 Hugging Face \u0645\u062f\u0644\u200c\u0647\u0627 \u0648 \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0627\u0632 \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647\u200c\u0634\u062f\u0647 \u0627\u0632 \u062c\u0645\u0644\u0647 GPT-2 \u0631\u0627 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u062f\u0647\u062f.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D8%A7%DB%8C%D8%AC%D8%A7%D8%AF_%D9%85%D8%AC%D9%85%D9%88%D8%B9%D9%87_%D8%AF%D8%A7%D8%AF%D9%87\"><\/span>\n<p>  \u0627\u06cc\u062c\u0627\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u0631\u0627\u06cc \u0622\u0645\u0648\u0632\u0634 \u0645\u0648\u062b\u0631 \u06cc\u06a9 \u0645\u062f\u0644 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646\u06cc \u0645\u0627\u0646\u0646\u062f GPT\u060c \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u0648 \u0622\u0645\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0635\u062d\u06cc\u062d \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc \u0628\u0633\u06cc\u0627\u0631 \u0645\u0647\u0645 \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u0641\u0631\u0622\u06cc\u0646\u062f \u0628\u0627 \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u06a9\u0644\u0627\u0633 \u062f\u0627\u062f\u0647 \u0633\u0641\u0627\u0631\u0634\u06cc \u0622\u063a\u0627\u0632 \u0645\u06cc \u0634\u0648\u062f \u06a9\u0647 \u0648\u0631\u0648\u062f\u06cc \u0647\u0627\u06cc \u0645\u062a\u0646 \u0648 \u062a\u0648\u06a9\u0646 \u0633\u0627\u0632\u06cc \u0631\u0627 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0645\u06cc \u06a9\u0646\u062f.  \u062a\u0648\u06a9\u0646\u200c\u0633\u0627\u0632\u06cc \u0641\u0631\u0622\u06cc\u0646\u062f \u062a\u0628\u062f\u06cc\u0644 \u0645\u062a\u0646 \u062e\u0627\u0645 \u0628\u0647 \u0646\u0645\u0627\u06cc\u0634\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc (\u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0631\u0645\u0632) \u0627\u0633\u062a \u06a9\u0647 \u0645\u062f\u0644 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u062f \u0622\u0646 \u0631\u0627 \u062f\u0631\u06a9 \u06a9\u0646\u062f (\u062f\u0648\u0644\u06cc\u0646 \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646\u060c 2019).  \u0642\u0637\u0639\u0647 \u06a9\u062f \u0627\u0631\u0627\u0626\u0647 \u0634\u062f\u0647 \u0628\u0627 \u062a\u0639\u0631\u06cc\u0641 \u06a9\u0644\u0627\u0633\u06cc \u0628\u0647 \u0646\u0627\u0645 \u0627\u06cc\u0646 \u06a9\u0627\u0631 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc \u062f\u0647\u062f <strong>SimpleDataset<\/strong>\u060c \u06a9\u0647 \u0627\u0632 \u062a\u0648\u06a9\u0646\u0627\u06cc\u0632\u0631 GPT-2 \u0628\u0631\u0627\u06cc \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0645\u062a\u0646\u06cc \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u0627\u06cc\u0646 <strong>SimpleDataset<\/strong> \u06a9\u0644\u0627\u0633 \u0627\u0632 \u0627\u0631\u062b \u0645\u06cc \u0628\u0631\u062f <em>torch.utils.data.Dataset<\/em> \u0648 \u0631\u0648\u0634 \u0647\u0627\u06cc \u0644\u0627\u0632\u0645 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u0639\u0627\u0645\u0644 \u06cc\u06a9\u067e\u0627\u0631\u0686\u0647 \u0628\u0627 DataLoader \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u06a9\u0644\u0627\u0633 \u0633\u0647 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0631\u0627 \u062f\u0631 \u0645\u0642\u062f\u0627\u0631\u062f\u0647\u06cc \u0627\u0648\u0644\u06cc\u0647 \u062e\u0648\u062f \u0645\u06cc \u06af\u06cc\u0631\u062f: \u0644\u06cc\u0633\u062a \u0645\u062a\u0648\u0646\u060c \u062a\u0648\u06a9\u0646\u0627\u06cc\u0632\u0631 \u0648 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0637\u0648\u0644 \u062f\u0646\u0628\u0627\u0644\u0647 \u0647\u0627.  \u0627\u06cc\u0646 <strong>_<em>\u0644\u0646_<\/em><\/strong>  \u0645\u062a\u062f \u062a\u0639\u062f\u0627\u062f \u0645\u062a\u0648\u0646 \u0645\u0648\u062c\u0648\u062f \u062f\u0631 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 the <strong>_<em>\u0645\u0648\u0631\u062f\u06cc_<\/em><\/strong>  \u0645\u062a\u062f \u06cc\u06a9 \u0645\u062a\u0646 \u062e\u0627\u0635 \u0631\u0627 \u062f\u0631 \u0646\u0645\u0627\u06cc\u0647 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0628\u0627\u0632\u06cc\u0627\u0628\u06cc \u0648 \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u0645\u06cc \u06a9\u0646\u062f.  \u0641\u0631\u0622\u06cc\u0646\u062f \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u0634\u0627\u0645\u0644 \u062a\u0628\u062f\u06cc\u0644 \u0645\u062a\u0646 \u0628\u0647 \u0646\u0645\u0627\u06cc\u0634\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062a\u0648\u06a9\u0646\u0627\u06cc\u0632\u0631 \u0648 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u062a\u0648\u0627\u0644\u06cc\u200c\u0647\u0627 \u0628\u0647 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0637\u0648\u0644 \u0645\u0634\u062e\u0635 \u0628\u0631\u0627\u06cc \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u0627\u0632 \u06cc\u06a9\u0646\u0648\u0627\u062e\u062a\u06cc \u0627\u0633\u062a.  Padding \u062a\u0645\u0631\u06cc\u0646 \u0627\u0636\u0627\u0641\u0647 \u06a9\u0631\u062f\u0646 \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0627\u0636\u0627\u0641\u06cc \u0628\u0647 \u062f\u0646\u0628\u0627\u0644\u0647\u200c\u0647\u0627 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0637\u0648\u0644 \u06cc\u06a9\u0633\u0627\u0646 \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0633\u062a\u0647\u200c\u0627\u06cc \u062f\u0631 \u0634\u0628\u06a9\u0647\u200c\u0647\u0627\u06cc \u0639\u0635\u0628\u06cc \u0645\u0647\u0645 \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u0631\u0648\u0634 \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0648 \u0645\u0627\u0633\u06a9\u200c\u0647\u0627\u06cc \u062a\u0648\u062c\u0647 \u0631\u0627 \u0628\u0631\u0645\u06cc\u200c\u06af\u0631\u062f\u0627\u0646\u062f\u060c \u062c\u0627\u06cc\u06cc \u06a9\u0647 \u0645\u0627\u0633\u06a9 \u062a\u0648\u062c\u0647 \u06cc\u06a9 \u0645\u0627\u0633\u06a9 \u0628\u0627\u06cc\u0646\u0631\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0646\u0634\u0627\u0646 \u0645\u06cc\u200c\u062f\u0647\u062f \u06a9\u062f\u0627\u0645 \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627 \u06a9\u0644\u0645\u0627\u062a \u0648\u0627\u0642\u0639\u06cc \u0647\u0633\u062a\u0646\u062f \u0648 \u06a9\u062f\u0627\u0645\u200c\u0647\u0627 \u0628\u0627\u0644\u0634\u062a\u06a9 \u0647\u0633\u062a\u0646\u062f.  \u0627\u06cc\u0646 \u0628\u0647 \u0645\u062f\u0644 \u06a9\u0645\u06a9 \u0645\u06cc \u06a9\u0646\u062f \u062a\u0627 \u062a\u0648\u06a9\u0646 \u0647\u0627\u06cc padding \u0631\u0627 \u062f\u0631 \u0637\u0648\u0644 \u0622\u0645\u0648\u0632\u0634 \u0646\u0627\u062f\u06cc\u062f\u0647 \u0628\u06af\u06cc\u0631\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u06a9\u062f \u0628\u0631\u0627\u06cc \u0645\u0631\u062c\u0639 \u0627\u0633\u062a:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">torch<\/span>\n<span class=\"kn\">from<\/span> <span class=\"n\">torch.utils.data<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">Dataset<\/span><span class=\"p\">,<\/span> <span class=\"n\">DataLoader<\/span>\n<span class=\"kn\">from<\/span> <span class=\"n\">transformers<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">GPT2Tokenizer<\/span>\n\n<span class=\"k\">class<\/span> <span class=\"nc\">SimpleDataset<\/span><span class=\"p\">(<\/span><span class=\"n\">Dataset<\/span><span class=\"p\">):<\/span>\n<span class=\"k\">def<\/span> <span class=\"nf\">__init__<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">texts<\/span><span class=\"p\">,<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">max_length<\/span><span class=\"p\">):<\/span>\n<span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">texts<\/span> <span class=\"o\">=<\/span> <span class=\"n\">texts<\/span>\n<span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">tokenizer<\/span> <span class=\"o\">=<\/span> <span class=\"n\">tokenizer<\/span>\n<span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">max_length<\/span> <span class=\"o\">=<\/span> <span class=\"n\">max_length<\/span>\n\n    <span class=\"k\">def<\/span> <span class=\"nf\">__len__<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">):<\/span>\n        <span class=\"k\">return<\/span> <span class=\"nf\">len<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">texts<\/span><span class=\"p\">)<\/span>\n\n    <span class=\"k\">def<\/span> <span class=\"nf\">__getitem__<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">idx<\/span><span class=\"p\">):<\/span>\n        <span class=\"n\">text<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">texts<\/span><span class=\"p\">[<\/span><span class=\"n\">idx<\/span><span class=\"p\">]<\/span>\n        <span class=\"n\">encoding<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">tokenizer<\/span><span class=\"p\">(<\/span><span class=\"n\">text<\/span><span class=\"p\">,<\/span> <span class=\"n\">return_tensors<\/span><span class=\"o\">=<\/span><span class=\"sh\">'<\/span><span class=\"s\">pt<\/span><span class=\"sh\">'<\/span><span class=\"p\">,<\/span> <span class=\"n\">padding<\/span><span class=\"o\">=<\/span><span class=\"sh\">'<\/span><span class=\"s\">max_length<\/span><span class=\"sh\">'<\/span><span class=\"p\">,<\/span> <span class=\"n\">truncation<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">,<\/span> <span class=\"n\">max_length<\/span><span class=\"o\">=<\/span><span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">max_length<\/span><span class=\"p\">)<\/span>\n        <span class=\"k\">return<\/span> <span class=\"n\">encoding<\/span><span class=\"p\">[<\/span><span class=\"sh\">'<\/span><span class=\"s\">input_ids<\/span><span class=\"sh\">'<\/span><span class=\"p\">].<\/span><span class=\"nf\">squeeze<\/span><span class=\"p\">(),<\/span> <span class=\"n\">encoding<\/span><span class=\"p\">[<\/span><span class=\"sh\">'<\/span><span class=\"s\">attention_mask<\/span><span class=\"sh\">'<\/span><span class=\"p\">].<\/span><span class=\"nf\">squeeze<\/span><span class=\"p\">()<\/span>\n\n<span class=\"n\">texts<\/span> <span class=\"o\">=<\/span> <span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">Hello, how are you?<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">I am fine, thank you.<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">What about you?<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span>\n<span class=\"n\">tokenizer<\/span> <span class=\"o\">=<\/span> <span class=\"n\">GPT2Tokenizer<\/span><span class=\"p\">.<\/span><span class=\"nf\">from_pretrained<\/span><span class=\"p\">(<\/span><span class=\"sh\">'<\/span><span class=\"s\">gpt2<\/span><span class=\"sh\">'<\/span><span class=\"p\">)<\/span>\n<span class=\"n\">tokenizer<\/span><span class=\"p\">.<\/span><span class=\"n\">pad_token<\/span> <span class=\"o\">=<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">.<\/span><span class=\"n\">eos_token<\/span>\n<span class=\"n\">dataset<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">SimpleDataset<\/span><span class=\"p\">(<\/span><span class=\"n\">texts<\/span><span class=\"p\">,<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">max_length<\/span><span class=\"o\">=<\/span><span class=\"mi\">20<\/span><span class=\"p\">)<\/span>\n<span class=\"n\">dataloader<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">DataLoader<\/span><span class=\"p\">(<\/span><span class=\"n\">dataset<\/span><span class=\"p\">,<\/span> <span class=\"n\">batch_size<\/span><span class=\"o\">=<\/span><span class=\"mi\">2<\/span><span class=\"p\">,<\/span> <span class=\"n\">shuffle<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u06a9\u062f\u060c \u06a9\u0644\u0627\u0633 SimpleDataset\u060c \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u0645\u062a\u0648\u0646 \u0648\u0631\u0648\u062f\u06cc \u0631\u0627 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u0634\u062f\u0647 \u0648 \u0645\u0627\u0633\u06a9 \u0647\u0627\u06cc \u062a\u0648\u062c\u0647 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f.  \u0633\u067e\u0633 DataLoader \u062f\u0627\u062f\u0647 \u0647\u0627 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0622\u0645\u0648\u0632\u0634 \u06a9\u0627\u0631\u0622\u0645\u062f \u062f\u0633\u062a\u0647 \u0628\u0646\u062f\u06cc \u0648 \u0628\u0647 \u0647\u0645 \u0645\u06cc \u0631\u06cc\u0632\u062f.  \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0633\u062a\u0647\u200c\u0627\u06cc\u060c \u06a9\u0647 \u0634\u0627\u0645\u0644 \u062a\u0642\u0633\u06cc\u0645 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647\u200c\u0647\u0627 \u0628\u0647 \u062f\u0633\u062a\u0647\u200c\u0647\u0627\u06cc \u06a9\u0648\u0686\u06a9\u200c\u062a\u0631 \u0627\u0633\u062a\u060c \u0628\u0647 \u0645\u062f\u0644 \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc\u200c\u062f\u0647\u062f \u062a\u0627 \u0648\u0632\u0646\u200c\u0647\u0627\u06cc \u062e\u0648\u062f \u0631\u0627 \u0628\u06cc\u0634\u062a\u0631 \u0628\u0647\u200c\u0631\u0648\u0632\u0631\u0633\u0627\u0646\u06cc \u06a9\u0646\u062f \u0648 \u0645\u0646\u062c\u0631 \u0628\u0647 \u0647\u0645\u06af\u0631\u0627\u06cc\u06cc \u0633\u0631\u06cc\u0639\u200c\u062a\u0631 \u0634\u0648\u062f.  \u0645\u062e\u0644\u0648\u0637 \u06a9\u0631\u062f\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0647 \u0634\u06a9\u0633\u062a\u0646 \u0647\u0631 \u0646\u0638\u0645 \u0630\u0627\u062a\u06cc \u062f\u0631 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0622\u0645\u0648\u0632\u0634\u06cc \u06a9\u0645\u06a9 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u062a\u0639\u0645\u06cc\u0645 \u0645\u062f\u0644 \u0631\u0627 \u0628\u0647\u0628\u0648\u062f \u0645\u06cc \u0628\u062e\u0634\u062f.<\/p>\n<p>\u0628\u0627 \u062a\u0646\u0638\u06cc\u0645 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0647 \u0627\u06cc\u0646 \u0631\u0648\u0634\u060c \u0645\u0627 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u062d\u0627\u0635\u0644 \u0645\u06cc \u06a9\u0646\u06cc\u0645 \u06a9\u0647 \u0645\u062f\u0644 \u062f\u0646\u0628\u0627\u0644\u0647 \u0647\u0627\u06cc\u06cc \u0628\u0627 \u0637\u0648\u0644 \u06cc\u06a9\u0646\u0648\u0627\u062e\u062a \u0628\u0631\u0627\u06cc \u0622\u0645\u0648\u0632\u0634 \u062f\u0631\u06cc\u0627\u0641\u062a \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0631\u0648\u06cc\u06a9\u0631\u062f \u0647\u0645\u0686\u0646\u06cc\u0646 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0648\u0631\u0648\u062f\u06cc\u200c\u0647\u0627\u06cc \u0628\u0627 \u0637\u0648\u0644 \u0645\u062a\u063a\u06cc\u0631 \u0631\u0627 \u0622\u0633\u0627\u0646\u200c\u062a\u0631 \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u062f\u0631 \u0639\u06cc\u0646 \u062d\u0627\u0644 \u062a\u0636\u0645\u06cc\u0646 \u0645\u06cc\u200c\u06a9\u0646\u062f \u06a9\u0647 \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627\u06cc padding \u062f\u0631 \u0641\u0631\u0622\u06cc\u0646\u062f \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u062f\u0644 \u062a\u062f\u0627\u062e\u0644\u06cc \u0627\u06cc\u062c\u0627\u062f \u0646\u0645\u06cc\u200c\u06a9\u0646\u0646\u062f.  \u0627\u06cc\u0646 \u0645\u0631\u062d\u0644\u0647 \u067e\u06cc\u0634 \u067e\u0631\u062f\u0627\u0632\u0634 \u062c\u0627\u0645\u0639 \u0628\u0631\u0627\u06cc \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0645\u0648\u062b\u0631 \u0648 \u06a9\u0627\u0631\u0622\u0645\u062f \u0628\u0633\u06cc\u0627\u0631 \u0645\u0647\u0645 \u0627\u0633\u062a (\u0628\u0631\u0627\u0648\u0646 \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646\u060c 2020).<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D8%B3%D8%A7%D8%AE%D8%AA_%D9%85%D8%AF%D9%84_GPT\"><\/span>\n<p>  \u0633\u0627\u062e\u062a \u0645\u062f\u0644 GPT<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a \u06cc\u06a9 \u0645\u062f\u0644 GPT \u0645\u0648\u062b\u0631\u060c \u0628\u0627 \u062a\u0639\u0631\u06cc\u0641 \u0645\u0639\u0645\u0627\u0631\u06cc \u0622\u0646 \u0634\u0631\u0648\u0639 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0627\u06cc\u0646 \u0645\u062f\u0644 \u0627\u0632 \u062f\u0648 \u06a9\u0644\u0627\u0633 \u0627\u0635\u0644\u06cc \u062a\u0634\u06a9\u06cc\u0644 \u0634\u062f\u0647 \u0627\u0633\u062a: <strong>GPTBlock<\/strong> \u0648 <strong>SimpleGPT<\/strong>.  \u0627\u06cc\u0646 <strong>GPTBlock<\/strong> \u06a9\u0644\u0627\u0633 \u0646\u0634\u0627\u0646 \u062f\u0647\u0646\u062f\u0647 \u06cc\u06a9 \u0628\u0644\u0648\u06a9 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0645\u0646\u0641\u0631\u062f \u0627\u0633\u062a\u060c \u062f\u0631 \u062d\u0627\u0644\u06cc \u06a9\u0647 <strong>SimpleGPT<\/strong> \u06a9\u0644\u0627\u0633 \u0686\u0646\u062f\u06cc\u0646 \u0628\u0644\u0648\u06a9 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0631\u0627 \u0628\u0631\u0627\u06cc \u0627\u06cc\u062c\u0627\u062f \u0645\u062f\u0644 \u06a9\u0627\u0645\u0644 \u067e\u0634\u062a\u0647 \u0645\u06cc \u06a9\u0646\u062f (\u0648\u0627\u0633\u0648\u0627\u0646\u06cc \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646\u060c 2017).<\/p>\n<p>\u062f\u0631 <strong>GPTBlock<\/strong> \u06a9\u0644\u0627\u0633\u060c \u0645\u0627 \u0627\u062c\u0632\u0627\u06cc \u0636\u0631\u0648\u0631\u06cc \u06cc\u06a9 \u0628\u0644\u0648\u06a9 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0631\u0627 \u06a9\u067e\u0633\u0648\u0644\u0647 \u0645\u06cc \u06a9\u0646\u06cc\u0645.  \u0627\u06cc\u0646\u0647\u0627 \u0634\u0627\u0645\u0644 \u0646\u0631\u0645\u0627\u0644 \u0633\u0627\u0632\u06cc \u0644\u0627\u06cc\u0647\u060c \u062a\u0648\u062c\u0647 \u0686\u0646\u062f \u0633\u0631 \u0648 \u0634\u0628\u06a9\u0647 \u0639\u0635\u0628\u06cc \u067e\u06cc\u0634\u062e\u0648\u0631 \u0628\u0627 \u0641\u0639\u0627\u0644 \u0633\u0627\u0632\u06cc GELU \u0627\u0633\u062a.  \u0646\u0631\u0645\u0627\u0644 \u0633\u0627\u0632\u06cc \u0644\u0627\u06cc\u0647 \u0647\u0627\u060c \u0648\u0631\u0648\u062f\u06cc \u0647\u0627\u06cc \u0647\u0631 \u0632\u06cc\u0631 \u0644\u0627\u06cc\u0647 \u0631\u0627 \u0627\u0633\u062a\u0627\u0646\u062f\u0627\u0631\u062f \u0645\u06cc \u06a9\u0646\u062f \u0648 \u062b\u0628\u0627\u062a \u0648 \u0647\u0645\u06af\u0631\u0627\u06cc\u06cc \u0641\u0631\u0622\u06cc\u0646\u062f \u0622\u0645\u0648\u0632\u0634 \u0631\u0627 \u0628\u0647\u0628\u0648\u062f \u0645\u06cc \u0628\u062e\u0634\u062f.  \u0645\u06a9\u0627\u0646\u06cc\u0633\u0645 \u062a\u0648\u062c\u0647 \u0686\u0646\u062f \u0633\u0631 \u0645\u062f\u0644 \u0631\u0627 \u0642\u0627\u062f\u0631 \u0645\u06cc\u200c\u0633\u0627\u0632\u062f \u062a\u0627 \u0628\u0631 \u0631\u0648\u06cc \u0628\u062e\u0634\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u062f\u0646\u0628\u0627\u0644\u0647 \u0648\u0631\u0648\u062f\u06cc \u0628\u0647 \u0637\u0648\u0631 \u0647\u0645\u0632\u0645\u0627\u0646 \u062a\u0645\u0631\u06a9\u0632 \u06a9\u0646\u062f \u0648 \u062a\u0648\u0627\u0646\u0627\u06cc\u06cc \u0622\u0646 \u0631\u0627 \u0628\u0631\u0627\u06cc \u06af\u0631\u0641\u062a\u0646 \u0648\u0627\u0628\u0633\u062a\u06af\u06cc\u200c\u0647\u0627\u06cc \u067e\u06cc\u0686\u06cc\u062f\u0647 \u062f\u0631\u0648\u0646 \u062f\u0627\u062f\u0647\u200c\u0647\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u062f (\u0648\u0627\u0633\u0648\u0627\u0646\u06cc \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646\u060c 2017).  \u0634\u0628\u06a9\u0647 \u0639\u0635\u0628\u06cc \u067e\u06cc\u0634\u062e\u0648\u0631\u060c \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 <strong>GELU (\u0648\u0627\u062d\u062f \u062e\u0637\u06cc \u062e\u0637\u0627\u06cc \u06af\u0627\u0648\u0633\u06cc)<\/strong> \u0641\u0639\u0627\u0644 \u0633\u0627\u0632\u06cc\u060c \u063a\u06cc\u0631\u062e\u0637\u06cc \u0628\u0648\u062f\u0646 \u0631\u0627 \u0645\u0639\u0631\u0641\u06cc \u0645\u06cc \u06a9\u0646\u062f \u0648 \u0638\u0631\u0641\u06cc\u062a \u0645\u062f\u0644 \u0631\u0627 \u0628\u0631\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0627\u0644\u06af\u0648\u0647\u0627\u06cc \u067e\u06cc\u0686\u06cc\u062f\u0647 \u0627\u0641\u0632\u0627\u06cc\u0634 \u0645\u06cc \u062f\u0647\u062f.  GELU \u06cc\u06a9 \u062a\u0627\u0628\u0639 \u0641\u0639\u0627\u0644 \u0633\u0627\u0632\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u0628\u0647 \u0622\u0646 \u062a\u0642\u0631\u06cc\u0628 \u0645\u06cc \u0632\u0646\u062f <strong>ReLU (\u0648\u0627\u062d\u062f \u062e\u0637\u06cc \u0627\u0635\u0644\u0627\u062d \u0634\u062f\u0647)<\/strong> \u0639\u0645\u0644\u06a9\u0631\u062f \u0648 \u0627\u063a\u0644\u0628 \u062f\u0631 \u0639\u0645\u0644 \u0628\u0647\u062a\u0631 \u0639\u0645\u0644 \u0645\u06cc \u06a9\u0646\u062f (\u0647\u0646\u062f\u0631\u06cc\u06a9\u0633 \u0648 \u06af\u06cc\u0645\u067e\u0644\u060c 2016).<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u06a9\u062f \u062a\u0639\u0631\u06cc\u0641 \u0627\u06cc\u0646 \u06a9\u0644\u0627\u0633 \u0647\u0627 \u0622\u0645\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">torch.nn<\/span> <span class=\"k\">as<\/span> <span class=\"n\">nn<\/span>\n\n<span class=\"k\">class<\/span> <span class=\"nc\">GPTBlock<\/span><span class=\"p\">(<\/span><span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"n\">Module<\/span><span class=\"p\">):<\/span>\n    <span class=\"k\">def<\/span> <span class=\"nf\">__init__<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">):<\/span>\n        <span class=\"nf\">super<\/span><span class=\"p\">(<\/span><span class=\"n\">GPTBlock<\/span><span class=\"p\">,<\/span> <span class=\"n\">self<\/span><span class=\"p\">).<\/span><span class=\"nf\">__init__<\/span><span class=\"p\">()<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">ln_1<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">LayerNorm<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">attn<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">MultiheadAttention<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_head<\/span><span class=\"p\">,<\/span> <span class=\"n\">dropout<\/span><span class=\"o\">=<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">attn_pdrop<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">ln_2<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">LayerNorm<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">mlp<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Sequential<\/span><span class=\"p\">(<\/span>\n            <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Linear<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">,<\/span> <span class=\"mi\">4<\/span> <span class=\"o\">*<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">),<\/span>\n            <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">GELU<\/span><span class=\"p\">(),<\/span>\n            <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Linear<\/span><span class=\"p\">(<\/span><span class=\"mi\">4<\/span> <span class=\"o\">*<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">),<\/span>\n            <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Dropout<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">resid_pdrop<\/span><span class=\"p\">)<\/span>\n        <span class=\"p\">)<\/span>\n\n    <span class=\"k\">def<\/span> <span class=\"nf\">forward<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">x<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span><span class=\"o\">=<\/span><span class=\"bp\">None<\/span><span class=\"p\">):<\/span>\n        <span class=\"n\">attn_output<\/span><span class=\"p\">,<\/span> <span class=\"n\">_<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">attn<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">,<\/span> <span class=\"n\">x<\/span><span class=\"p\">,<\/span> <span class=\"n\">x<\/span><span class=\"p\">,<\/span> <span class=\"n\">attn_mask<\/span><span class=\"o\">=<\/span><span class=\"n\">attention_mask<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">x<\/span> <span class=\"o\">+<\/span> <span class=\"n\">attn_output<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">ln_1<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">mlp_output<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">mlp<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">x<\/span> <span class=\"o\">+<\/span> <span class=\"n\">mlp_output<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">ln_2<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n        <span class=\"k\">return<\/span> <span class=\"n\">x<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0627\u06cc\u0646 <strong>SimpleGPT<\/strong> \u062f\u0633\u062a\u0647 \u0686\u0646\u062f\u06af\u0627\u0646\u0647 \u06a9\u0644\u0627\u0633 <strong>GPTBlock<\/strong> \u0646\u0645\u0648\u0646\u0647 \u0647\u0627\u06cc\u06cc \u0628\u0631\u0627\u06cc \u062a\u0634\u06a9\u06cc\u0644 \u0645\u062f\u0644 \u06a9\u0627\u0645\u0644  \u0627\u06cc\u0646 \u06a9\u0644\u0627\u0633 \u0634\u0627\u0645\u0644 \u062a\u0648\u06a9\u0646 \u0647\u0627 \u0648 \u062c\u0627\u0633\u0627\u0632\u06cc \u0647\u0627\u06cc \u0645\u0648\u0642\u0639\u06cc\u062a\u060c \u0627\u0646\u0635\u0631\u0627\u0641 \u0628\u0631\u0627\u06cc \u0645\u0646\u0638\u0645 \u0633\u0627\u0632\u06cc\u060c \u0648 \u06cc\u06a9 \u0644\u0627\u06cc\u0647 \u062e\u0637\u06cc \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0644\u062c\u06cc\u062a \u0647\u0627\u06cc \u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u062a.  \u062a\u0639\u0628\u06cc\u0647\u200c\u0647\u0627\u06cc \u0631\u0645\u0632\u060c \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0646\u0634\u0627\u0646\u0647 \u0648\u0631\u0648\u062f\u06cc \u0631\u0627 \u0628\u0647 \u0628\u0631\u062f\u0627\u0631\u0647\u0627\u06cc \u0645\u062a\u0631\u0627\u06a9\u0645 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f \u0648 \u0628\u0647 \u0645\u062f\u0644 \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc\u200c\u062f\u0647\u0646\u062f \u0628\u0627 \u0646\u0645\u0627\u06cc\u0634\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc \u06a9\u0644\u0645\u0627\u062a \u06a9\u0627\u0631 \u06a9\u0646\u062f.  \u062c\u0627\u0633\u0627\u0632\u06cc\u200c\u0647\u0627\u06cc \u0645\u0648\u0642\u0639\u06cc\u062a \u0627\u0637\u0644\u0627\u0639\u0627\u062a\u06cc \u0631\u0627 \u062f\u0631\u0628\u0627\u0631\u0647 \u0645\u0648\u0642\u0639\u06cc\u062a \u0647\u0631 \u0646\u0634\u0627\u0646\u0647 \u062f\u0631 \u062f\u0646\u0628\u0627\u0644\u0647 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u062f\u0647\u0646\u062f \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0645\u062f\u0644 \u0628\u0631\u0627\u06cc \u062f\u0631\u06a9 \u062a\u0631\u062a\u06cc\u0628 \u06a9\u0644\u0645\u0627\u062a \u0628\u0633\u06cc\u0627\u0631 \u0645\u0647\u0645 \u0627\u0633\u062a.  \u062a\u0631\u06a9 \u062a\u062d\u0635\u06cc\u0644 \u06cc\u06a9 \u062a\u06a9\u0646\u06cc\u06a9 \u0645\u0646\u0638\u0645\u200c\u0633\u0627\u0632\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0647\u200c\u0637\u0648\u0631 \u062a\u0635\u0627\u062f\u0641\u06cc \u0628\u0631\u062e\u06cc \u0627\u0632 \u0646\u0648\u0631\u0648\u0646\u200c\u0647\u0627 \u0631\u0627 \u062f\u0631 \u062d\u06cc\u0646 \u062a\u0645\u0631\u06cc\u0646 \u0635\u0641\u0631 \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u0628\u0647 \u062c\u0644\u0648\u06af\u06cc\u0631\u06cc \u0627\u0632 \u0628\u0631\u0627\u0632\u0634 \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f \u06a9\u0645\u06a9 \u0645\u06cc\u200c\u06a9\u0646\u062f (Srivastava et al., 2014).  \u0644\u0627\u06cc\u0647 \u062e\u0637\u06cc \u0646\u0647\u0627\u06cc\u06cc \u062d\u0627\u0644\u062a \u0647\u0627\u06cc \u067e\u0646\u0647\u0627\u0646 \u0631\u0627 \u0628\u0647 logit \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u06a9\u0646\u062f \u06a9\u0647 \u0628\u0631\u0627\u06cc \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u0646\u0634\u0627\u0646\u0647 \u0628\u0639\u062f\u06cc \u062f\u0631 \u062f\u0646\u0628\u0627\u0644\u0647 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"k\">class<\/span> <span class=\"nc\">SimpleGPT<\/span><span class=\"p\">(<\/span><span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"n\">Module<\/span><span class=\"p\">):<\/span>\n    <span class=\"k\">def<\/span> <span class=\"nf\">__init__<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">):<\/span>\n        <span class=\"nf\">super<\/span><span class=\"p\">(<\/span><span class=\"n\">SimpleGPT<\/span><span class=\"p\">,<\/span> <span class=\"n\">self<\/span><span class=\"p\">).<\/span><span class=\"nf\">__init__<\/span><span class=\"p\">()<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">token_embedding<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Embedding<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">vocab_size<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">position_embedding<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Embedding<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_positions<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">drop<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Dropout<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">embd_pdrop<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">blocks<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">ModuleList<\/span><span class=\"p\">([<\/span><span class=\"nc\">GPTBlock<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">)<\/span> <span class=\"k\">for<\/span> <span class=\"n\">_<\/span> <span class=\"ow\">in<\/span> <span class=\"nf\">range<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_layer<\/span><span class=\"p\">)])<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">ln_f<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">LayerNorm<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">head<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">Linear<\/span><span class=\"p\">(<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_embd<\/span><span class=\"p\">,<\/span> <span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">vocab_size<\/span><span class=\"p\">,<\/span> <span class=\"n\">bias<\/span><span class=\"o\">=<\/span><span class=\"bp\">False<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">config<\/span> <span class=\"o\">=<\/span> <span class=\"n\">config<\/span>\n\n    <span class=\"k\">def<\/span> <span class=\"nf\">forward<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">input_ids<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span><span class=\"o\">=<\/span><span class=\"bp\">None<\/span><span class=\"p\">):<\/span>\n        <span class=\"n\">positions<\/span> <span class=\"o\">=<\/span> <span class=\"n\">torch<\/span><span class=\"p\">.<\/span><span class=\"nf\">arange<\/span><span class=\"p\">(<\/span><span class=\"mi\">0<\/span><span class=\"p\">,<\/span> <span class=\"n\">input_ids<\/span><span class=\"p\">.<\/span><span class=\"nf\">size<\/span><span class=\"p\">(<\/span><span class=\"mi\">1<\/span><span class=\"p\">),<\/span> <span class=\"n\">device<\/span><span class=\"o\">=<\/span><span class=\"n\">input_ids<\/span><span class=\"p\">.<\/span><span class=\"n\">device<\/span><span class=\"p\">).<\/span><span class=\"nf\">unsqueeze<\/span><span class=\"p\">(<\/span><span class=\"mi\">0<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">token_embedding<\/span><span class=\"p\">(<\/span><span class=\"n\">input_ids<\/span><span class=\"p\">)<\/span> <span class=\"o\">+<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">position_embedding<\/span><span class=\"p\">(<\/span><span class=\"n\">positions<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">drop<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n\n        <span class=\"k\">if<\/span> <span class=\"n\">attention_mask<\/span> <span class=\"ow\">is<\/span> <span class=\"ow\">not<\/span> <span class=\"bp\">None<\/span><span class=\"p\">:<\/span>\n            <span class=\"n\">attention_mask<\/span> <span class=\"o\">=<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">.<\/span><span class=\"nf\">unsqueeze<\/span><span class=\"p\">(<\/span><span class=\"mi\">1<\/span><span class=\"p\">).<\/span><span class=\"nf\">repeat<\/span><span class=\"p\">(<\/span><span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">config<\/span><span class=\"p\">.<\/span><span class=\"n\">n_head<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">.<\/span><span class=\"nf\">size<\/span><span class=\"p\">(<\/span><span class=\"mi\">1<\/span><span class=\"p\">),<\/span> <span class=\"mi\">1<\/span><span class=\"p\">)<\/span>\n            <span class=\"n\">attention_mask<\/span> <span class=\"o\">=<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">.<\/span><span class=\"nf\">to<\/span><span class=\"p\">(<\/span><span class=\"n\">dtype<\/span><span class=\"o\">=<\/span><span class=\"n\">torch<\/span><span class=\"p\">.<\/span><span class=\"n\">float32<\/span><span class=\"p\">)<\/span>\n            <span class=\"n\">attention_mask<\/span> <span class=\"o\">=<\/span> <span class=\"p\">(<\/span><span class=\"mf\">1.0<\/span> <span class=\"o\">-<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">)<\/span> <span class=\"o\">*<\/span> <span class=\"o\">-<\/span><span class=\"mf\">10000.0<\/span>\n\n        <span class=\"k\">for<\/span> <span class=\"n\">block<\/span> <span class=\"ow\">in<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">blocks<\/span><span class=\"p\">:<\/span>\n            <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">block<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">.<\/span><span class=\"nf\">transpose<\/span><span class=\"p\">(<\/span><span class=\"mi\">0<\/span><span class=\"p\">,<\/span> <span class=\"mi\">1<\/span><span class=\"p\">),<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">)<\/span>\n            <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">x<\/span><span class=\"p\">.<\/span><span class=\"nf\">transpose<\/span><span class=\"p\">(<\/span><span class=\"mi\">0<\/span><span class=\"p\">,<\/span> <span class=\"mi\">1<\/span><span class=\"p\">)<\/span>\n\n        <span class=\"n\">x<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">ln_f<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">logits<\/span> <span class=\"o\">=<\/span> <span class=\"n\">self<\/span><span class=\"p\">.<\/span><span class=\"nf\">head<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)<\/span>\n        <span class=\"k\">return<\/span> <span class=\"n\">logits<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0633\u067e\u0633 \u0645\u062f\u0644 \u0631\u0627 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u0645\u06cc \u06a9\u0646\u06cc\u0645 <strong>GPT2Config<\/strong> \u06a9\u0644\u0627\u0633 \u0627\u0632 \u06a9\u062a\u0627\u0628\u062e\u0627\u0646\u0647 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631\u0647\u0627\u060c \u06a9\u0647 \u0641\u0631\u0627\u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u06cc \u0645\u0627\u0646\u0646\u062f \u0627\u0646\u062f\u0627\u0632\u0647 \u0648\u0627\u0698\u06af\u0627\u0646\u060c \u062a\u0639\u062f\u0627\u062f \u0645\u0648\u0642\u0639\u06cc\u062a\u200c\u0647\u0627\u060c \u0627\u0628\u0639\u0627\u062f \u062c\u0627\u0633\u0627\u0632\u06cc\u060c \u062a\u0639\u062f\u0627\u062f \u0644\u0627\u06cc\u0647\u200c\u0647\u0627\u060c \u062a\u0639\u062f\u0627\u062f \u0633\u0631\u0647\u0627\u06cc \u062a\u0648\u062c\u0647 \u0648 \u0646\u0631\u062e \u062e\u0631\u0648\u062c \u0631\u0627 \u062a\u0646\u0638\u06cc\u0645 \u0645\u06cc\u200c\u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u062a\u0646\u0638\u06cc\u0645\u0627\u062a \u0628\u0631\u0627\u06cc \u062a\u0639\u0631\u06cc\u0641 \u0645\u0639\u0645\u0627\u0631\u06cc \u0648 \u0631\u0641\u062a\u0627\u0631 \u0645\u062f\u0644 \u062f\u0631 \u0637\u0648\u0644 \u0622\u0645\u0648\u0632\u0634 \u0636\u0631\u0648\u0631\u06cc \u0647\u0633\u062a\u0646\u062f.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D8%A2%D9%85%D9%88%D8%B2%D8%B4_%D9%85%D8%AF%D9%84\"><\/span>\n<p>  \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u06cc\u0646 <strong>\u0642\u0637\u0627\u0631 &#8211; \u062a\u0639\u0644\u06cc\u0645 \u062f\u0627\u062f\u0646<\/strong> \u062a\u0627\u0628\u0639 \u06cc\u06a9 \u062c\u0632\u0621 \u062d\u06cc\u0627\u062a\u06cc \u062f\u0631 \u0641\u0631\u0622\u06cc\u0646\u062f \u0622\u0645\u0648\u0632\u0634 \u06cc\u06a9 \u0645\u062f\u0644 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0632 \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634 \u062f\u06cc\u062f\u0647 (GPT) \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u06a9\u0644 \u062d\u0644\u0642\u0647 \u0622\u0645\u0648\u0632\u0634\u06cc \u0631\u0627 \u0647\u0645\u0627\u0647\u0646\u06af \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u0645\u0631\u0627\u062d\u0644 \u06a9\u0644\u06cc\u062f\u06cc \u0645\u0627\u0646\u0646\u062f \u067e\u0627\u0633 \u0631\u0648 \u0628\u0647 \u062c\u0644\u0648\u060c \u0645\u062d\u0627\u0633\u0628\u0647 \u0636\u0631\u0631\u060c \u0627\u0646\u062a\u0634\u0627\u0631 \u067e\u0633\u200c\u0627\u0646\u062f\u0627\u0632 \u0648 \u0628\u0647\u06cc\u0646\u0647\u200c\u0633\u0627\u0632\u06cc \u0631\u0627 \u062f\u0631 \u0628\u0631 \u0645\u06cc\u200c\u06af\u06cc\u0631\u062f.  \u0647\u0631 \u06cc\u06a9 \u0627\u0632 \u0627\u06cc\u0646 \u0645\u0631\u0627\u062d\u0644 \u0646\u0642\u0634 \u062d\u06cc\u0627\u062a\u06cc \u062f\u0631 \u067e\u0627\u0644\u0627\u06cc\u0634 \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0645\u062f\u0644 \u0628\u0631 \u0627\u0633\u0627\u0633 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0627\u06cc\u0641\u0627 \u0645\u06cc \u06a9\u0646\u062f \u0648 \u062f\u0631 \u0646\u0647\u0627\u06cc\u062a \u062a\u0648\u0627\u0646\u0627\u06cc\u06cc \u0645\u062f\u0644 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0645\u0646\u0633\u062c\u0645 \u0648 \u0645\u0631\u062a\u0628\u0637 \u0628\u0627 \u0632\u0645\u06cc\u0646\u0647 \u0628\u0647\u0628\u0648\u062f \u0645\u06cc \u0628\u062e\u0634\u062f.<\/p>\n<p>\u0641\u0631\u0622\u06cc\u0646\u062f \u0622\u0645\u0648\u0632\u0634 \u0628\u0627 \u062a\u0646\u0638\u06cc\u0645 \u0645\u062f\u0644 \u0628\u0647 \u062d\u0627\u0644\u062a \u0622\u0645\u0648\u0632\u0634 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 <em>model.train()<\/em> \u0631\u0648\u0634.  \u0627\u06cc\u0646 \u062d\u0627\u0644\u062a \u0644\u0627\u06cc\u0647\u200c\u0647\u0627\u06cc \u062e\u0627\u0635\u06cc \u0645\u0627\u0646\u0646\u062f \u062a\u0631\u06a9 \u062a\u062d\u0635\u06cc\u0644 \u0648 \u0646\u0631\u0645\u0627\u0644\u200c\u0633\u0627\u0632\u06cc \u062f\u0633\u062a\u0647\u200c\u0627\u06cc \u0631\u0627 \u0642\u0627\u062f\u0631 \u0645\u06cc\u200c\u0633\u0627\u0632\u062f \u062a\u0627 \u062f\u0631 \u0637\u0648\u0644 \u0622\u0645\u0648\u0632\u0634 \u0628\u0647 \u062f\u0631\u0633\u062a\u06cc \u0639\u0645\u0644 \u06a9\u0646\u0646\u062f \u0648 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u062d\u0627\u0635\u0644 \u0634\u0648\u062f \u06a9\u0647 \u0628\u0647 \u0642\u0627\u0628\u0644\u06cc\u062a\u200c\u0647\u0627\u06cc \u062a\u0639\u0645\u06cc\u0645 \u0645\u062f\u0644 \u06a9\u0645\u06a9 \u0645\u06cc\u200c\u06a9\u0646\u0646\u062f (Goodfellow et al., 2016).  \u0633\u067e\u0633 \u062d\u0644\u0642\u0647 \u0622\u0645\u0648\u0632\u0634\u06cc \u0628\u0631\u0627\u06cc \u062a\u0639\u062f\u0627\u062f \u0645\u0639\u06cc\u0646\u06cc \u0627\u0632 \u062f\u0648\u0631\u0647 \u0647\u0627 \u0631\u0648\u06cc \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u062a\u06a9\u0631\u0627\u0631 \u0645\u06cc \u0634\u0648\u062f.  \u06cc\u06a9 \u062f\u0648\u0631\u0647 \u0646\u0634\u0627\u0646 \u062f\u0647\u0646\u062f\u0647 \u06cc\u06a9 \u06af\u0630\u0631 \u06a9\u0627\u0645\u0644 \u0627\u0632 \u06a9\u0644 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0622\u0645\u0648\u0632\u0634\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0628\u0647 \u0645\u062f\u0644 \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc \u062f\u0647\u062f \u0627\u0632 \u062a\u0645\u0627\u0645 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0645\u0648\u062c\u0648\u062f \u0628\u06cc\u0627\u0645\u0648\u0632\u062f.<\/p>\n<p>\u0628\u0631\u0627\u06cc \u0647\u0631 \u062f\u0648\u0631\u0647\u060c \u062a\u0627\u0628\u0639 \u062f\u0633\u062a\u0647\u200c\u0627\u06cc \u0627\u0632 \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u0627\u0631\u0627\u0626\u0647 \u0634\u062f\u0647 \u062a\u0648\u0633\u0637 \u0622\u0646 \u0631\u0627 \u067e\u0631\u062f\u0627\u0632\u0634 \u0645\u06cc\u200c\u06a9\u0646\u062f <strong>DataLoader<\/strong>\u060c \u06a9\u0647 \u062f\u0633\u062a\u0647 \u0628\u0646\u062f\u06cc \u0648 \u062a\u0631\u06a9\u06cc\u0628 \u06a9\u0627\u0631\u0622\u0645\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0631\u0627 \u0645\u062f\u06cc\u0631\u06cc\u062a \u0645\u06cc \u06a9\u0646\u062f.  \u062f\u0633\u062a\u0647\u200c\u0628\u0646\u062f\u06cc\u060c \u062a\u0648\u0627\u0644\u06cc\u200c\u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0645\u062a\u0639\u062f\u062f \u0631\u0627 \u062f\u0631 \u06cc\u06a9 \u062f\u0633\u062a\u0647 \u06af\u0631\u0648\u0647\u200c\u0628\u0646\u062f\u06cc \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u06a9\u0647 \u067e\u0631\u062f\u0627\u0632\u0634 \u0645\u0648\u0627\u0632\u06cc \u0648 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0627\u0631\u0622\u0645\u062f \u0627\u0632 \u0645\u0646\u0627\u0628\u0639 \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc \u0631\u0627 \u0645\u0645\u06a9\u0646 \u0645\u06cc\u200c\u0633\u0627\u0632\u062f.  \u0645\u062e\u0644\u0648\u0637 \u06a9\u0631\u062f\u0646 \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0647 \u06a9\u0627\u0647\u0634 \u0628\u06cc\u0634 \u0627\u0632 \u062d\u062f \u0628\u0631\u0627\u0632\u0634 \u0645\u062f\u0644 \u0628\u0647 \u062a\u0631\u062a\u06cc\u0628 \u0646\u0645\u0648\u0646\u0647 \u0647\u0627\u06cc \u062f\u0627\u062f\u0647 \u06a9\u0645\u06a9 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062f\u0631 \u0647\u0631 \u062f\u0633\u062a\u0647\u060c \u0634\u0646\u0627\u0633\u0647\u200c\u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0648 \u0645\u0627\u0633\u06a9\u200c\u0647\u0627\u06cc \u062a\u0648\u062c\u0647 \u0628\u0647 \u062f\u0633\u062a\u06af\u0627\u0647 \u0645\u0634\u062e\u0635 \u0634\u062f\u0647 (CPU \u06cc\u0627 GPU) \u0645\u0646\u062a\u0642\u0644 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f \u062a\u0627 \u0642\u062f\u0631\u062a \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc \u0633\u062e\u062a\u200c\u0627\u0641\u0632\u0627\u0631 \u0631\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u0646\u062f.  \u067e\u0627\u0633 \u0631\u0648 \u0628\u0647 \u062c\u0644\u0648 \u0634\u0627\u0645\u0644 \u0639\u0628\u0648\u0631 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0627\u0632 \u0637\u0631\u06cc\u0642 \u0645\u062f\u0644 \u0628\u0631\u0627\u06cc \u0628\u0647 \u062f\u0633\u062a \u0622\u0648\u0631\u062f\u0646 \u0644\u062c\u06cc\u062a \u0647\u0627\u06cc \u062e\u0631\u0648\u062c\u06cc \u0627\u0633\u062a \u06a9\u0647 \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u0647\u0627\u06cc \u062e\u0627\u0645 \u0648 \u063a\u06cc\u0631 \u0639\u0627\u062f\u06cc \u0645\u062f\u0644 \u0647\u0633\u062a\u0646\u062f.  \u0628\u0631\u0627\u06cc \u062a\u0631\u0627\u0632 \u06a9\u0631\u062f\u0646 \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc\u200c\u0647\u0627 \u0628\u0627 \u0627\u0647\u062f\u0627\u0641\u060c \u0644\u0627\u062c\u06cc\u062a\u200c\u0647\u0627 \u062c\u0627\u0628\u0647\u200c\u062c\u0627 \u0645\u06cc\u200c\u0634\u0648\u0646\u062f: shift_logits \u0622\u062e\u0631\u06cc\u0646 \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u062a\u0648\u06a9\u0646 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 shift_labels \u0627\u0648\u0644\u06cc\u0646 \u0646\u0634\u0627\u0646\u0647 \u0631\u0627 \u062d\u0630\u0641 \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u0648 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u062d\u0627\u0635\u0644 \u0645\u06cc\u200c\u06a9\u0646\u062f \u06a9\u0647 \u062a\u0648\u0627\u0644\u06cc\u200c\u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0648 \u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u062f\u0631\u0633\u062a\u06cc \u0628\u0631\u0627\u06cc \u06a9\u0627\u0631 \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u0646\u0634\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0628\u0639\u062f\u06cc \u062a\u0631\u0627\u0632 \u0634\u062f\u0647\u200c\u0627\u0646\u062f.<\/p>\n<p>\u0645\u062d\u0627\u0633\u0628\u0647 \u062a\u0644\u0641\u0627\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062a\u0627\u0628\u0639 \u062a\u0644\u0641\u0627\u062a \u0645\u062a\u0642\u0627\u0637\u0639 \u0622\u0646\u062a\u0631\u0648\u067e\u06cc\u060c \u06cc\u06a9 \u0645\u0639\u06cc\u0627\u0631 \u0631\u0627\u06cc\u062c \u0628\u0631\u0627\u06cc \u0648\u0638\u0627\u06cc\u0641 \u0637\u0628\u0642\u0647\u200c\u0628\u0646\u062f\u06cc \u06a9\u0647 \u062a\u0641\u0627\u0648\u062a \u0628\u06cc\u0646 \u0627\u062d\u062a\u0645\u0627\u0644\u0627\u062a \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc\u200c\u0634\u062f\u0647 \u0648 \u0645\u0642\u0627\u062f\u06cc\u0631 \u0647\u062f\u0641 \u0648\u0627\u0642\u0639\u06cc \u0631\u0627 \u0627\u0646\u062f\u0627\u0632\u0647\u200c\u06af\u06cc\u0631\u06cc \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u0627\u0646\u062c\u0627\u0645 \u0645\u06cc\u200c\u0634\u0648\u062f.  \u0627\u0632 \u062f\u0633\u062a \u062f\u0627\u062f\u0646 \u0622\u0646\u062a\u0631\u0648\u067e\u06cc \u0645\u062a\u0642\u0627\u0628\u0644 \u0628\u0647 \u0648\u06cc\u0698\u0647 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0631\u0647\u0627\u06cc \u0645\u062f\u0644\u200c\u0633\u0627\u0632\u06cc \u0632\u0628\u0627\u0646 \u06a9\u0647 \u0647\u062f\u0641 \u0622\u0646 \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u0646\u0634\u0627\u0646\u0647 \u0628\u0639\u062f\u06cc \u062f\u0631 \u06cc\u06a9 \u062f\u0646\u0628\u0627\u0644\u0647 \u0627\u0633\u062a\u060c \u0645\u0646\u0627\u0633\u0628 \u0627\u0633\u062a (Goodfellow et al., 2016).<\/p>\n<p>\u067e\u0633 \u0627\u0646\u062a\u0634\u0627\u0631\u060c \u0627\u062c\u0631\u0627 \u0627\u0632 \u0637\u0631\u06cc\u0642 <em>loss.backward()<\/em> \u0631\u0648\u0634\u060c \u06af\u0631\u0627\u062f\u06cc\u0627\u0646 \u062a\u0627\u0628\u0639 \u0636\u0631\u0631 \u0631\u0627 \u0628\u0627 \u062a\u0648\u062c\u0647 \u0628\u0647 \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0645\u062f\u0644 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u06af\u0631\u0627\u062f\u06cc\u0627\u0646 \u0647\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u06a9\u0647 \u0647\u0631 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0686\u0642\u062f\u0631 \u0628\u0627\u06cc\u062f \u062a\u063a\u06cc\u06cc\u0631 \u06a9\u0646\u062f \u062a\u0627 \u062a\u0644\u0641\u0627\u062a \u0628\u0647 \u062d\u062f\u0627\u0642\u0644 \u0628\u0631\u0633\u062f.  \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u060c \u0645\u0634\u062e\u0635 \u0634\u062f\u0647 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 Adam (Kingma &#038; Ba\u060c 2015)\u060c \u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627\u06cc \u0645\u062f\u0644 \u0631\u0627 \u0628\u0631 \u0627\u0633\u0627\u0633 \u0627\u06cc\u0646 \u06af\u0631\u0627\u062f\u06cc\u0627\u0646 \u0647\u0627 \u0628\u0647 \u0631\u0648\u0632 \u0645\u06cc \u06a9\u0646\u062f.  Adam (\u062a\u062e\u0645\u06cc\u0646 \u0644\u062d\u0638\u0647 \u062a\u0637\u0628\u06cc\u0642\u06cc) \u0646\u0648\u0639\u06cc \u0627\u0632 \u0646\u0632\u0648\u0644 \u06af\u0631\u0627\u062f\u06cc\u0627\u0646 \u062a\u0635\u0627\u062f\u0641\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u0647\u0631 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u062a\u0637\u0628\u06cc\u0642 \u0645\u06cc \u062f\u0647\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u0648\u0632\u06cc\u0639 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641 \u06a9\u0627\u0631\u0622\u0645\u062f\u062a\u0631 \u0648 \u0642\u0648\u06cc \u062a\u0631 \u0645\u06cc \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u06cc\u06a9 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645 \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u06cc \u0645\u062d\u0628\u0648\u0628 \u0627\u0633\u062a \u06a9\u0647 \u0646\u0631\u062e \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u062a\u0637\u0628\u06cc\u0642\u06cc \u200b\u200b\u0631\u0627 \u0628\u0631\u0627\u06cc \u0647\u0631 \u067e\u0627\u0631\u0627\u0645\u062a\u0631 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u06a9\u0646\u062f.<\/p>\n<p>\u062f\u0631 \u0637\u0648\u0644 \u0647\u0631 \u062f\u0648\u0631\u0647\u060c \u062a\u0644\u0641\u0627\u062a \u06a9\u0644 \u0627\u0646\u0628\u0627\u0634\u062a\u0647 \u0634\u062f\u0647 \u0648 \u062f\u0631 \u062a\u0645\u0627\u0645 \u062f\u0633\u062a\u0647 \u0647\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0645\u06cc\u0627\u0646\u06af\u06cc\u0646 \u0645\u062d\u0627\u0633\u0628\u0647 \u0645\u06cc \u0634\u0648\u062f \u0648 \u0645\u0639\u06cc\u0627\u0631\u06cc \u0627\u0632 \u0639\u0645\u0644\u06a9\u0631\u062f \u0645\u062f\u0644 \u0631\u0627 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc \u062f\u0647\u062f.  \u0646\u0638\u0627\u0631\u062a \u0628\u0631 \u0636\u0631\u0631 \u062f\u0631 \u062f\u0648\u0631\u0647\u200c\u0647\u0627 \u0628\u0647 \u062f\u0631\u06a9 \u067e\u06cc\u0634\u0631\u0641\u062a \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u062f\u0644 \u0648 \u062a\u0646\u0638\u06cc\u0645 \u0641\u0631\u0627\u067e\u0627\u0631\u0627\u0645\u062a\u0631\u0647\u0627 \u062f\u0631 \u0635\u0648\u0631\u062a \u0644\u0632\u0648\u0645 \u06a9\u0645\u06a9 \u0645\u06cc\u200c\u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0641\u0631\u0622\u06cc\u0646\u062f \u0627\u0635\u0644\u0627\u062d \u0645\u062f\u0627\u0648\u0645 \u0628\u0631\u0627\u06cc \u0628\u0647\u0628\u0648\u062f \u062f\u0642\u062a \u0645\u062f\u0644 \u0648 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u0627\u0632 \u062a\u0648\u0627\u0646\u0627\u06cc\u06cc \u0622\u0646 \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0628\u0627 \u06a9\u06cc\u0641\u06cc\u062a \u0628\u0627\u0644\u0627 \u0636\u0631\u0648\u0631\u06cc \u0627\u0633\u062a.<\/p>\n<p>\u0645\u0639\u06cc\u0627\u0631 \u0627\u0632 \u062f\u0633\u062a \u062f\u0627\u062f\u0646 \u0622\u0646\u062a\u0631\u0648\u067e\u06cc \u0645\u062a\u0642\u0627\u0637\u0639 \u0627\u0633\u062a \u06a9\u0647 \u0628\u0631\u0627\u06cc \u0627\u0646\u062f\u0627\u0632\u0647 \u06af\u06cc\u0631\u06cc \u0639\u0645\u0644\u06a9\u0631\u062f \u0645\u062f\u0644 \u0637\u0628\u0642\u0647 \u0628\u0646\u062f\u06cc \u06a9\u0647 \u062e\u0631\u0648\u062c\u06cc \u0622\u0646 \u0645\u0642\u062f\u0627\u0631 \u0627\u062d\u062a\u0645\u0627\u0644 \u0628\u06cc\u0646 0 \u0648 1 \u0627\u0633\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<p>\u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0642\u0637\u0639\u0647 \u06a9\u062f \u0628\u0631\u0627\u06cc <strong>\u0642\u0637\u0627\u0631 &#8211; \u062a\u0639\u0644\u06cc\u0645 \u062f\u0627\u062f\u0646<\/strong> \u0639\u0645\u0644\u06a9\u0631\u062f \u0648 \u0627\u062c\u0631\u0627\u06cc \u0622\u0646:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">torch.optim<\/span> <span class=\"k\">as<\/span> <span class=\"n\">optim<\/span>\n\n<span class=\"k\">def<\/span> <span class=\"nf\">train<\/span><span class=\"p\">(<\/span><span class=\"n\">model<\/span><span class=\"p\">,<\/span> <span class=\"n\">dataloader<\/span><span class=\"p\">,<\/span> <span class=\"n\">optimizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">criterion<\/span><span class=\"p\">,<\/span> <span class=\"n\">epochs<\/span><span class=\"o\">=<\/span><span class=\"mi\">5<\/span><span class=\"p\">,<\/span> <span class=\"n\">device<\/span><span class=\"o\">=<\/span><span class=\"sh\">'<\/span><span class=\"s\">cuda<\/span><span class=\"sh\">'<\/span><span class=\"p\">):<\/span>\n    <span class=\"n\">model<\/span><span class=\"p\">.<\/span><span class=\"nf\">train<\/span><span class=\"p\">()<\/span>\n    <span class=\"k\">for<\/span> <span class=\"n\">epoch<\/span> <span class=\"ow\">in<\/span> <span class=\"nf\">range<\/span><span class=\"p\">(<\/span><span class=\"n\">epochs<\/span><span class=\"p\">):<\/span>\n        <span class=\"n\">total_loss<\/span> <span class=\"o\">=<\/span> <span class=\"mi\">0<\/span>\n        <span class=\"k\">for<\/span> <span class=\"n\">input_ids<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span> <span class=\"ow\">in<\/span> <span class=\"n\">dataloader<\/span><span class=\"p\">:<\/span>\n            <span class=\"n\">input_ids<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span> <span class=\"o\">=<\/span> <span class=\"n\">input_ids<\/span><span class=\"p\">.<\/span><span class=\"nf\">to<\/span><span class=\"p\">(<\/span><span class=\"n\">device<\/span><span class=\"p\">),<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">.<\/span><span class=\"nf\">to<\/span><span class=\"p\">(<\/span><span class=\"n\">device<\/span><span class=\"p\">)<\/span>\n            <span class=\"n\">optimizer<\/span><span class=\"p\">.<\/span><span class=\"nf\">zero_grad<\/span><span class=\"p\">()<\/span>\n            <span class=\"n\">outputs<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">model<\/span><span class=\"p\">(<\/span><span class=\"n\">input_ids<\/span><span class=\"p\">,<\/span> <span class=\"n\">attention_mask<\/span><span class=\"p\">)<\/span>\n            <span class=\"n\">shift_logits<\/span> <span class=\"o\">=<\/span> <span class=\"n\">outputs<\/span><span class=\"p\">[...,<\/span> <span class=\"p\">:<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">,<\/span> <span class=\"p\">:].<\/span><span class=\"nf\">contiguous<\/span><span class=\"p\">()<\/span>\n            <span class=\"n\">shift_labels<\/span> <span class=\"o\">=<\/span> <span class=\"n\">input_ids<\/span><span class=\"p\">[...,<\/span> <span class=\"mi\">1<\/span><span class=\"p\">:].<\/span><span class=\"nf\">contiguous<\/span><span class=\"p\">()<\/span>\n            <span class=\"n\">loss<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">criterion<\/span><span class=\"p\">(<\/span><span class=\"n\">shift_logits<\/span><span class=\"p\">.<\/span><span class=\"nf\">view<\/span><span class=\"p\">(<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">,<\/span> <span class=\"n\">shift_logits<\/span><span class=\"p\">.<\/span><span class=\"nf\">size<\/span><span class=\"p\">(<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">)),<\/span> <span class=\"n\">shift_labels<\/span><span class=\"p\">.<\/span><span class=\"nf\">view<\/span><span class=\"p\">(<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">))<\/span>\n            <span class=\"n\">loss<\/span><span class=\"p\">.<\/span><span class=\"nf\">backward<\/span><span class=\"p\">()<\/span>\n            <span class=\"n\">optimizer<\/span><span class=\"p\">.<\/span><span class=\"nf\">step<\/span><span class=\"p\">()<\/span>\n            <span class=\"n\">total_loss<\/span> <span class=\"o\">+=<\/span> <span class=\"n\">loss<\/span><span class=\"p\">.<\/span><span class=\"nf\">item<\/span><span class=\"p\">()<\/span>\n        <span class=\"nf\">print<\/span><span class=\"p\">(<\/span><span class=\"sa\">f<\/span><span class=\"sh\">\"<\/span><span class=\"s\">Epoch <\/span><span class=\"si\">{<\/span><span class=\"n\">epoch<\/span> <span class=\"o\">+<\/span> <span class=\"mi\">1<\/span><span class=\"si\">}<\/span><span class=\"s\">\/<\/span><span class=\"si\">{<\/span><span class=\"n\">epochs<\/span><span class=\"si\">}<\/span><span class=\"s\">, Loss: <\/span><span class=\"si\">{<\/span><span class=\"n\">total_loss<\/span> <span class=\"o\">\/<\/span> <span class=\"nf\">len<\/span><span class=\"p\">(<\/span><span class=\"n\">dataloader<\/span><span class=\"p\">)<\/span><span class=\"si\">}<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\n\n<span class=\"n\">optimizer<\/span> <span class=\"o\">=<\/span> <span class=\"n\">optim<\/span><span class=\"p\">.<\/span><span class=\"nc\">Adam<\/span><span class=\"p\">(<\/span><span class=\"n\">model<\/span><span class=\"p\">.<\/span><span class=\"nf\">parameters<\/span><span class=\"p\">(),<\/span> <span class=\"n\">lr<\/span><span class=\"o\">=<\/span><span class=\"mf\">1e-4<\/span><span class=\"p\">)<\/span>\n<span class=\"n\">criterion<\/span> <span class=\"o\">=<\/span> <span class=\"n\">nn<\/span><span class=\"p\">.<\/span><span class=\"nc\">CrossEntropyLoss<\/span><span class=\"p\">()<\/span>\n<span class=\"nf\">train<\/span><span class=\"p\">(<\/span><span class=\"n\">model<\/span><span class=\"p\">,<\/span> <span class=\"n\">dataloader<\/span><span class=\"p\">,<\/span> <span class=\"n\">optimizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">criterion<\/span><span class=\"p\">,<\/span> <span class=\"n\">epochs<\/span><span class=\"o\">=<\/span><span class=\"mi\">5<\/span><span class=\"p\">,<\/span> <span class=\"n\">device<\/span><span class=\"o\">=<\/span><span class=\"n\">device<\/span><span class=\"p\">)<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<h2><span class=\"ez-toc-section\" id=\"%D8%AA%D9%88%D9%84%DB%8C%D8%AF_%D9%85%D8%AA%D9%86\"><\/span>\n<p>  \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u0627\u06cc\u0646 <strong>generate_text<\/strong> \u062a\u0627\u0628\u0639 \u062f\u0631 \u06a9\u062f \u0645\u0627 \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0627\u0632 \u06cc\u06a9 \u0645\u062f\u0644 GPT \u0622\u0645\u0648\u0632\u0634 \u062f\u06cc\u062f\u0647 \u0628\u0631 \u0627\u0633\u0627\u0633 \u06cc\u06a9 \u062f\u0631\u062e\u0648\u0627\u0633\u062a \u0627\u0648\u0644\u06cc\u0647 \u0637\u0631\u0627\u062d\u06cc \u0634\u062f\u0647 \u0627\u0633\u062a.  \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u0628\u0631\u0627\u06cc \u0646\u0634\u0627\u0646 \u062f\u0627\u062f\u0646 \u06a9\u0627\u0631\u0628\u0631\u062f \u0639\u0645\u0644\u06cc \u0645\u062f\u0644 \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647 \u0636\u0631\u0648\u0631\u06cc \u0627\u0633\u062a \u0648 \u0628\u0647 \u0645\u0627 \u0627\u06cc\u0646 \u0627\u0645\u06a9\u0627\u0646 \u0631\u0627 \u0645\u06cc\u200c\u062f\u0647\u062f \u06a9\u0647 \u0628\u0628\u06cc\u0646\u06cc\u0645 \u0686\u06af\u0648\u0646\u0647 \u0645\u06cc\u200c\u062a\u0648\u0627\u0646\u062f \u0645\u062a\u0646 \u0645\u0646\u0633\u062c\u0645 \u0648 \u0645\u0631\u062a\u0628\u0637 \u0628\u0627 \u0632\u0645\u06cc\u0646\u0647 \u062a\u0648\u0644\u06cc\u062f \u06a9\u0646\u062f.<\/p>\n<p>\u062a\u0627\u0628\u0639 \u0628\u0627 \u062a\u0646\u0638\u06cc\u0645 \u0645\u062f\u0644 \u062f\u0631 \u062d\u0627\u0644\u062a \u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u0634\u0631\u0648\u0639 \u0645\u06cc \u0634\u0648\u062f <em>model.eval()<\/em>.  \u062d\u0627\u0644\u062a \u0627\u0631\u0632\u06cc\u0627\u0628\u06cc \u062a\u0636\u0645\u06cc\u0646 \u0645\u06cc \u06a9\u0646\u062f \u06a9\u0647 \u0644\u0627\u06cc\u0647 \u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f \u062a\u0631\u06a9 \u062a\u062d\u0635\u06cc\u0644 \u0628\u0647 \u062f\u0631\u0633\u062a\u06cc \u0631\u0641\u062a\u0627\u0631 \u0645\u06cc \u06a9\u0646\u0646\u062f \u0648 \u0628\u0631 \u0646\u062a\u0627\u06cc\u062c \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u062a\u0623\u062b\u06cc\u0631 \u0646\u0645\u06cc \u06af\u0630\u0627\u0631\u0646\u062f (Goodfellow et al., 2016).  \u0633\u067e\u0633 \u062f\u0631\u062e\u0648\u0627\u0633\u062a \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0631\u0648\u0634 \u0631\u0645\u0632\u06af\u0630\u0627\u0631\u06cc \u062a\u0648\u06a9\u0646 \u0633\u0627\u0632 \u0628\u0647 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u0634\u0648\u062f\u060c \u06a9\u0647 \u0645\u062a\u0646 \u0631\u0627 \u0628\u0647 \u0642\u0627\u0644\u0628\u06cc \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc \u06a9\u0646\u062f \u06a9\u0647 \u0645\u062f\u0644 \u0628\u062a\u0648\u0627\u0646\u062f \u067e\u0631\u062f\u0627\u0632\u0634 \u06a9\u0646\u062f.  \u0627\u06cc\u0646 \u0634\u0646\u0627\u0633\u0647 \u0647\u0627\u06cc \u0648\u0631\u0648\u062f\u06cc \u0628\u0647 \u062f\u0633\u062a\u06af\u0627\u0647 \u0645\u0634\u062e\u0635 \u0634\u062f\u0647 (\u06cc\u0627 CPU \u06cc\u0627 GPU) \u0645\u0646\u062a\u0642\u0644 \u0645\u06cc \u0634\u0648\u0646\u062f \u062a\u0627 \u0642\u062f\u0631\u062a \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc \u0645\u0648\u062c\u0648\u062f \u0631\u0627 \u0627\u0641\u0632\u0627\u06cc\u0634 \u062f\u0647\u0646\u062f.<\/p>\n<p>\u0633\u067e\u0633 \u0627\u06cc\u0646 \u062a\u0627\u0628\u0639 \u0648\u0627\u0631\u062f \u06cc\u06a9 \u062d\u0644\u0642\u0647 \u0645\u06cc \u0634\u0648\u062f \u06a9\u0647 \u062a\u0627 \u0631\u0633\u06cc\u062f\u0646 \u0628\u0647 \u062d\u062f\u0627\u06a9\u062b\u0631 \u0637\u0648\u0644 \u0645\u062a\u0646 \u062a\u0648\u0644\u06cc\u062f \u0634\u062f\u0647 \u06cc\u0627 \u062a\u0648\u0644\u06cc\u062f \u06cc\u06a9 \u0646\u0634\u0627\u0646\u0647 \u067e\u0627\u06cc\u0627\u0646 \u062f\u0646\u0628\u0627\u0644\u0647 (EOS) \u0627\u062f\u0627\u0645\u0647 \u0645\u06cc \u06cc\u0627\u0628\u062f.  \u062f\u0631 \u0637\u0648\u0644 \u0647\u0631 \u062a\u06a9\u0631\u0627\u0631\u060c \u062f\u0646\u0628\u0627\u0644\u0647 \u0641\u0639\u0644\u06cc \u062a\u0648\u06a9\u0646 \u0647\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0634\u062f\u0647 \u0627\u0632 \u0645\u062f\u0644 \u0639\u0628\u0648\u0631 \u062f\u0627\u062f\u0647 \u0645\u06cc \u0634\u0648\u062f \u062a\u0627 \u0644\u062c\u06cc\u062a \u0647\u0627\u06cc \u062e\u0631\u0648\u062c\u06cc \u0628\u0647 \u062f\u0633\u062a \u0622\u06cc\u062f.  \u0644\u0627\u062c\u06cc\u062a \u0647\u0627 \u067e\u06cc\u0634 \u0628\u06cc\u0646\u06cc \u0647\u0627\u06cc \u062e\u0627\u0645 \u0648 \u063a\u06cc\u0631 \u0639\u0627\u062f\u06cc \u0647\u0633\u062a\u0646\u062f \u06a9\u0647 \u0646\u0634\u0627\u0646 \u062f\u0647\u0646\u062f\u0647 \u0627\u0637\u0645\u06cc\u0646\u0627\u0646 \u0645\u062f\u0644 \u0628\u0631\u0627\u06cc \u0647\u0631 \u0646\u0634\u0627\u0646\u0647 \u062f\u0631 \u0648\u0627\u0698\u06af\u0627\u0646 \u0647\u0633\u062a\u0646\u062f.  \u0644\u0627\u062c\u06cc\u062a \u0647\u0627 \u0628\u0631\u0627\u06cc \u0622\u062e\u0631\u06cc\u0646 \u0646\u0634\u0627\u0646\u0647 \u062f\u0631 \u062f\u0646\u0628\u0627\u0644\u0647 \u0627\u0646\u062a\u062e\u0627\u0628 \u0645\u06cc \u0634\u0648\u0646\u062f \u0648 \u062a\u0648\u06a9\u0646 \u0628\u0627 \u0628\u0627\u0644\u0627\u062a\u0631\u06cc\u0646 \u0627\u062d\u062a\u0645\u0627\u0644 (\u0645\u062d\u062a\u0645\u0644 \u062a\u0631\u06cc\u0646 \u0646\u0634\u0627\u0646\u0647 \u0628\u0639\u062f\u06cc) \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0622\u0646 \u062a\u0639\u06cc\u06cc\u0646 \u0645\u06cc \u0634\u0648\u062f. <em>\u0645\u0634\u0639\u0644.argmax<\/em>.  \u0627\u06cc\u0646 \u0646\u0634\u0627\u0646\u0647 \u0628\u0647 \u062f\u0646\u0628\u0627\u0644\u0647 \u062a\u0648\u0644\u06cc\u062f \u0634\u062f\u0647 \u0627\u0636\u0627\u0641\u0647 \u0645\u06cc \u0634\u0648\u062f.<\/p>\n<p>\u0627\u06af\u0631 \u062a\u0648\u06a9\u0646 \u062a\u0648\u0644\u06cc\u062f \u0634\u062f\u0647\u060c \u0646\u0634\u0627\u0646\u0647 EOS \u0628\u0627\u0634\u062f\u060c \u062d\u0644\u0642\u0647 \u0634\u06a9\u0633\u062a\u0647 \u0645\u06cc\u200c\u0634\u0648\u062f \u0648 \u0646\u0634\u0627\u0646 \u0645\u06cc\u200c\u062f\u0647\u062f \u06a9\u0647 \u0645\u062f\u0644 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0631\u0627 \u0628\u0647 \u067e\u0627\u06cc\u0627\u0646 \u0631\u0633\u0627\u0646\u062f\u0647 \u0627\u0633\u062a.  \u062f\u0631 \u0646\u0647\u0627\u06cc\u062a\u060c \u062a\u0648\u0627\u0644\u06cc \u062a\u0648\u06a9\u0646\u200c\u0647\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0634\u062f\u0647 \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u0631\u0648\u0634 \u0631\u0645\u0632\u06af\u0634\u0627\u06cc\u06cc \u062a\u0648\u06a9\u0646\u0627\u06cc\u0632\u0631 \u0628\u0647 \u0645\u062a\u0646 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc\u200c\u0634\u0648\u062f\u060c \u06a9\u0647 \u0646\u0645\u0627\u06cc\u0634\u200c\u0647\u0627\u06cc \u0639\u062f\u062f\u06cc \u0631\u0627 \u0628\u0647 \u0645\u062a\u0646 \u0642\u0627\u0628\u0644 \u062e\u0648\u0627\u0646\u062f\u0646 \u0628\u0631\u0627\u06cc \u0627\u0646\u0633\u0627\u0646 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u0648 \u0627\u0632 \u0647\u0631 \u0646\u0634\u0627\u0646\u0647 \u062e\u0627\u0635\u06cc \u0635\u0631\u0641\u0646\u0638\u0631 \u0645\u06cc\u200c\u06a9\u0646\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u0641\u0631\u0622\u06cc\u0646\u062f \u062a\u06a9\u0631\u0627\u0631\u06cc \u067e\u06cc\u0634\u200c\u0628\u06cc\u0646\u06cc \u0646\u0634\u0627\u0646\u0647 \u0628\u0639\u062f\u06cc \u0628\u0631 \u0627\u0633\u0627\u0633 \u062a\u0648\u0627\u0644\u06cc \u0641\u0639\u0644\u06cc\u060c \u062a\u0648\u0627\u0646\u0627\u06cc\u06cc \u0645\u062f\u0644 \u0631\u0627 \u0628\u0631\u0627\u06cc \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0628\u0647 \u0634\u06cc\u0648\u0647\u200c\u0627\u06cc \u0645\u0631\u062a\u0628\u0637 \u0628\u0627 \u0632\u0645\u06cc\u0646\u0647 \u0646\u0634\u0627\u0646 \u0645\u06cc\u200c\u062f\u0647\u062f\u060c \u06a9\u0647 \u0628\u0631\u0627\u06cc \u06a9\u0627\u0631\u0628\u0631\u062f\u0647\u0627\u06cc\u06cc \u0645\u0627\u0646\u0646\u062f \u062a\u0648\u0644\u06cc\u062f \u062f\u0627\u0633\u062a\u0627\u0646\u060c \u0633\u06cc\u0633\u062a\u0645\u200c\u0647\u0627\u06cc \u06af\u0641\u062a\u06af\u0648 \u0648 \u0633\u0627\u06cc\u0631 \u0648\u0638\u0627\u06cc\u0641 \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0628\u0633\u06cc\u0627\u0631 \u0645\u0647\u0645 \u0627\u0633\u062a (\u0648\u0627\u0633\u0648\u0627\u0646\u06cc \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646. \u060c 2017).<\/p>\n<p>\u06a9\u062f \u062a\u0627\u0628\u0639 generate_text \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0622\u0645\u062f\u0647 \u0627\u0633\u062a:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"k\">def<\/span> <span class=\"nf\">generate_text<\/span><span class=\"p\">(<\/span><span class=\"n\">model<\/span><span class=\"p\">,<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">prompt<\/span><span class=\"p\">,<\/span> <span class=\"n\">max_length<\/span><span class=\"o\">=<\/span><span class=\"mi\">50<\/span><span class=\"p\">,<\/span> <span class=\"n\">device<\/span><span class=\"o\">=<\/span><span class=\"sh\">'<\/span><span class=\"s\">cuda<\/span><span class=\"sh\">'<\/span><span class=\"p\">):<\/span>\n    <span class=\"n\">model<\/span><span class=\"p\">.<\/span><span class=\"nf\">eval<\/span><span class=\"p\">()<\/span>\n    <span class=\"n\">input_ids<\/span> <span class=\"o\">=<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">.<\/span><span class=\"nf\">encode<\/span><span class=\"p\">(<\/span><span class=\"n\">prompt<\/span><span class=\"p\">,<\/span> <span class=\"n\">return_tensors<\/span><span class=\"o\">=<\/span><span class=\"sh\">'<\/span><span class=\"s\">pt<\/span><span class=\"sh\">'<\/span><span class=\"p\">).<\/span><span class=\"nf\">to<\/span><span class=\"p\">(<\/span><span class=\"n\">device<\/span><span class=\"p\">)<\/span>\n    <span class=\"n\">generated<\/span> <span class=\"o\">=<\/span> <span class=\"n\">input_ids<\/span>\n\n    <span class=\"k\">for<\/span> <span class=\"n\">_<\/span> <span class=\"ow\">in<\/span> <span class=\"nf\">range<\/span><span class=\"p\">(<\/span><span class=\"n\">max_length<\/span><span class=\"p\">):<\/span>\n        <span class=\"n\">outputs<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">model<\/span><span class=\"p\">(<\/span><span class=\"n\">generated<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">next_token_logits<\/span> <span class=\"o\">=<\/span> <span class=\"n\">outputs<\/span><span class=\"p\">[:,<\/span> <span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">,<\/span> <span class=\"p\">:]<\/span>\n        <span class=\"n\">next_token<\/span> <span class=\"o\">=<\/span> <span class=\"n\">torch<\/span><span class=\"p\">.<\/span><span class=\"nf\">argmax<\/span><span class=\"p\">(<\/span><span class=\"n\">next_token_logits<\/span><span class=\"p\">,<\/span> <span class=\"n\">dim<\/span><span class=\"o\">=-<\/span><span class=\"mi\">1<\/span><span class=\"p\">).<\/span><span class=\"nf\">unsqueeze<\/span><span class=\"p\">(<\/span><span class=\"mi\">0<\/span><span class=\"p\">)<\/span>\n        <span class=\"n\">generated<\/span> <span class=\"o\">=<\/span> <span class=\"n\">torch<\/span><span class=\"p\">.<\/span><span class=\"nf\">cat<\/span><span class=\"p\">((<\/span><span class=\"n\">generated<\/span><span class=\"p\">,<\/span> <span class=\"n\">next_token<\/span><span class=\"p\">),<\/span> <span class=\"n\">dim<\/span><span class=\"o\">=<\/span><span class=\"mi\">1<\/span><span class=\"p\">)<\/span>\n        <span class=\"k\">if<\/span> <span class=\"n\">next_token<\/span><span class=\"p\">.<\/span><span class=\"nf\">item<\/span><span class=\"p\">()<\/span> <span class=\"o\">==<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">.<\/span><span class=\"n\">eos_token_id<\/span><span class=\"p\">:<\/span>\n            <span class=\"k\">break<\/span>\n\n    <span class=\"n\">generated_text<\/span> <span class=\"o\">=<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">.<\/span><span class=\"nf\">decode<\/span><span class=\"p\">(<\/span><span class=\"n\">generated<\/span><span class=\"p\">[<\/span><span class=\"mi\">0<\/span><span class=\"p\">],<\/span> <span class=\"n\">skip_special_tokens<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\n    <span class=\"k\">return<\/span> <span class=\"n\">generated_text<\/span>\n\n<span class=\"n\">prompt<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">Once upon a time<\/span><span class=\"sh\">\"<\/span>\n<span class=\"n\">generated_text<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">generate_text<\/span><span class=\"p\">(<\/span><span class=\"n\">model<\/span><span class=\"p\">,<\/span> <span class=\"n\">tokenizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">prompt<\/span><span class=\"p\">,<\/span> <span class=\"n\">device<\/span><span class=\"o\">=<\/span><span class=\"n\">device<\/span><span class=\"p\">)<\/span>\n<span class=\"nf\">print<\/span><span class=\"p\">(<\/span><span class=\"n\">generated_text<\/span><span class=\"p\">)<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<h2><span class=\"ez-toc-section\" id=\"%D9%86%D8%AA%DB%8C%D8%AC%D9%87\"><\/span>\n<p>  \u0646\u062a\u06cc\u062c\u0647<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u062f\u0631 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u060c \u062a\u0648\u0636\u06cc\u062d\u06cc \u062c\u0627\u0645\u0639 \u0648 \u06af\u0627\u0645 \u0628\u0647 \u06af\u0627\u0645 \u062f\u0631 \u0645\u0648\u0631\u062f \u0646\u062d\u0648\u0647 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u06cc\u06a9 \u0645\u062f\u0644 \u0633\u0627\u062f\u0647 GPT (\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0632 \u0642\u0628\u0644 \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647) \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 PyTorch \u0627\u0631\u0627\u0626\u0647 \u06a9\u0631\u062f\u06cc\u0645.  \u0645\u0627 \u0631\u0648\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06cc\u06a9 \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0633\u0641\u0627\u0631\u0634\u06cc\u060c \u0633\u0627\u062e\u062a \u0645\u062f\u0644 GPT\u060c \u0622\u0645\u0648\u0632\u0634 \u0622\u0646 \u0648 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0631\u0627 \u0637\u06cc \u06a9\u0631\u062f\u06cc\u0645.  \u0627\u06cc\u0646 \u067e\u06cc\u0627\u062f\u0647 \u0633\u0627\u0632\u06cc \u0639\u0645\u0644\u06cc \u0645\u0641\u0627\u0647\u06cc\u0645 \u0627\u0633\u0627\u0633\u06cc \u067e\u0634\u062a \u0645\u0639\u0645\u0627\u0631\u06cc GPT \u0631\u0627 \u0646\u0634\u0627\u0646 \u0645\u06cc \u062f\u0647\u062f \u0648 \u0628\u0647 \u0639\u0646\u0648\u0627\u0646 \u067e\u0627\u06cc\u0647 \u0627\u06cc \u0628\u0631\u0627\u06cc \u0628\u0631\u0646\u0627\u0645\u0647 \u0647\u0627\u06cc \u067e\u06cc\u0686\u06cc\u062f\u0647 \u062a\u0631 \u0639\u0645\u0644 \u0645\u06cc \u06a9\u0646\u062f.  \u0628\u0627 \u062f\u0646\u0628\u0627\u0644 \u06a9\u0631\u062f\u0646 \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u060c \u0627\u06a9\u0646\u0648\u0646 \u062f\u0631\u06a9 \u0627\u0648\u0644\u06cc\u0647 \u0627\u06cc \u0627\u0632 \u0646\u062d\u0648\u0647 \u0627\u06cc\u062c\u0627\u062f\u060c \u0622\u0645\u0648\u0632\u0634 \u0648 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u06cc\u06a9 \u0645\u062f\u0644 \u0633\u0627\u062f\u0647 GPT \u062f\u0627\u0631\u06cc\u062f.  \u0627\u06cc\u0646 \u062f\u0627\u0646\u0634 \u0634\u0645\u0627 \u0631\u0627 \u0628\u0647 \u0622\u0632\u0645\u0627\u06cc\u0634 \u0628\u0627 \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc\u200c\u0647\u0627\u06cc \u0645\u062e\u062a\u0644\u0641\u060c \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u062a\u0631 \u0648 \u062a\u06a9\u0646\u06cc\u06a9\u200c\u0647\u0627\u06cc \u0627\u0636\u0627\u0641\u06cc \u0628\u0631\u0627\u06cc \u0627\u0641\u0632\u0627\u06cc\u0634 \u0639\u0645\u0644\u06a9\u0631\u062f \u0648 \u0642\u0627\u0628\u0644\u06cc\u062a\u200c\u0647\u0627\u06cc \u0645\u062f\u0644 \u0645\u062c\u0647\u0632 \u0645\u06cc\u200c\u06a9\u0646\u062f.  \u0627\u0635\u0648\u0644 \u0648 \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u067e\u0648\u0634\u0634 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u062f\u0631 \u0627\u06cc\u0646\u062c\u0627 \u0628\u0647 \u0634\u0645\u0627 \u06a9\u0645\u06a9 \u0645\u06cc \u06a9\u0646\u062f \u062a\u0627 \u0645\u062f\u0644 \u0647\u0627\u06cc \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0631\u0627 \u062f\u0631 \u0648\u0638\u0627\u06cc\u0641 \u0645\u062e\u062a\u0644\u0641 NLP \u0627\u0639\u0645\u0627\u0644 \u06a9\u0646\u06cc\u062f \u0648 \u067e\u062a\u0627\u0646\u0633\u06cc\u0644 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0631\u0627 \u062f\u0631 \u062f\u0631\u06a9 \u0648 \u062a\u0648\u0644\u06cc\u062f \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0628\u0627\u0632 \u06a9\u0646\u06cc\u062f.  \u0631\u0648\u0634\u200c\u0647\u0627\u06cc \u0627\u0631\u0627\u0626\u0647\u200c\u0634\u062f\u0647 \u0628\u0627 \u067e\u06cc\u0634\u0631\u0641\u062a\u200c\u0647\u0627\u06cc \u0645\u062f\u0644\u200c\u0647\u0627\u06cc \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u06a9\u0647 \u062a\u0648\u0633\u0637 \u0648\u0627\u0633\u0648\u0627\u0646\u06cc \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646 \u0645\u0639\u0631\u0641\u06cc \u0634\u062f\u0647\u200c\u0627\u0646\u062f\u060c \u0647\u0645\u0633\u0648 \u0647\u0633\u062a\u0646\u062f.  (2017)\u060c \u0628\u0627 \u062a\u0627\u06a9\u06cc\u062f \u0628\u0631 \u0642\u062f\u0631\u062a \u0645\u06a9\u0627\u0646\u06cc\u0633\u0645 \u0647\u0627\u06cc \u062e\u0648\u062f\u062a\u0648\u062c\u0647\u06cc \u062f\u0631 \u067e\u0631\u062f\u0627\u0632\u0634 \u062a\u0648\u0627\u0644\u06cc \u062f\u0627\u062f\u0647 \u0647\u0627 \u0628\u0647 \u0637\u0648\u0631 \u0645\u0648\u062b\u0631\u062a\u0631 \u0627\u0632 \u0631\u0648\u06cc\u06a9\u0631\u062f\u0647\u0627\u06cc \u0633\u0646\u062a\u06cc (\u0648\u0627\u0633\u0648\u0627\u0646\u06cc \u0648 \u0647\u0645\u06a9\u0627\u0631\u0627\u0646\u060c 2017).  \u0627\u06cc\u0646 \u062f\u0631\u06a9 \u0645\u0633\u06cc\u0631\u0647\u0627\u06cc\u06cc \u0631\u0627 \u0628\u0631\u0627\u06cc \u06a9\u0634\u0641 \u0648 \u0646\u0648\u0622\u0648\u0631\u06cc \u062f\u0631 \u0632\u0645\u06cc\u0646\u0647 \u067e\u0631\u062f\u0627\u0632\u0634 \u0632\u0628\u0627\u0646 \u0637\u0628\u06cc\u0639\u06cc \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 \u062a\u06a9\u0646\u06cc\u06a9 \u0647\u0627\u06cc \u067e\u06cc\u0634\u0631\u0641\u062a\u0647 \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642 \u0628\u0627\u0632 \u0645\u06cc \u06a9\u0646\u062f (Kingma &#038; Ba, 2015).<\/p>\n<h4><span class=\"ez-toc-section\" id=\"%D9%85%D9%86%D8%A7%D8%A8%D8%B9\"><\/span>\n<p>  \u0645\u0646\u0627\u0628\u0639:<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>Devlin\u060c J.\u060c Chang\u060c MW\u060c Lee\u060c K.\u060c &#038; Toutanova\u060c K. (2019).  BERT: \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634 \u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631\u0647\u0627\u06cc \u062f\u0648 \u062c\u0647\u062a\u0647 \u0639\u0645\u06cc\u0642 \u0628\u0631\u0627\u06cc \u062f\u0631\u06a9 \u0632\u0628\u0627\u0646.  \u067e\u06cc\u0634 \u0686\u0627\u067e arXiv arXiv:1810.04805.<\/li>\n<li>\u0628\u0631\u0627\u0648\u0646\u060c \u0633\u0644\u060c \u0645\u0627\u0646\u060c \u0628\u06cc.\u060c \u0631\u0627\u06cc\u062f\u0631\u060c \u0627\u0646.\u060c \u0633\u0627\u0628\u06cc\u0647\u060c \u0627\u0645.\u060c \u06a9\u0627\u067e\u0644\u0627\u0646\u060c \u062c\u06cc.\u060c \u062f\u0647\u06cc\u0648\u0627\u0644\u060c \u067e\u06cc\u060c &#8230; \u0648 \u0622\u0645\u0648\u062f\u06cc\u060c \u062f\u06cc. (2020).  \u0645\u062f\u0644 \u0647\u0627\u06cc \u0632\u0628\u0627\u0646 \u06cc\u0627\u062f\u06af\u06cc\u0631\u0646\u062f\u06af\u0627\u0646 \u06a9\u0645\u06cc \u0647\u0633\u062a\u0646\u062f.  \u067e\u06cc\u0634 \u0686\u0627\u067e arXiv arXiv:2005.14165.<\/li>\n<li>Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, AN, &#8230; &#038; Polosukhin, I. (2017).  \u062a\u0648\u062c\u0647 \u062a\u0646\u0647\u0627 \u0686\u06cc\u0632\u06cc \u0627\u0633\u062a \u06a9\u0647 \u0646\u06cc\u0627\u0632 \u062f\u0627\u0631\u06cc\u062f.  \u067e\u06cc\u0634 \u0686\u0627\u067e arXiv arXiv:1706.03762. <\/li>\n<li>\u0647\u0646\u062f\u0631\u06cc\u06a9\u0633\u060c \u062f\u06cc\u060c \u0648 \u06af\u06cc\u0645\u067e\u0644\u060c \u06a9. (2016).  \u0648\u0627\u062d\u062f\u0647\u0627\u06cc \u062e\u0637\u06cc \u062e\u0637\u0627\u06cc \u06af\u0627\u0648\u0633\u06cc (GELUs).  \u067e\u06cc\u0634 \u0686\u0627\u067e arXiv arXiv:1606.08415. <\/li>\n<li>Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., &#038; Salakhutdinov, R. (2014).  Dropout: \u06cc\u06a9 \u0631\u0627\u0647 \u0633\u0627\u062f\u0647 \u0628\u0631\u0627\u06cc \u062c\u0644\u0648\u06af\u06cc\u0631\u06cc \u0627\u0632 \u0628\u0631\u0627\u0632\u0634 \u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u0639\u0635\u0628\u06cc.  \u0645\u062c\u0644\u0647 \u062a\u062d\u0642\u06cc\u0642\u0627\u062a \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646\u060c 15 (1)\u060c 1929-1958. <\/li>\n<li>\u0631\u0627\u062f\u0641\u0648\u0631\u062f\u060c \u0627.\u060c \u0646\u0631\u0627\u0633\u06cc\u0645\u0647\u0627\u0646\u060c \u06a9.\u060c \u0633\u0627\u0644\u06cc\u0645\u0627\u0646\u0632\u060c \u062a\u06cc\u060c \u0648 \u0633\u0648\u062a\u0633\u06a9\u0648\u0631\u060c \u0622\u06cc. (2018).  \u0628\u0647\u0628\u0648\u062f \u062f\u0631\u06a9 \u0632\u0628\u0627\u0646 \u0628\u0627 \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634 \u0645\u0648\u0644\u062f.  \u067e\u06cc\u0634 \u0686\u0627\u067e OpenAI.<\/li>\n<li>Goodfellow\u060c I.\u060c Bengio\u060c Y.\u060c \u0648 Courville\u060c A. (2016).  \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0639\u0645\u06cc\u0642.  \u0645\u0637\u0628\u0648\u0639\u0627\u062a MIT. <\/li>\n<li>Kingma, DP, &#038; Ba, J. (2015).  \u0622\u062f\u0627\u0645: \u0631\u0648\u0634\u06cc \u0628\u0631\u0627\u06cc \u0628\u0647\u06cc\u0646\u0647 \u0633\u0627\u0632\u06cc \u062a\u0635\u0627\u062f\u0641\u06cc.  \u067e\u06cc\u0634 \u0686\u0627\u067e arXiv arXiv:1412.6980.<\/li>\n<\/ul><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u062f\u0631 \u0627\u0628\u062a\u062f\u0627 \u062f\u0631 14\/5\/2024 \u062f\u0631 emangini.com \u0627\u0631\u0633\u0627\u0644 \u0634\u062f\u0647 \u0627\u0633\u062a \u0627\u06cc\u0646 \u0631\u0627\u0647\u0646\u0645\u0627\u06cc \u062c\u0627\u0645\u0639 \u062a\u0648\u0636\u06cc\u062d \u0645\u0641\u0635\u0644\u06cc \u062f\u0631\u0628\u0627\u0631\u0647 \u0646\u062d\u0648\u0647 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc \u06cc\u06a9 \u0645\u062f\u0644 \u0633\u0627\u062f\u0647 GPT (\u062a\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062a\u0648\u0631 \u0627\u0632 \u067e\u06cc\u0634 \u0622\u0645\u0648\u0632\u0634\u200c\u062f\u06cc\u062f\u0647) \u0628\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0627\u0632 PyTorch \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u062f\u0647\u062f. \u0645\u0627 \u0627\u062c\u0632\u0627\u06cc \u0644\u0627\u0632\u0645\u060c \u0646\u062d\u0648\u0647 \u0622\u0645\u0648\u0632\u0634 \u0645\u062f\u0644 \u0648 \u0646\u062d\u0648\u0647 \u062a\u0648\u0644\u06cc\u062f \u0645\u062a\u0646 \u0631\u0627 \u067e\u0648\u0634\u0634 \u062e\u0648\u0627\u0647\u06cc\u0645 \u062f\u0627\u062f. \u0628\u0631\u0627\u06cc \u06a9\u0633\u0627\u0646\u06cc \u0627\u0632 \u0634\u0645\u0627 \u06a9\u0647 \u0645\u06cc\u200c\u062e\u0648\u0627\u0647\u06cc\u062f \u062f\u0646\u0628\u0627\u0644 \u06a9\u0646\u06cc\u062f\u060c \u06cc\u06a9 \u067e\u06cc\u0627\u062f\u0647\u200c\u0633\u0627\u0632\u06cc &hellip;<\/p>\n","protected":false},"author":2,"featured_media":64603,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"fifu_image_url":"https:\/\/media.dev.to\/cdn-cgi\/image\/width=1000,height=500,fit=cover,gravity=auto,format=auto\/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F6lqrknnvpe09clctdqkd.png","fifu_image_alt":"","footnotes":""},"categories":[339],"tags":[],"class_list":["post-64602","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-dev"],"_links":{"self":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/64602","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/comments?post=64602"}],"version-history":[{"count":0,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/64602\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media\/64603"}],"wp:attachment":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media?parent=64602"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/categories?post=64602"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/tags?post=64602"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}