{"id":93947,"date":"2025-01-21T12:58:44","date_gmt":"2025-01-21T09:28:44","guid":{"rendered":"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/"},"modified":"2025-01-21T12:58:44","modified_gmt":"2025-01-21T09:28:44","slug":"pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af","status":"publish","type":"post","link":"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/","title":{"rendered":"PySpark Local Python Windows Notebook \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u06cc\u062f"},"content":{"rendered":"<div data-article-id=\"2230178\" id=\"article-body\">\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_84 counter-hierarchy ez-toc-counter-rtl ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0641\u0647\u0631\u0633\u062a \u0645\u0637\u0627\u0644\u0628<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/#%D9%85%D9%82%D8%AF%D9%85%D9%87\" >\u0645\u0642\u062f\u0645\u0647<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/#%D9%86%D8%B5%D8%A8_%D9%88_%D8%B1%D8%A7%D9%87_%D8%A7%D9%86%D8%AF%D8%A7%D8%B2%DB%8C\" >\u0646\u0635\u0628 \u0648 \u0631\u0627\u0647 \u0627\u0646\u062f\u0627\u0632\u06cc<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/#%D9%BE%DB%8C%DA%A9%D8%B1%D8%A8%D9%86%D8%AF%DB%8C_%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86\" >\u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/#%D9%85%D8%AB%D8%A7%D9%84_%D8%B1%D8%A7_%D8%A8%D8%A7_%D9%86%D9%88%D8%AA_%D8%A8%D9%88%DA%A9_Pyspark_%D8%A7%D9%85%D8%AA%D8%AD%D8%A7%D9%86_%DA%A9%D9%86%DB%8C%D8%AF\" >\u0645\u062b\u0627\u0644 \u0631\u0627 \u0628\u0627 \u0646\u0648\u062a \u0628\u0648\u06a9 Pyspark \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u062f.<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/nabfollower.com\/blog\/pyspark-local-python-windows-notebook-%d8%b1%d8%a7-%d8%a7%d8%ac%d8%b1%d8%a7-%da%a9%d9%86%db%8c%d8%af\/#%D9%85%D8%B1%D8%AC%D8%B9\" >\u0645\u0631\u062c\u0639<\/a><\/li><\/ul><\/nav><\/div>\n<h2><span class=\"ez-toc-section\" id=\"%D9%85%D9%82%D8%AF%D9%85%D9%87\"><\/span>\n<p>  \u0645\u0642\u062f\u0645\u0647<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>PySpark API \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc Apache Spark \u0627\u0633\u062a\u060c \u06cc\u06a9 \u0633\u06cc\u0633\u062a\u0645 \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc \u062a\u0648\u0632\u06cc\u0639 \u0634\u062f\u0647 \u0645\u0646\u0628\u0639 \u0628\u0627\u0632 \u06a9\u0647 \u067e\u0631\u062f\u0627\u0632\u0634 \u0633\u0631\u06cc\u0639 \u0648 \u0645\u0642\u06cc\u0627\u0633 \u067e\u0630\u06cc\u0631 \u062f\u0627\u062f\u0647 \u0631\u0627 \u0627\u0645\u06a9\u0627\u0646 \u067e\u0630\u06cc\u0631 \u0645\u06cc \u06a9\u0646\u062f. PySpark \u0628\u0647 \u062a\u0648\u0633\u0639\u0647 \u062f\u0647\u0646\u062f\u06af\u0627\u0646 Python \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc \u062f\u0647\u062f \u062a\u0627 \u0627\u0632 \u0642\u0627\u0628\u0644\u06cc\u062a \u0647\u0627\u06cc \u0642\u062f\u0631\u062a\u0645\u0646\u062f Spark \u0628\u0631\u0627\u06cc \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0648 \u06a9\u0627\u0631\u0647\u0627\u06cc \u0645\u0647\u0646\u062f\u0633\u06cc \u062f\u0627\u062f\u0647 \u0628\u062f\u0648\u0646 \u0646\u06cc\u0627\u0632 \u0628\u0647 \u06a9\u0627\u0648\u0634 \u062f\u0631 \u067e\u06cc\u0686\u06cc\u062f\u06af\u06cc \u0647\u0627\u06cc \u062c\u0627\u0648\u0627 \u06cc\u0627 \u0627\u0633\u06a9\u0627\u0644\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u0646\u062f.<\/p>\n<p>\u0628\u0627 PySpark\u060c \u06a9\u0627\u0631\u0628\u0631\u0627\u0646 \u0645\u06cc \u062a\u0648\u0627\u0646\u0646\u062f \u0645\u062c\u0645\u0648\u0639\u0647 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u06cc \u0631\u0627 \u062f\u0631 \u0645\u06cc\u0627\u0646 \u062e\u0648\u0634\u0647 \u0647\u0627 \u067e\u0631\u062f\u0627\u0632\u0634 \u06a9\u0646\u0646\u062f\u060c \u062a\u0628\u062f\u06cc\u0644 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u062a\u0648\u0632\u06cc\u0639 \u0634\u062f\u0647 \u0631\u0627 \u0627\u0646\u062c\u0627\u0645 \u062f\u0647\u0646\u062f \u0648 \u0627\u0644\u06af\u0648\u0631\u06cc\u062a\u0645 \u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0631\u0627 \u0627\u062c\u0631\u0627 \u06a9\u0646\u0646\u062f. \u0627\u06cc\u0646 \u06cc\u06a9\u067e\u0627\u0631\u0686\u0647 \u0628\u0627 \u0686\u0627\u0631\u0686\u0648\u0628\u200c\u0647\u0627\u06cc \u067e\u0631\u062f\u0627\u0632\u0634 \u062f\u0627\u062f\u0647 \u0645\u062d\u0628\u0648\u0628 \u0645\u0627\u0646\u0646\u062f Hadoop \u0627\u062f\u063a\u0627\u0645 \u0645\u06cc\u200c\u0634\u0648\u062f \u0648 \u0627\u0632 \u0641\u0631\u0645\u062a\u200c\u0647\u0627\u06cc \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u0645\u062a\u0639\u062f\u062f \u067e\u0634\u062a\u06cc\u0628\u0627\u0646\u06cc \u0645\u06cc\u200c\u06a9\u0646\u062f \u0648 \u0622\u0646 \u0631\u0627 \u0628\u0647 \u06cc\u06a9 \u0627\u0628\u0632\u0627\u0631 \u0647\u0645\u0647 \u06a9\u0627\u0631\u0647 \u062f\u0631 \u0639\u0644\u0645 \u062f\u0627\u062f\u0647 \u0648 \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u062a\u0628\u062f\u06cc\u0644 \u0645\u06cc\u200c\u06a9\u0646\u062f.<\/p>\n<p>\u0627\u06cc\u0646 \u0645\u0642\u062f\u0645\u0647 \u06cc\u06a9 \u0646\u0645\u0627\u06cc \u06a9\u0644\u06cc \u0627\u0632 \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc PySpark \u0631\u0627 \u0627\u0631\u0627\u0626\u0647 \u0645\u06cc\u200c\u06a9\u0646\u062f\u060c \u0628\u0647 \u0634\u0645\u0627 \u06a9\u0645\u06a9 \u0645\u06cc\u200c\u06a9\u0646\u062f \u062a\u0627 \u0628\u0647 \u0631\u0627\u062d\u062a\u06cc \u062f\u0631 \u0631\u0627\u06cc\u0627\u0646\u0647\u200c\u0647\u0627\u06cc \u0645\u062d\u06cc\u0637\u06cc \u0645\u062d\u0644\u06cc \u0631\u0627\u0647\u200c\u0627\u0646\u062f\u0627\u0632\u06cc \u0648 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f. <\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D9%86%D8%B5%D8%A8_%D9%88_%D8%B1%D8%A7%D9%87_%D8%A7%D9%86%D8%AF%D8%A7%D8%B2%DB%8C\"><\/span>\n<p>  \u0646\u0635\u0628 \u0648 \u0631\u0627\u0647 \u0627\u0646\u062f\u0627\u0632\u06cc<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<ol>\n<li>\u067e\u0627\u06cc\u062a\u0648\u0646 \u0631\u0627 \u062f\u0631 \u0622\u062f\u0631\u0633 \u0632\u06cc\u0631 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f: https:\/\/www.python.org\/downloads\/\n<\/li>\n<li>\u062c\u0627\u0648\u0627 \u0631\u0627 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f \u0627\u0628\u062a\u062f\u0627 \u0628\u0627\u06cc\u062f \u0622\u062e\u0631\u06cc\u0646 \u0646\u0633\u062e\u0647 \u062c\u0627\u0648\u0627 \u0631\u0627 \u0627\u0632 \u0622\u062f\u0631\u0633: https:\/\/jdk.java.net \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0646\u06cc\u062f. \u0645\u0646 \u0627\u0632 \u062c\u0627\u0648\u0627 \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc\u06a9\u0646\u0645 <code>23<\/code> \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u067e\u0633\u062a<\/li>\n<li>PySpark \u0631\u0627 \u0646\u0635\u0628 \u06a9\u0646\u06cc\u062f<\/li>\n<\/ol>\n<p>\u0627\u0628\u062a\u062f\u0627 \u0628\u0627\u06cc\u062f Apache Spark \u0631\u0627 \u0646\u06cc\u0632 \u0627\u0632 \u0627\u06cc\u0646 \u0632\u06cc\u0631 \u062f\u0627\u0646\u0644\u0648\u062f \u06a9\u0646\u06cc\u062f: <\/p>\n<p>\u0645\u0646 \u0627\u0632 https:\/\/www.apache.org\/dyn\/closer.lua\/spark\/spark-3.5.4\/spark-3.5.4-bin-hadoop3.tgz \u0628\u0631\u0627\u06cc \u0633\u0627\u062e\u062a\u0646 \u06cc\u06a9 \u0622\u0645\u0648\u0632\u0634 \u0628\u0631\u0627\u06cc \u0627\u06cc\u0646 \u067e\u0633\u062a \u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u0645\u06cc \u06a9\u0646\u0645.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D9%BE%DB%8C%DA%A9%D8%B1%D8%A8%D9%86%D8%AF%DB%8C_%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86\"><\/span>\n<p>  \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u067e\u0627\u06cc\u062a\u0648\u0646<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<ol>\n<li>\u062c\u0627\u0648\u0627\n<\/li>\n<\/ol>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">os<\/span>\n<span class=\"n\">os<\/span><span class=\"p\">.<\/span><span class=\"n\">environ<\/span><span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">JAVA_HOME<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span> <span class=\"o\">=<\/span> <span class=\"sa\">fr<\/span><span class=\"sh\">\"<\/span><span class=\"s\">D:\\Soft\\JAVA\\jdk-23.0.1<\/span><span class=\"sh\">\"<\/span>\n<span class=\"n\">os<\/span><span class=\"p\">.<\/span><span class=\"n\">environ<\/span><span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">PATH<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span> <span class=\"o\">=<\/span> <span class=\"n\">os<\/span><span class=\"p\">.<\/span><span class=\"n\">environ<\/span><span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">JAVA_HOME<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span> <span class=\"o\">+<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">\/bin;<\/span><span class=\"sh\">\"<\/span> <span class=\"o\">+<\/span> <span class=\"n\">os<\/span><span class=\"p\">.<\/span><span class=\"n\">environ<\/span><span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">PATH<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<ol>\n<li>PySpark\n<\/li>\n<\/ol>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight plaintext\"><code>import os\nos.environ[\"SPARK_HOME\"] = fr\"D:\\Soft\\pyspark\\spark-3.5.4-bin-hadoop3\"\nos.environ[\"PATH\"] = os.environ[\"SPARK_HOME\"] + \"\/bin;\" + os.environ[\"PATH\"]\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u067e\u0633 \u0627\u0632 \u0627\u062a\u0645\u0627\u0645\u060c \u0645\u06cc \u062a\u0648\u0627\u0646\u06cc\u062f Pyspark \u0631\u0627 \u062f\u0631 \u062e\u0637 \u0641\u0631\u0645\u0627\u0646 \u0628\u0631\u0631\u0633\u06cc \u06a9\u0646\u06cc\u062f: <\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D9%85%D8%AB%D8%A7%D9%84_%D8%B1%D8%A7_%D8%A8%D8%A7_%D9%86%D9%88%D8%AA_%D8%A8%D9%88%DA%A9_Pyspark_%D8%A7%D9%85%D8%AA%D8%AD%D8%A7%D9%86_%DA%A9%D9%86%DB%8C%D8%AF\"><\/span>\n<p>  \u0645\u062b\u0627\u0644 \u0631\u0627 \u0628\u0627 \u0646\u0648\u062a \u0628\u0648\u06a9 Pyspark \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u062f.<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">numpy<\/span> <span class=\"k\">as<\/span> <span class=\"n\">np<\/span>\n<span class=\"kn\">import<\/span> <span class=\"n\">pandas<\/span> <span class=\"k\">as<\/span> <span class=\"n\">pd<\/span>\n<span class=\"n\">spark<\/span> <span class=\"o\">=<\/span> <span class=\"n\">SparkSession<\/span><span class=\"p\">.<\/span><span class=\"n\">builder<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">appName<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">Debugging Example<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">master<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">local[*]<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">config<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">spark.eventLog.enabled<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">true<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">config<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">spark.sql.shuffle.partitions<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">1<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">getOrCreate<\/span><span class=\"p\">()<\/span>\n\n<span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">sparkContext<\/span><span class=\"p\">.<\/span><span class=\"nf\">setLogLevel<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">DEBUG<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\n<span class=\"c1\"># Enable Arrow-based columnar data transfers\n<\/span><span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">conf<\/span><span class=\"p\">.<\/span><span class=\"nf\">set<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">spark.sql.execution.arrow.enabled<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">true<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\n\n<span class=\"c1\"># Generate a pandas DataFrame\n<\/span><span class=\"n\">pdf<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pd<\/span><span class=\"p\">.<\/span><span class=\"nc\">DataFrame<\/span><span class=\"p\">(<\/span><span class=\"n\">np<\/span><span class=\"p\">.<\/span><span class=\"n\">random<\/span><span class=\"p\">.<\/span><span class=\"nf\">rand<\/span><span class=\"p\">(<\/span><span class=\"mi\">100<\/span><span class=\"p\">,<\/span> <span class=\"mi\">3<\/span><span class=\"p\">))<\/span>\n\n<span class=\"c1\"># Create a Spark DataFrame from a pandas DataFrame using Arrow\n<\/span><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"nf\">createDataFrame<\/span><span class=\"p\">(<\/span><span class=\"n\">pdf<\/span><span class=\"p\">)<\/span>\n<span class=\"c1\"># rename columns\n<\/span><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"nf\">toDF<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">a<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">b<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">c<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\n<span class=\"n\">df<\/span>\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0627\u0633\u062a\u0641\u0627\u062f\u0647 \u06a9\u0646\u06cc\u062f <code>df.show(5)<\/code> \u0628\u0631\u0627\u06cc \u062f\u06cc\u062f\u0646 \u062e\u0631\u0648\u062c\u06cc \u062a\u0633\u062a \u0628\u0627 pyspark.<br \/><img decoding=\"async\" src=\"https:\/\/media2.dev.to\/dynamic\/image\/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto\/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F5eeo6eqa8yekni2whvzw.png\" alt=\"\u062a\u0648\u0636\u06cc\u062d\u0627\u062a \u062a\u0635\u0648\u06cc\u0631\" loading=\"lazy\" width=\"800\" height=\"421\" title=\"\"><\/p>\n<p>\u0628\u06cc\u0627\u06cc\u06cc\u062f \u0646\u0645\u0648\u0646\u0647\u200c\u0627\u06cc \u0627\u0632 \u062f\u0627\u062f\u0647\u200c\u0647\u0627\u06cc \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0631\u0627 \u0627\u0645\u062a\u062d\u0627\u0646 \u06a9\u0646\u06cc\u0645:<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight python\"><code><span class=\"kn\">import<\/span> <span class=\"n\">requests<\/span>\n\n<span class=\"c1\"># URL for the dataset\n<\/span><span class=\"n\">url<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/iris\/iris.data<\/span><span class=\"sh\">\"<\/span>\n\n<span class=\"c1\"># Download the dataset and save it locally\n<\/span><span class=\"n\">response<\/span> <span class=\"o\">=<\/span> <span class=\"n\">requests<\/span><span class=\"p\">.<\/span><span class=\"nf\">get<\/span><span class=\"p\">(<\/span><span class=\"n\">url<\/span><span class=\"p\">)<\/span>\n<span class=\"k\">with<\/span> <span class=\"nf\">open<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">iris.data<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">wb<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> <span class=\"k\">as<\/span> <span class=\"nb\">file<\/span><span class=\"p\">:<\/span>\n    <span class=\"nb\">file<\/span><span class=\"p\">.<\/span><span class=\"nf\">write<\/span><span class=\"p\">(<\/span><span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">content<\/span><span class=\"p\">)<\/span>\n<span class=\"kn\">from<\/span> <span class=\"n\">pyspark.sql<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">SparkSession<\/span>\n\n<span class=\"c1\"># Create a SparkSession\n<\/span><span class=\"n\">spark<\/span> <span class=\"o\">=<\/span> <span class=\"n\">SparkSession<\/span><span class=\"p\">.<\/span><span class=\"n\">builder<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">appName<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">Iris Data Analysis<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\n    <span class=\"p\">.<\/span><span class=\"nf\">master<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">local[*]<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\\\n    <span class=\"p\">.<\/span><span class=\"nf\">getOrCreate<\/span><span class=\"p\">()<\/span>\n\n<span class=\"c1\"># Path to the locally downloaded Iris dataset\n<\/span><span class=\"n\">iris_data_path<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">iris.data<\/span><span class=\"sh\">\"<\/span>\n\n<span class=\"c1\"># Define the schema for the data\n<\/span><span class=\"n\">columns<\/span> <span class=\"o\">=<\/span> <span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">sepal_length<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">sepal_width<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">petal_length<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">petal_width<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">species<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span>\n\n<span class=\"c1\"># Load the data into a DataFrame\n<\/span><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">read<\/span><span class=\"p\">.<\/span><span class=\"nf\">csv<\/span><span class=\"p\">(<\/span><span class=\"n\">iris_data_path<\/span><span class=\"p\">,<\/span> <span class=\"n\">header<\/span><span class=\"o\">=<\/span><span class=\"bp\">False<\/span><span class=\"p\">,<\/span> <span class=\"n\">inferSchema<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\n\n<span class=\"c1\"># Set column names\n<\/span><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"nf\">toDF<\/span><span class=\"p\">(<\/span><span class=\"o\">*<\/span><span class=\"n\">columns<\/span><span class=\"p\">)<\/span>\n\n<span class=\"c1\"># Show the first few rows of the DataFrame\n<\/span><span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"nf\">show<\/span><span class=\"p\">()<\/span>\n\n<span class=\"c1\"># Stop the SparkSession when done\n<\/span><span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"nf\">stop<\/span><span class=\"p\">()<\/span>\n\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p><img decoding=\"async\" src=\"https:\/\/media2.dev.to\/dynamic\/image\/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto\/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fi7xfwb5sm8928q3x8ne3.png\" alt=\"\u062a\u0648\u0636\u06cc\u062d\u0627\u062a \u062a\u0635\u0648\u06cc\u0631\" loading=\"lazy\" width=\"716\" height=\"697\" title=\"\"><\/p>\n<p>\u06a9\u0627\u0631 \u0645\u06cc \u06a9\u0646\u062f! \u0628\u0647 \u0633\u0644\u0627\u0645\u062a\u06cc <\/p>\n<h2><span class=\"ez-toc-section\" id=\"%D9%85%D8%B1%D8%AC%D8%B9\"><\/span>\n<p>  \u0645\u0631\u062c\u0639<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<\/p><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u0645\u0642\u062f\u0645\u0647 PySpark API \u067e\u0627\u06cc\u062a\u0648\u0646 \u0628\u0631\u0627\u06cc Apache Spark \u0627\u0633\u062a\u060c \u06cc\u06a9 \u0633\u06cc\u0633\u062a\u0645 \u0645\u062d\u0627\u0633\u0628\u0627\u062a\u06cc \u062a\u0648\u0632\u06cc\u0639 \u0634\u062f\u0647 \u0645\u0646\u0628\u0639 \u0628\u0627\u0632 \u06a9\u0647 \u067e\u0631\u062f\u0627\u0632\u0634 \u0633\u0631\u06cc\u0639 \u0648 \u0645\u0642\u06cc\u0627\u0633 \u067e\u0630\u06cc\u0631 \u062f\u0627\u062f\u0647 \u0631\u0627 \u0627\u0645\u06a9\u0627\u0646 \u067e\u0630\u06cc\u0631 \u0645\u06cc \u06a9\u0646\u062f. PySpark \u0628\u0647 \u062a\u0648\u0633\u0639\u0647 \u062f\u0647\u0646\u062f\u06af\u0627\u0646 Python \u0627\u062c\u0627\u0632\u0647 \u0645\u06cc \u062f\u0647\u062f \u062a\u0627 \u0627\u0632 \u0642\u0627\u0628\u0644\u06cc\u062a \u0647\u0627\u06cc \u0642\u062f\u0631\u062a\u0645\u0646\u062f Spark \u0628\u0631\u0627\u06cc \u062a\u062c\u0632\u06cc\u0647 \u0648 \u062a\u062d\u0644\u06cc\u0644 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc \u0628\u0632\u0631\u06af\u060c \u06cc\u0627\u062f\u06af\u06cc\u0631\u06cc \u0645\u0627\u0634\u06cc\u0646 \u0648 \u06a9\u0627\u0631\u0647\u0627\u06cc \u0645\u0647\u0646\u062f\u0633\u06cc \u062f\u0627\u062f\u0647 \u0628\u062f\u0648\u0646 &hellip;<\/p>\n","protected":false},"author":2,"featured_media":93948,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"fifu_image_url":"https:\/\/media2.dev.to\/dynamic\/image\/width=1000,height=500,fit=cover,gravity=auto,format=auto\/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fstz20pmg6i0sl8iianqv.png","fifu_image_alt":"","footnotes":""},"categories":[339],"tags":[],"class_list":["post-93947","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-dev"],"_links":{"self":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/93947","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/comments?post=93947"}],"version-history":[{"count":0,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/93947\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media\/93948"}],"wp:attachment":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media?parent=93947"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/categories?post=93947"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/tags?post=93947"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}