{"id":88180,"date":"2024-12-14T06:54:42","date_gmt":"2024-12-14T03:24:42","guid":{"rendered":"https:\/\/nabfollower.com\/blog\/azure-synapse-pyspark-toolbox-002-dataframe-transformation-500f\/"},"modified":"2024-12-14T06:54:42","modified_gmt":"2024-12-14T03:24:42","slug":"azure-synapse-pyspark-toolbox-002-dataframe-transformation-500f","status":"publish","type":"post","link":"https:\/\/nabfollower.com\/blog\/azure-synapse-pyspark-toolbox-002-dataframe-transformation-500f\/","title":{"rendered":"Azure Synapse PySpark Toolbox 002: DataFrame Transformation"},"content":{"rendered":"<p>Summarize this content to 400 words in Persian Lang<br \/>\n              \u0645\u062d\u062a\u0648\u06cc\u0627\u062a \u062c\u0639\u0628\u0647 \u0627\u0628\u0632\u0627\u0631<\/p>\n<p>\u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame \u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f<\/p>\n<p>\u0645\u0634\u062e\u0635\u0627\u062a \u0645\u062d\u06cc\u0637\u06cc<\/p>\n<p>Azure Synapse Runtime \u0628\u0631\u0627\u06cc Apache Spark 3.4<br \/>\nAzure Data Lake Storage<br \/>\n\u062e\u0631\u06a9 \u06a9\u0644\u06cc\u062f \u0644\u0627\u062c\u0648\u0631\u062f\u06cc<\/p>\n<p>\u0648\u0627\u0628\u0633\u062a\u06af\u06cc \u0647\u0627\u06cc \u0648\u0627\u0631\u062f\u0627\u062a\u06cc<\/p>\n<p>import requests<br \/>\nfrom pyspark.sql import types as T, functions as F<br \/>\nimport json<br \/>\nimport datetime<br \/>\nimport logging<\/p>\n<p>    \u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/p>\n<p>    \u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/p>\n<p>  \u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame <\/p>\n<p>\u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame (\u0627\u0641\u0632\u0648\u062f\u0646 \u0633\u062a\u0648\u0646\u200c\u0647\u0627\u06cc \u062c\u062f\u06cc\u062f\u060c \u062a\u063a\u06cc\u06cc\u0631 \u0646\u0627\u0645\u060c \u067e\u0631 \u06a9\u0631\u062f\u0646 \u0645\u0642\u0627\u062f\u06cc\u0631 na\u060c \u0641\u06cc\u0644\u062a\u0631 \u0631\u062f\u06cc\u0641\u200c\u0647\u0627\u060c \u0627\u0646\u062a\u062e\u0627\u0628 \u0633\u062a\u0648\u0646\u200c\u0647\u0627\u060c \u0631\u0647\u0627 \u06a9\u0631\u062f\u0646 \u0645\u0648\u0627\u0631\u062f \u062a\u06a9\u0631\u0627\u0631\u06cc\u060c \u0628\u0631\u06af\u0631\u062f\u0627\u0646\u062f\u0646 \u06cc\u06a9 \u0634\u06cc PySpark Dataframe)<\/p>\n<p>def df_transform_basic(<br \/>\n    df_input, #DataFrame<br \/>\n    step1_new_cols, #List of Dictionaries [{&#8216;new_col_name&#8217;: &#8216;xxx&#8217;, &#8216;new_col_expr&#8217;: &#8216;expression string for F.expr() function&#8217;}]\n    step2_col_name_mapping, #Dictionary of old-new names mapping<br \/>\n    step3_na_values,<br \/>\n    step4_filter_expr, #String parameter for df.filter() function<br \/>\n    step5_result_col_list, #List of column names<br \/>\n    step6_drop_dup, #True\/False<br \/>\n    timestamp_name<br \/>\n):<br \/>\n    df_output = df_input<\/p>\n<p>    if step1_new_cols:<br \/>\n        for val in step1_new_cols:<br \/>\n            df_output = df_output.withColumn(val[&#8216;new_col_name&#8217;], F.expr(val[&#8216;new_col_expr&#8217;]))<\/p>\n<p>    if step2_col_name_mapping:<br \/>\n        df_output = df_output.withColumnsRenamed(step2_col_name_mapping)<\/p>\n<p>    if step3_na_values:<br \/>\n        df_output = df_output.na.fill(step3_na_values)<\/p>\n<p>    if step4_filter_expr:<br \/>\n        df_output = df_output.filter(step4_filter_expr)<\/p>\n<p>    if step5_result_col_list:<br \/>\n        df_output = df_output.select(*step5_result_col_list)<\/p>\n<p>    if step6_drop_dup:<br \/>\n        df_output = df_output.dropDuplicates()<\/p>\n<p>    if timestamp_name:<br \/>\n        df_output = df_output.withColumn(timestamp_name, F.lit(F.current_timestamp()))<\/p>\n<p>    return df_output<\/p>\n<p>    \u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/p>\n<p>    \u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/p>\n<p>\u0628\u0627\u0632\u06af\u0634\u062a \u0628\u0647 \u0628\u0627\u0644\u0627<\/p>\n<p>  \u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f <\/p>\n<p>\u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0628\u0631 \u0627\u0633\u0627\u0633 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0648 DataFrame \u0628\u0647 \u0631\u0648\u0632 \u0634\u062f\u0647 \u0648\u0627\u0644\u062f\u06cc\u0646 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f. \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u0647\u0631 \u062a\u062c\u0645\u0639 \u062f\u0631 \u0634\u06cc\u0621 \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc summary_config \u062a\u0639\u0631\u06cc\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<p>def df_summary(<br \/>\n    parent_df, #DataFrame<br \/>\n    child_df, #DataFrame<br \/>\n    summary_config<br \/>\n):<\/p>\n<p># Example of summary_config<br \/>\n# summary_config=[{&#8216;agg&#8217; : {&#8216;RevisionNum&#8217;: &#8216;count&#8217;}, &#8216;agg_result_col_name&#8217; : &#8216;CountOfRevisions&#8217;, &#8216;na_fill_value&#8217; : 0, &#8216;join_on&#8217; = [&#8216;Contract&#8217;, &#8216;RevisionNum&#8217;]},<br \/>\n#        {&#8216;agg&#8217; : {&#8216;RevisionNum&#8217;: &#8216;max&#8217;}, &#8216;agg_result_col_name&#8217; : &#8216;RecentRevNum&#8217;, &#8216;na_fill_value&#8217; : 0, &#8216;join_on&#8217; = [&#8216;Contract&#8217;, &#8216;RevisionNum&#8217;]}]\n<p>    if summary_config:<br \/>\n        for summary_item in summary_config:<br \/>\n            agg_col = list(summary_item[&#8216;agg&#8217;])[0]\n            agg_fun = summary_item[&#8216;agg&#8217;][agg_col]\n            child_df_grouped = child_df.groupBy(*join_on).agg(summary_item[&#8216;agg&#8217;]).withColumnRenamed(f&#8221;{agg_fun}({agg_col})&#8221;, summary_item[&#8216;agg_result_col_name&#8217;])<br \/>\n            parent_df = parent_df.join(child_df_grouped, summary_item[&#8216;agg&#8217;][&#8216;join_on&#8217;], &#8216;left&#8217;)<br \/>\n            na_value = {}<br \/>\n            na_value[summary_item[&#8216;agg_result_col_name&#8217;]] = summary_item[&#8216;na_fill_value&#8217;]\n            parent_df = parent_df.na.fill(na_value)<\/p>\n<p>    return parent_df<\/p>\n<p>    \u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/p>\n<p>    \u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/p>\n<p>\u0628\u0627\u0632\u06af\u0634\u062a \u0628\u0647 \u0628\u0627\u0644\u0627<\/p>\n<div data-article-id=\"2155953\" id=\"article-body\">\n<p>\u0645\u062d\u062a\u0648\u06cc\u0627\u062a \u062c\u0639\u0628\u0647 \u0627\u0628\u0632\u0627\u0631<\/p>\n<hr\/>\n<p>\u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame <br \/>\u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f<\/p>\n<hr\/>\n<p>\u0645\u0634\u062e\u0635\u0627\u062a \u0645\u062d\u06cc\u0637\u06cc<\/p>\n<ul>\n<li>Azure Synapse Runtime \u0628\u0631\u0627\u06cc Apache Spark 3.4<\/li>\n<li>Azure Data Lake Storage<\/li>\n<li>\u062e\u0631\u06a9 \u06a9\u0644\u06cc\u062f \u0644\u0627\u062c\u0648\u0631\u062f\u06cc<\/li>\n<\/ul>\n<hr\/>\n<p>\u0648\u0627\u0628\u0633\u062a\u06af\u06cc \u0647\u0627\u06cc \u0648\u0627\u0631\u062f\u0627\u062a\u06cc<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight plaintext\"><code>import requests\nfrom pyspark.sql import types as T, functions as F\nimport json\nimport datetime\nimport logging\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<hr\/>\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_81 counter-hierarchy ez-toc-counter-rtl ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u0641\u0647\u0631\u0633\u062a \u0645\u0637\u0627\u0644\u0628<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/nabfollower.com\/blog\/azure-synapse-pyspark-toolbox-002-dataframe-transformation-500f\/#%D8%AA%D8%A8%D8%AF%DB%8C%D9%84_%D9%BE%D8%A7%DB%8C%D9%87_DataFrame\" >\u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/nabfollower.com\/blog\/azure-synapse-pyspark-toolbox-002-dataframe-transformation-500f\/#%D8%B3%D8%AA%D9%88%D9%86_%D9%87%D8%A7%DB%8C_%D8%AA%D8%AC%D9%85%D8%B9%DB%8C_%D8%B1%D8%A7_%D8%A7%D8%B2_DataFrame_%D9%81%D8%B1%D8%B2%D9%86%D8%AF_%D8%A7%DB%8C%D8%AC%D8%A7%D8%AF_%DA%A9%D9%86%DB%8C%D8%AF\" >\u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f<\/a><\/li><\/ul><\/nav><\/div>\n<h4><span class=\"ez-toc-section\" id=\"%D8%AA%D8%A8%D8%AF%DB%8C%D9%84_%D9%BE%D8%A7%DB%8C%D9%87_DataFrame\"><\/span>\n<p>  \u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<p>\u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame (\u0627\u0641\u0632\u0648\u062f\u0646 \u0633\u062a\u0648\u0646\u200c\u0647\u0627\u06cc \u062c\u062f\u06cc\u062f\u060c \u062a\u063a\u06cc\u06cc\u0631 \u0646\u0627\u0645\u060c \u067e\u0631 \u06a9\u0631\u062f\u0646 \u0645\u0642\u0627\u062f\u06cc\u0631 na\u060c \u0641\u06cc\u0644\u062a\u0631 \u0631\u062f\u06cc\u0641\u200c\u0647\u0627\u060c \u0627\u0646\u062a\u062e\u0627\u0628 \u0633\u062a\u0648\u0646\u200c\u0647\u0627\u060c \u0631\u0647\u0627 \u06a9\u0631\u062f\u0646 \u0645\u0648\u0627\u0631\u062f \u062a\u06a9\u0631\u0627\u0631\u06cc\u060c \u0628\u0631\u06af\u0631\u062f\u0627\u0646\u062f\u0646 \u06cc\u06a9 \u0634\u06cc PySpark Dataframe)<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight plaintext\"><code>def df_transform_basic(\n    df_input, #DataFrame\n    step1_new_cols, #List of Dictionaries [{'new_col_name': 'xxx', 'new_col_expr': 'expression string for F.expr() function'}]\n    step2_col_name_mapping, #Dictionary of old-new names mapping\n    step3_na_values,\n    step4_filter_expr, #String parameter for df.filter() function\n    step5_result_col_list, #List of column names\n    step6_drop_dup, #True\/False\n    timestamp_name\n):\n    df_output = df_input\n\n    if step1_new_cols:\n        for val in step1_new_cols:\n            df_output = df_output.withColumn(val['new_col_name'], F.expr(val['new_col_expr']))\n\n    if step2_col_name_mapping:\n        df_output = df_output.withColumnsRenamed(step2_col_name_mapping)\n\n    if step3_na_values:\n        df_output = df_output.na.fill(step3_na_values)\n\n    if step4_filter_expr:\n        df_output = df_output.filter(step4_filter_expr)\n\n    if step5_result_col_list:\n        df_output = df_output.select(*step5_result_col_list)\n\n    if step6_drop_dup:\n        df_output = df_output.dropDuplicates()\n\n    if timestamp_name:\n        df_output = df_output.withColumn(timestamp_name, F.lit(F.current_timestamp()))\n\n    return df_output\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0628\u0627\u0632\u06af\u0634\u062a \u0628\u0647 \u0628\u0627\u0644\u0627<\/p>\n<hr\/>\n<h4><span class=\"ez-toc-section\" id=\"%D8%B3%D8%AA%D9%88%D9%86_%D9%87%D8%A7%DB%8C_%D8%AA%D8%AC%D9%85%D8%B9%DB%8C_%D8%B1%D8%A7_%D8%A7%D8%B2_DataFrame_%D9%81%D8%B1%D8%B2%D9%86%D8%AF_%D8%A7%DB%8C%D8%AC%D8%A7%D8%AF_%DA%A9%D9%86%DB%8C%D8%AF\"><\/span>\n<p>  \u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f<br \/>\n<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<p>\u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0628\u0631 \u0627\u0633\u0627\u0633 \u062f\u0627\u062f\u0647 \u0647\u0627\u06cc DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0648 DataFrame \u0628\u0647 \u0631\u0648\u0632 \u0634\u062f\u0647 \u0648\u0627\u0644\u062f\u06cc\u0646 \u0631\u0627 \u0628\u0631\u0645\u06cc \u06af\u0631\u062f\u0627\u0646\u062f. \u067e\u06cc\u06a9\u0631\u0628\u0646\u062f\u06cc \u0647\u0631 \u062a\u062c\u0645\u0639 \u062f\u0631 \u0634\u06cc\u0621 \u062f\u06cc\u06a9\u0634\u0646\u0631\u06cc summary_config \u062a\u0639\u0631\u06cc\u0641 \u0634\u062f\u0647 \u0627\u0633\u062a.<\/p>\n<div class=\"highlight js-code-highlight\">\n<pre class=\"highlight plaintext\"><code>def df_summary(\n    parent_df, #DataFrame\n    child_df, #DataFrame\n    summary_config\n):\n\n# Example of summary_config\n# summary_config=[{'agg' : {'RevisionNum': 'count'}, 'agg_result_col_name' : 'CountOfRevisions', 'na_fill_value' : 0, 'join_on' = ['Contract', 'RevisionNum']},\n#        {'agg' : {'RevisionNum': 'max'}, 'agg_result_col_name' : 'RecentRevNum', 'na_fill_value' : 0, 'join_on' = ['Contract', 'RevisionNum']}]\n\n    if summary_config:\n        for summary_item in summary_config:\n            agg_col = list(summary_item['agg'])[0]\n            agg_fun = summary_item['agg'][agg_col]\n            child_df_grouped = child_df.groupBy(*join_on).agg(summary_item['agg']).withColumnRenamed(f\"{agg_fun}({agg_col})\", summary_item['agg_result_col_name'])\n            parent_df = parent_df.join(child_df_grouped, summary_item['agg']['join_on'], 'left')\n            na_value = {}\n            na_value[summary_item['agg_result_col_name']] = summary_item['na_fill_value']\n            parent_df = parent_df.na.fill(na_value)\n\n    return parent_df\n<\/code><\/pre>\n<div class=\"highlight__panel js-actions-panel\">\n<div class=\"highlight__panel-action js-fullscreen-code-action\">\n    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-on\"><title>\u0648\u0627\u0631\u062f \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M16 3h6v6h-2V5h-4V3zM2 3h6v2H4v4H2V3zm18 16v-4h2v6h-6v-2h4zM4 19h4v2H2v-6h2v4z\"\/>\n<\/svg><\/p>\n<p>    <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" class=\"highlight-action crayons-icon highlight-action--fullscreen-off\"><title>\u0627\u0632 \u062d\u0627\u0644\u062a \u062a\u0645\u0627\u0645 \u0635\u0641\u062d\u0647 \u062e\u0627\u0631\u062c \u0634\u0648\u06cc\u062f<\/title>\n    <path d=\"M18 7h4v2h-6V3h2v4zM8 9H2V7h4V3h2v6zm10 8v4h-2v-6h6v2h-4zM8 15v6H6v-4H2v-2h6z\"\/>\n<\/svg><\/p>\n<\/div>\n<\/div>\n<\/div>\n<p>\u0628\u0627\u0632\u06af\u0634\u062a \u0628\u0647 \u0628\u0627\u0644\u0627<\/p>\n<\/p><\/div>\n","protected":false},"excerpt":{"rendered":"<p>Summarize this content to 400 words in Persian Lang \u0645\u062d\u062a\u0648\u06cc\u0627\u062a \u062c\u0639\u0628\u0647 \u0627\u0628\u0632\u0627\u0631 \u062a\u0628\u062f\u06cc\u0644 \u067e\u0627\u06cc\u0647 DataFrame \u0633\u062a\u0648\u0646 \u0647\u0627\u06cc \u062a\u062c\u0645\u0639\u06cc \u0631\u0627 \u0627\u0632 DataFrame \u0641\u0631\u0632\u0646\u062f \u0627\u06cc\u062c\u0627\u062f \u06a9\u0646\u06cc\u062f \u0645\u0634\u062e\u0635\u0627\u062a \u0645\u062d\u06cc\u0637\u06cc Azure Synapse Runtime \u0628\u0631\u0627\u06cc Apache Spark 3.4 Azure Data Lake Storage \u062e\u0631\u06a9 \u06a9\u0644\u06cc\u062f \u0644\u0627\u062c\u0648\u0631\u062f\u06cc \u0648\u0627\u0628\u0633\u062a\u06af\u06cc \u0647\u0627\u06cc \u0648\u0627\u0631\u062f\u0627\u062a\u06cc import requests from pyspark.sql import types as T, functions as F import &hellip;<\/p>\n","protected":false},"author":2,"featured_media":88181,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"fifu_image_url":"","fifu_image_alt":"","footnotes":""},"categories":[339],"tags":[],"class_list":["post-88180","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-dev"],"_links":{"self":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/88180","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/comments?post=88180"}],"version-history":[{"count":0,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/posts\/88180\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media\/88181"}],"wp:attachment":[{"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/media?parent=88180"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/categories?post=88180"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/nabfollower.com\/blog\/wp-json\/wp\/v2\/tags?post=88180"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}