import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job

args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args["JOB_NAME"], args)

# Script generated for node Amazon S3
dataFrameFromCSV = glueContext.create_dynamic_frame.from_options(
    format_options={
        "quoteChar": '"',
        "withHeader": True,
        "separator": ",",
        "optimizePerformance": False,
    },
    connection_type="s3",
    format="csv",
    ## Make sure to replace s3 address with from your bucket
    connection_options={"paths": ["s3://bucket/folder/SAMPLE_DATA.csv"]},
    transformation_ctx="AmazonS3_node1679503722761",
).toDF()

# Turn Dataframe into a SQL Temporary View
dataFrameFromCSV.createOrReplaceTempView("tmp_csv_df")

# Turn the view into an Iceberg table in our glue_catalog
query = f"""
CREATE TABLE glue_catalog.sampledb.sample_table1
USING iceberg
AS SELECT * FROM tmp_csv_df
"""

spark.sql(query)

job.commit()
