import pandas as pd
from sqlalchemy import create_engine, text

class StagingTableCreator:
    def __init__(self, csv_file, connection_string, schema='staging'):
        """
        Initialize with the CSV file path, database connection string, and target schema.
        """
        self.csv_file = csv_file
        self.connection_string = connection_string
        self.schema = schema
        self.df = None
        self.table_name = None
        self.engine = create_engine(self.connection_string)

    def load_csv(self):
        """
        Load the CSV into a pandas DataFrame and print the columns.
        """
        self.df = pd.read_csv(self.csv_file)
        print("CSV loaded. Columns:", list(self.df.columns))
        return self.df

    def infer_sql_types(self):
        """
        Infer SQL data types from the pandas DataFrame types.
        """
        type_mapping = {
            'object': 'VARCHAR(3000)',
            'int64': 'BIGINT',
            'float64': 'FLOAT',
            'bool': 'BOOLEAN',
            'datetime64[ns]': 'DATETIME'
        }
        columns_info = {}
        for col in self.df.columns:
            dtype = str(self.df[col].dtype)
            sql_type = type_mapping.get(dtype, 'VARCHAR(3000)')
            columns_info[col] = sql_type
        print("Inferred SQL types:", columns_info)
        return columns_info

    def generate_create_table_script(self, table_name):
        """
        Generate a CREATE TABLE SQL statement based on the CSV schema.
        """
        self.table_name = table_name
        columns_info = self.infer_sql_types()
        cols_def = ",\n".join([f"[{col}] {sql_type}" for col, sql_type in columns_info.items()])
        create_table_sql = f"CREATE TABLE {self.schema}.[{table_name}] (\n{cols_def}\n);"
        print("Create table script generated:\n", create_table_sql)
        return create_table_sql

    def create_staging_table(self, table_name):
        """
        Connect to the database and execute the create table statement.
        """
        create_table_sql = self.generate_create_table_script(table_name)
        with self.engine.connect() as connection:
            connection.execute(text(create_table_sql))
        print(f"Table {self.schema}.[{table_name}] created.")
        return True

    def load_data(self):
        """
        Bulk load the CSV data into the newly created table.
        """
        if self.table_name is None or self.df is None:
            print("Table not created or CSV not loaded.")
            return False
        # Use pandas to_sql for bulk insert
        self.df.to_sql(self.table_name, con=self.engine, schema=self.schema,
                       if_exists='append', index=False)
        print(f"Data loaded into table {self.schema}.[{self.table_name}].")
        return True
    def run(self, table_name):
        """
        Run the entire process.
        """
        self.load_csv()
        self.generate_create_table_script(table_name)
        self.create_staging_table(table_name)
        self.load_data()
        


