from pathlib import Path
import sys
import importlib
import polars as pl 

pl.Config.set_tbl_rows(1000)   # allow many rows
pl.Config.set_tbl_cols(100)    # allow many columns
pl.Config.set_tbl_width_chars(200)


current = Path.cwd()

while current.name != "shared-notebooks":
    if current.parent == current:
        raise RuntimeError("Could not locate shared-notebooks directory")
    current = current.parent

utils_path = current / "common_utils" / "python"
if str(utils_path) not in sys.path:
    sys.path.append(str(utils_path))

import load_flight_data
importlib.reload(load_flight_data)

df = load_flight_data.load_flight_data()
df = load_flight_data.engineer_features(df)

df_work = df.select([
    "year", "quarter", "month", "dayof_month", "day_of_week", "flight_date",
    "reporting_airline", "tail_number", "flight_number_reporting_airline",
    "origin", "dest", "distance",
    "crs_dep_time", "dep_time", "dep_delay", "dep_delay_minutes", "dep_del15",
    "taxi_out", "wheels_off", "wheels_on", "taxi_in",
    "crs_arr_time", "arr_time", "arr_delay", "arr_delay_minutes", "arr_del15",
    "cancelled", "diverted", "crs_elapsed_time", "actual_elapsed_time", "air_time",
    "dep_tmpf", "dep_dwpf", "dep_relh", "dep_drct", "dep_sknt", "dep_p01i",
    "dep_alti", "dep_mslp", "dep_vsby", "dep_gust",
    "dep_skyc1", "dep_skyl1", "dep_wxcodes",
    "dep_peak_wind_gust", "dep_peak_wind_drct", "dep_weather_severity",
    "sched_dep_hour", "sched_arr_hour", "hour_of_day", "is_weekend", "route",
    "is_delayed", "delay_state", "sched_dep_min", "sched_arr_min", "dep_min", "arr_min",
    "schedule_buffer", "prev_origin", "prev_dest", "prev_dep_delay", "prev_arr_delay",
    "prev_arr_min", "prev_dep_min", "prev_flight_date", "rotation_leg_number",
    "flights_per_aircraft_day", "cum_dep_delay_day", "cum_arr_delay_day",
    "curr_sched_dep_abs_min", "prev_arr_abs_min", "turnaround_minutes",
    "inherited_delay", "bad_visibility", "high_wind", "precipitation", "severe_weather"
])
sample = df_work.head(10)
sample.write_csv("sample_data.csv")