from pathlib import Path
import pandas as pd

BASE_DIR = Path.cwd()

file_path = BASE_DIR / ".." / ".." / "data" / "iad_dca_states_smoketest.parquet"

df = pd.read_parquet(file_path)

# Convert to datetime
df["snapshot_time"] = pd.to_datetime(df["snapshot_time"], utc=True)

# Sort first
df = df.sort_values(["icao24", "snapshot_time"])

# Remove duplicates while snapshot_time is still a column
df = df.drop_duplicates(subset=["icao24", "snapshot_time"])

# NOW set index
df = df.set_index("snapshot_time")


df = df.ffill()
df = df[df["velocity"] >= 0]
df = df[df["geo_altitude"] >= 0]
# 10-second digital twin dataset
df_resampled = (
    df.groupby("icao24")
      .resample("10s")
      .agg({
          "latitude": "mean",
          "longitude": "mean",
          "velocity": "mean",
          "geo_altitude": "mean",
          "vertical_rate": "mean",
          "true_track": "mean"
      })
      .reset_index()
)

# 1-minute ML dataset
df_minute = (
    df.groupby("icao24")
      .resample("1min")
      .agg({
          "velocity": "mean",
          "geo_altitude": "mean",
          "vertical_rate": "mean"
      })
      .reset_index()
)

print(df_resampled.head())
print(df_minute.head())
   icao24             snapshot_time  latitude  longitude  velocity  \
0  a01097 2026-02-16 21:27:10+00:00   39.0827   -77.4145    158.41   
1  a0b845 2026-02-16 21:27:10+00:00   38.8418   -77.0379      0.00   
2  a0b845 2026-02-16 21:27:20+00:00       NaN        NaN       NaN   
3  a0b845 2026-02-16 21:27:30+00:00       NaN        NaN       NaN   
4  a0b845 2026-02-16 21:27:40+00:00       NaN        NaN       NaN   

   geo_altitude  vertical_rate  true_track  
0       3970.02           6.83      337.27  
1       3970.02           6.83      101.25  
2           NaN            NaN         NaN  
3           NaN            NaN         NaN  
4           NaN            NaN         NaN  
   icao24             snapshot_time  velocity  geo_altitude  vertical_rate
0  a01097 2026-02-16 21:27:00+00:00    158.41       3970.02           6.83
1  a0b845 2026-02-16 21:27:00+00:00      0.00       3970.02           6.83
2  a0b845 2026-02-16 21:28:00+00:00     23.66       3970.02           6.83
3  a0b845 2026-02-16 21:29:00+00:00     89.11        541.02          16.58
4  a12322 2026-02-16 21:27:00+00:00      0.00        541.02          16.58
import matplotlib.pyplot as plt

aircraft = df[df["icao24"] == "a0b845"]

plt.plot(aircraft.index, aircraft["geo_altitude"])
plt.title("Altitude over time")
plt.show()

df["velocity"].hist(bins=50)

plt.scatter(df["longitude"], df["latitude"], s=1)

# Feature Engineering  
# Lag Feature 
df["velocity_lag1"] = df.groupby("icao24")["velocity"].shift(1)

df["velocity_rolling_mean"] = (
    df.groupby("icao24")["velocity"]
      .rolling(window=5)
      .mean()
      .reset_index(level=0, drop=True)
)

df["acceleration"] = (
    df.groupby("icao24")["velocity"].diff()
)

df["altitude_rate"] = (
    df.groupby("icao24")["geo_altitude"].diff()
)
flight_features = df.groupby("icao24").agg({

    "velocity": ["mean", "max", "std"],
    "geo_altitude": ["mean", "max"],
    "vertical_rate": ["mean", "std"],
    
})

print(flight_features)
          velocity                    geo_altitude           vertical_rate  \
              mean     max        std         mean       max          mean   
icao24                                                                       
a01097  158.410000  158.41        NaN      3970.02   3970.02      6.830000   
a0b845   37.590000   89.11  46.159308      2827.02   3970.02     10.080000   
a12322    0.000000    0.00   0.000000       541.02    541.02     16.580000   
a12aa5    2.076667    6.17   3.545058       541.02    541.02     16.580000   
a12ac9   91.695000  100.54  12.508719       567.69    571.50      0.165000   
a12e57    0.000000    0.00   0.000000       571.50    571.50      0.000000   
a1afde    2.830000    6.43   3.283428       571.50    571.50      0.000000   
a203a9  105.103333  128.64  25.364149       777.24   1455.42     14.630000   
a240dc   80.306667   88.74   8.800508       579.12    708.66     -2.816667   
a29aff    4.890000    4.89        NaN       419.10    419.10     -4.550000   
a2b10b    1.153333    3.34   1.893709       419.10    419.10     -4.550000   
a2bc9e    0.060000    0.06   0.000000       419.10    419.10     -4.550000   
a2e2cd   45.110000   46.53   1.347145       378.46    563.88      3.686667   
a33d97  232.665000  233.61   1.336432      9147.81   9151.62      0.000000   
a369af  118.993333  133.40  18.864955      1328.42   1950.72     12.896667   
a3afe1    7.030000    7.72   0.788860      1950.72   1950.72      5.200000   
a406e3    7.290000    7.72   0.535630      1950.72   1950.72      5.200000   
a4095f   80.433333   85.42   4.326400       449.58    693.42     -4.120000   
a43ce3   46.106667   51.44   5.747150       294.64    335.28      1.406667   
a4491d   78.893333   87.48   7.454430       312.42    548.64     -4.226667   
a4798f    4.120000    4.12   0.000000        83.82     83.82     -3.250000   
a4f7cb    2.830000    2.83   0.000000        83.82     83.82     -3.250000   
a59b2b  206.743333  212.80   5.830046     10350.50  10972.80     -8.560000   
a5cf3f    1.290000    1.29        NaN      9753.60   9753.60     -3.900000   
a63388    1.716667    2.06   0.391706      9753.60   9753.60     -3.900000   
a65b64  129.716667  136.51   8.819123      2367.28   3192.78     13.330000   
a66174    5.400000    5.40        NaN      3192.78   3192.78     13.980000   
a668eb   60.076667   65.39   6.678266       342.90    381.00     -1.410000   
a6c71c  170.490000  188.94  18.550809      3274.06   3741.42      9.973333   
a6eeb9    0.770000    0.77   0.000000      3741.42   3741.42      8.130000   
a73d54   40.240000   49.46   8.750583       203.20    312.42      1.300000   
a7b133    1.540000    1.54   0.000000       312.42    312.42      1.950000   
a845f4    0.000000    0.00   0.000000       312.42    312.42      1.950000   
a8b5fc   19.376667   29.32   8.852990       312.42    312.42      1.950000   
a8dffc    0.000000    0.00   0.000000       312.42    312.42      1.950000   
a8fc72  123.820000  123.82        NaN      1043.94   1043.94     13.000000   
a9739c   71.416667   74.38   2.578960       200.66    419.10     -3.903333   
a9ea96   89.760000   89.76        NaN       716.28    716.28      0.000000   
aa7dad    0.000000    0.00   0.000000       716.28    716.28      0.000000   
aa7e56  228.980000  228.98        NaN     10927.08  10927.08      0.000000   
aa7ebc  144.460000  144.46        NaN      2049.78   2049.78      6.180000   
ab9968   29.280000   29.28   0.000000       106.68    106.68     -2.930000   
ab9e5a   56.763333   62.25   6.634345       121.92    251.46     -2.060000   
abcb63   74.196667   77.07   2.880023       632.46    670.56      0.216667   
ac4ace    2.060000    2.06        NaN       670.56    670.56     -0.330000   
ac6c2c  100.725000  104.65   5.550788       967.74   1059.18     -3.090000   
ade8fa    5.573333    6.43   0.784496       876.30    876.30     -1.950000   
ae481f   59.926667   62.90   3.492869       411.48    419.10     -0.650000   
c05d08    2.100000    2.83   1.264397       419.10    419.10      0.000000   
c07572    0.000000    0.00   0.000000       419.10    419.10      0.000000   

                  
             std  
icao24            
a01097       NaN  
a0b845  5.629165  
a12322  0.000000  
a12aa5  0.000000  
a12ac9  0.233345  
a12e57  0.000000  
a1afde  0.000000  
a203a9  1.492615  
a240dc  2.460860  
a29aff       NaN  
a2b10b  0.000000  
a2bc9e  0.000000  
a2e2cd  1.538712  
a33d97  0.000000  
a369af  6.824605  
a3afe1  0.000000  
a406e3  0.000000  
a4095f  0.190526  
a43ce3  2.727202  
a4491d  0.861762  
a4798f  0.000000  
a4f7cb  0.000000  
a59b2b  4.038849  
a5cf3f       NaN  
a63388  0.000000  
a65b64  2.343608  
a66174       NaN  
a668eb  2.464386  
a6c71c  3.192747  
a6eeb9  0.000000  
a73d54  3.303317  
a7b133  0.000000  
a845f4  0.000000  
a8b5fc  0.000000  
a8dffc  0.000000  
a8fc72       NaN  
a9739c  0.325013  
a9ea96       NaN  
aa7dad  0.000000  
aa7e56       NaN  
aa7ebc       NaN  
ab9968  0.000000  
ab9e5a  1.313887  
abcb63  1.234396  
ac4ace       NaN  
ac6c2c  1.612203  
ade8fa  0.000000  
ae481f  1.125833  
c05d08  0.000000  
c07572  0.000000  
flight_features.columns = [
    "_".join(col) for col in flight_features.columns
]
flight_features = flight_features.dropna()