MLMacro1D

ml_macro_1d.py
"""
ML Macro 1D Strategy — XGBoost + Macro Factors on Daily timeframe
- 50+ features: trend, momentum, volatility, volume, macro regime, sentiment
- XGBoost classifier ensemble (5 models)
- Walk-forward retraining every 60 days
- Min 500 candles for initial training
"""
from pandas import DataFrame, Series
import talib.abstract as ta
import numpy as np
from freqtrade.strategy import IStrategy
from datetime import datetime, timezone
import pickle


class MLMacro1D(IStrategy):
    INTERFACE_VERSION = 3
    timeframe = '1d'
    can_short = True

    # Risk management
    stoploss = -0.05
    trailing_stop = True
    trailing_stop_positive = 0.01
    trailing_stop_positive_offset = 0.04
    trailing_only_offset_is_reached = True

    # ROI exits
    minimal_roi = {"0": 0.15, "1440": 0.10, "4320": 0.05, "8640": 0}

    max_open_trades = 4
    startup_candle_count = 500
    process_only_new_candles = True
    use_exit_signal = True

    def __init__(self, config: dict) -> None:
        super().__init__(config)
        self.model = None
        self.scaler_mean = None
        self.scaler_std = None
        self.feature_cols = None
        self.last_train_time = None

    # ================================================================
    # FEATURE ENGINEERING — 50+ macro + technical factors
    # ================================================================
    def _build_features(self, dataframe: DataFrame) -> DataFrame:
        df = dataframe.copy()

        # --- 1. Price Returns (multi-horizon) ---
        for p in [1, 3, 5, 10, 20, 50]:
            df[f'ret_{p}d'] = df['close'].pct_change(p) * 100

        # --- 2. Distance from Moving Averages ---
        for p in [5, 10, 20, 50, 100, 200]:
            df[f'ma_{p}'] = ta.SMA(df, timeperiod=p)
            df[f'ma_{p}_dist'] = (df['close'] - df[f'ma_{p}']) / df[f'ma_{p}'] * 100

        # --- 3. EMA Cross Signals ---
        df['ema_5_20'] = ta.EMA(df, 5) - ta.EMA(df, 20)
        df['ema_20_50'] = ta.EMA(df, 20) - ta.EMA(df, 50)
        df['ema_50_200'] = ta.EMA(df, 50) - ta.EMA(df, 200)

        # --- 4. MACD ---
        macd = ta.MACD(df, 12, 26, 9)
        df['macd'] = macd['macd']
        df['macd_signal'] = macd['macdsignal']
        df['macd_hist'] = df['macd'] - df['macd_signal']
        df['macd_hist_z'] = df['macd_hist'] / df['macd_hist'].rolling(100).std()

        # --- 5. RSI & Stochastic ---
        df['rsi_14'] = ta.RSI(df, timeperiod=14)
        df['rsi_7'] = ta.RSI(df, timeperiod=7)
        df['rsi_28'] = ta.RSI(df, timeperiod=28)
        stoch = ta.STOCH(df, 14, 3, 3)
        df['stoch_k'] = stoch['slowk']
        df['stoch_d'] = stoch['slowd']

        # --- 6. Trend Strength (ADX / DMI) ---
        df['adx'] = ta.ADX(df, timeperiod=14)
        df['di_plus'] = ta.PLUS_DI(df, timeperiod=14)
        df['di_minus'] = ta.MINUS_DI(df, timeperiod=14)
        df['di_ratio'] = df['di_plus'] / (df['di_minus'] + 1e-10)
        df['trend_strength'] = (df['adx'] - 20) / 20  # normalized

        # --- 7. Bollinger Bands ---
        bb = ta.BBANDS(df, timeperiod=20, nbdevup=2.0, nbdevdn=2.0)
        df['bb_upper'] = bb['upperband']
        df['bb_lower'] = bb['lowerband']
        df['bb_mid'] = bb['middleband']
        df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_mid']
        df['bb_position'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'] + 1e-10)

        # --- 8. Volatility Regime ---
        df['atr_14'] = ta.ATR(df, timeperiod=14)
        df['atr_pct'] = df['atr_14'] / df['close'] * 100
        df['atr_ratio'] = df['atr_14'] / ta.SMA(df['atr_14'], timeperiod=50)
        df['vol_20d'] = df['ret_1d'].rolling(20).std()  # 20-day historical vol

        # --- 9. Drawdown from ATH ---
        df['ath'] = df['close'].expanding().max()
        df['dd_from_ath'] = (df['close'] / df['ath'] - 1) * 100
        df['dd_duration'] = (df['close'] < df['ath']).astype(int).cumsum()
        df['dd_duration'] = df['dd_duration'] - df['dd_duration'].where(df['close'] >= df['ath']).ffill().fillna(0)

        # --- 10. Volume & Money Flow ---
        df['volume_sma_20'] = ta.SMA(df['volume'], timeperiod=20)
        df['volume_sma_50'] = ta.SMA(df['volume'], timeperiod=50)
        df['volume_ratio'] = df['volume'] / df['volume_sma_20']
        df['volume_trend'] = df['volume_sma_20'] / df['volume_sma_50']

        # OBV (On-Balance Volume)
        df['obv'] = (np.sign(df['close'].diff()) * df['volume']).cumsum()
        df['obv_ma_20'] = ta.SMA(df['obv'], timeperiod=20)
        df['obv_ratio'] = df['obv'] / df['obv_ma_20']

        # MFI (Money Flow Index)
        tp = (df['high'] + df['low'] + df['close']) / 3
        mf = tp * df['volume']
        pos_mf = mf.where(tp > tp.shift(1), 0).rolling(14).sum()
        neg_mf = mf.where(tp < tp.shift(1), 0).rolling(14).sum()
        df['mfi'] = 100 - 100 / (1 + pos_mf / (neg_mf + 1e-10))

        # --- 11. Price Structure ---
        df['hh_20'] = df['high'].rolling(20).max()
        df['ll_20'] = df['low'].rolling(20).min()
        df['channel_position'] = (df['close'] - df['ll_20']) / (df['hh_20'] - df['ll_20'] + 1e-10)
        df['is_20d_high'] = (df['close'] >= df['hh_20']).astype(int)
        df['is_20d_low'] = (df['close'] <= df['ll_20']).astype(int)

        # Reversal signals
        df['hammer'] = ((df['close'] - df['low']) > 2 * (df['high'] - df['close'])) & \
                       ((df['high'] - df['low']) > 3 * (df['open'] - df['close']).abs())
        df['shooting_star'] = ((df['high'] - df['close']) > 2 * (df['close'] - df['low'])) & \
                             ((df['high'] - df['low']) > 3 * (df['open'] - df['close']).abs())
        df['hammer'] = df['hammer'].astype(int)
        df['shooting_star'] = df['shooting_star'].astype(int)

        # --- 12. Return Asymmetry / Skew ---
        df['ret_skew_20'] = df['ret_1d'].rolling(20).skew()
        df['ret_kurt_20'] = df['ret_1d'].rolling(20).kurt()

        # --- 13. Cross-sectional factors ---
        # These will be computed per-pair but capture relative strength
        # Normalize returns relative to their own history
        df['ret_zscore_20'] = (df['ret_1d'] - df['ret_1d'].rolling(20).mean()) / (df['ret_1d'].rolling(20).std() + 1e-10)

        # Serial correlation (trending vs mean-reverting)
        df['autocorr_5'] = df['ret_1d'].rolling(5).apply(lambda x: x.autocorr() if len(x) > 2 else 0, raw=False)

        # --- 14. Volume-Price divergence ---
        df['vol_price_div'] = df['volume_ratio'] - df['ret_1d'].rolling(5).mean()

        return df

    # ================================================================
    # TRAINING DATA PREPARATION
    # ================================================================
    def _get_training_data(self, dataframe: DataFrame) -> tuple:
        df = self._build_features(dataframe)

        # Target: classification for next 5 days
        # 1 = long (>2% up in 5d), -1 = short (>2% down in 5d), 0 = neutral
        future_close = df['close'].shift(-5)
        future_ret = (future_close / df['close'] - 1) * 100
        y = np.where(future_ret > 3.0, 1,
                     np.where(future_ret < -3.0, -1, 0))

        # Feature columns
        exclude = ['date', 'open', 'high', 'low', 'close', 'volume',
                   'enter_long', 'enter_short', 'exit_long', 'exit_short',
                   'enter_tag', 'exit_tag']
        feature_cols = [c for c in df.columns
                        if c not in exclude
                        and not c.startswith('&')
                        and not c.startswith('%')
                        and df[c].dtype in [np.float64, np.float32, np.int64, np.int32, float, int]]

        # Drop NaN rows
        valid = ~np.isnan(y)
        for col in feature_cols:
            valid &= df[col].notna() & (~np.isinf(df[col].replace([np.inf, -np.inf], np.nan).fillna(0)))

        X = df[feature_cols].loc[valid].values.astype(np.float64)
        y = y[valid].astype(np.int64)

        return X, y, feature_cols

    # ================================================================
    # MODEL TRAINING
    # ================================================================
    def _train_model(self, dataframe: DataFrame) -> bool:
        try:
            import xgboost as xgb
        except ImportError:
            return False

        X, y, feature_cols = self._get_training_data(dataframe)

        if len(X) < 500:
            return False

        # Feature scaling (z-score)
        self.scaler_mean = np.nanmean(X, axis=0)
        self.scaler_std = np.nanstd(X, axis=0) + 1e-10
        X_scaled = np.clip((X - self.scaler_mean) / self.scaler_std, -5, 5)

        # Class weights for imbalance
        unique, counts = np.unique(y, return_counts=True)
        total = len(y)
        weights = {c: total / (len(unique) * cnt + 1e-10) for c, cnt in zip(unique, counts)}

        # 5-model ensemble with diverse hyperparameters
        configs = [
            {'n': 200, 'd': 5, 'lr': 0.05, 'sub': 0.8, 'col': 0.7, 'seed': 42},
            {'n': 250, 'd': 4, 'lr': 0.04, 'sub': 0.7, 'col': 0.8, 'seed': 73},
            {'n': 150, 'd': 6, 'lr': 0.03, 'sub': 0.9, 'col': 0.6, 'seed': 99},
            {'n': 300, 'd': 3, 'lr': 0.06, 'sub': 0.75, 'col': 0.75, 'seed': 17},
            {'n': 200, 'd': 5, 'lr': 0.04, 'sub': 0.85, 'col': 0.65, 'seed': 55},
        ]

        models = []
        sample_weights = np.array([weights[label] for label in y])

        for cfg in configs:
            model = xgb.XGBClassifier(
                n_estimators=cfg['n'],
                max_depth=cfg['d'],
                learning_rate=cfg['lr'],
                subsample=cfg['sub'],
                colsample_bytree=cfg['col'],
                min_child_weight=5,
                gamma=0.2,
                reg_alpha=0.5,
                reg_lambda=1.0,
                random_state=cfg['seed'],
                eval_metric='mlogloss',
                use_label_encoder=False,
                verbosity=0
            )
            model.fit(X_scaled, y, sample_weight=sample_weights, verbose=False)
            models.append(model)

        self.model = models
        self.feature_cols = feature_cols
        self.last_train_time = datetime.now(timezone.utc)
        return True

    # ================================================================
    # PREDICTION
    # ================================================================
    def _predict(self, dataframe: DataFrame) -> DataFrame:
        if self.model is None or self.feature_cols is None:
            return dataframe

        df = self._build_features(dataframe)

        valid_cols = [c for c in self.feature_cols if c in df.columns]
        X = df[valid_cols].values.astype(np.float64)

        if len(X) == 0:
            return dataframe

        # Scale using saved params
        n_cols = min(len(valid_cols), len(self.scaler_mean))
        X_scaled = np.clip(
            (X[:, :n_cols] - self.scaler_mean[:n_cols]) / self.scaler_std[:n_cols],
            -5, 5
        )

        # Ensemble prediction (average probabilities)
        all_probs = []
        for m in self.model:
            probs = m.predict_proba(X_scaled)
            all_probs.append(probs)
        avg_probs = np.mean(all_probs, axis=0)

        classes = self.model[0].classes_
        prob_dict = {c: avg_probs[:, i] for i, c in enumerate(classes)}

        n = len(df)
        df['ml_long_prob'] = prob_dict.get(1, np.zeros(n))
        df['ml_short_prob'] = prob_dict.get(-1, np.zeros(n))
        df['ml_neutral_prob'] = prob_dict.get(0, np.zeros(n))
        df['ml_confidence'] = np.maximum(df['ml_long_prob'], df['ml_short_prob'])

        return df

    # ================================================================
    # RETRAINING LOGIC
    # ================================================================
    def _should_retrain(self) -> bool:
        if self.last_train_time is None:
            return True
        hours = (datetime.now(timezone.utc) - self.last_train_time).total_seconds() / 3600
        return hours > 1440  # Retrain every 60 days

    # ================================================================
    # FREQTRADE HOOKS
    # ================================================================
    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        dataframe = self._build_features(dataframe)

        if self._should_retrain():
            ok = self._train_model(dataframe)
            if ok:
                self.dp.send_msg(
                    f"ML-Macro-1D retrained: {len(self.model)} models, "
                    f"{len(self.feature_cols)} features"
                )

        if self.model is not None:
            dataframe = self._predict(dataframe)

        return dataframe

    def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        if 'ml_confidence' not in dataframe.columns:
            return dataframe

        # Long: confidence > 0.55 and long_prob > short_prob
        dataframe.loc[
            (dataframe['ml_confidence'] > 0.55) &
            (dataframe['ml_long_prob'] > dataframe['ml_short_prob']),
            ['enter_long', 'enter_tag']
        ] = (1, 'ml_long_1d')

        # Short: confidence > 0.55 and short_prob > long_prob
        dataframe.loc[
            (dataframe['ml_confidence'] > 0.55) &
            (dataframe['ml_short_prob'] > dataframe['ml_long_prob']),
            ['enter_short', 'enter_tag']
        ] = (1, 'ml_short_1d')

        return dataframe

    def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        if 'ml_confidence' not in dataframe.columns:
            return dataframe

        # Exit when confidence drops
        dataframe.loc[
            (dataframe['ml_confidence'] < 0.40),
            ['exit_long', 'exit_short', 'exit_tag']
        ] = (1, 1, 'ml_exit_1d')

        return dataframe
Strategy Details

Source Code

Related Strategies

Bandtastic

FSampleStrategy

FReinforcedStrategy

Strategy003

CustomStoplossWithPSAR