Timeframe
1d
Direction
Long & Short
Stoploss
-5.0%
Trailing Stop
Yes
ROI
0m: 15.0%, 1440m: 10.0%, 4320m: 5.0%, 8640m: 0.0%
Interface Version
3
Startup Candles
500
Indicators
10
freqtrade/freqtrade-strategies
Strategy 003 author@: Gerald Lonlas github@: https://github.com/freqtrade/freqtrade-strategies
"""
ML Macro 1D Strategy — XGBoost + Macro Factors on Daily timeframe
- 50+ features: trend, momentum, volatility, volume, macro regime, sentiment
- XGBoost classifier ensemble (5 models)
- Walk-forward retraining every 60 days
- Min 500 candles for initial training
"""
from pandas import DataFrame, Series
import talib.abstract as ta
import numpy as np
from freqtrade.strategy import IStrategy
from datetime import datetime, timezone
import pickle
class MLMacro1D(IStrategy):
INTERFACE_VERSION = 3
timeframe = '1d'
can_short = True
# Risk management
stoploss = -0.05
trailing_stop = True
trailing_stop_positive = 0.01
trailing_stop_positive_offset = 0.04
trailing_only_offset_is_reached = True
# ROI exits
minimal_roi = {"0": 0.15, "1440": 0.10, "4320": 0.05, "8640": 0}
max_open_trades = 4
startup_candle_count = 500
process_only_new_candles = True
use_exit_signal = True
def __init__(self, config: dict) -> None:
super().__init__(config)
self.model = None
self.scaler_mean = None
self.scaler_std = None
self.feature_cols = None
self.last_train_time = None
# ================================================================
# FEATURE ENGINEERING — 50+ macro + technical factors
# ================================================================
def _build_features(self, dataframe: DataFrame) -> DataFrame:
df = dataframe.copy()
# --- 1. Price Returns (multi-horizon) ---
for p in [1, 3, 5, 10, 20, 50]:
df[f'ret_{p}d'] = df['close'].pct_change(p) * 100
# --- 2. Distance from Moving Averages ---
for p in [5, 10, 20, 50, 100, 200]:
df[f'ma_{p}'] = ta.SMA(df, timeperiod=p)
df[f'ma_{p}_dist'] = (df['close'] - df[f'ma_{p}']) / df[f'ma_{p}'] * 100
# --- 3. EMA Cross Signals ---
df['ema_5_20'] = ta.EMA(df, 5) - ta.EMA(df, 20)
df['ema_20_50'] = ta.EMA(df, 20) - ta.EMA(df, 50)
df['ema_50_200'] = ta.EMA(df, 50) - ta.EMA(df, 200)
# --- 4. MACD ---
macd = ta.MACD(df, 12, 26, 9)
df['macd'] = macd['macd']
df['macd_signal'] = macd['macdsignal']
df['macd_hist'] = df['macd'] - df['macd_signal']
df['macd_hist_z'] = df['macd_hist'] / df['macd_hist'].rolling(100).std()
# --- 5. RSI & Stochastic ---
df['rsi_14'] = ta.RSI(df, timeperiod=14)
df['rsi_7'] = ta.RSI(df, timeperiod=7)
df['rsi_28'] = ta.RSI(df, timeperiod=28)
stoch = ta.STOCH(df, 14, 3, 3)
df['stoch_k'] = stoch['slowk']
df['stoch_d'] = stoch['slowd']
# --- 6. Trend Strength (ADX / DMI) ---
df['adx'] = ta.ADX(df, timeperiod=14)
df['di_plus'] = ta.PLUS_DI(df, timeperiod=14)
df['di_minus'] = ta.MINUS_DI(df, timeperiod=14)
df['di_ratio'] = df['di_plus'] / (df['di_minus'] + 1e-10)
df['trend_strength'] = (df['adx'] - 20) / 20 # normalized
# --- 7. Bollinger Bands ---
bb = ta.BBANDS(df, timeperiod=20, nbdevup=2.0, nbdevdn=2.0)
df['bb_upper'] = bb['upperband']
df['bb_lower'] = bb['lowerband']
df['bb_mid'] = bb['middleband']
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_mid']
df['bb_position'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'] + 1e-10)
# --- 8. Volatility Regime ---
df['atr_14'] = ta.ATR(df, timeperiod=14)
df['atr_pct'] = df['atr_14'] / df['close'] * 100
df['atr_ratio'] = df['atr_14'] / ta.SMA(df['atr_14'], timeperiod=50)
df['vol_20d'] = df['ret_1d'].rolling(20).std() # 20-day historical vol
# --- 9. Drawdown from ATH ---
df['ath'] = df['close'].expanding().max()
df['dd_from_ath'] = (df['close'] / df['ath'] - 1) * 100
df['dd_duration'] = (df['close'] < df['ath']).astype(int).cumsum()
df['dd_duration'] = df['dd_duration'] - df['dd_duration'].where(df['close'] >= df['ath']).ffill().fillna(0)
# --- 10. Volume & Money Flow ---
df['volume_sma_20'] = ta.SMA(df['volume'], timeperiod=20)
df['volume_sma_50'] = ta.SMA(df['volume'], timeperiod=50)
df['volume_ratio'] = df['volume'] / df['volume_sma_20']
df['volume_trend'] = df['volume_sma_20'] / df['volume_sma_50']
# OBV (On-Balance Volume)
df['obv'] = (np.sign(df['close'].diff()) * df['volume']).cumsum()
df['obv_ma_20'] = ta.SMA(df['obv'], timeperiod=20)
df['obv_ratio'] = df['obv'] / df['obv_ma_20']
# MFI (Money Flow Index)
tp = (df['high'] + df['low'] + df['close']) / 3
mf = tp * df['volume']
pos_mf = mf.where(tp > tp.shift(1), 0).rolling(14).sum()
neg_mf = mf.where(tp < tp.shift(1), 0).rolling(14).sum()
df['mfi'] = 100 - 100 / (1 + pos_mf / (neg_mf + 1e-10))
# --- 11. Price Structure ---
df['hh_20'] = df['high'].rolling(20).max()
df['ll_20'] = df['low'].rolling(20).min()
df['channel_position'] = (df['close'] - df['ll_20']) / (df['hh_20'] - df['ll_20'] + 1e-10)
df['is_20d_high'] = (df['close'] >= df['hh_20']).astype(int)
df['is_20d_low'] = (df['close'] <= df['ll_20']).astype(int)
# Reversal signals
df['hammer'] = ((df['close'] - df['low']) > 2 * (df['high'] - df['close'])) & \
((df['high'] - df['low']) > 3 * (df['open'] - df['close']).abs())
df['shooting_star'] = ((df['high'] - df['close']) > 2 * (df['close'] - df['low'])) & \
((df['high'] - df['low']) > 3 * (df['open'] - df['close']).abs())
df['hammer'] = df['hammer'].astype(int)
df['shooting_star'] = df['shooting_star'].astype(int)
# --- 12. Return Asymmetry / Skew ---
df['ret_skew_20'] = df['ret_1d'].rolling(20).skew()
df['ret_kurt_20'] = df['ret_1d'].rolling(20).kurt()
# --- 13. Cross-sectional factors ---
# These will be computed per-pair but capture relative strength
# Normalize returns relative to their own history
df['ret_zscore_20'] = (df['ret_1d'] - df['ret_1d'].rolling(20).mean()) / (df['ret_1d'].rolling(20).std() + 1e-10)
# Serial correlation (trending vs mean-reverting)
df['autocorr_5'] = df['ret_1d'].rolling(5).apply(lambda x: x.autocorr() if len(x) > 2 else 0, raw=False)
# --- 14. Volume-Price divergence ---
df['vol_price_div'] = df['volume_ratio'] - df['ret_1d'].rolling(5).mean()
return df
# ================================================================
# TRAINING DATA PREPARATION
# ================================================================
def _get_training_data(self, dataframe: DataFrame) -> tuple:
df = self._build_features(dataframe)
# Target: classification for next 5 days
# 1 = long (>2% up in 5d), -1 = short (>2% down in 5d), 0 = neutral
future_close = df['close'].shift(-5)
future_ret = (future_close / df['close'] - 1) * 100
y = np.where(future_ret > 3.0, 1,
np.where(future_ret < -3.0, -1, 0))
# Feature columns
exclude = ['date', 'open', 'high', 'low', 'close', 'volume',
'enter_long', 'enter_short', 'exit_long', 'exit_short',
'enter_tag', 'exit_tag']
feature_cols = [c for c in df.columns
if c not in exclude
and not c.startswith('&')
and not c.startswith('%')
and df[c].dtype in [np.float64, np.float32, np.int64, np.int32, float, int]]
# Drop NaN rows
valid = ~np.isnan(y)
for col in feature_cols:
valid &= df[col].notna() & (~np.isinf(df[col].replace([np.inf, -np.inf], np.nan).fillna(0)))
X = df[feature_cols].loc[valid].values.astype(np.float64)
y = y[valid].astype(np.int64)
return X, y, feature_cols
# ================================================================
# MODEL TRAINING
# ================================================================
def _train_model(self, dataframe: DataFrame) -> bool:
try:
import xgboost as xgb
except ImportError:
return False
X, y, feature_cols = self._get_training_data(dataframe)
if len(X) < 500:
return False
# Feature scaling (z-score)
self.scaler_mean = np.nanmean(X, axis=0)
self.scaler_std = np.nanstd(X, axis=0) + 1e-10
X_scaled = np.clip((X - self.scaler_mean) / self.scaler_std, -5, 5)
# Class weights for imbalance
unique, counts = np.unique(y, return_counts=True)
total = len(y)
weights = {c: total / (len(unique) * cnt + 1e-10) for c, cnt in zip(unique, counts)}
# 5-model ensemble with diverse hyperparameters
configs = [
{'n': 200, 'd': 5, 'lr': 0.05, 'sub': 0.8, 'col': 0.7, 'seed': 42},
{'n': 250, 'd': 4, 'lr': 0.04, 'sub': 0.7, 'col': 0.8, 'seed': 73},
{'n': 150, 'd': 6, 'lr': 0.03, 'sub': 0.9, 'col': 0.6, 'seed': 99},
{'n': 300, 'd': 3, 'lr': 0.06, 'sub': 0.75, 'col': 0.75, 'seed': 17},
{'n': 200, 'd': 5, 'lr': 0.04, 'sub': 0.85, 'col': 0.65, 'seed': 55},
]
models = []
sample_weights = np.array([weights[label] for label in y])
for cfg in configs:
model = xgb.XGBClassifier(
n_estimators=cfg['n'],
max_depth=cfg['d'],
learning_rate=cfg['lr'],
subsample=cfg['sub'],
colsample_bytree=cfg['col'],
min_child_weight=5,
gamma=0.2,
reg_alpha=0.5,
reg_lambda=1.0,
random_state=cfg['seed'],
eval_metric='mlogloss',
use_label_encoder=False,
verbosity=0
)
model.fit(X_scaled, y, sample_weight=sample_weights, verbose=False)
models.append(model)
self.model = models
self.feature_cols = feature_cols
self.last_train_time = datetime.now(timezone.utc)
return True
# ================================================================
# PREDICTION
# ================================================================
def _predict(self, dataframe: DataFrame) -> DataFrame:
if self.model is None or self.feature_cols is None:
return dataframe
df = self._build_features(dataframe)
valid_cols = [c for c in self.feature_cols if c in df.columns]
X = df[valid_cols].values.astype(np.float64)
if len(X) == 0:
return dataframe
# Scale using saved params
n_cols = min(len(valid_cols), len(self.scaler_mean))
X_scaled = np.clip(
(X[:, :n_cols] - self.scaler_mean[:n_cols]) / self.scaler_std[:n_cols],
-5, 5
)
# Ensemble prediction (average probabilities)
all_probs = []
for m in self.model:
probs = m.predict_proba(X_scaled)
all_probs.append(probs)
avg_probs = np.mean(all_probs, axis=0)
classes = self.model[0].classes_
prob_dict = {c: avg_probs[:, i] for i, c in enumerate(classes)}
n = len(df)
df['ml_long_prob'] = prob_dict.get(1, np.zeros(n))
df['ml_short_prob'] = prob_dict.get(-1, np.zeros(n))
df['ml_neutral_prob'] = prob_dict.get(0, np.zeros(n))
df['ml_confidence'] = np.maximum(df['ml_long_prob'], df['ml_short_prob'])
return df
# ================================================================
# RETRAINING LOGIC
# ================================================================
def _should_retrain(self) -> bool:
if self.last_train_time is None:
return True
hours = (datetime.now(timezone.utc) - self.last_train_time).total_seconds() / 3600
return hours > 1440 # Retrain every 60 days
# ================================================================
# FREQTRADE HOOKS
# ================================================================
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
dataframe = self._build_features(dataframe)
if self._should_retrain():
ok = self._train_model(dataframe)
if ok:
self.dp.send_msg(
f"ML-Macro-1D retrained: {len(self.model)} models, "
f"{len(self.feature_cols)} features"
)
if self.model is not None:
dataframe = self._predict(dataframe)
return dataframe
def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
if 'ml_confidence' not in dataframe.columns:
return dataframe
# Long: confidence > 0.55 and long_prob > short_prob
dataframe.loc[
(dataframe['ml_confidence'] > 0.55) &
(dataframe['ml_long_prob'] > dataframe['ml_short_prob']),
['enter_long', 'enter_tag']
] = (1, 'ml_long_1d')
# Short: confidence > 0.55 and short_prob > long_prob
dataframe.loc[
(dataframe['ml_confidence'] > 0.55) &
(dataframe['ml_short_prob'] > dataframe['ml_long_prob']),
['enter_short', 'enter_tag']
] = (1, 'ml_short_1d')
return dataframe
def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
if 'ml_confidence' not in dataframe.columns:
return dataframe
# Exit when confidence drops
dataframe.loc[
(dataframe['ml_confidence'] < 0.40),
['exit_long', 'exit_short', 'exit_tag']
] = (1, 1, 'ml_exit_1d')
return dataframe