import numpy as np import pandas as pd from gplearn import fitness
from gplearn.genetic import SymbolicRegressor
train_data = pd.read_csv('../data/IC_train.csv', index_col=0, parse_dates=[0]) test_data = pd.read_csv('../data/IC_test.csv', index_col=0, parse_dates=[0]) feature_names = list(train_data.columns) train_data.loc[:, 'y'] = np.log(train_data['Close'].shift(-4) / train_data['Close']) train_data.dropna(inplace=True)
from examples.backtest import BackTester
class SymbolicTestor(BackTester): def init(self): self.params = {'factor': pd.Series}
@BackTester.process_strategy def run_(self, *args, **kwargs) -> dict[str: int]: factor = np.array(self.params['factor']) long_cond = factor > 0 short_cond = factor < 0 self.backtest_env['signal'] = np.where(long_cond, 1, np.where(short_cond, -1, np.nan)) self.construct_position_(keep_raw=True, max_holding_period=1200, take_profit=None, stop_loss=None)
comm = [0 / 10000, 0 / 10000] bt = SymbolicTestor(train_data, transact_base='PreClose', commissions=(comm[0], comm[1]))
def score_func_basic(y, y_pred, sample_weight): try: _ = bt.run_(factor=y_pred) factor_ret = _['annualized_mean']/_['max_drawdown'] if _['max_drawdown'] != 0 else 0 except: factor_ret = 0 return factor_ret
def my_gplearn(function_set, my_fitness, pop_num=100, gen_num=3, tour_num=10, random_state = 42, feature_names=None): metric = fitness.make_fitness(function=my_fitness, greater_is_better=True, wrap=False) return SymbolicRegressor(population_size=pop_num, generations=gen_num, metric=metric, tournament_size=tour_num, function_set=function_set, const_range=(-1.0, 1.0), parsimony_coefficient='auto', stopping_criteria=100.0, init_depth=(2, 3), init_method='half and half', p_crossover=0.8, p_subtree_mutation=0.05, p_hoist_mutation=0.05, p_point_mutation=0.05, p_point_replace=0.05, max_samples=1.0, feature_names=feature_names, warm_start=False, low_memory=False, n_jobs=1, verbose=1, random_state=random_state)
function_set=['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'sin', 'cos', 'tan', 'max', 'min', ]
my_cmodel_gp = my_gplearn(function_set, score_func_basic, random_state=0, feature_names=feature_names) my_cmodel_gp.fit(train_data.loc[:, :'rank_num'].values, train_data.loc[:, 'y'].values) print(my_cmodel_gp)
factor = my_cmodel_gp.predict(test_data.values) bt_test = SymbolicTestor(test_data, transact_base='PreClose', commissions=(comm[0], comm[1])) bt_test.run_(factor=factor) md = bt_test.summary() print(md.out_stats) print(bt.fees_factor) md.plot_(comm=comm, show_bool=True)
|