# Day 16 (100天计划): 多因子选股模型（打分法）
# Scoring-based multi-factor model：百分位打分 + 加权合成

# ==================== 可切换参数（在这里修改）====================
VERSION = 'v1'  # 'v1': 4因子等权 | 'v2': IC加权 | 'v3': 行业中性化 | 'v4': 6因子等权
# ================================================================

import pandas as pd
import numpy as np
from jqdata import *

"""
回测参数：2020-01-01 至 2026-02-01，100万，沪深300，月度调仓，持仓20只

打分法核心：因子值 → 百分位排名(0-100) → 加权求和 → 选Top20
vs Day 13：因子值 → 去极值 → z-score标准化 → 等权相加
关键差异：打分法对极端值鲁棒，无分布假设

4个核心因子（基于Day1-10实证结论）：
  PCF：现金流价值因子，Day1最强，38.06%（行业中性化后103.74%）
  20日动量：Day5最强，40.53%
  低换手率：Day7最强，55.18%
  资产周转率：Day2最优质量因子，32.49%

V1: 等权打分（4×25%）
V2: IC加权打分  PCF(35%) + 换手率(35%) + 动量(20%) + 资产周转(10%)
V3: 行业中性化打分（在行业内做百分位排名）
V4: 6因子等权（追加60日历史波动率 + PSY情绪）
"""


def initialize(context):
    g.stock_num = 20
    g.index = '000300.XSHG'
    set_option('use_real_price', True)
    log.set_level('order', 'error')
    run_monthly(trade, 1)


# ==================== 打分函数 ====================

def to_score(series, higher_is_better=True):
    """
    将因子值转换为百分位分数 (0, 100]
    higher_is_better=True:  值越高分越高（动量、资产周转率）
    higher_is_better=False: 值越低分越高（PCF、换手率）
    """
    s = series.dropna()
    if s.empty or s.std() == 0:
        return pd.Series(50.0, index=s.index)
    if higher_is_better:
        return s.rank(pct=True) * 100
    else:
        return s.rank(ascending=False, pct=True) * 100


def to_score_in_industry(series, industries, higher_is_better=True):
    """
    行业内百分位打分（行业中性化）
    同行业内独立排名，消除行业系统性偏差
    """
    result = pd.Series(dtype=float)
    for ind in industries.unique():
        mask = industries[industries == ind].index
        sub = series.reindex(mask).dropna()
        if len(sub) < 2:
            result = pd.concat([result, pd.Series(50.0, index=sub.index)])
        elif higher_is_better:
            result = pd.concat([result, sub.rank(pct=True) * 100])
        else:
            result = pd.concat([result, sub.rank(ascending=False, pct=True) * 100])
    return result


def get_industry_series(stocks, date):
    """获取股票行业分类（申万一级）"""
    industries = get_industries('sw_l1', date=date)
    stock_ind = {}
    for ind_code in industries.index:
        try:
            ind_stocks = get_industry_stocks(ind_code, date=date)
            matched = [s for s in stocks if s in ind_stocks]
            ind_name = industries.loc[ind_code, 'name']
            for s in matched:
                stock_ind[s] = ind_name
        except:
            continue
    return pd.Series(stock_ind)


# ==================== 因子计算 ====================

def get_pcf_scores(stocks, date, industries=None):
    """
    PCF因子：低PCF = 现金流便宜 = 买入信号
    行业中性化时：行业内低PCF = 更有价值
    """
    try:
        q = query(valuation.code, valuation.pcf_ratio).filter(
            valuation.code.in_(stocks),
            valuation.pcf_ratio > 0
        )
        df = get_fundamentals(q, date=date)
        if df.empty:
            return pd.Series(dtype=float)

        series = df.set_index('code')['pcf_ratio']

        if industries is not None:
            ind = industries.reindex(series.index).dropna()
            series = series.reindex(ind.index)
            return to_score_in_industry(series, ind, higher_is_better=False)
        return to_score(series, higher_is_better=False)

    except Exception as e:
        log.error(f"PCF打分失败: {e}")
        return pd.Series(dtype=float)


def get_momentum_scores(stocks, date, industries=None):
    """
    20日价格动量：涨幅高的得分高
    """
    try:
        price_df = get_price(stocks, end_date=date, count=21,
                             fields=['close'], panel=False, fq='pre')
        if price_df is None or price_df.empty:
            return pd.Series(dtype=float)

        close_p = price_df.pivot(index='time', columns='code', values='close')
        mom = {}
        for stock in close_p.columns:
            cl = close_p[stock].dropna()
            if len(cl) >= 2:
                mom[stock] = cl.iloc[-1] / cl.iloc[0] - 1

        series = pd.Series(mom)
        if series.empty:
            return series

        if industries is not None:
            ind = industries.reindex(series.index).dropna()
            series = series.reindex(ind.index)
            return to_score_in_industry(series, ind, higher_is_better=True)
        return to_score(series, higher_is_better=True)

    except Exception as e:
        log.error(f"动量打分失败: {e}")
        return pd.Series(dtype=float)


def get_turnover_scores(stocks, date, industries=None):
    """
    20日平均换手率：低换手 = 流动性溢价 = 好
    来源：Day7，55.18%，第一阶段最强因子
    """
    try:
        price_df = get_price(stocks, end_date=date, count=20,
                             fields=['turnover_ratio'], panel=False, fq='pre')
        if price_df is None or price_df.empty:
            return pd.Series(dtype=float)

        tr_pivot = price_df.pivot(index='time', columns='code', values='turnover_ratio')
        avg_tr = tr_pivot.mean(axis=0).dropna()
        avg_tr = avg_tr[avg_tr > 0]

        if avg_tr.empty:
            return avg_tr

        if industries is not None:
            ind = industries.reindex(avg_tr.index).dropna()
            avg_tr = avg_tr.reindex(ind.index)
            return to_score_in_industry(avg_tr, ind, higher_is_better=False)
        return to_score(avg_tr, higher_is_better=False)

    except Exception as e:
        log.error(f"换手率打分失败: {e}")
        return pd.Series(dtype=float)


def get_asset_turnover_scores(stocks, date, industries=None):
    """
    资产周转率 = 营收 / 总资产：高周转 = 运营效率高 = 好
    来源：Day2，唯一跑赢基准的质量因子，32.49%
    """
    try:
        q = query(
            valuation.code,
            income.total_operating_revenue,
            balance.total_assets
        ).filter(
            valuation.code.in_(stocks),
            income.total_operating_revenue > 0,
            balance.total_assets > 0
        )
        df = get_fundamentals(q, date=date)
        if df.empty:
            return pd.Series(dtype=float)

        df = df.set_index('code')
        series = df['total_operating_revenue'] / df['total_assets']
        series = series.dropna()
        series = series[series > 0]

        if series.empty:
            return series

        if industries is not None:
            ind = industries.reindex(series.index).dropna()
            series = series.reindex(ind.index)
            return to_score_in_industry(series, ind, higher_is_better=True)
        return to_score(series, higher_is_better=True)

    except Exception as e:
        log.error(f"资产周转率打分失败: {e}")
        return pd.Series(dtype=float)


def get_volatility_scores(stocks, date, industries=None):
    """
    60日历史波动率：低波动 = 低波异象 = 好
    来源：Day6，60日波动率35.89%，最大回撤13.92%
    """
    try:
        price_df = get_price(stocks, end_date=date, count=61,
                             fields=['close'], panel=False, fq='pre')
        if price_df is None or price_df.empty:
            return pd.Series(dtype=float)

        close_p = price_df.pivot(index='time', columns='code', values='close')
        vol = {}
        for stock in close_p.columns:
            cl = close_p[stock].dropna()
            if len(cl) >= 20:
                ret = cl.pct_change().dropna()
                vol[stock] = ret.std() * np.sqrt(252)

        series = pd.Series(vol).dropna()
        if series.empty:
            return series

        if industries is not None:
            ind = industries.reindex(series.index).dropna()
            series = series.reindex(ind.index)
            return to_score_in_industry(series, ind, higher_is_better=False)
        return to_score(series, higher_is_better=False)

    except Exception as e:
        log.error(f"波动率打分失败: {e}")
        return pd.Series(dtype=float)


def get_psy_scores(stocks, date, industries=None):
    """
    PSY情绪因子 = 过去12日上涨天数/12 × 100
    高PSY = 市场乐观，来源：Day9，PSY(29.17%)
    """
    try:
        price_df = get_price(stocks, end_date=date, count=13,
                             fields=['close'], panel=False, fq='pre')
        if price_df is None or price_df.empty:
            return pd.Series(dtype=float)

        close_p = price_df.pivot(index='time', columns='code', values='close')
        psy = {}
        for stock in close_p.columns:
            cl = close_p[stock].dropna()
            if len(cl) >= 13:
                up_days = (cl.diff(1).dropna() > 0).sum()
                psy[stock] = up_days / 12 * 100

        series = pd.Series(psy).dropna()
        if series.empty:
            return series

        if industries is not None:
            ind = industries.reindex(series.index).dropna()
            series = series.reindex(ind.index)
            return to_score_in_industry(series, ind, higher_is_better=True)
        return to_score(series, higher_is_better=True)

    except Exception as e:
        log.error(f"PSY打分失败: {e}")
        return pd.Series(dtype=float)


# ==================== 加权合成 ====================

def combine_scores(score_dict, weights):
    """
    加权综合评分
    score_dict: {因子名: pd.Series(0-100分)}
    weights:    {因子名: 权重}，会自动归一化
    """
    valid = {k: v for k, v in score_dict.items() if not v.empty}
    if not valid:
        return pd.Series(dtype=float)

    common = None
    for s in valid.values():
        if common is None:
            common = set(s.index)
        else:
            common = common.intersection(set(s.index))

    if not common:
        return pd.Series(dtype=float)

    common = list(common)
    result = pd.Series(0.0, index=common)
    w_total = 0.0

    for name, s in valid.items():
        w = weights.get(name, 0.0)
        result += s.reindex(common).fillna(50.0) * w
        w_total += w

    if w_total > 0:
        result /= w_total

    log.info(f"[{VERSION}] 参与打分因子: {list(valid.keys())}，股票数: {len(common)}")
    return result


# ==================== 交易 ====================

def trade(context):
    stocks = get_index_stocks(g.index)
    dt = context.current_dt

    # 行业分类（V3需要）
    industries = None
    if VERSION == 'v3':
        try:
            industries = get_industry_series(stocks, dt)
        except:
            pass

    # ──── 计算各因子打分 ────
    pcf_s  = get_pcf_scores(stocks, dt, industries)
    mom_s  = get_momentum_scores(stocks, dt, industries)
    turn_s = get_turnover_scores(stocks, dt, industries)
    at_s   = get_asset_turnover_scores(stocks, dt, industries)

    # ──── 按版本设置权重 ────
    if VERSION == 'v1':
        # 等权：4因子各25%
        score_dict = {'pcf': pcf_s, 'mom': mom_s, 'turn': turn_s, 'at': at_s}
        weights = {'pcf': 0.25, 'mom': 0.25, 'turn': 0.25, 'at': 0.25}

    elif VERSION == 'v2':
        # IC加权：按历史表现分配权重
        # 换手率(55.18%)最强，PCF(38.06%)其次，动量(40.53%)，资产周转(32.49%)最弱
        score_dict = {'pcf': pcf_s, 'mom': mom_s, 'turn': turn_s, 'at': at_s}
        weights = {'pcf': 0.30, 'mom': 0.20, 'turn': 0.40, 'at': 0.10}

    elif VERSION == 'v3':
        # 行业中性化 + 等权（因子已在行业内打分）
        score_dict = {'pcf': pcf_s, 'mom': mom_s, 'turn': turn_s, 'at': at_s}
        weights = {'pcf': 0.25, 'mom': 0.25, 'turn': 0.25, 'at': 0.25}

    elif VERSION == 'v4':
        # 6因子等权，额外加入低波动率 + PSY情绪
        vol_s = get_volatility_scores(stocks, dt)
        psy_s = get_psy_scores(stocks, dt)
        score_dict = {'pcf': pcf_s, 'mom': mom_s, 'turn': turn_s,
                      'at': at_s, 'vol': vol_s, 'psy': psy_s}
        weights = {k: 1/6 for k in score_dict}

    else:
        return

    # ──── 合成评分并选股 ────
    total_score = combine_scores(score_dict, weights)
    if total_score.empty:
        return

    target = total_score.sort_values(ascending=False).head(g.stock_num).index.tolist()

    for stock in list(context.portfolio.positions.keys()):
        if stock not in target:
            order_target(stock, 0)

    if target:
        w = 1.0 / len(target)
        for stock in target:
            order_target_value(stock, context.portfolio.total_value * w)