diff --git a/examples/cycle_end_vintage.sql b/examples/cycle_end_vintage.sql new file mode 100644 index 0000000..0e0256c --- /dev/null +++ b/examples/cycle_end_vintage.sql @@ -0,0 +1,64 @@ +WITH loan AS ( + # 筛选需要计算的订单数据 + SELECT id, user_id, flow_channel, bank_channel, apply_time, loan_time, periods, amount + FROM database_name.loan + WHERE l.loan_state = 'SUCCEED' +), +replan as ( + SELECT l.flow_channel 流量渠道 + , l.bank_channel 放款资方 + , l.id 放款编号 + , l.user_id 用户编号 + , l.apply_time 申请时间 + , l.loan_time 放款时间 + , date_format(l.loan_time, '%Y-%m') 放款月份 + , l.periods 放款期数 + , l.amount 放款金额 + , p.period 还款期数 + , p.plan_repay_date 应还日期 + , date_add(p.plan_repay_date, INTERVAL 30 DAY) 观察日期 + , date(p.act_repay_time) 实还日期 + , p.principal_amt 应还本金 + , p.act_principal_amt 实还本金 + FROM loan l INNER JOIN database_name.repay_plan p ON l.id = p.loan_id + ORDER BY 放款编号, 还款期数 +), +amount as ( + SELECT date_format(loan_time, '%Y-%m') 放款月份, count(id) 放款件数, count(user_id) 放款人数, sum(amount) 放款金额 + FROM loan + GROUP BY date_format(loan_time, '%Y-%m') +) +SELECT vintage.放款月份 + , SUM(IF(还款期数 = 1, 放款金额, 0)) 放款金额 + , SUM(IF(还款期数 = 1, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM1 + , SUM(IF(还款期数 = 2, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM2 + , SUM(IF(还款期数 = 3, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM3 + , SUM(IF(还款期数 = 4, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM4 + , SUM(IF(还款期数 = 5, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM5 + , SUM(IF(还款期数 = 6, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM6 + , SUM(IF(还款期数 = 7, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM7 + , SUM(IF(还款期数 = 8, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM8 + , SUM(IF(还款期数 = 9, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM9 + , SUM(IF(还款期数 = 10, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM10 + , SUM(IF(还款期数 = 11, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM11 + , SUM(IF(还款期数 = 12, case '递延逾期率' when '逾期率' then 逾期余额 / 放款金额 when '递延逾期率' then 逾期余额 / 出账金额 when '出账订单' then 出账订单 when '出账金额' then 出账金额 when '逾期余额' then 逾期余额 end, NULL)) TERM12 +FROM ( + SELECT 放款月份, 还款期数 + , SUM(IF(观察点逾期 = 1, 观察点余额, 0)) 逾期余额 + , SUM(IF(观察日期 <= current_date(), 放款金额, 0)) 出账金额 + , COUNT(distinct IF(观察日期 <= current_date(), 放款编号, null)) 出账订单 + -- , SUM(IF(观察点逾期 = 1, 观察点余额, 0)) / SUM(放款金额) 逾期率 + -- , SUM(IF(观察点逾期 = 1, 观察点余额, 0)) / SUM(IF(观察日期 <= current_date(), 放款金额, 0)) 递延逾期率 + FROM ( + SELECT p1.放款编号, p1.放款月份, p1.放款金额, p1.还款期数, p1.观察日期 + , p1.放款金额 - sum(if(p2.实还日期 <= p1.观察日期, p2.实还本金, 0)) 观察点余额 + , if(p1.观察日期 < current_date() AND (p1.实还日期 IS NULL OR p1.实还日期 > p1.观察日期), 1, 0) 观察点逾期 + FROM replan p1 + LEFT JOIN replan p2 ON p1.放款编号 = p2.放款编号 + GROUP BY p1.放款编号, p1.放款月份, p1.放款金额, p1.还款期数, p1.观察日期, p1.实还日期 + ORDER BY 放款编号, 还款期数 + ) balance + GROUP BY 放款月份, 还款期数 +) vintage +LEFT JOIN amount a ON vintage.放款月份 = a.放款月份 +GROUP BY vintage.放款月份 \ No newline at end of file diff --git a/examples/month_end_vintage.sql b/examples/month_end_vintage.sql new file mode 100644 index 0000000..9013242 --- /dev/null +++ b/examples/month_end_vintage.sql @@ -0,0 +1,96 @@ +# MONTH END VINTAGE DPD +WITH RECURSIVE months_end(观察时点) AS ( + SELECT date_add(last_day(current_date()), INTERVAL 1 DAY) + UNION + SELECT date_sub(观察时点, INTERVAL 1 MONTH) + FROM months_end + WHERE 观察时点 > date '2023-12-01' # 限定开始统计时间使用 +) +, loan AS ( + # 筛选需要计算的订单数据 + SELECT id, user_id, flow_channel, bank_channel, apply_time, loan_time, periods, amount + FROM database_name.loan l + WHERE l.loan_state = 'SUCCEED' +) +, replan as ( + SELECT l.flow_channel 流量渠道 + , l.bank_channel 放款资方 + , l.id 放款编号 + , l.user_id 用户编号 + , l.apply_time 申请时间 + , l.loan_time 放款时间 + , date_format(l.loan_time, '%Y-%m') 放款月份 + , l.periods 放款期数 + , p.period 还款期数 + , l.amount 放款金额 + , p.plan_repay_date 应还日期 + , date(p.act_repay_time) 实还日期 + , p.principal_amt 应还本金 + , p.act_principal_amt 实还本金 + FROM loan l INNER JOIN database_name.repay_plan p ON l.id = p.loan_id + ORDER BY 放款编号, 还款期数 +) +, amount AS ( + SELECT date_format(loan_time, '%Y-%m') 放款月份, count(id) 放款件数, count(user_id) 放款人数, sum(amount) 放款金额 + FROM loan + GROUP BY date_format(loan_time, '%Y-%m') +) +, balance AS ( + SELECT 观察时点, 放款月份, 用户编号, 放款编号 + , 放款金额 - sum(IF(实还日期 IS NOT NULL + AND 实还日期 < 观察时点, 实还本金, 0)) 余额 + , concat('MOB', TIMESTAMPDIFF(month, concat(substr(放款时间, 1, 7), '-01'), concat(substr(date_sub(观察时点, INTERVAL 1 DAY), 1, 7), '-01'))) MOB + , ifnull( + max( + CASE + WHEN 应还日期 >= IF(观察时点 > current_date(), current_date(), 观察时点) THEN 0 # 未到还款日,当天出账不计入 + WHEN 应还日期 <= IF(观察时点 > current_date(), current_date(), 观察时点) AND 实还日期 <= 应还日期 THEN 0 # 按时还款 + -- CURR 口径: 观察时点之前出账且截止观察时点的未还的订单逾期天数 + WHEN 应还日期 < IF(观察时点 > current_date(), current_date(), 观察时点) AND 实还日期 < IF(观察时点 > current_date(), current_date(), 观察时点) THEN 0 # 观察时点已还清,也就是说逾期但已还的不算在内 + WHEN 应还日期 < IF(观察时点 > current_date(), current_date(), 观察时点) AND (实还日期 IS NULL + OR 实还日期 >= IF(观察时点 > current_date(), current_date(), 观察时点)) THEN DATEDIFF(IF(观察时点 > current_date(), current_date(), 观察时点), 应还日期) # 观察时点时点未还款 + ELSE 0 + END + ), 0 + ) 逾期天数 + FROM replan + LEFT JOIN months_end ON replan.放款时间 < months_end.观察时点 + GROUP BY 观察时点, 放款月份, 用户编号, 放款编号, 放款金额 + ORDER BY 逾期天数 DESC +) +SELECT 放款月份, 放款金额 + , sum(CASE WHEN MOB = 'MOB1' THEN 逾期率 END) MOB1 + , sum(CASE WHEN MOB = 'MOB2' THEN 逾期率 END) MOB2 + , sum(CASE WHEN MOB = 'MOB3' THEN 逾期率 END) MOB3 + , sum(CASE WHEN MOB = 'MOB4' THEN 逾期率 END) MOB4 + , sum(CASE WHEN MOB = 'MOB5' THEN 逾期率 END) MOB5 + , sum(CASE WHEN MOB = 'MOB6' THEN 逾期率 END) MOB6 + , sum(CASE WHEN MOB = 'MOB7' THEN 逾期率 END) MOB7 + , sum(CASE WHEN MOB = 'MOB8' THEN 逾期率 END) MOB8 + , sum(CASE WHEN MOB = 'MOB9' THEN 逾期率 END) MOB9 + , sum(CASE WHEN MOB = 'MOB10' THEN 逾期率 END) MOB10 + , sum(CASE WHEN MOB = 'MOB11' THEN 逾期率 END) MOB11 + , sum(CASE WHEN MOB = 'MOB12' THEN 逾期率 END) MOB12 + , sum(CASE WHEN MOB = 'MOB13' THEN 逾期率 END) MOB13 + , sum(CASE WHEN MOB = 'MOB14' THEN 逾期率 END) MOB14 + , sum(CASE WHEN MOB = 'MOB15' THEN 逾期率 END) MOB15 +-- , sum(CASE WHEN MOB = 'MOB16' THEN 逾期率 END) MOB16 +-- , sum(CASE WHEN MOB = 'MOB17' THEN 逾期率 END) MOB17 +-- , sum(CASE WHEN MOB = 'MOB18' THEN 逾期率 END) MOB18 +-- , sum(CASE WHEN MOB = 'MOB19' THEN 逾期率 END) MOB19 +-- , sum(CASE WHEN MOB = 'MOB20' THEN 逾期率 END) MOB20 +-- , sum(CASE WHEN MOB = 'MOB21' THEN 逾期率 END) MOB21 +-- , sum(CASE WHEN MOB = 'MOB22' THEN 逾期率 END) MOB22 +-- , sum(CASE WHEN MOB = 'MOB23' THEN 逾期率 END) MOB23 +-- , sum(CASE WHEN MOB = 'MOB24' THEN 逾期率 END) MOB24 +-- , sum(CASE WHEN MOB = 'MOB25' THEN 逾期率 END) MOB25 +FROM ( + SELECT a.放款月份, a.放款金额, t.MOB, t.逾期余额, ifnull(t.逾期余额,0) / a.放款金额 逾期率 + FROM ( + SELECT 放款月份, MOB, sum(CASE WHEN 逾期天数 > 30 THEN 余额 END) 逾期余额 # 逾期天数修改这里的30 + FROM balance + GROUP BY 放款月份, MOB + ) t LEFT JOIN amount a ON t.放款月份 = a.放款月份 +) v +GROUP BY 放款月份, 放款金额 +ORDER BY 放款月份 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 900f245..39edd04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ seaborn>=0.10.0 scipy>=1.6.0 statsmodels<0.14,>=0.13.2 scikit-learn -toad +toad<=0.1.2 scorecardpy ortools>=9.5.0,<9.8.0 ropwr>=0.4.0 @@ -20,3 +20,4 @@ joblib>=0.12 six>=1.15.0 openpyxl==3.0.7 sweetviz +numexpr diff --git a/scorecardpipeline/__init__.py b/scorecardpipeline/__init__.py index 837dfff..a017449 100644 --- a/scorecardpipeline/__init__.py +++ b/scorecardpipeline/__init__.py @@ -11,21 +11,19 @@ from .logger import init_logger from .utils import * -from .processing import FeatureSelection, FeatureImportanceSelector, StepwiseSelection, Combiner, WOETransformer +from .processing import FeatureSelection, FeatureImportanceSelector, StepwiseSelection, Combiner, WOETransformer, feature_bin_stats from .model import ITLubberLogisticRegression, ScoreCard from .excel_writer import ExcelWriter, dataframe2excel from .auto_eda import auto_eda_sweetviz from .auto_report import auto_data_testing_report - - -feature_bin_stats = Combiner.feature_bin_stats +from .rule import Rule __version__ = "0.1.30" __all__ = ( "__version__" , "FeatureSelection", "FeatureImportanceSelector", "StepwiseSelection", "Combiner", "WOETransformer" - , "ITLubberLogisticRegression", "ScoreCard" + , "ITLubberLogisticRegression", "ScoreCard", "Rule" , "Pipeline", "KS", "AUC", "PSI", "F1", "FeatureUnion", "make_pipeline", "make_union" , "init_logger", "init_setting", "load_pickle", "save_pickle", "germancredit" , "ColorScaleRule", "get_column_letter", "column_index_from_string", "seed_everything" diff --git a/scorecardpipeline/processing.py b/scorecardpipeline/processing.py index 933fd44..55fa71f 100644 --- a/scorecardpipeline/processing.py +++ b/scorecardpipeline/processing.py @@ -803,6 +803,9 @@ def __iter__(self): return iter(self.combiner._rules) +feature_bin_stats = Combiner.feature_bin_stats + + class WOETransformer(TransformerMixin, BaseEstimator): def __init__(self, target="target", exclude=None): diff --git a/scorecardpipeline/rule.py b/scorecardpipeline/rule.py new file mode 100644 index 0000000..7c7a5a7 --- /dev/null +++ b/scorecardpipeline/rule.py @@ -0,0 +1,338 @@ +# -*- coding: utf-8 -*- +""" +@Time : 2024/2/26 12:00 +@Author : itlubber +@Site : itlubber.art +""" +import numpy as np +import numexpr as ne +from enum import Enum + +import pandas as pd +from pandas import DataFrame +from sklearn.utils import check_array +from sklearn.metrics import f1_score, recall_score, accuracy_score, precision_score + +from .processing import feature_bin_stats + + +def _get_context(X, feature_names): + return {name: X[:, i] for i, name in enumerate(feature_names)} + + +def _apply_expr_on_array(expr, X, feature_names): + ctx = _get_context(X, feature_names) + return ne.evaluate(expr, local_dict=ctx) + + +class RuleState(str, Enum): + INITIALIZED = "initialized" + APPLIED = "applied" + + +class RuleStateError(RuntimeError): + pass + + +class RuleUnAppliedError(RuleStateError): + pass + + +# 中級操作符 +op_dict = {"GT": ">", "LT": "<", "EQ": "==", "ADD": "+", "GE": ">=", "LE": "<=", "SUBTRACT": "-", "MULTIPLY": "*", "DIVIDE": "/", "OR": "|", "AND": "&"} +# 数据类型 int, float-->float, 目前不支持 str +value_type_dict = {"int": float, "float": float, "string": str, "bool": bool} +# if_part, then_part, else_part +part_dict = ["if", "then", "else"] + + +# max_index: 数据的列数 feature_list: 列的名称 +def json2expr(data, max_index, feature_list): + if data.keys()._contains_("operator"): + op = data.get("operator") + params = data.get("params") + if op == "FEATURE_INDEX": # 取变量,一个值{判断变量,索引是否正常} + feature = params[0].get("feature") + if params[0].get("index") >= max_index: # json中的索引异常: index >= 数据列数 + raise ValueError("index error") + if feature not in feature_list: # 变量异常:变量名不在数据的列名中 + raise ValueError("{} do not belong to the data ".format(feature)) + return feature + elif op in op_dict: # 两个值,递归 + value_list = [json2expr(params[0], max_index, feature_list), json2expr(params[1], max_index, feature_list)] + return "(" + str(value_list[0]) + op_dict[op] + str(value_list[1]) + ")" + else: # op 不在op_dict报错 + raise TypeError("The operator: {} is invalid".format(op)) + + if data.keys().__contains__("value"): + value_type = data.get("value_type") + value = data["value"] + # 对取到值的类型做转换, 不在类型字典中的值报错 + if not value_type_dict.get(value_type): + raise ValueError("Data type error!") + return value_type_dict.get(value_type)(value) + + +class Rule: + def __init__(self, expr): # expr 既可以传递字符串,也可以传递dict + self._state = RuleState.INITIALIZED + self.expr = expr + + def __str__(self): + return f"Rule({repr(self.expr)})" + + def __repr__(self): + return f"Rule({repr(self.expr)})" + + def predict(self, X: DataFrame, part=""): # dict预测对应part_dict 、字符串表达式对应"、"其他情况报错 + if not isinstance(X, DataFrame): + raise ValueError("Rule can only predict on DataFrame.") + feature_names = X.columns.values.tolist() # 取数据的列名 + X = check_array(X, dtype=None, ensure_2d=True, force_all_finite="allow-nan") + + if isinstance(self.expr, dict): # dict部分 + if part not in part_dict: + raise TypeError("Part : {} not in ['if','then','else']".format(part)) + if not self.expr[part]: # 没有返回值的情况[] + return list() + dict2expr = json2expr(self.expr[part], X.shape[1], feature_names) + if not isinstance(dict2expr, str): # 返回Value (类型已经做过转换),对其扩充 --> [value] * Len(X) + result = [dict2expr] * len(X) + else: # 表达式在进行计算 + result = _apply_expr_on_array(dict2expr, X, feature_names) + result = result.tolist() + if not isinstance(result, list): # result 只有一个数值时,对其扩充 --> [value] * len(X) + result = [result] * len(X) + elif isinstance(self.expr, str): # 字符串表达式部分 + if part != "": + raise TypeError('The part of the expression must be ""') + result = _apply_expr_on_array(self.expr, X, feature_names) + else: + raise TypeError("Rule currently only supports dict and expression") + self.result_ = result + + return result + + def report(self, datasets, target="target", overdue="overdue", dpd=-1, del_grey=False, valid=None, desc="", return_cols=None, prior_rules=None): + if return_cols is None: + return_cols = ['指标名称', "指标含义", '分箱', '样本总数', '样本占比', '好样本数', '好样本占比', '坏样本数', '坏样本占比', '坏样本率', 'LIFT值', '分档KS值'] + if desc is None or desc == "" and "指标含义" in return_cols: + return_cols.remove("指标含义") + + datasets = datasets.copy() + if target not in datasets.columns and overdue in datasets.columns and dpd >= 0: + datasets[target] = (datasets[overdue] > dpd).astype(int) + + if isinstance(del_grey, bool) and del_grey: + grey = datasets[(datasets[overdue] > 0) & (datasets[overdue] <= dpd)].reset_index(drop=True) + datasets = datasets[(datasets[overdue] == 0) | (datasets[overdue] > dpd)].reset_index(drop=True) + + rule_expr = self.expr + + if prior_rules: + prior_tables = prior_rules.report(datasets, target=target, overdue=overdue, dpd=dpd, del_grey=del_grey, valid=valid, desc=desc, return_cols=return_cols, prior_rules=None) + temp = datasets[~prior_rules.predict(datasets)] + rule_result = pd.DataFrame({rule_expr: np.where(self.predict(temp), "命中", "未命中"), "target": temp[target].tolist()}) + else: + prior_tables = pd.DataFrame(columns=return_cols) + rule_result = pd.DataFrame({rule_expr: np.where(self.predict(datasets), "命中", "未命中"), "target": datasets[target].tolist()}) + + table = feature_bin_stats(rule_result, rule_expr, rules=[["命中"], ["未命中"]], desc=desc, return_cols=return_cols) + + # 准确率、精确率、召回率、F1分数 + metrics = pd.DataFrame({ + "分箱": ["命中", "未命中"], + "准确率": [accuracy_score(rule_result["target"], rule_result[rule_expr].map({"命中": 1, "未命中": 0})), accuracy_score(rule_result["target"], rule_result[rule_expr].map({"命中": 0, "未命中": 1}))], + "精确率": [precision_score(rule_result["target"], rule_result[rule_expr].map({"命中": 1, "未命中": 0})), precision_score(rule_result["target"], rule_result[rule_expr].map({"命中": 0, "未命中": 1}))], + "召回率": [recall_score(rule_result["target"], rule_result[rule_expr].map({"命中": 1, "未命中": 0})), recall_score(rule_result["target"], rule_result[rule_expr].map({"命中": 0, "未命中": 1}))], + "F1分数": [f1_score(rule_result["target"], rule_result[rule_expr].map({"命中": 1, "未命中": 0})), f1_score(rule_result["target"], rule_result[rule_expr].map({"命中": 0, "未命中": 1}))], + }) + table = table.merge(metrics, on="分箱", how="left") + + if prior_rules: + prior_tables.insert(loc=0, column="规则分类", value=["先验规则"] * len(prior_tables)) + table.insert(loc=0, column="规则分类", value=["验证规则"] * len(table)) + table = pd.concat([prior_tables, table]) + + # 规则上线后增益评估 + # 通过率变化情况: 上线后拒绝多少客户 + # 坏账率变化情况: 上线后拒绝多少比例的坏客户同时拒绝后坏账水平多少 + + return table + + def result(self): + if self._state != RuleState.APPLIED: + raise RuleUnAppliedError("Invoke `predict` to make a rule applied.") + return self.result_ + + def __eq__(self, other): + if not isinstance(other, Rule): + raise TypeError(f"Input should be of type Rule, got {type(other)} instead.") + if self._state != other._state: + raise RuleStateError(f"Input rule should be of the same state.") + res = self.expr == other.expr + if self._state == RuleState.INITIALIZED: + return res + return res and np.all(self.result() == other.result()) + + # rule combinations + def __or__(self, other): + if not isinstance(other, Rule): + raise TypeError(f"Input should be of type Rule, got {type(other)} instead.") + if self._state != other._state: + raise RuleStateError(f"Input rule should be of the same state.") + if isinstance(self.expr, str): + r = Rule(f"({self.expr}) | ({other.expr})") + if self._state == RuleState.INITIALIZED: + return r + r.result_ = np.logical_or(self.result(), other.result()) + r._state = RuleState.APPLIED + return r + elif isinstance(self.expr, dict): + self.new_dict = {} # 汇总成新的json + self.new_dict["name"] = str(self.expr.get("name")) + str(other.expr.get("name")) + self.new_dict["description"] = str(self.expr.get("description")) + " || " + str(other.expr.get("description")) + self.new_dict["output"] = self.expr.get("output") + + # if_part + if_dict = {} + if_dict["value_type"] = "bool" + if_dict["operator"] = "OR" + if_dict["params"] = list() + if_dict["params"].append(self.expr.get("if")) + if_dict["params"].append(other.expr.get("if")) + self.new_dict["if"] = if_dict + + # then_part + then_part = {} + if not self.expr.get("then") and not other.expr.get("then"): # 两条规则的then都为空 + then_part = {} + elif not self.expr.get("then"): # 一条规则的then存在 + then_part = other.expr.get("then") + elif not other.expr.get("then"): + then_part = self.expr.get("then") + else: # 两条规则的then都存在 + if self.expr.get("then").get("value_type") != other.expr.get("then").get("value_type"): + raise TypeError("两个规则then_part类型要一致") + if self.expr.get("then").get("value_type") != "bool": + raise TypeError("两个规则之间or运算, 类型需要设置为bool类型") + then_part["value_type"] = "bool" + then_part["operator"] = "OR" + then_part["params"] = list() + then_part["params"].append(self.expr.get("then")) + then_part["params"].append(other.expr.get("then")) + self.new_dict["then"] = then_part + + # else_part + else_part = {} # self.else或者other.else存在为空的情况 + if not self.expr.get("else") and not other.expr.get("else"): + else_part = {} + elif not self.expr.get("else"): # 一条规则的then存在 + else_part = other.expr.get("else") + elif not other.expr.get("else"): + else_part = self.expr.get("else") + else: + if self.expr.get("then").get("value_type") != other.expr.get("then").get("value_type"): + raise TypeError("两个规则else part类型要一致") + if self.expr.get("then").get("value_type") != "bool": + raise TypeError("两个规则之间or运算, 类型需要设置为bool类型") + else_part["value_type"] = "bool" + else_part["operator"] = "OR" + else_part["params"] = list() + else_part["params"].append(self.expr.get("else")) + else_part["params"].append(other.expr.get("else")) + self.new_dict["else"] = else_part + + return Rule(self.new_dict) + + def __and__(self, other): + if not isinstance(other, Rule): + raise TypeError(f"Input should be of type Rule, got {type(other)} instead.") + if self._state != other._state: + raise RuleStateError(f"Input rule should be of the same state.") + if isinstance(self.expr, str): # 表达式 + r = Rule(f"({self.expr}) & ({other.expr})") + if self._state == RuleState.INITIALIZED: + return r + r.result_ = np.logical_and(self.result(), other.result()) + r._state = RuleState.APPLIED + return r + elif isinstance(self.expr, dict): # dict + self.new_dict = {} # 汇总成新的json + self.new_dict["name"] = str(self.expr.get("name")) + str(other.expr.get("name")) + self.new_dict["description"] = str(self.expr.get("description")) + " && " + str(other.expr.get("description")) + self.new_dict["output"] = self.expr.get("output") + + # if_part + if_dict = {} + if_dict["value_type"] = "bool" + if_dict["operator"] = "AND" + if_dict["params"] = list() + if_dict["params"].append(self.expr.get("if")) + if_dict["params"].append(other.expr.get("if")) + self.new_dict["if"] = if_dict + + # then_part + then_part = {} + if not self.expr.get("then") and not other.expr.get("then"): # 两条规则的then都为空 + then_part = {} + elif not self.expr.get("then"): # 一条规则的then存在 + then_part = other.expr.get("then") + elif not other.expr.get("then"): + then_part = self.expr.get("then") + else: # 两条规则的then都存在 + if self.expr["then"].get("value_type") != other.expr["then"].get("value_type"): + raise TypeError("两个规则then_part类型要一致") + if self.expr.get("then").get("value_type") != "bool": + raise TypeError("两个规则之间and运算, 类型需要设置为bool类型") + then_part["value_type"] = "bool" + then_part["operator"] = "AND" + then_part["params"] = list() + then_part["params"].append(self.expr.get("then")) + then_part["params"].append(other.expr.get("then")) + self.new_dict["then"] = then_part + + # else_part + else_part = {} # self.else 或者other.else 存在为空的情况 + if not self.expr.get("else") and not other.expr.get("else"): + else_part = {} + elif not self.expr.get("else"): # 一条规则的then存在 + else_part = other.expr.get("else") + elif not other.expr.get("else"): + else_part = self.expr.get("else") + else: + if self.expr.get("else").get("value_type") != other.expr.get("else").get("value_type"): + raise TypeError("两个规则else_part类型要一致") + if self.expr.get("then").get("value_type") != "bool": + raise TypeError("两个规则之间and运算, 类型需要设置为bool类型") + else_part["value_type"] = "bool" + else_part["operator"] = "AND" + else_part["params"] = list() + else_part["params"].append(self.expr.get("else")) + else_part["params"].append(other.expr.get("else")) + self.new_dict["else"] = else_part + + return Rule(self.new_dict) + + def __xor__(self, other): + if not isinstance(other, Rule): + raise TypeError(f"Input should be of type Rule, got {type(other)} instead.") + if self._state != other._state: + raise RuleStateError(f"Input rule should be of the same state.") + r = Rule(f"({self.expr}) ^ ({other.expr})") + if self._state == RuleState.INITIALIZED: + return r + r.result_ = np.logical_xor(self.result(), other.result()) + r._state = RuleState.APPLIED + return r + + def __mul__(self, other): + return self._or_(other) + + def __invert__(self): + r = Rule(f"~({self.expr})") + if self._state == RuleState.INITIALIZED: + return r + r.result_ = np.logical_not(self.result()) + r._state = RuleState.APPLIED + return r