python做逐步回归的报错

def forward_selected(data, response):
    """
    Linear model designed by forward selection.
    Parameters:
    -----------
    data : pandas DataFrame with all possible predictors and response
    response: string, name of response column in data
    Returns:
    --------
    model: an "optimal" fitted statsmodels linear model
           with an intercept
           selected by forward selection
           evaluated by adjusted R-squared
    """
    remaining = set(data.columns)
    remaining.remove(response)
    selected = []
    current_score, best_new_score = 0.0, 0.0
    while remaining and current_score == best_new_score:
        scores_with_candidates = []
        for candidate in remaining:
            formula = "{} ~ {} + 1".format(response,
                                           '+'.join(selected + [candidate]))
            score = smf.ols(formula, data).fit().rsquared_adj
            scores_with_candidates.append((score, candidate))
        scores_with_candidates.sort()
        best_new_score, best_candidate = scores_with_candidates.pop()
        if current_score < best_new_score:
            remaining.remove(best_candidate)
            selected.append(best_candidate)
            current_score = best_new_score
    formula = "{} ~ {} + 1".format(response,
                                   "+".join(selected))
    model = smf.ols(formula, data).fit()
 
    return model

model = forward_selected(data = predictors, response = "price")

Traceback (most recent call last):

  File "D:\new_program\envs\r\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)

  File "<ipython-input-50-4438f53e897c>", line 1, in <module>
    model = forward_selected(data = predictors, response = "price")

  File "<ipython-input-49-f4a8e8b1eb28>", line 24, in forward_selected
    score = smf.ols(formula, data).fit().rsquared_adj

  File "D:\new_program\envs\r\lib\site-packages\statsmodels\base\model.py", line 169, in from_formula
    tmp = handle_formula_data(data, None, formula, depth=eval_env,

  File "D:\new_program\envs\r\lib\site-packages\statsmodels\formula\formulatools.py", line 63, in handle_formula_data
    result = dmatrices(formula, Y, depth, return_type='dataframe',

  File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 309, in dmatrices
    (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,

  File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 164, in _do_highlevel_design
    design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,

  File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 66, in _try_incr_builders
    return design_matrix_builders([formula_like.lhs_termlist,

  File "D:\new_program\envs\r\lib\site-packages\patsy\build.py", line 689, in design_matrix_builders
    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)

  File "D:\new_program\envs\r\lib\site-packages\patsy\build.py", line 354, in _factors_memorize
    which_pass = factor.memorize_passes_needed(state, eval_env)

  File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 474, in memorize_passes_needed
    subset_names = [name for name in ast_names(self.code)

  File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 474, in <listcomp>
    subset_names = [name for name in ast_names(self.code)

  File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 105, in ast_names
    for node in ast.walk(ast.parse(code)):

  File "D:\new_program\envs\r\lib\ast.py", line 47, in parse
    return compile(source, filename, mode, flags,

  File "<unknown>", line 1
    room_type_Private room
                      ^
SyntaxError: invalid syntax

做逐步回归的时候,在运行模型的时候,出现报错,说房屋类型有问题,该如何解决呢?

错误是 forward_selected 函数内部传递给 smf.ols 函数的公式字符串出现了语法错误。具体来说,似乎 predictors 数据框中的某个预测变量名称含有空格或其他特殊字符,这些字符不能在公式字符串中使用。

要修复错误,您可以尝试更改 predictors 数据框中的列名,以删除任何特殊字符或空格。或者,您可以在公式字符串中用反引号(`)将有问题的变量名括起来,以确保它们被正确解释。例如,如果有问题的变量名为“Private room”,您可以修改公式字符串如下:

formula = "{} ~ {} + 1".format(response, '+'.join(selected + ['Private room']))