def forward_selected(data, response):
"""
Linear model designed by forward selection.
Parameters:
-----------
data : pandas DataFrame with all possible predictors and response
response: string, name of response column in data
Returns:
--------
model: an "optimal" fitted statsmodels linear model
with an intercept
selected by forward selection
evaluated by adjusted R-squared
"""
remaining = set(data.columns)
remaining.remove(response)
selected = []
current_score, best_new_score = 0.0, 0.0
while remaining and current_score == best_new_score:
scores_with_candidates = []
for candidate in remaining:
formula = "{} ~ {} + 1".format(response,
'+'.join(selected + [candidate]))
score = smf.ols(formula, data).fit().rsquared_adj
scores_with_candidates.append((score, candidate))
scores_with_candidates.sort()
best_new_score, best_candidate = scores_with_candidates.pop()
if current_score < best_new_score:
remaining.remove(best_candidate)
selected.append(best_candidate)
current_score = best_new_score
formula = "{} ~ {} + 1".format(response,
"+".join(selected))
model = smf.ols(formula, data).fit()
return model
model = forward_selected(data = predictors, response = "price")
Traceback (most recent call last):
File "D:\new_program\envs\r\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-50-4438f53e897c>", line 1, in <module>
model = forward_selected(data = predictors, response = "price")
File "<ipython-input-49-f4a8e8b1eb28>", line 24, in forward_selected
score = smf.ols(formula, data).fit().rsquared_adj
File "D:\new_program\envs\r\lib\site-packages\statsmodels\base\model.py", line 169, in from_formula
tmp = handle_formula_data(data, None, formula, depth=eval_env,
File "D:\new_program\envs\r\lib\site-packages\statsmodels\formula\formulatools.py", line 63, in handle_formula_data
result = dmatrices(formula, Y, depth, return_type='dataframe',
File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 309, in dmatrices
(lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 164, in _do_highlevel_design
design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
File "D:\new_program\envs\r\lib\site-packages\patsy\highlevel.py", line 66, in _try_incr_builders
return design_matrix_builders([formula_like.lhs_termlist,
File "D:\new_program\envs\r\lib\site-packages\patsy\build.py", line 689, in design_matrix_builders
factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
File "D:\new_program\envs\r\lib\site-packages\patsy\build.py", line 354, in _factors_memorize
which_pass = factor.memorize_passes_needed(state, eval_env)
File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 474, in memorize_passes_needed
subset_names = [name for name in ast_names(self.code)
File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 474, in <listcomp>
subset_names = [name for name in ast_names(self.code)
File "D:\new_program\envs\r\lib\site-packages\patsy\eval.py", line 105, in ast_names
for node in ast.walk(ast.parse(code)):
File "D:\new_program\envs\r\lib\ast.py", line 47, in parse
return compile(source, filename, mode, flags,
File "<unknown>", line 1
room_type_Private room
^
SyntaxError: invalid syntax
做逐步回归的时候,在运行模型的时候,出现报错,说房屋类型有问题,该如何解决呢?
错误是 forward_selected 函数内部传递给 smf.ols 函数的公式字符串出现了语法错误。具体来说,似乎 predictors 数据框中的某个预测变量名称含有空格或其他特殊字符,这些字符不能在公式字符串中使用。
要修复错误,您可以尝试更改 predictors 数据框中的列名,以删除任何特殊字符或空格。或者,您可以在公式字符串中用反引号(`)将有问题的变量名括起来,以确保它们被正确解释。例如,如果有问题的变量名为“Private room”,您可以修改公式字符串如下:
formula = "{} ~ {} + 1".format(response, '+'.join(selected + ['
Private room
']))