UnicodeDecodeError: 'gbk' codec can't decode byte 0xa1 in position 351: illegal multibyte sequence
KeyError: 'std'
The above exception was the direct cause of the following exception:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('mysql+pymysql://root:1234@127.0.0.1:3306/word?charset=utf8')
detail = pd.read_sql_table('meal_order_detail1',con=engine)
order = pd.read_table('e:/python file/meal_order_info.csv',sep=',',encoding='gbk')
user = pd.read_excel('e:/python file/users.xlsx')
print('订单详情表的维度为:',detail.ndim)
print('订单信息表的维度为:',order.ndim)
print('客户信息表的维度为:',order.ndim)
print('订单详情表的维度为:',detail.shape)
print('订单信息表的维度为:',order.shape)
print('客户信息表的维度为:',user.shape)
print('订单详情表的元素个数为:',detail.size)
print('订单信息表的元素个数为:',order.size)
print('客户信息表的元素个数为:',user.size)
print('订单详情表counts和amounts两列的描述性统计为:\n',detail.loc[:,['counts','amounts']].describe())
detail['order_id']=detail['order_id'].astype('category')
detail['dishes_name']=detail['dishes_name'].astype('category')
print('''订单信息表order_id(订单编号)与dishes_name(菜品名称)的描述性统计结果为:''','\n',detail[['order_id','dishes_name']].describe())
def dropNullStd(data):
beforelen = data.shape[1]
colisNull = data.describe().loc['count']==0
for i in range(len(colisNull)):
if colisNull[i]:
data.drop(colisNull.index[i],axis =1,inplace = True)
stdisZero = data.describe().loc['std']==0
for i in range(len(stdisZero)):
if stdisZero[i]:
data.drop(stdisZero.index[i],axis=1,inplace=True)
afterlen=data.shape[1]
print('去除的列的数目为:',beforelen-afterlen)
print('去除后数据的形状为:',data.shape)
dropNullStd(detail)
dropNullStd(order)
dropNullStd(user)
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17708/3380695327.py in <module>
3 engine = create_engine('mysql+pymysql://root:1234@127.0.0.1:3306/word?charset=utf8')
4 detail = pd.read_sql_table('meal_order_detail1',con=engine)
----> 5 order = pd.read_table('e:/python file/meal_order_info.csv',sep=',',encoding='gbk')
6 user = pd.read_excel('e:/python file/users.xlsx')
7 print('订单详情表的维度为:',detail.ndim)
D:\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
D:\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in read_table(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, encoding_errors, delim_whitespace, low_memory, memory_map, float_precision)
681 kwds.update(kwds_defaults)
682
--> 683 return _read(filepath_or_buffer, kwds)
684
685
D:\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _read(filepath_or_buffer, kwds)
480
481 # Create the parser.
--> 482 parser = TextFileReader(filepath_or_buffer, **kwds)
483
484 if chunksize or iterator:
D:\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in __init__(self, f, engine, **kwds)
809 self.options["has_index_names"] = kwds["has_index_names"]
810
--> 811 self._engine = self._make_engine(self.engine)
812
813 def close(self):
D:\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _make_engine(self, engine)
1038 )
1039 # error: Too many arguments for "ParserBase"
-> 1040 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1041
1042 def _failover_to_python(self):
D:\anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py in __init__(self, src, **kwds)
67 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
68 try:
---> 69 self._reader = parsers.TextReader(self.handles.handle, **kwds)
70 except Exception:
71 self.handles.close()
D:\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
D:\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._get_header()
D:\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
D:\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa1 in position 351: illegal multibyte sequence
什么也不加试试