Using .loc inside custom transformer produces copy with slice error

Asked
Active3 hr before
Viewed126 times

6 Answers

errorusingtransformercustom
90%

Example_snippet/controller/utility/_error.js/ import numpy as np import pand. . .
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)
import matplotlib
matplotlib.rcParams['figure.figsize'] = (14, 7)
import matplotlib.pyplot as plt %
   matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
load more v
88%

This code in Pandas 20,3 throws SettingWithCopyWarning and suggests to,To my knowledge, there is no way to use the same code as above and force changes to propagate back to the original dataframe

Example_snippet/controller/utility/_error.js/ # My code df.loc[0, 'column_na. . .
# My code
df.loc[0, 'column_name'] = 'foo bar'
load more v
72%

Example_snippet/controller/utility/_error.js/ pd.read_csv(file_name, index_. . .
pd.read_csv(file_name, index_col = 0)
load more v
65%

Creating a Custom Transformer from scratch, to include in the Pipeline,,The code above creates data which follows the equation y = X1 + 2 * sqrt(X2)

Example_snippet/controller/utility/_error.js/ y = X1 + 2 * sqrt(X2). . .
y = X1 + 2 * sqrt(X2)
load more v
75%

本文地址:IT屋 » 在自定义转换器中使用 ,loc 会生成带有切片错误的副本,I am working on the home credit dataset on Kaggle and specifically on instalment_payment

Example_snippet/controller/utility/_error.js/ class Xfrmer_replace1(BaseEsti. . .
class Xfrmer_replace1(BaseEstimator, TransformerMixin): ""
这个转换器在数据帧内进行全局替换将本案例研究的 365243 scific 替换为 0 用零替换 + /-inf , nan""# 构造函数def __init__(self):#我们不会使用这个self._features = 无#返回自己def fit(self, X,y=None ):回归自我def 变换(self,X,y=None):#用零替换高值对于 X.columns 中的 col:X=X.replace([365243,365243.0],0)打印('替换值')#X=X.replace([np.inf,-np.inf],np.nan)#X=X.replace(np.nan,0)返回 XXfrmer_signchng1(BaseEstimator, TransformerMixin) 类:""这个变压器做正极到负极的变化""# 构造函数def __init__(self):#我们不会使用这个self.signchng_columns = 无#返回自己def fit(self,X,y=None):回归自我def 变换(self,X,y=None):#改变列的符号对于 X.columns 中的 col:print('符号改变')X[col]= [0 if val >= 0 else (val *-1) for val in X[col] ]返回 XXfrmer_dif_calc1(BaseEstimator, TransformerMixin) 类:""这个变压器做两列之间的区别i/p
是元组列表元组中的第二项与第一项相除元组中的第三项是这个新列的名称 ""
# 构造函数def __init__(self): #我们不会使用这个self.dif_columns = 无 #返回自己def fit(self, X, y = None): 回归自我def 变换(self, X, y = None): 打印('差异计算器') 打印('X列',
   X.columns) 打印(X[X.columns[0]]) 打印(X[X.columns[1]]) #print(X['AMT_PAYMENT']) #print(X['AMT_INSTALMENT']) #print(X[X.columns[0]] - X[X.columns[1]]) #it​​ er1.X.loc[: , 'AMT_PMT_DIF'] = X[X.columns[0]] - X[X.columns[1]] X['AMT_PMT_DIF'] = X[X.columns[0]] - X[X.columns[1]] 打印(X['AMT_PMT_DIF']) 返回 XXfrmer_rto_calc1(BaseEstimator, TransformerMixin) 类: ""
这个变压器计算两列之间的比率i / p 是元组列表元组中的第一项与第二项分开元组中的第三项是这个新列的名称 ""
# 构造函数def __init__(self): #我们不会使用这个self.rto_columns = 无 #返回自己def fit(self, X, y = None): 回归自我def 变换(self, X, y = None): 打印('比率计算器') #it​​ er1.X.loc[: , 'AMT_PMT_RTO'] = (X[X.columns[0]] / X[X.columns[1]]).clip(lower = 0) X['AMT_PMT_RTO'] = (X[X.columns[0]] / X[X.columns[1]]).clip(lower = 0) 返回 X
Step 2 continued with lst_all_cols = dtprcs.X_train.. . .
lst_all_cols = dtprcs.X_train.columns.values.tolist() lst_signchng_cols = ["DAYS_INSTALMENT", "DAYS_ENTRY_PAYMENT"] lst_imptr_cols = ['DAYS_ENTRY_PAYMENT', 'AMT_PAYMENT'] lst_diff_cols = ['AMT_PAYMENT', 'AMT_INSTALMENT'] lst_rto_cols = ['AMT_PAYMENT', 'AMT_INSTALMENT'] print('开始管道处理') # ""
"instpmt_preprcs_pipln = ColumnTransformer( 变压器 = [('instpmt_repl_pipln', Xfrmer_replace1(),lst_all_cols),('instpmt_sgnchng_pipln', Xfrmer_signchng1(),lst_signchng_cols),('instpmt_imptr_piplin',SimpleImputer(strategy = 'median'),lst_imptr_cols),('instpmt_dif_pipln',Xfrmer_dif_calc1(), lst_diff_cols),('instpmt_rto_pipln',Xfrmer_rto_calc1(),lst_rto_cols)],余数='直通')print('管道拟合开始...')instpmt_preprcs_pipln.fit(dtprcs.X_train, dtprcs.y_train)print('管道拟合在...')#print(dtprcs.X_train.shape,dtprcs.x_test.shape)#print(dtprcs.X_train.columns,dtprcs.x_test.columns)#可以像任何其他管道一样使用它进行预测print('管道转换x_test...')y_pred = instpmt_partial_piplin.transform(dtprcs.x_test)print('管道转换 x_test over...')打印(类型(dtprcs.X_train),类型(dtprcs.x_test),类型(dtprcs.y_train))打印(dtprcs.X_train.columns,dtprcs.x_test.columns)print('管道预处理pver.设置其他类...')
Step 3 continued with 读完apln训练/测试文件...分期付款_payments.. . .
读完apln训练/测试文件...分期付款_payments.csv主要名称 train installments_payments_train.csv主要名称 test installments_payments_test.csv训练测试文件准备好...完成编写训练/测试文件.退出函数(0).(16915, 8)(4574, 8)正在处理分期付款_payments.csv...开始流水线处理管道安装启动...替换值替换值替换值替换值替换值替换值替换值替换值标志变化标志变化差异计算器X 列索引(['AMT_PAYMENT', 'AMT_INSTALMENT'], dtype='object')0 6948.3602 6948.3603 1716.5254 1716.5255 3375.000...42390 12303.00042391 10299.96042392 10869.43542402 124.15542409 4198.950名称:AMT_PAYMENT,长度:16915,数据类型:float640 6948.3602 6948.3603 1716.5254 1716.5255 3375.000...42390 12303.00042391 10299.96042392 14958.13542402 124.15542409 4198.950名称:AMT_INSTALMENT,长度:16915,数据类型:float640 0.02 0.03 0.04 0.05 0.0...42390 0.042391 0.042392 -4088.742402 0.042409 0.0名称:AMT_PMT_DIF,长度:16915,数据类型:float64比率计算器管道安装在...管道转换 x_test...替换值替换值替换值替换值替换值替换值替换值替换值标志变化标志变化差异计算器比率计算器**管道将 x_test 转换为...**<class 'pandas.core.frame.DataFrame'>
   <class 'pandas.core.frame.DataFrame'>
      <class 'pandas.core.series.Series'>索引(['SK_ID_PREV', 'SK_ID_CURR', 'NUM_INSTALMENT_VERSION',"NUM_INSTALMENT_NUMBER"、"DAYS_INSTALMENT"、"DAYS_ENTRY_PAYMENT"、'AMT_INSTALMENT', 'AMT_PAYMENT'],dtype='object') 索引(['SK_ID_PREV', 'SK_ID_CURR', 'NUM_INSTALMENT_VERSION',"NUM_INSTALMENT_NUMBER"、"DAYS_INSTALMENT"、"DAYS_ENTRY_PAYMENT"、'AMT_INSTALMENT', 'AMT_PAYMENT'],dtype='对象')管道预处理pver.设置其他类...退出主函数...E:\anaconda\envs\appliedaicourse\lib\site-packages\ipykernel_launcher.py:187: SettingWithCopyWarning:试图在来自 DataFrame 的切片副本上设置值.尝试使用 .loc[row_indexer,col_indexer] = value 代替请参阅文档中的注意事项:http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copyE:\anaconda\envs\appliedaicourse\lib\site-packages\pandas\core\indexing.py:362: SettingWithCopyWarning:试图在来自 DataFrame 的切片副本上设置值.尝试使用 .loc[row_indexer,col_indexer] = value 代替请参阅文档中的注意事项:http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copyself.obj[key] = _infer_fill_value(value)E:\anaconda\envs\appliedaicourse\lib\site-packages\pandas\core\indexing.py:562: SettingWithCopyWarning:试图在来自 DataFrame 的切片副本上设置值.尝试使用 .loc[row_indexer,col_indexer] = value 代替请参阅文档中的注意事项:http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copyself.obj[item_labels[indexer[info_axis]]] = 值
Step 4 continued with from sklearn.base import Trans. . .
from sklearn.base
import TransformerMixin类 FeatureExtractor(TransformerMixin): def __init__(self, cols): self.cols = cols打印(self.cols) def fit(self, X, y = None): # 无状态转换器回归自我def变换(自我, X): # 假设 X 是 Pandas 数据框X_cols = X.loc[: , self.cols] 返回 X_cols
Step 5 continued with class SynopsisNumWords(Transfo. . .
class SynopsisNumWords(TransformerMixin): def __init__(self): 返回无 # self.text_array = text_arraydef fit(self, X, y = None, ** fit_params): 回归自我def 变换(self, X, y = None, ** fit_params): X = X.copy() # # 将系列重命名为与输入的列名不同返回 X.loc[: , 'Synopsis'].apply(lambda x: len(str(x).split())).rename('Synopsis_num_words').to_frame()
Step 6 continued with class DFFeatureUnion(Transform. . .
class DFFeatureUnion(TransformerMixin): # FeatureUnion 但对于 Pandas DataFramesdef __init__(self, transformer_list): self.transformer_list = transformer_listdef fit(self, X, y = None): for (name, t) 在 self.transformer_list 中: t.fit(X) 回归自我def变换(自我, X): # X 必须是数据帧Xts = [t.transform(X) for _, t in self.transformer_list] Xunion = reduce(lambda X1, X2: pd.merge(X1, X2, left_index = True, right_index = True), Xts) 回春联
Step 7 continued with from sklearn.pipeline import P. . .
from sklearn.pipeline
import Pipeline概要_feat_gen_pipeline = Pipeline(steps = [('engineer_data', DFFeatureUnion([('extract_all_columns', 流水线(步骤 = [('extract_all_features', FeatureExtractor(['概要', '标题', '作者', '版本', "评论""评级""类型""图书类别""价格"
]))], 详细 = 真)), ('generate_num_words_column', 流水线(步骤 = [('extract_Synopsis_feature', FeatureExtractor(['Synopsis'])), ('generate_num_words', SynopsisNumWords())], 详细 = 真)), ]))], 详细 = 真)
load more v
40%

Code quality: a concern for businesses, bottom lines, and empathetic programmers , Stack Overflow Public questions & answers ,Thanks for contributing an answer to Stack Overflow!,I am working on the home credit dataset on Kaggle and specifically on instalment_payment,csv

Example_snippet/controller/utility/_using.js/ from sklearn.base import Trans. . .
from sklearn.base
import TransformerMixin

class FeatureExtractor(TransformerMixin):
   def __init__(self, cols):
   self.cols = cols
print(self.cols)

def fit(self, X, y = None):
   # stateless transformer
return self

def transform(self, X):
   # assumes X is Pandas Dataframe
X_cols = X.loc[: , self.cols]
return X_cols
load more v