MLOps Toolkit In Pipeline


License
Apache-2.0
Install
pip install easymlops==0.1.3

Documentation

EasyMLOps

介绍

EasyMLOps包以Pipline的方式构建建模任务,可直接进行模型训练、预测(离线,在线),测试(离线在线预测一致性、预测性能)、特征存储、监控、分析等功能,通过外套一层Flask或FastApi即可直接部署生产,更多内容请访问使用文档1>>使用文档2>>

安装

pip install easymlops

使用

import pandas as pd
data=pd.read_csv("./data/demo.csv")
print(data.head(5).to_markdown())
|    |   PassengerId |   Survived |   Pclass | Name                                                | Sex    |   Age |   SibSp |   Parch | Ticket           |    Fare | Cabin   | Embarked   |
|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|
|  0 |             1 |          0 |        3 | Braund, Mr. Owen Harris                             | male   |    22 |       1 |       0 | A/5 21171        |  7.25   | nan     | S          |
|  1 |             2 |          1 |        1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female |    38 |       1 |       0 | PC 17599         | 71.2833 | C85     | C          |
|  2 |             3 |          1 |        3 | Heikkinen, Miss. Laina                              | female |    26 |       0 |       0 | STON/O2. 3101282 |  7.925  | nan     | S          |
|  3 |             4 |          1 |        1 | Futrelle, Mrs. Jacques Heath (Lily May Peel)        | female |    35 |       1 |       0 | 113803           | 53.1    | C123    | S          |
|  4 |             5 |          0 |        3 | Allen, Mr. William Henry                            | male   |    35 |       0 |       0 | 373450           |  8.05   | nan     | S          |
# 拆分训练测试
x_train=data[:500]
x_test=data[500:]
y_train=x_train["Survived"]
y_test=x_test["Survived"]
del x_train["Survived"]
del x_test["Survived"]

表格型任务

from easymlops import TablePipeLine
from easymlops.table.preprocessing import *
from easymlops.table.encoding import *
from easymlops.table.decomposition import *
from easymlops.table.classification import *
from easymlops.table.ensemble import *
table = TablePipeLine()
table.pipe(FixInput()) \
  .pipe(FillNa()) \
  .pipe(Parallel([OneHotEncoding(cols=["Pclass", "Sex"]), LabelEncoding(cols=["Sex", "Pclass"]),
                    TargetEncoding(cols=["Name", "Ticket", "Embarked", "Cabin", "Sex"], y=y_train)])) \
  .pipe(Parallel([PCADecomposition(n_components=2, prefix="pca"), NMFDecomposition(n_components=2, prefix="nmf")]))\
  .pipe(Parallel([LGBMClassification(y=y_train, prefix="lgbm"), LogisticRegressionClassification(y=y_train, prefix="lr")]))

table.fit(x_train)
<easymlops.table.core.pipeline_object.TablePipeLine at 0x213de1f9988>
print(table.transform(x_test).head(5).to_markdown())
|     |   lgbm_0 |    lgbm_1 |     lr_0 |     lr_1 |
|----:|---------:|----------:|---------:|---------:|
| 500 | 0.965218 | 0.0347825 | 0.651417 | 0.348583 |
| 501 | 0.98153  | 0.0184698 | 0.65506  | 0.34494  |
| 502 | 0.979139 | 0.0208607 | 0.647266 | 0.352734 |
| 503 | 0.808796 | 0.191204  | 0.656613 | 0.343387 |
| 504 | 0.184484 | 0.815516  | 0.449149 | 0.550851 |
table.transform_single(x_test.to_dict("record")[0])
{'lgbm_0': 0.9652175316373408,
 'lgbm_1': 0.03478246836265923,
 'lr_0': 0.6514166695229407,
 'lr_1': 0.3485833304770593}

文本特征提取

from easymlops import NLPPipeline
from easymlops.nlp.preprocessing import *
from easymlops.nlp.representation import *
nlp = NLPPipeline()
nlp.pipe(FixInput()) \
  .pipe(FillNa()) \
  .pipe(SelectCols(cols=["Name"]))\
  .pipe(ReplaceDigits())\
  .pipe(RemovePunctuation())\
  .pipe(Parallel([Word2VecModel(embedding_size=4), FastTextModel(embedding_size=4)]))

nlp.fit(x_train)
<easymlops.nlp.core.pipeline_object.NLPPipeline at 0x213e69dda88>
print(nlp.transform(x_test).head(5).to_markdown())
|     |   w2v_Name_0 |   w2v_Name_1 |   w2v_Name_2 |   w2v_Name_3 |   fasttext_Name_0 |   fasttext_Name_1 |   fasttext_Name_2 |   fasttext_Name_3 |
|----:|-------------:|-------------:|-------------:|-------------:|------------------:|------------------:|------------------:|------------------:|
| 500 |   -0.0160304 |   0.00341222 |     0.131879 |    0.227311  |        0.0470462  |       -0.00434371 |        0.0662056  |        0.0268283  |
| 501 |   -0.168437  |  -0.0802843  |     0.188844 |    0.0576391 |        0.00569692 |       -0.00443027 |        0.00559943 |        0.0177013  |
| 502 |   -0.168437  |  -0.0802843  |     0.188844 |    0.0576391 |       -0.00195689 |       -0.0279951  |        0.00245144 |        0.0229158  |
| 503 |   -0.233626  |  -0.181061   |     0.165193 |    0.226518  |        0.00192385 |       -0.0182485  |       -0.005737   |        0.032779   |
| 504 |   -0.233626  |  -0.181061   |     0.165193 |    0.226518  |        0.010348   |       -0.0144759  |        0.028474   |        0.00600368 |
nlp.transform_single(x_test.to_dict("record")[0])
{'w2v_Name_0': -0.01603037677705288,
 'w2v_Name_1': 0.003412220859900117,
 'w2v_Name_2': 0.13187919557094574,
 'w2v_Name_3': 0.227310910820961,
 'fasttext_Name_0': 0.04704619571566582,
 'fasttext_Name_1': -0.004343707114458084,
 'fasttext_Name_2': 0.06620561331510544,
 'fasttext_Name_3': 0.026828348636627197}