cube-studio/job-template/job/ner
2023-12-11 09:58:45 +08:00
..
bert_pretrain add ner train and service 2022-08-13 17:00:41 +08:00
models add ner train and service 2022-08-13 17:00:41 +08:00
tokenizer add ner train and service 2022-08-13 17:00:41 +08:00
utils add ner train and service 2022-08-13 17:00:41 +08:00
bert_bilstm_crf_opration.py add ner train and service 2022-08-13 17:00:41 +08:00
bilstm_crf_opration.py add ner train and service 2022-08-13 17:00:41 +08:00
bilstm_opration.py add ner train and service 2022-08-13 17:00:41 +08:00
build.sh fix ner job template 2022-08-22 13:26:19 +08:00
config.py add ner train and service 2022-08-13 17:00:41 +08:00
data.py add ner train and service 2022-08-13 17:00:41 +08:00
Dockerfile 丰富原有部分模板的可接收参数,去除不使用的模板 2023-12-11 09:58:45 +08:00
evaluating.py add ner train and service 2022-08-13 17:00:41 +08:00
main4argo.py fix ner job template 2022-08-22 13:26:19 +08:00
main.py add ner train and service 2022-08-13 17:00:41 +08:00
README.md fix ner job template 2022-08-22 13:26:19 +08:00
the_peoples_daily_tool.py add ner train and service 2022-08-13 17:00:41 +08:00
train_evaluate.py add ner train and service 2022-08-13 17:00:41 +08:00

ner 模板

模版简介

命名实体识别NER是一种自然语言处理技术可以自动扫描整篇文章提取文本中的一些基本实体并将它们分类到预定义的类别中。 举一个直观一点的例子比如你对手机的语音助手说“提醒我明天下午7点开会”或者“明天北京海淀区的天气怎么样”然后它就会根据你的指令来做出相应的执行。 主要应用于:搜索和推荐引擎、自动聊天机器人、内容分析、消费者洞察

准备数据集

数据集 可以通过链接下载

把数据拷贝到 note book 中 /mnt/admin/NER 目录(可以直接拖拽进去),包含NER/zdata/resume_BIO.txt和NER/zdata/people_daily_BIO.txt

注册模板

{
   "参数分组1": {
       "--model": {
           "type": "str",
           "item_type": "str",
           "label": "训练的基础模型名称,这里固定为: BiLSTM_CRF",
           "require": 1,
           "choice": [],
           "range": "",
           "default": "BiLSTM_CRF",
           "placeholder": "",
           "describe": "训练的基础模型名称,这里固定为: BiLSTM_CRF",
           "editable": 1,
           "condition": "",
           "sub_args": {}
       },
       "--path": {
           "type": "str",
           "item_type": "str",
           "label": "训练数据存放目录",
           "require": 1,
           "choice": [],
           "range": "",
           "default": "/mnt/admin/NER/zdata/",
           "placeholder": "",
           "describe": "训练数据存放目录",
           "editable": 1,
           "condition": "",
           "sub_args": {}
       },
       "--filename": {
           "type": "str",
           "item_type": "str",
           "label": "数据集的名字",
           "require": 1,
           "choice": [
               "resume_BIO.txt",
               "people_daily_BIO.txt"
           ],
           "range": "",
           "default": "resume_BIO.txt",
           "placeholder": "",
           "describe": " 数据集的名字",
           "editable": 1,
           "condition": "",
           "sub_args": {}
       },
       "--epochs": {
           "type": "str",
           "item_type": "str",
           "label": "训练的次数,次数越大效果越好,建议 5 以上",
           "require": 1,
           "choice": [],
           "range": "",
           "default": "5",
           "placeholder": "",
           "describe": "训练的次数,次数越大效果越好,建议 5 以上",
           "editable": 1,
           "condition": "",
           "sub_args": {}
       },
       
       "-pp": {
           "type": "str",
           "item_type": "str",
           "label": "模型保存地址",
           "require": 1,
           "choice": [],
           "range": "",
           "default": "/mnt/admin/model.pkl",
           "placeholder": "",
           "describe": "模型保存地址",
           "editable": 1,
           "condition": "",
           "sub_args": {}
       }
   }
}