import watson_nlp
from watson_nlp.toolkit.classification_utils.train_util import prepare_data_from_json
from watson_nlp.blocks.classification.svm import SVM
training_data_file = "train_data.json"# Load a Syntax model
syntax_model = watson_nlp.load('syntax_izumo_en_stock')
# Create datastream from training data
train_stream = prepare_data_from_json(training_data_file, syntax_model)
syntax_stream, labels_stream = train_stream[0], train_stream[1]
Copy to clipboardクリップボードにコピーされました
USE 埋め込みを使用して分類モデルをトレーニングします。 利用可能な事前学習済みブロックのリストについては、事前学習済みUSEエンベッディングを 参照してください。
# download embedding
use_embedding_model = watson_nlp.load('embedding_use_en_stock')
use_train_stream = use_embedding_model.stream(syntax_stream, doc_embed_style='raw_text')
# NOTE: doc_embed_style can be changed to `avg_sent` as well. For more information check the documentation for Embeddings# Or the USE run function API docs
use_svm_train_stream = watson_nlp.data_model.DataStream.zip(use_train_stream, labels_stream)
# Train SVM using Universal Sentence Encoder (USE) training stream
classification_model = SVM.train(use_svm_train_stream)
import watson_nlp
from watson_nlp.blocks.classification.transformer import Transformer
from watson_nlp.toolkit.classification_utils.train_util import prepare_stream_of_train_records
from watson_nlp import data_model as dm
training_data_file = "train_data.json"# Create datastream from training data
train_data_stream = dm.DataStream.from_file(training_data_file)
train_record_data_stream = prepare_stream_of_train_records(train_data_stream)
# Load pre-trained slate model
pretrained_model_resource = watson_nlp.load('<pretrained Slate model>')
# Train model - note that Transformer uses Slatea by default
classification_model = Transformer.train(train_data_stream=train_record_data_stream,
pretrained_model_resource=pretrained_model_resource)
import watson_nlp
from watson_nlp.blocks.classification.transformer import Transformer
from watson_nlp.toolkit.classification_utils.train_util import prepare_stream_of_train_records
from watson_nlp import data_model as dm
training_data_file = "train_data.json"# Create datastream from training data
train_data_stream = dm.DataStream.from_file(training_data_file)
train_record_data_stream = prepare_stream_of_train_records(train_data_stream)
# Specify the name of the Hugging Face model
huggingface_model_name = 'xml-roberta-base'# Train model
classification_model = Transformer.train(train_data_stream=train_record_data_stream,
pretrained_model_resource=huggingface_model_name)
import watson_nlp
from watson_nlp.workflows.classification.generic_ensemble import GenericEnsemble
from watson_nlp.workflows.classification.base_classifier import GloveCNN
from watson_nlp.workflows.classification.base_classifier import TFidfSvm
training_data_file = "train_data.json"# Syntax model
syntax_model = watson_nlp.load('syntax_izumo_en_stock')
# GloVE Embedding model
glove_embedding_model = watson_nlp.load('embedding_glove_en_stock')
# Train classifier using the manually defined base classifier and manually defined weights¶
classification_model = GenericEnsemble.train(training_data_file,
syntax_model,
base_classifiers_params=[
TFidfSvm.TrainParams(syntax_model=syntax_model),
GloveCNN.TrainParams(syntax_model=syntax_model,
glove_embedding_model=glove_embedding_model,
)],
weights=[2,1])
# run Ensemble model on new text
ensemble_prediction = ensemble_classification_model.run("new input text")
Copy to clipboardクリップボードにコピーされました
SVM モデルおよび CNN モデルの場合 (例えば、CNN の場合):
# run Syntax model first
syntax_result = syntax_model.run("new input text")
# run CNN model on top of syntax result
cnn_prediction = cnn_classification_model.run(syntax_result)