文本分类.ipynb
59.2 KB
In [1]:
import numpy as np import pandas as pd import os import csv import time import datetime import random import json import re import gensim import warnings from collections import Counter from math import sqrt import jieba from jieba.analyse import extract_tags import tensorflow as tf
In [2]:
_dataSource = "待抽取关键词文本数据.txt" _stopWordSource = "cn_stopwords.txt" _sequenceLength = 804 _embeddingSize = 300 _hiddenSize = 256 _dropoutKeepProb = 0.5 _l2RegLambda = 0.0 _rate = 0.8 _epoches = 4 _evaluateEvery = 100 _checkpointEvery = 100 _learningRate = 0.001 _batchSize = 128 _numClasses = 10
In [3]:
def _readData(filePath): rowline = [line for line in open(filePath, 'r', encoding='utf8').readlines()] labels = [line[:2] for line in rowline] news = [line[3:-1] for line in rowline] news = [re.sub("[A-Za-z0-9\s+\.\-\!\[\]\……\<\>\/_,$%^*()+\"\']+|[+——!,:~“”‘’;。《》〈?、~@#¥%......&*()]+", "", line) for line in news] allNews = [_cutWord(line) for line in news] return allNews, labels def _readStopWord(filePath): stopWordList = [line[:-1] for line in open(filePath, 'r', encoding='utf8').readlines()] stopWordDict = dict(zip(stopWordList, range(len(stopWordList)))) return stopWordDict def _cutWord(line): cut = jieba.cut(line, cut_all = False) cut_list = [i for i in cut] return cut_list def _getWordEmbedding(words): wordvec = gensim.models.KeyedVectors.load_word2vec_format("word2vec/sgns.zhihu.bigram", binary = False) vocab = [] wordEmbedding = [] vocab.append("PAD") vocab.append("UNK") wordEmbedding.append(np.zeros(_embeddingSize)) wordEmbedding.append(np.random.randn(_embeddingSize)) for word in words: try: vector = wordvec.wv[word] vocab.append(word) wordEmbedding.append(vector) except: pass return vocab, np.array(wordEmbedding) def _genVocabulary(allNews, labels, stopWordDict): allWords = [word for news in allNews for word in news] subWords = [word for word in allWords if word not in stopWordDict] wordCount = Counter(subWords) sortWordCount = sorted(wordCount.items(), key = lambda x: x[1], reverse = True) words = [item[0] for item in sortWordCount if item[1] >= 5] vocab, wordEmbedding = _getWordEmbedding(words) word2idx = dict(zip(vocab, list(range(len(vocab))))) uniqueLabel = list(set(labels)) label2idx = dict(zip(uniqueLabel, list(range(len(uniqueLabel))))) labelList = list(range(len(uniqueLabel))) with open("word2vec/wordJson/word2idx.json", "w", encoding = "utf-8") as f: json.dump(word2idx, f) with open("word2vec/wordJson/label2idx.json", "w", encoding = "utf-8") as f: json.dump(label2idx, f) return word2idx, label2idx, wordEmbedding, labelList def _labelToIndex(labels, label2idx): labelIds = [label2idx[label] for label in labels] return labelIds def _wordToIndex(allNews, word2idx): newsIds = [[word2idx.get(item, word2idx["UNK"]) for item in line] for line in news] return newsIds
In [4]:
news, labels = _readData(_dataSource)
Out [4]:
Building prefix dict from the default dictionary ... Loading model from cache C:\Users\jerry\AppData\Local\Temp\jieba.cache Loading model cost 0.637 seconds. Prefix dict has been built successfully.
In [5]:
stopWordDict = _readStopWord(_stopWordSource)
In [6]:
word2idx, label2idx, wordEmbedding, labelList = _genVocabulary(news, labels, stopWordDict)
Out [6]:
C:\Users\jerry\anaconda3\envs\tf\lib\site-packages\ipykernel_launcher.py:34: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).
In [8]:
len(word2idx), list(word2idx.items())[:10]
Out [8]:
(36090, [('PAD', 0), ('UNK', 1), ('基金', 2), ('中', 3), ('一个', 4), ('月', 5), ('年', 6), ('中国', 7), ('会', 8), ('市场', 9)])
In [9]:
len(label2idx), list(label2idx.items())[:10]
Out [9]:
(10, [('房产', 0), ('时尚', 1), ('体育', 2), ('娱乐', 3), ('游戏', 4), ('财经', 5), ('教育', 6), ('时政', 7), ('家居', 8), ('科技', 9)])
In [10]:
labelIds = _labelToIndex(labels, label2idx)
In [11]:
newsIds = _wordToIndex(news, word2idx)
In [12]:
def _genTrainEvalData(x, y, word2idx, rate): allNews = [] for news in x: if len(news) >= _sequenceLength: allNews.append(news[:_sequenceLength]) else: allNews.append(news + [word2idx["PAD"]] * (_sequenceLength - len(news))) allNews = np.array(allNews) y = np.array(y) idx = np.arange(len(x)) np.random.shuffle(idx) allNews = allNews[idx,:] y = y[idx] trainIndex = int(len(x) * rate) trainNews = np.array(allNews[:trainIndex], dtype="int64") trainLabels = np.array(y[:trainIndex], dtype="float32") evalNews = np.array(allNews[trainIndex:], dtype="int64") evalLabels = np.array(y[trainIndex:], dtype="float32") return trainNews, trainLabels, evalNews, evalLabels, idx
In [13]:
trainNews, trainLabels, evalNews, evalLabels, idx = _genTrainEvalData(newsIds, labelIds, word2idx, _rate)
In [14]:
def nextBatch(x, y, batchSize): perm = np.arange(len(x)) np.random.shuffle(perm) x = x[perm] y = y[perm] numBatches = len(x) // batchSize for i in range(numBatches): start = i * batchSize end = start + batchSize batchX = np.array(x[start: end], dtype="int64") batchY = np.array(y[start: end], dtype="float32") yield batchX, batchY
In [15]:
# 构建模型 class BiLSTMAttention(object): def __init__(self, wordEmbedding): self.inputX = tf.placeholder(tf.int32, [None, _sequenceLength], name="inputX") self.inputY = tf.placeholder(tf.int32, [None], name="inputY") self.dropoutKeepProb = tf.placeholder(tf.float32, name="dropoutKeepProb") # define l2 loss l2Loss = tf.constant(0.0) # word embedding with tf.name_scope("embedding"): self.W = tf.Variable(tf.cast(wordEmbedding, dtype=tf.float32, name="word2vec") ,name="W") self.embeddedWords = tf.nn.embedding_lookup(self.W, self.inputX) # 定义双向LSTM的模型结构 with tf.name_scope("Bi-LSTM"): lstmFwCell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(num_units=_hiddenSize, state_is_tuple=True), output_keep_prob=_dropoutKeepProb) lstmBwCell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(num_units=_hiddenSize, state_is_tuple=True), output_keep_prob=_dropoutKeepProb) outputs_, self.current_state = tf.nn.bidirectional_dynamic_rnn(lstmFwCell, lstmBwCell, self.embeddedWords, dtype=tf.float32, scope="bi-lstm") self.embeddedWords = tf.concat(outputs_, 2) print("lstm outputs shape", self.embeddedWords.shape) # 将最后一层Bi-LSTM输出的结果分割成前向和后向的输出 outputs = tf.split(self.embeddedWords, 2, -1) print('outputs shape', outputs[0].shape) # 在Bi-LSTM+Attention的论文中,将前向和后向的输出相加 with tf.name_scope("Attention"): H = outputs[0] + outputs[1] print('attetion output H shape', H.shape) output = self.attention(H) print("attetion output shape", output.shape) outputSize = _hiddenSize # fc layer output with tf.name_scope("output"): # [256, 1] outputW = tf.get_variable("outputW", shape=[outputSize, _numClasses], initializer=tf.contrib.layers.xavier_initializer()) # [1, ] outputB= tf.Variable(tf.constant(0.1, shape=[_numClasses]), name="outputB") l2Loss += tf.nn.l2_loss(outputW) l2Loss += tf.nn.l2_loss(outputB) self.logits = tf.nn.xw_plus_b(output, outputW, outputB, name="logits") self.predictions = tf.cast(tf.math.argmax(self.logits), tf.float32, name="predictions") # softmax with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.cast(tf.reshape(self.inputY, [-1, 1]), dtype=tf.float32)) self.loss = tf.reduce_mean(losses) + _l2RegLambda * l2Loss def attention(self, H): hiddenSize = _hiddenSize W = tf.Variable(tf.random_normal([hiddenSize], stddev=0.1)) print("attention W shape", W.shape) M = tf.tanh(H) # e = W*tanh(H) newM = tf.matmul(tf.reshape(M, [-1, hiddenSize]), tf.reshape(W, [-1, 1])) # 对newM做维度转换成[batch_size, time_step] restoreM = tf.reshape(newM, [-1, _sequenceLength]) # 用softmax做归一化处理[batch_size, time_step] # # 𝛼𝑡=exp(𝑒𝑡') / ∑exp(𝑒𝑡′𝑘) self.alpha = tf.nn.softmax(restoreM) # 利用求得的alpha的值对H进行加权求和,用矩阵运算直接操作 r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(self.alpha, [-1, _sequenceLength, 1])) print('attetion output r shape', r.shape) # 将三维压缩成二维sequeezeR=[batch_size, hidden_size] sequeezeR = tf.reshape(r, [-1, hiddenSize]) sentenceRepren = tf.tanh(sequeezeR) # 对Attention的输出可以做dropout处理 output = tf.nn.dropout(sentenceRepren, self.dropoutKeepProb) return output
In [16]:
""" 定义各类性能指标 """ def mean(item: list) -> float: """ 计算列表中元素的平均值 :param item: 列表对象 :return: """ res = sum(item) / len(item) if len(item) > 0 else 0 return res def accuracy(pred_y, true_y): """ 计算二类和多类的准确率 :param pred_y: 预测结果 :param true_y: 真实结果 :return: """ if isinstance(pred_y[0], list): pred_y = [item[0] for item in pred_y] corr = 0 for i in range(len(pred_y)): if pred_y[i] == true_y[i]: corr += 1 acc = corr / len(pred_y) if len(pred_y) > 0 else 0 return acc def binary_precision(pred_y, true_y, positive=1): """ 二类的精确率计算 :param pred_y: 预测结果 :param true_y: 真实结果 :param positive: 正例的索引表示 :return: """ corr = 0 pred_corr = 0 for i in range(len(pred_y)): if pred_y[i] == positive: pred_corr += 1 if pred_y[i] == true_y[i]: corr += 1 prec = corr / pred_corr if pred_corr > 0 else 0 return prec def binary_recall(pred_y, true_y, positive=1): """ 二类的召回率 :param pred_y: 预测结果 :param true_y: 真实结果 :param positive: 正例的索引表示 :return: """ corr = 0 true_corr = 0 for i in range(len(pred_y)): if true_y[i] == positive: true_corr += 1 if pred_y[i] == true_y[i]: corr += 1 rec = corr / true_corr if true_corr > 0 else 0 return rec def binary_f_beta(pred_y, true_y, beta=1.0, positive=1): """ 二类的f beta值 :param pred_y: 预测结果 :param true_y: 真实结果 :param beta: beta值 :param positive: 正例的索引表示 :return: """ precision = binary_precision(pred_y, true_y, positive) recall = binary_recall(pred_y, true_y, positive) try: f_b = (1 + beta * beta) * precision * recall / (beta * beta * precision + recall) except: f_b = 0 return f_b def multi_precision(pred_y, true_y, labels): """ 多类的精确率 :param pred_y: 预测结果 :param true_y: 真实结果 :param labels: 标签列表 :return: """ if isinstance(pred_y[0], list): pred_y = [item[0] for item in pred_y] precisions = [binary_precision(pred_y, true_y, label) for label in labels] prec = mean(precisions) return prec def multi_recall(pred_y, true_y, labels): """ 多类的召回率 :param pred_y: 预测结果 :param true_y: 真实结果 :param labels: 标签列表 :return: """ if isinstance(pred_y[0], list): pred_y = [item[0] for item in pred_y] recalls = [binary_recall(pred_y, true_y, label) for label in labels] rec = mean(recalls) return rec def multi_f_beta(pred_y, true_y, labels, beta=1.0): """ 多类的f beta值 :param pred_y: 预测结果 :param true_y: 真实结果 :param labels: 标签列表 :param beta: beta值 :return: """ if isinstance(pred_y[0], list): pred_y = [item[0] for item in pred_y] f_betas = [binary_f_beta(pred_y, true_y, beta, label) for label in labels] f_beta = mean(f_betas) return f_beta def get_binary_metrics(pred_y, true_y, f_beta=1.0): """ 得到二分类的性能指标 :param pred_y: :param true_y: :param f_beta: :return: """ acc = accuracy(pred_y, true_y) recall = binary_recall(pred_y, true_y) precision = binary_precision(pred_y, true_y) f_beta = binary_f_beta(pred_y, true_y, f_beta) return acc, recall, precision, f_beta def get_multi_metrics(pred_y, true_y, labels, f_beta=1.0): """ 得到多分类的性能指标 :param pred_y: :param true_y: :param labels: :param f_beta: :return: """ acc = accuracy(pred_y, true_y) recall = multi_recall(pred_y, true_y, labels) precision = multi_precision(pred_y, true_y, labels) f_beta = multi_f_beta(pred_y, true_y, labels, f_beta) return acc, recall, precision, f_beta
In [17]:
# train the model # 定义计算图 with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_conf.gpu_options.allow_growth=True session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9 # 配置gpu占用率 sess = tf.Session(config=session_conf) # 定义会话 with sess.as_default(): lstm = BiLSTMAttention(wordEmbedding) globalStep = tf.Variable(0, name="globalStep", trainable=False) optimizer = tf.train.AdamOptimizer(_learningRate) gradsAndVars = optimizer.compute_gradients(lstm.loss) trainOp = optimizer.apply_gradients(gradsAndVars, global_step=globalStep) # 用summary绘制tensorBoard gradSummaries = [] for g, v in gradsAndVars: if g is not None: tf.summary.histogram("{}/grad/hist".format(v.name), g) tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) outDir = os.path.abspath(os.path.join(os.path.curdir, "summarys")) print("Writing to {}\n".format(outDir)) lossSummary = tf.summary.scalar("loss", lstm.loss) summaryOp = tf.summary.merge_all() trainSummaryDir = os.path.join(outDir, "train") trainSummaryWriter = tf.summary.FileWriter(trainSummaryDir, sess.graph) evalSummaryDir = os.path.join(outDir, "eval") evalSummaryWriter = tf.summary.FileWriter(evalSummaryDir, sess.graph) # 保存模型的句柄 saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) # 保存模型的一种方式,保存为pb文件 savedModelPath = "model/Bi-LSTM-atten/savedModel" if os.path.exists(savedModelPath): os.rmdir(savedModelPath) checkpointPath = "model/Bi-LSTM-atten/model/my-model" if os.path.exists(checkpointPath): os.rmdir(checkpointPath) builder = tf.saved_model.builder.SavedModelBuilder(savedModelPath) sess.run(tf.global_variables_initializer()) def trainStep(batchX, batchY): feed_dict = {lstm.inputX: batchX, lstm.inputY: batchY, lstm.dropoutKeepProb: _dropoutKeepProb} _, summary, step, loss, predictions = sess.run([trainOp, summaryOp, globalStep, lstm.loss, lstm.predictions], feed_dict) timeStr = datetime.datetime.now().isoformat() acc, recall, prec, f_beta = get_multi_metrics(pred_y=predictions, true_y=batchY, labels = labelList) trainSummaryWriter.add_summary(summary, step) return loss, acc, prec, recall, f_beta def devStep(batchX, batchY): feed_dict = {lstm.inputX: batchX, lstm.inputY: batchY, lstm.dropoutKeepProb: 1.0} summary, step, loss, predictions = sess.run([summaryOp, globalStep, lstm.loss, lstm.predictions], feed_dict) acc, precision, recall, f_beta = get_multi_metrics(pred_y=predictions, true_y=batchY, labels = labelList) evalSummaryWriter.add_summary(summary, step) return loss, acc, precision, recall, f_beta for i in range(_epoches): # 训练模型 print("start training model") for batchTrain in nextBatch(trainNews, trainLabels, _batchSize): loss, acc, prec, recall, f_beta = trainStep(batchTrain[0], batchTrain[1]) currentStep = tf.train.global_step(sess, globalStep) print("train: step: {}, loss: {}, acc: {}, recall: {}, precision: {}, f_beta: {}".format(currentStep, loss, acc, recall, prec, f_beta)) # 每隔100个batch,验证一次 if currentStep % _evaluateEvery == 0: print("\nEvaluation:") losses = [] accs = [] f_betas = [] precisions = [] recalls = [] for batchEval in nextBatch(evalNews, evalLabels, _batchSize): loss, acc, precision, recall, f_beta = devStep(batchEval[0], batchEval[1]) losses.append(loss) accs.append(acc) f_betas.append(f_beta) precisions.append(precision) recalls.append(recall) time_str = datetime.datetime.now().isoformat() print("{}, step: {}, loss: {}, acc: {},precision: {}, recall: {}, f_beta: {}".format(time_str, currentStep, mean(losses), mean(accs), mean(precisions), mean(recalls), mean(f_betas))) # 每隔100个batch,保存一次模型 if currentStep % _checkpointEvery == 0: # 保存模型的另一种方法,保存checkpoint文件 path = saver.save(sess, checkpointPath, global_step=currentStep) print("Saved model checkpoint to {}\n".format(path)) inputs = {"inputX": tf.saved_model.utils.build_tensor_info(lstm.inputX), "keepProb": tf.saved_model.utils.build_tensor_info(lstm.dropoutKeepProb)} outputs = {"predictions": tf.saved_model.utils.build_tensor_info(lstm.predictions)} prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(inputs=inputs, outputs=outputs, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) legacy_init_op = tf.group(tf.tables_initializer(), name="legacy_init_op") builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={"predict": prediction_signature}, legacy_init_op=legacy_init_op) builder.save()
Out [17]:
WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:18: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version. Instructions for updating: This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0. WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:20: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version. Instructions for updating: Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API WARNING:tensorflow:From C:\Users\jerry\anaconda3\envs\tf\lib\site-packages\tensorflow_core\python\ops\rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version. Instructions for updating: Please use `keras.layers.RNN(cell)`, which is equivalent to this API WARNING:tensorflow:From C:\Users\jerry\anaconda3\envs\tf\lib\site-packages\tensorflow_core\python\ops\rnn_cell_impl.py:958: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.add_weight` method instead. WARNING:tensorflow:From C:\Users\jerry\anaconda3\envs\tf\lib\site-packages\tensorflow_core\python\ops\rnn_cell_impl.py:962: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version. Instructions for updating: Call initializer instance with the dtype argument instead of passing it to the constructor lstm outputs shape (?, 804, 512) outputs shape (?, 804, 256) attetion output H shape (?, 804, 256) attention W shape (256,) attetion output r shape (?, 256, 1) WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:84: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`. attetion output shape (?, 256) WARNING:tensorflow: The TensorFlow contrib module will not be included in TensorFlow 2.0. For more information, please see: * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md * https://github.com/tensorflow/addons * https://github.com/tensorflow/io (for I/O related ops) If you depend on functionality not listed there, please file an issue. WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:53: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version. Instructions for updating: Future major versions of TensorFlow will allow gradients to flow into the labels input on backprop by default. See `tf.nn.softmax_cross_entropy_with_logits_v2`. INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead. INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead. INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/kernel:0/grad/hist is illegal; using bi-lstm/fw/lstm_cell/kernel_0/grad/hist instead. INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/kernel:0/grad/sparsity is illegal; using bi-lstm/fw/lstm_cell/kernel_0/grad/sparsity instead. INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/bias:0/grad/hist is illegal; using bi-lstm/fw/lstm_cell/bias_0/grad/hist instead. INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/bias:0/grad/sparsity is illegal; using bi-lstm/fw/lstm_cell/bias_0/grad/sparsity instead. INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/kernel:0/grad/hist is illegal; using bi-lstm/bw/lstm_cell/kernel_0/grad/hist instead. INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/kernel:0/grad/sparsity is illegal; using bi-lstm/bw/lstm_cell/kernel_0/grad/sparsity instead. INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/bias:0/grad/hist is illegal; using bi-lstm/bw/lstm_cell/bias_0/grad/hist instead. INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/bias:0/grad/sparsity is illegal; using bi-lstm/bw/lstm_cell/bias_0/grad/sparsity instead. INFO:tensorflow:Summary name Attention/Variable:0/grad/hist is illegal; using Attention/Variable_0/grad/hist instead. INFO:tensorflow:Summary name Attention/Variable:0/grad/sparsity is illegal; using Attention/Variable_0/grad/sparsity instead. INFO:tensorflow:Summary name outputW:0/grad/hist is illegal; using outputW_0/grad/hist instead. INFO:tensorflow:Summary name outputW:0/grad/sparsity is illegal; using outputW_0/grad/sparsity instead. INFO:tensorflow:Summary name output/outputB:0/grad/hist is illegal; using output/outputB_0/grad/hist instead. INFO:tensorflow:Summary name output/outputB:0/grad/sparsity is illegal; using output/outputB_0/grad/sparsity instead. Writing to C:\Users\jerry\Desktop\Project\greedy-ai-recommend-system\projects\project1\summarys start training model train: step: 1, loss: 102.622802734375, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 2, loss: 100.93660736083984, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 3, loss: 104.3801040649414, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 4, loss: 111.20909118652344, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 5, loss: 114.00332641601562, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 6, loss: 120.47428894042969, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 7, loss: 136.00888061523438, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 8, loss: 147.71319580078125, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 9, loss: 140.00100708007812, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 10, loss: 143.022216796875, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 11, loss: 140.45101928710938, acc: 0.1, recall: 0.1, precision: 0.1, f_beta: 0.1 train: step: 12, loss: 155.61532592773438, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 13, loss: 149.02059936523438, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 14, loss: 136.99212646484375, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 15, loss: 154.5734405517578, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 16, loss: 151.01498413085938, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 17, loss: 157.79434204101562, acc: 0.1, recall: 0.03333333333333333, precision: 0.1, f_beta: 0.05 train: step: 18, loss: 158.71762084960938, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 19, loss: 148.79803466796875, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 20, loss: 137.8258056640625, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 21, loss: 164.97897338867188, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 22, loss: 149.67916870117188, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 23, loss: 175.56698608398438, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 24, loss: 153.5785369873047, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 25, loss: 147.6337890625, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 26, loss: 158.0564422607422, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 27, loss: 149.98202514648438, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 28, loss: 173.33285522460938, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 29, loss: 151.09945678710938, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 30, loss: 177.244140625, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 31, loss: 154.50172424316406, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 32, loss: 157.1009521484375, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 33, loss: 148.7662353515625, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 34, loss: 165.9762420654297, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0 train: step: 35, loss: 159.74624633789062, acc: 0.0, recall: 0.0, precision: 0.0, f_beta: 0.0