" rowline = [line for line in open(filePath, 'r', encoding='utf8').readlines()]\n",
" labels = [line[:2] for line in rowline]\n",
" news = [line[3:-1] for line in rowline]\n",
" news = [re.sub(\"[A-Za-z0-9\\s+\\.\\-\\!\\[\\]\\……\\<\\>\\/_,$%^*()+\\\"\\']+|[+——!,:~“”‘’;。《》〈?、~@#¥%......&*()]+\", \"\", line) for line in news]\n",
" allNews = [_cutWord(line) for line in news]\n",
" \n",
" return allNews, labels\n",
"\n",
"def _readStopWord(filePath):\n",
" stopWordList = [line[:-1] for line in open(filePath, 'r', encoding='utf8').readlines()]\n",
" labelIds = [label2idx[label] for label in labels]\n",
" return labelIds\n",
"\n",
"def _wordToIndex(allNews, word2idx):\n",
" newsIds = [[word2idx.get(item, word2idx[\"UNK\"]) for item in line] for line in news]\n",
" return newsIds"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache C:\\Users\\jerry\\AppData\\Local\\Temp\\jieba.cache\n",
"Loading model cost 0.637 seconds.\n",
"Prefix dict has been built successfully.\n"
]
}
],
"source": [
"news, labels = _readData(_dataSource)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"stopWordDict = _readStopWord(_stopWordSource)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jerry\\anaconda3\\envs\\tf\\lib\\site-packages\\ipykernel_launcher.py:34: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n"
"WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:18: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
"WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:20: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API\n",
"WARNING:tensorflow:From C:\\Users\\jerry\\anaconda3\\envs\\tf\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
"WARNING:tensorflow:From C:\\Users\\jerry\\anaconda3\\envs\\tf\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:958: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `layer.add_weight` method instead.\n",
"WARNING:tensorflow:From C:\\Users\\jerry\\anaconda3\\envs\\tf\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:962: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n",
"lstm outputs shape (?, 804, 512)\n",
"outputs shape (?, 804, 256)\n",
"attetion output H shape (?, 804, 256)\n",
"attention W shape (256,)\n",
"attetion output r shape (?, 256, 1)\n",
"WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:84: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
"attetion output shape (?, 256)\n",
"WARNING:tensorflow:\n",
"The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
" * https://github.com/tensorflow/io (for I/O related ops)\n",
"If you depend on functionality not listed there, please file an issue.\n",
"\n",
"WARNING:tensorflow:From <ipython-input-15-9bc34f41179b>:53: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"\n",
"Future major versions of TensorFlow will allow gradients to flow\n",
"into the labels input on backprop by default.\n",
"\n",
"See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
"\n",
"INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/kernel:0/grad/hist is illegal; using bi-lstm/fw/lstm_cell/kernel_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/kernel:0/grad/sparsity is illegal; using bi-lstm/fw/lstm_cell/kernel_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/bias:0/grad/hist is illegal; using bi-lstm/fw/lstm_cell/bias_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name bi-lstm/fw/lstm_cell/bias:0/grad/sparsity is illegal; using bi-lstm/fw/lstm_cell/bias_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/kernel:0/grad/hist is illegal; using bi-lstm/bw/lstm_cell/kernel_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/kernel:0/grad/sparsity is illegal; using bi-lstm/bw/lstm_cell/kernel_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/bias:0/grad/hist is illegal; using bi-lstm/bw/lstm_cell/bias_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name bi-lstm/bw/lstm_cell/bias:0/grad/sparsity is illegal; using bi-lstm/bw/lstm_cell/bias_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name Attention/Variable:0/grad/hist is illegal; using Attention/Variable_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name Attention/Variable:0/grad/sparsity is illegal; using Attention/Variable_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name outputW:0/grad/hist is illegal; using outputW_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name outputW:0/grad/sparsity is illegal; using outputW_0/grad/sparsity instead.\n",
"INFO:tensorflow:Summary name output/outputB:0/grad/hist is illegal; using output/outputB_0/grad/hist instead.\n",
"INFO:tensorflow:Summary name output/outputB:0/grad/sparsity is illegal; using output/outputB_0/grad/sparsity instead.\n",
"Writing to C:\\Users\\jerry\\Desktop\\Project\\greedy-ai-recommend-system\\projects\\project1\\summarys\n",