49 lines
1.3 KiB
Plaintext
49 lines
1.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"metadata": {},
|
|
"cell_type": "code",
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"source": [
|
|
"import import_ipynb\n",
|
|
"from notebooks.03_semantic_methods.ipynb import *\n",
|
|
"\n",
|
|
"def extract_all_features(sentence_pairs):\n",
|
|
" features = []\n",
|
|
" for sent1, sent2 in sentence_pairs:\n",
|
|
" feature_vector = [\n",
|
|
" jaccard_similarity(sent1, sent2),\n",
|
|
" sentence_similarity_avg(sent1, sent2),\n",
|
|
" sentence_similarity_sif(sent1, sent2),\n",
|
|
" syntactic_similarity(sent1, sent2)\n",
|
|
" ]"
|
|
],
|
|
"id": "1c45d83192facfc6"
|
|
},
|
|
{
|
|
"metadata": {},
|
|
"cell_type": "code",
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"labled_pairs = []\n",
|
|
"\n",
|
|
"X = extract_all_features(labled_pairs)\n",
|
|
"y = [0,1,0,1...] #Lables for pairs\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"model = LogisticRegression()\n",
|
|
"model.fit(X_train, y_train)\n"
|
|
],
|
|
"id": "9665682bd5a7951e"
|
|
}
|
|
],
|
|
"metadata": {},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|