From 188a8e58522a9ad0416de5fcb938b3933dee59b7 Mon Sep 17 00:00:00 2001 From: Henry Dowd Date: Wed, 8 Oct 2025 14:55:08 +0100 Subject: [PATCH] simple sentance parser and dataset tester program --- .gitignore | 2 + __init__.py | 0 __pycache__/parser.cpython-313.pyc | Bin 0 -> 790 bytes main.py | 14 +++++++ testing/__init__.py | 0 testing/__pycache__/datasets.cpython-313.pyc | Bin 0 -> 1729 bytes testing/dataset_testing.py | 41 +++++++++++++++++++ tools/__init__.py | 0 tools/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 151 bytes tools/__pycache__/parser.cpython-313.pyc | Bin 0 -> 1500 bytes tools/parser.py | 35 ++++++++++++++++ 11 files changed, 92 insertions(+) create mode 100644 .gitignore create mode 100644 __init__.py create mode 100644 __pycache__/parser.cpython-313.pyc create mode 100644 main.py create mode 100644 testing/__init__.py create mode 100644 testing/__pycache__/datasets.cpython-313.pyc create mode 100644 testing/dataset_testing.py create mode 100644 tools/__init__.py create mode 100644 tools/__pycache__/__init__.cpython-313.pyc create mode 100644 tools/__pycache__/parser.cpython-313.pyc create mode 100644 tools/parser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e75ccaa --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv +.vscode diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/__pycache__/parser.cpython-313.pyc b/__pycache__/parser.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1693ab5e06143362738843b6b635821b7ffc2619 GIT binary patch literal 790 zcmZuvzi-n}5I+0ajvMlWKtYHBnYJKBB7`3Tp(7AK)Bn3Wby2y}SFq`|kAg`q^w6&@#B!GI-)y{V(X&VBt5`fJD*e7Lsjni=?VRgF6nBGXOUK2hKP6x`0xLOeJ|x z1;nI70H&&ln1q=8FZN%pn4d!a;Pw9QQRO^bl@E5DsskK>c-Ju~4o!!rW}U~>E-iIS zK^!Q$ty^8c<&THbEPWb`D

|z7k5xE$aSCE`=#&lIkXAk}^|M@#ZTxiV`EX {dep['head']}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/testing/__init__.py b/testing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/testing/__pycache__/datasets.cpython-313.pyc b/testing/__pycache__/datasets.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26cec17d33018e9e875b1f15978ebb340fc95155 GIT binary patch literal 1729 zcmah}TTdHD6rR0!ecN4gGl5XZLP%5-jBDH|4N0h~a*0TY5Gz-eV#!+UDRyMDU#}d|^xh0=6e zIgcTgyAJuzAVP`(n^!&8*_Owv3KtRarTpOcw&MUR0Jg7f_oBPsKz1QB`4U1w zq@d*|&3T=T3?BnBiZE0x7+N-+)ojhe_5&(oJ)JQKPCv$LX{$*2^};5me36)$Osnor zBJ#q=!FO5%@eDm|bqS#)YV+#sIWN=kat}K=t2`wCZH%F0rGD$w^ zNwP^!Wu}C26k*G^S2SW_WzDd2iiLF>>lth*nx0kgmp0KdwqoaSo+w#tVm%A1CB?GI zX2#wm*op|$-%g63snb;Igy*ppf7hI*eAdXsT{#`$D5n=pDwxhT2yL^bI{>9L!3E9E z8@iRtn?Ol?P!ZyHgy;wf1oc2X5V_N|C4ftD2mL6PGm1Et!#XL&GDa51%!0PAk-Sxm zX0eSk4sy)KmYvtvW34t@Q)Y>VO=3VTtaST?X;aG+(g)ZMx%C-{3VQ29VraYc?QCW4 zWl(M^Zv`av{wgMZ78AR3`{K>Y{7bRt9s5K;{g)2IBZq_TudDB{tM9es6@?Sz^$4ec zjLJhRO@7#yvb?(TDU?Plngwi4O-&s;SBrX}L3#bG9%X-Ez8Rw&JNJz;Gi6b2HC2hv zDCewcu)O!RHC&icsHfm4Vy8I?UIShd^9=>OF67%BO&lNAC^U$C$dZ}Q=+16 z7zJp8dnZW9ISTJQLn1q>x0U0zf_hAn*KO({n!b)Hlc(NRou~jebhBVn4!D0rBn-Kv z*8%X|E*ja*0-hz;z;r5Qi6BonhGAZz(ch5x3JpVxyyBBbUq7mMkM0X&749g|U)`#0 z?FX(_yl?m(MtD7e7@;m)K0xC~etCQ2yMf9lhjO^O-jGLX8x481vUnto?c8{Nt8w|p z-i4or8aJ2sr3aOT-+BM`Qgx%&{nV=BTD%?@KHx`=zPSyva^}y5tANx<| zVs){$*ytOt_g<-AS*QmV5BR$PTNft|_{qaixN0;)S9i#BW {dep['head']}") \ No newline at end of file diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/__pycache__/__init__.cpython-313.pyc b/tools/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..443a12ccdb2071c1b8b8ff9e25656e9721fcd358 GIT binary patch literal 151 zcmey&%ge<81P@uCWPs?$AOZ#$p^VQgK*m&tbOudEzm*I{OhDdekkl_{_Y_lK6PNg34PQHo5sJ ar8%i~MXW$$K-Lt47$2D#85xV1fh+(CTqK_W literal 0 HcmV?d00001 diff --git a/tools/__pycache__/parser.cpython-313.pyc b/tools/__pycache__/parser.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b58fcc99a527fc735bde263bfa61ebba3a2b1481 GIT binary patch literal 1500 zcmZvc%Wo4$7{F(}4{sJb4&+hS>0^1cU}$ikQX5&hP*mg=jIyzcAY!$6hj`QV?rLTu zuum3J)kr;n4^heqj^)%V$8zGj5fw25s*vE)TT=f6{bmz~nC?jX&3=#h=KE%TyZvld zLog1=uNyy$2)*LUV1OQvei^1UWFSNM9uc&N=2N?#ebW(Sq`ne%3?yZU1w=$c0x2yC zMfo4@xWGoSXV!g!%^UcdNt^ZE_;^(YB;T?BH;6TKWzSbbgS?(_Wl$5;GO+5XCXAzV z7ZLJO`~0a|YF>&n4+%reAwvS6GGvYwLk6D$KK1{;#9TZ+4e{|A4zDrP9Qs0iQ{(x0 zIAz3ZJ%)-S7uxv+|0nFtTx-pQ{6)7s%WYAI&bE~Wd^4QY+hX~%&)bUrEe>{+Pupqz z9JXxG6#Zh=fH_tEx~MSGa|0#^#PLEVg|L`OHV#ZCH6WTvfltk%$W)5G5PNmZMBA@3 zIo_7?^VD^neOYSwOdG>MTQRQ*6!Fr8@GjjlGMQSYhh zZ;R;A;vngX>Z?!)lbJ`7XsZ_eHfiB%7E00w(D z;L*|vb}fF}wBZC)zwQ%#iC94c>;b|2s$(y3C*AOg&14rhn-)_!a$zgLWCXTk8fVYk zafv`+G0jwmn!$=!4+%$vDY4TeyzO`m;cYR+3V0<&iSQo+%j^~AU`lv%OxrWHC_f*- zS0|&~qXYZA8}m(sIn)h#55mUap#g{vdO1GX`Qa63^5ucC+jF<)o@FM0sL8^5rCaI2 zwAfwj<=?-j-PL*%?{edMwOj2KKD>AG?#a!O;2g7+SV8vncP&T zH`M9hDo@qp+tZxU4^So_N$(K8b5Uk84Xk>LNv?0%L@-Ndy$KylDcJ8HS<+bb@f uBRT}}{~^uaN?OSE4~+}?ILUvfPdv>4 literal 0 HcmV?d00001 diff --git a/tools/parser.py b/tools/parser.py new file mode 100644 index 0000000..63fbfe8 --- /dev/null +++ b/tools/parser.py @@ -0,0 +1,35 @@ +import spacy + +# English model +nlp = spacy.load("en_core_web_sm") + +# Parse a single sentence +def parse_sentence(sentence): + doc = nlp(sentence) + + print("Token-by-token analysis:") + for token in doc: + print(f"Text: {token.text:<12} Dep: {token.dep_:<10} Head: {token.head.text:<10} POS: {token.pos_:<8}") + + return doc + +def extract_dependency_relationships(doc): + """Extract dependency relationships for graph representation""" + dependencies = [] + + for token in doc: + # Skip punctuation + if token.is_punct: + continue + + dependency = { + 'word': token.text, + 'lemma': token.lemma_, + 'dep_type': token.dep_, + 'head': token.head.text, + 'head_lemma': token.head.lemma_, + 'pos': token.pos_ + } + dependencies.append(dependency) + + return dependencies