simple sentance parser and dataset tester program

2025-10-08 14:55:08 +01:00
parent ed7046a8c0
commit 188a8e5852
11 changed files with 92 additions and 0 deletions
--- a/tools/init.py
+++ b/tools/init.py
--- a/tools/pycache/init.cpython-313.pyc
+++ b/tools/pycache/init.cpython-313.pyc
--- a/tools/pycache/parser.cpython-313.pyc
+++ b/tools/pycache/parser.cpython-313.pyc
--- a/tools/parser.py
+++ b/tools/parser.py
@@ -0,0 +1,35 @@
+import spacy
+
+# English model
+nlp = spacy.load("en_core_web_sm")
+
+# Parse a single sentence
+def parse_sentence(sentence):
+    doc = nlp(sentence)
+
+    print("Token-by-token analysis:")
+    for token in doc:
+        print(f"Text: {token.text:<12} Dep: {token.dep_:<10} Head: {token.head.text:<10} POS: {token.pos_:<8}")
+
+    return doc
+
+def extract_dependency_relationships(doc):
+    """Extract dependency relationships for graph representation"""
+    dependencies = []
+    
+    for token in doc:
+        # Skip punctuation
+        if token.is_punct:
+            continue
+            
+        dependency = {
+            'word': token.text,
+            'lemma': token.lemma_,
+            'dep_type': token.dep_,
+            'head': token.head.text,
+            'head_lemma': token.head.lemma_,
+            'pos': token.pos_
+        }
+        dependencies.append(dependency)
+    
+    return dependencies