This section will cover the Python code to connect to HelixDB and create nodes and edges, create embeddings for the professors, search by embeddings, and filtering.
Step 1: Setting up the environment
python -m venv venv
source venv/bin/activate
pip install helix-py sentence-transformers
Step 2: Imports and Setup for HelixDB
import helix
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
db = helix.Client(local=True, port=6969, verbose=True)
Step 3: Creating the nodes that we will link professors to
Here we start creating all the nodes that we will link a professors to.
You can also make a query in queries.hx
to get the ID of the research area if you have the name, but we will store it in Python for simplicity.
Sample Data
professor_data = {
"name": "James",
"bio": "James is an Assistant Professor focusing on basketball analytics, computer vision, and machine learning. Projects include ShotNet and DunkGPT.",
"department": "Computer Science",
"university": "Uni X",
"key_research_areas": [
{"area": "Computer Vision for Basketball"},
{"area": "Predictive Modelling & Simulation"},
{"area": "Sports Analytics with Large Language Models"},
{"area": "Wearable Sensor Data Mining"},
{"area": "Fairness & Ethics in Sports AI"}
]
}
Create Professor Node
james_node =db.query("create_professor", {"name": professor_data["name"], "bio": professor_data["bio"]})
// store the ID of the professor node
james_node_id = james_node[0]['professor']["id"]
Create Department Node
computer_science_node = db.query("create_department", {"name": professor_data["department"]})
// store the ID of the department node
computer_science_node_id = computer_science_node[0]['department']["id"]
Create University Node
university_node = db.query("create_university", {"name": professor_data["university"]})
// store the ID of the university node
university_node_id = university_node[0]['university']["id"]
Create Research Area Nodes
research_area_node_ids = {}
for area in professor_data["key_research_areas"]:
research_area_node = db.query("create_research_area", {"name": area["area"]})
pprint(research_area_node)
research_area_node_id = research_area_node[0]['research_area']["id"]
research_area_node_ids[area["area"]] = research_area_node_id
Step 4: Link nodes to the professor
Link Professor to Department
db.query("link_professor_to_department", {"professor_id": james_node_id, "department_id": computer_science_node_id})
Link Professor to University
db.query("link_professor_to_university", {"professor_id": james_node_id, "university_id": university_node_id})
Link Professor to Research Area
for area in professor_data["key_research_areas"]:
db.query("link_professor_to_research_area", {"professor_id": james_node_id, "research_area_id": research_area_node_ids[area["area"]]})
Step 5: Create Embeddings for the professor
Create Research Area Embedding
for area in professor_data["key_research_areas"]:
embedding = model.encode(area["area"]).astype(float).tolist()
db.query("create_research_area_embedding", {"professor_id": james_node_id, "research_area": area["area"], "vector": embedding})
Step 6: Lets answer some of the questions we asked in the beginning
- “What professors does X research area?”
db.query("get_professor_by_research_area_name", {"research_area_name": "Computer Vision for Basketball"})
- “What professors are working in X University?”
QUERY get_professors_by_university_name (university_name: String) =>
professors <- N<Professor>::WHERE(EXISTS(_::Out<HasUniversity>::WHERE(_::{name}::EQ(university_name))))
RETURN professors
- “What professors are working in X Department?”
db.query("get_professors_by_department_name", {"department_name": "Computer Science"})
- “What professors are working in the X University and are working in X department?”
db.query("get_professors_by_university_and_department_name", {"university_name": "Uni X", "department_name": "Computer Science"})
- “Find me professors doing X research area”
query_vector = model.encode("Computer Vision for Basketball").astype(float).tolist()
db.query("search_similar_professors_by_research_area", {"query_vector": query_vector, "k": 1})
- “I want to find professors working in computer vision, in X university”
query_vector = model.encode("Computer Vision for Basketball").astype(float).tolist()
db.query("get_professor_semantically_and_by_university_name", {"query_vector": query_vector, "university_name": "Uni X", "k": 1})