Skip to main content

Step 1: HelixDB Setup

  1. Create a folder for the project
  2. Initialise HelixDB
mkdir professor_example && cd professor_example
mkdir helixdb-cfg && cd helixdb-cfg
helix init

Step 2: Building our Graph Schema

schema.hx:
// NODES //
N::Professor {
    name: String,
    bio: String,
}

N::ResearchArea {
    research_area: String,
}

N::Department {
    name: String,
}   

N::University {
    name: String,
}

// EDGES //

E::HasUniversity {
    From: Professor,
    To: University,
}

E::HasDepartment {
    From: Professor,
    To: Department,
}

E::HasResearchArea {
    From: Professor,
    To: ResearchArea,
}

E::HasResearchAreaEmbedding {
    From: Professor,
    To: ResearchAreaEmbedding,
}

// VECTORS //
V::ResearchAreaEmbedding {
    research_area: String,
}
Great! We’ve made the schema for our graph. Now lets think about the types of queries for ingesting our professors data.

Step 3: Building our Queries

Lets build the queries to put our professor data into HelixDB, based on the JSON and schema we have. query.hx:
// Create Professor Node
QUERY create_professor (name: String, bio: String ) =>
    professor <- AddN<Professor>({ name: name, bio: bio })
    RETURN professor

// Create Research Area Node
QUERY create_research_area (name: String) =>
    research_area <- AddN<ResearchArea>({ research_area: name })
    RETURN research_area

// Create Department Node
QUERY create_department (name: String) =>
    department <- AddN<Department>({ name: name })
    RETURN department

// Create University Node
QUERY create_university (name: String) =>
    university <- AddN<University>({ name: name })
    RETURN university
Now we need to link professors to other nodes:
// Link Professor to Department
QUERY link_professor_to_department (professor_id: ID, department_id: ID) =>
    professor <- N<Professor>(professor_id)
    department <- N<Department>(department_id)
    edge <- AddE<HasDepartment>::From(professor)::To(department)
    RETURN edge


// Link Professor to University
QUERY link_professor_to_university (professor_id: ID, university_id: ID) =>
    professor <- N<Professor>(professor_id)
    university <- N<University>(university_id)
    edge <- AddE<HasUniversity>::From(professor)::To(university)
    RETURN edge


// Link Professor to Research Area
QUERY link_professor_to_research_area (professor_id: ID, research_area_id: ID) =>
    professor <- N<Professor>(professor_id)
    research_area <- N<ResearchArea>(research_area_id)
    edge <- AddE<HasResearchArea>::From(professor)::To(research_area)
    RETURN edge
Now let’s create our embedding nodes, in this guide, we only will embed the professor’s research area.
// Create Research Area Embedding Node
QUERY create_research_area_embedding (professor_id: ID, research_area: String, vector: [F64]) =>
    professor <- N<Professor>(professor_id)
    research_area_embedding <- AddV<ResearchAreaEmbedding>(vector, { research_area: research_area })
    edge <- AddE<HasResearchAreaEmbedding>::From(professor)::To(research_area_embedding)
    RETURN "success"
We need to be able to search for professors based on their embeddings. So we can use the SearchV operation to search for similar embeddings.
// Search Similar Professors based on Research Area Embedding
QUERY search_similar_professors_by_research_area (query_vector: [F64], k: I64) =>
    vecs <- SearchV<ResearchAreaEmbedding>(query_vector, k)
    research_area <- vecs::{research_area}
    professors <- vecs::In<HasResearchAreaEmbedding>
    RETURN professors, research_area
Awesome! We’ve built our graph schema and queries. Now how do query this in Python?
⌘I