Skip to main content
@tags: keyword, search, bm25, text-search, full-text, ranking Search for keywords in nodes using BM25 algorithm.

Syntax

SearchBM25<Type>(text, limit)
Notes:
  • BM25 is a ranking function used for full-text search.
  • It searches through the text properties of nodes and ranks results based on keyword relevance and frequency.
  • Make sure you enable BM25 in your helix.toml file.
  • Schema:
N::Document {
    content: String,
    created_at: Date
}
  • Query:
QUERY SearchKeyword (keywords: String, limit: I64) =>
    documents <- SearchBM25<Document>(keywords, limit)
    RETURN documents

QUERY InsertDocument (content: String, created_at: Date) =>
    document <- AddN<Document>({ content: content, created_at: created_at })
    RETURN document
  • cURL:
curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Machine learning algorithms for data analysis","created_at":"'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Introduction to artificial intelligence and neural networks","created_at":"'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Database optimization techniques and performance tuning","created_at":"'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/SearchKeyword \
  -H 'Content-Type: application/json' \
  -d '{"keywords":"machine learning algorithms","limit":5}'
  • Python SDK:
from datetime import datetime, timezone
from helix.client import Client

client = Client(local=True, port=6969)

sample_docs = [
    "Machine learning algorithms for data analysis",
    "Introduction to artificial intelligence and neural networks",
    "Database optimization techniques and performance tuning",
    "Web development with modern JavaScript frameworks"
]

for content in sample_docs:
    client.query("InsertDocument", {
        "content": content,
        "created_at": datetime.now(timezone.utc).isoformat(),
    })

result = client.query("SearchKeyword", {
    "keywords": "machine learning algorithms",
    "limit": 5
})

print(result)
  • TypeScript SDK:
import HelixDB from "helix-ts";

async function main() {
    const client = new HelixDB("http://localhost:6969");

    const sampleDocs = [
        "Machine learning algorithms for data analysis",
        "Introduction to artificial intelligence and neural networks",
        "Database optimization techniques and performance tuning",
        "Web development with modern JavaScript frameworks"
    ];

    for (const content of sampleDocs) {
        await client.query("InsertDocument", {
            content: content,
            created_at: new Date().toISOString(),
        });
    }

    const result = await client.query("SearchKeyword", {
        keywords: "machine learning algorithms",
        limit: 5
    });

    console.log("Search results:", result);
}

main().catch((err) => {
    console.error("SearchKeyword query failed:", err);
});

Example 2: Keyword search with postfiltering

  • Schema:
N::Document {
    content: String,
    created_at: Date
}
  • Query:
QUERY SearchRecentKeywords (keywords: String, limit: I64, cutoff_date: Date) =>
    searched_docs <- SearchBM25<Document>(keywords, limit)
    documents <- searched_docs::WHERE(_::{created_at}::GTE(cutoff_date))
    RETURN documents

QUERY InsertDocument (content: String, created_at: Date) =>
    document <- AddN<Document>({ content: content, created_at: created_at })
    RETURN document
  • cURL:
curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Modern machine learning techniques in 2024","created_at":"'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Latest artificial intelligence research papers","created_at":"'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/InsertDocument \
  -H 'Content-Type: application/json' \
  -d '{"content":"Traditional machine learning approaches from last year","created_at":"'"$(date -u -d '15 days ago' +"%Y-%m-%dT%H:%M:%SZ")"'"}'

curl -X POST \
  http://localhost:6969/SearchRecentKeywords \
  -H 'Content-Type: application/json' \
  -d '{"keywords":"machine learning artificial intelligence","limit":5,"cutoff_date":"'"$(date -u -d '10 days ago' +"%Y-%m-%dT%H:%M:%SZ")"'"}'
  • Python SDK:
from datetime import datetime, timezone, timedelta
from helix.client import Client

client = Client(local=True, port=6969)

recent_date = datetime.now(timezone.utc).isoformat()
old_date = (datetime.now(timezone.utc) - timedelta(days=15)).isoformat()

recent_docs = [
    "Modern machine learning techniques in 2024",
    "Latest artificial intelligence research papers"
]

for content in recent_docs:
    client.query("InsertDocument", {
        "content": content,
        "created_at": recent_date,
    })

old_docs = [
    "Traditional machine learning approaches from last year",
    "Historical AI development milestones"
]

for content in old_docs:
    client.query("InsertDocument", {
        "content": content,
        "created_at": old_date,
    })

cutoff_date = (datetime.now(timezone.utc) - timedelta(days=10)).isoformat()

result = client.query("SearchRecentKeywords", {
    "keywords": "machine learning artificial intelligence",
    "limit": 5,
    "cutoff_date": cutoff_date,
})

print(result)
  • TypeScript SDK:
import HelixDB from "helix-ts";

async function main() {
    const client = new HelixDB("http://localhost:6969");

    const recentDate = new Date().toISOString();
    const oldDate = new Date(Date.now() - 15 * 24 * 60 * 60 * 1000).toISOString();

    const recentDocs = [
        "Modern machine learning techniques in 2024",
        "Latest artificial intelligence research papers"
    ];

    for (const content of recentDocs) {
        await client.query("InsertDocument", {
            content: content,
            created_at: recentDate,
        });
    }

    const oldDocs = [
        "Traditional machine learning approaches from last year",
        "Historical AI development milestones"
    ];

    for (const content of oldDocs) {
        await client.query("InsertDocument", {
            content: content,
            created_at: oldDate,
        });
    }

    const cutoffDate = new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString();

    const result = await client.query("SearchRecentKeywords", {
        keywords: "machine learning artificial intelligence",
        limit: 5,
        cutoff_date: cutoffDate,
    });

    console.log("Filtered search results:", result);
}

main().catch((err) => {
    console.error("SearchRecentKeywords query failed:", err);
});