Semantic Search in 5 Minutes
This tutorial walks you through the full semantic search workflow:
- Push some records
- Create an embedding index on a text property
- Poll until the index is ready
- Run semantic search
- Run semantic search with a filter
Prerequisites: a running RushDB instance with RUSHDB_EMBEDDING_MODEL configured (or RushDB Cloud with AI enabled).
Step 1: Push records
- Python
- TypeScript
- shell
from rushdb import RushDB
db = RushDB("RUSHDB_API_KEY")
db.records.import_json({
"label": "Article",
"data": [
{
"title": "Intro to Machine Learning",
"description": "A beginner guide to supervised learning, neural networks, and model evaluation.",
"tags": ["ml", "beginner"]
},
{
"title": "Graph Databases Explained",
"description": "How graph databases store relationships and why they outperform SQL for connected data.",
"tags": ["databases", "graphs"]
},
{
"title": "Climate Science Overview",
"description": "Current research on global warming, carbon cycles, and renewable energy policy.",
"tags": ["science", "climate"]
}
]
})
import RushDB from '@rushdb/javascript-sdk'
const db = new RushDB('RUSHDB_API_KEY')
await db.records.importJson({
label: 'Article',
data: [
{
title: 'Intro to Machine Learning',
description: 'A beginner guide to supervised learning, neural networks, and model evaluation.',
tags: ['ml', 'beginner']
},
{
title: 'Graph Databases Explained',
description: 'How graph databases store relationships and why they outperform SQL for connected data.',
tags: ['databases', 'graphs']
},
{
title: 'Climate Science Overview',
description: 'Current research on global warming, carbon cycles, and renewable energy policy.',
tags: ['science', 'climate']
}
]
})
POST /api/v1/records/import/json
Content-Type: application/json
token: YOUR_API_KEY
{
"label": "Article",
"data": [
{
"title": "Intro to Machine Learning",
"description": "A beginner guide to supervised learning, neural networks, and model evaluation.",
"tags": ["ml", "beginner"]
},
{
"title": "Graph Databases Explained",
"description": "How graph databases store relationships and why they outperform SQL for connected data.",
"tags": ["databases", "graphs"]
},
{
"title": "Climate Science Overview",
"description": "Current research on global warming, carbon cycles, and renewable energy policy.",
"tags": ["science", "climate"]
}
]
}
Step 2: Create an embedding index
Tell RushDB to vectorize the description field on Article records.
- Python
- TypeScript
- shell
response = db.ai.indexes.create({
"label": "Article",
"propertyName": "description"
})
index = response.data
print(index["id"], index["status"]) # e.g. 'idx_abc123', 'pending'
const { data: index } = await db.ai.indexes.create({
label: 'Article',
propertyName: 'description'
})
console.log(index.id, index.status) // e.g. 'idx_abc123', 'pending'
POST /api/v1/ai/indexes
Content-Type: application/json
token: YOUR_API_KEY
{
"label": "Article",
"propertyName": "description"
}
Response:
{
"data": {
"id": "idx_abc123",
"label": "Article",
"propertyName": "description",
"status": "pending"
},
"success": true
}
Attempting to create a duplicate
(label, propertyName)pair returns409 Conflict.
Step 3: Wait for the index to become ready
Backfill is asynchronous. Poll stats until indexedRecords === totalRecords.
- Python
- TypeScript
- shell
import time
def wait_for_index(index_id: str, interval: float = 2.0):
while True:
stats = db.ai.indexes.stats(index_id).data
print(f"{stats['indexedRecords']} / {stats['totalRecords']} embedded")
if stats["indexedRecords"] >= stats["totalRecords"] > 0:
break
time.sleep(interval)
wait_for_index(index["id"])
async function waitForIndex(indexId: string, intervalMs = 2000) {
while (true) {
const { data: stats } = await db.ai.indexes.stats(indexId)
console.log(`${stats.indexedRecords} / ${stats.totalRecords} embedded`)
if (stats.indexedRecords >= stats.totalRecords && stats.totalRecords > 0) break
await new Promise(r => setTimeout(r, intervalMs))
}
}
await waitForIndex(index.id)
GET /api/v1/ai/indexes/idx_abc123/stats
token: YOUR_API_KEY
{ "data": { "totalRecords": 3, "indexedRecords": 3 }, "success": true }
Poll until indexedRecords === totalRecords.
Step 4: Semantic search
RushDB always narrows candidates to the current project before ranking them by vector similarity.
- Python
- TypeScript
- shell
response = db.ai.search({
"propertyName": "description",
"query": "neural networks and deep learning",
"labels": ["Article"],
"limit": 3
})
for result in response.data:
print(f"[{result.score:.3f}] {result['title']}")
# [0.921] Intro to Machine Learning
# [0.743] Graph Databases Explained
# [0.612] Climate Science Overview
const { data: results } = await db.ai.search({
propertyName: 'description',
query: 'neural networks and deep learning',
labels: ['Article'],
limit: 3
})
for (const result of results) {
console.log(`[${result.__score.toFixed(3)}] ${result.title}`)
}
// [0.921] Intro to Machine Learning
// [0.743] Graph Databases Explained
// [0.612] Climate Science Overview
POST /api/v1/ai/search
Content-Type: application/json
token: YOUR_API_KEY
{
"propertyName": "description",
"query": "neural networks and deep learning",
"labels": ["Article"],
"limit": 3
}
{
"data": [
{ "__id": "rec_1", "__label": "Article", "__score": 0.921, "title": "Intro to Machine Learning", "description": "..." },
{ "__id": "rec_2", "__label": "Article", "__score": 0.743, "title": "Graph Databases Explained", "description": "..." },
{ "__id": "rec_3", "__label": "Article", "__score": 0.612, "title": "Climate Science Overview", "description": "..." }
],
"success": true
}
Step 5: Semantic search with filter
Adding a where clause narrows the project-scoped candidate set further before cosine similarity ranking.
- Python
- TypeScript
- shell
response = db.ai.search({
"propertyName": "description",
"query": "renewable energy and climate",
"labels": ["Article"],
"where": {
"tags": {"$in": ["science", "climate"]}
},
"limit": 5
})
for result in response.data:
print(f"[{result.score:.3f}] {result['title']}")
const { data: results } = await db.ai.search({
propertyName: 'description',
query: 'renewable energy and climate',
labels: ['Article'],
where: {
tags: { $in: ['science', 'climate'] }
},
limit: 5
})
for (const result of results) {
console.log(`[${result.__score.toFixed(3)}] ${result.title}`)
}
// Only Articles with tags science or climate are considered
POST /api/v1/ai/search
Content-Type: application/json
token: YOUR_API_KEY
{
"propertyName": "description",
"query": "renewable energy and climate",
"labels": ["Article"],
"where": {
"tags": { "$in": ["science", "climate"] }
},
"limit": 5
}
Next steps
- Inject schema context into an LLM: TypeScript AI docs | Python AI docs
- REST API reference: REST AI docs
- Multiple labels: Pass 2+ labels in
labelsto search across all specified labels.