Skip to main content

GraphRAG — Graph-Enriched Retrieval Augmented Generation

Standard RAG retrieves the top-k most similar text chunks and pastes them into a prompt. GraphRAG does the same retrieval step, then traverses the knowledge graph to collect related entities — authors, topics, source documents, citations — and includes them as structured context. The LLM call is identical; the difference is what you put in the context window.

Flat RAG context                GraphRAG context
───────────────────── ──────────────────────────────────
Chunk 1 text Chunk 1 text
Chunk 2 text └─ from: architecture.md
Chunk 3 text └─ author: Jane Smith (Platform)
└─ topics: distributed systems, caching
Chunk 2 text
└─ from: api-reference.md
└─ cited by: 3 other documents

Graph shape

LabelWhat it represents
SOURCEA document, web page, or data export (the original source)
CHUNKA text fragment from a source, with overlap
AUTHORA person or team that authored the source
TOPICA concept tag associated with a chunk

Step 1: Ingest sources and chunks

from rushdb import RushDB
import os

db = RushDB(os.environ['RUSHDB_API_KEY'], base_url='https://api.rushdb.com/api/v1')

CHUNK_SIZE = 500
CHUNK_OVERLAP = 80

def chunk_text(text: str) -> list[str]:
chunks, start = [], 0
while start < len(text):
chunks.append(text[start:start + CHUNK_SIZE].strip())
start += CHUNK_SIZE - CHUNK_OVERLAP
return [c for c in chunks if c]

def ingest_source(filename: str, author: str, topics: list[str], content: str):
# Create SOURCE
source = db.records.create('SOURCE', {
'filename': filename,
'ingestedAt': '2025-01-01T00:00:00Z'
})

# Find or create AUTHOR
existing = db.records.find({'labels': ['AUTHOR'], 'where': {'name': author}})
author_rec = existing.data[0] if existing.data else db.records.create('AUTHOR', {'name': author})
db.records.attach(author_rec.id, source.id, {'type': 'AUTHORED', 'direction': 'out'})

# Create chunks
texts = chunk_text(content)
db.records.import_json({
'label': 'CHUNK',
'data': [{'text': t, 'chunkIndex': i, 'sourceFile': filename} for i, t in enumerate(texts)]
})

# Link chunks to source
chunk_records = db.records.find({
'labels': ['CHUNK'],
'where': {'sourceFile': filename},
'orderBy': {'chunkIndex': 'asc'}
})
for chunk in chunk_records.data:
db.records.attach(source.id, chunk.id, {'type': 'HAS_CHUNK', 'direction': 'out'})

# Attach topics to all chunks
for topic_name in topics:
existing_topic = db.records.find({'labels': ['TOPIC'], 'where': {'name': topic_name}})
topic = existing_topic.data[0] if existing_topic.data else db.records.create('TOPIC', {'name': topic_name})
for chunk in chunk_records.data:
db.records.attach(topic.id, chunk.id, {'type': 'COVERS', 'direction': 'out'})

print(f'Ingested {len(texts)} chunks from {filename}')

Step 2: Create an embedding index on chunks

import time

index = db.ai.indexes.create({'label': 'CHUNK', 'propertyName': 'text'})
index_id = index['id']

while True:
stats = db.ai.indexes.stats(index_id)
if stats['data']['indexedRecords'] >= stats['data']['totalRecords']:
break
time.sleep(3)
print('Index ready')

Step 3: GraphRAG retrieval — chunks + graph context

from concurrent.futures import ThreadPoolExecutor

def graph_rag_retrieve(user_query: str, k: int = 5) -> list[dict]:
results = db.ai.search({
'query': user_query,
'propertyName': 'text',
'labels': ['CHUNK'],
'limit': k
})

def enrich(chunk):
chunk_id = chunk.id

source_result = db.records.find({
'labels': ['SOURCE'],
'where': {'CHUNK': {'$relation': {'type': 'HAS_CHUNK', 'direction': 'out'}, '__id': chunk_id}}
})
topic_result = db.records.find({
'labels': ['TOPIC'],
'where': {'CHUNK': {'$relation': {'type': 'COVERS', 'direction': 'out'}, '__id': chunk_id}}
})

source = source_result.data[0] if source_result.data else None
author_name = None
if source:
author_result = db.records.find({
'labels': ['AUTHOR'],
'where': {'SOURCE': {'$relation': {'type': 'AUTHORED', 'direction': 'out'}, '__id': source.id}}
})
author_name = author_result.data[0].get('name') if author_result.data else None

return {
'text': chunk.get('text'),
'source': source.get('filename') if source else 'unknown',
'author': author_name,
'topics': [t.get('name') for t in topic_result.data],
'score': chunk.score
}

with ThreadPoolExecutor(max_workers=5) as pool:
return list(pool.map(enrich, results.data))

def build_graph_rag_prompt(user_query: str, chunks: list[dict]) -> str:
blocks = []
for i, c in enumerate(chunks, 1):
blocks.append(
f"[{i}] (score: {c['score']:.2f}, source: {c['source']}, author: {c['author'] or 'unknown'})\n"
f"Topics: {', '.join(c['topics']) or 'none'}\n"
f"{c['text']}"
)
context = '\n\n---\n\n'.join(blocks)
return f"You are a helpful assistant. Answer using the provided context.\n\nContext:\n{context}\n\nQuestion: {user_query}"

chunks = graph_rag_retrieve('How does the caching layer handle invalidation?')
prompt = build_graph_rag_prompt('How does the caching layer handle invalidation?', chunks)
print(prompt)

GraphRAG vs flat RAG — what changes in the prompt

Flat RAG prompt fragment:

[1] The cache layer uses a time-based TTL of 300 seconds. Stale entries
are invalidated on next read by comparing the stored timestamp...

GraphRAG prompt fragment:

[1] (score: 0.91, source: architecture.md, author: Jane Smith)
Topics: distributed systems, caching
The cache layer uses a time-based TTL of 300 seconds. Stale entries
are invalidated on next read by comparing the stored timestamp...

The LLM now knows where this knowledge came from, who wrote it, and what domain it belongs to. This enables citation-aware answers and reduces hallucination on ambiguous questions.


Production caveat

Each retrieved chunk triggers two additional queries for source and topic enrichment. For k=10 that is 20 extra roundtrips. Run enrichment in parallel (as above) and cache per-chunk context if the same chunk appears across multiple queries in a session.


Next steps

  • BYOV External Embeddings — supply your own vectors instead of relying on managed embeddings
  • Multi-Source RAG — combine PDFs, web pages, and database exports in one semantic search
  • RAG Evaluation — measure precision and recall before deploying GraphRAG to production