Opentrace uses Supabase (PostgreSQL) with the pgvector extension for vector similarity search. Here's the complete schema.
CREATE EXTENSION IF NOT EXISTS vector; -- pgvector for embeddings
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- trigram similarity for full-text searchProvisioned via Clerk webhook on first sign-in.
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
clerk_id TEXT UNIQUE NOT NULL,
email TEXT NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW()
);Each user can have multiple projects. Deleting a project cascades to all child records.
CREATE TABLE projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
user_id UUID REFERENCES users(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ DEFAULT NOW()
);RAG configuration per project. One-to-one with projects.
CREATE TABLE project_settings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID UNIQUE REFERENCES projects(id) ON DELETE CASCADE,
rag_strategy TEXT DEFAULT 'basic',
agent_type TEXT DEFAULT 'simple',
embedding_model TEXT DEFAULT 'text-embedding-3-large',
num_chunks INTEGER DEFAULT 5,
final_context_size INTEGER DEFAULT 5,
similarity_threshold FLOAT DEFAULT 0.3,
use_reranking BOOLEAN DEFAULT FALSE,
multi_query_count INTEGER DEFAULT 3,
vector_weight FLOAT DEFAULT 0.7,
keyword_weight FLOAT DEFAULT 0.3
);Represents an uploaded file or processed URL.
CREATE TABLE documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID REFERENCES projects(id) ON DELETE CASCADE,
file_name TEXT NOT NULL,
file_type TEXT,
s3_key TEXT,
status TEXT DEFAULT 'uploading',
status_message TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);Individual chunks with embeddings. The core of the RAG system.
CREATE TABLE document_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
project_id UUID REFERENCES projects(id) ON DELETE CASCADE,
chunk_index INTEGER,
content TEXT NOT NULL, -- searchable text / AI summary
original_content TEXT, -- raw extracted content
element_type TEXT DEFAULT 'text', -- text, table, or image
page_number INTEGER,
embedding VECTOR(1536), -- pgvector embedding
fts TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', content)) STORED,
created_at TIMESTAMPTZ DEFAULT NOW()
);The fts column is auto-generated by PostgreSQL — no manual updates needed. It's used for full-text keyword search in the Hybrid and Multi-Query Hybrid strategies.
CREATE TABLE chats (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID REFERENCES projects(id) ON DELETE CASCADE,
title TEXT DEFAULT 'New Chat',
created_at TIMESTAMPTZ DEFAULT NOW()
);CREATE TABLE messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chat_id UUID REFERENCES chats(id) ON DELETE CASCADE,
role TEXT NOT NULL, -- 'user' or 'assistant'
content TEXT NOT NULL,
citations JSONB DEFAULT '[]',
feedback TEXT, -- 'thumbs_up' or 'thumbs_down'
created_at TIMESTAMPTZ DEFAULT NOW()
);-- Vector similarity search (IVFFlat index)
CREATE INDEX idx_chunks_embedding ON document_chunks
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Full-text search using GIN index
CREATE INDEX idx_chunks_fts ON document_chunks USING GIN (fts);
-- Project-level chunk lookups
CREATE INDEX idx_chunks_project ON document_chunks (project_id);
-- Document-level chunk lookups
CREATE INDEX idx_chunks_document ON document_chunks (document_id);The database includes custom PostgreSQL functions called via Supabase rpc():
match_documents_by_vector — cosine similarity search on embeddingsmatch_documents_by_fts — full-text keyword search using tsvectormatch_documents_hybrid — combined vector + keyword search with RRF