Files
wiki/convex/semanticSearch.ts
Wayne Sutton 5a8df46681 feat: Add semantic search with vector embeddings
Add vector-based semantic search to complement keyword search.
  Users can toggle between "Keyword" and "Semantic" modes in the
  search modal (Cmd+K, then Tab to switch).

  Semantic search:
  - Uses OpenAI text-embedding-ada-002 (1536 dimensions)
  - Finds content by meaning, not exact words
  - Shows similarity scores as percentages
  - ~300ms latency, ~$0.0001/query
  - Graceful fallback if OPENAI_API_KEY not set

  New files:
  - convex/embeddings.ts - Embedding generation actions
  - convex/embeddingsQueries.ts - Queries/mutations for embeddings
  - convex/semanticSearch.ts - Vector search action
  - convex/semanticSearchQueries.ts - Result hydration queries
  - content/pages/docs-search.md - Keyword search docs
  - content/pages/docs-semantic-search.md - Semantic search docs

  Changes:
  - convex/schema.ts: Add embedding field and by_embedding vectorIndex
  - SearchModal.tsx: Add mode toggle (TextAa/Brain icons)
  - sync-posts.ts: Generate embeddings after content sync
  - global.css: Search mode toggle styles

  Documentation updated:
  - changelog.md, TASK.md, files.md, about.md, home.md

  Configuration:
  npx convex env set OPENAI_API_KEY sk-your-key

  Generated with [Claude Code](https://claude.com/claude-code)

  Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

  Status: Ready to commit. All semantic search files are staged. The TypeScript warnings are pre-existing (unused variables) and don't affect the build.
2026-01-05 18:30:48 -08:00

157 lines
4.4 KiB
TypeScript

"use node";
import { v } from "convex/values";
import { action } from "./_generated/server";
import { internal } from "./_generated/api";
import OpenAI from "openai";
// Search result type matching existing search.ts format
const searchResultValidator = v.object({
_id: v.string(),
type: v.union(v.literal("post"), v.literal("page")),
slug: v.string(),
title: v.string(),
description: v.optional(v.string()),
snippet: v.string(),
score: v.number(), // Similarity score from vector search
});
// Main semantic search action
export const semanticSearch = action({
args: { query: v.string() },
returns: v.array(searchResultValidator),
handler: async (ctx, args) => {
// Return empty for empty queries
if (!args.query.trim()) {
return [];
}
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
// Gracefully return empty if not configured
console.log("OPENAI_API_KEY not set, semantic search unavailable");
return [];
}
// Generate embedding for search query
const openai = new OpenAI({ apiKey });
const embeddingResponse = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: args.query,
});
const queryEmbedding = embeddingResponse.data[0].embedding;
// Search posts using vector index
const postResults = await ctx.vectorSearch("posts", "by_embedding", {
vector: queryEmbedding,
limit: 10,
filter: (q) => q.eq("published", true),
});
// Search pages using vector index
const pageResults = await ctx.vectorSearch("pages", "by_embedding", {
vector: queryEmbedding,
limit: 10,
filter: (q) => q.eq("published", true),
});
// Fetch full document details
const posts: Array<{
_id: string;
slug: string;
title: string;
description: string;
content: string;
unlisted?: boolean;
}> = await ctx.runQuery(internal.semanticSearchQueries.fetchPostsByIds, {
ids: postResults.map((r) => r._id),
});
const pages: Array<{
_id: string;
slug: string;
title: string;
content: string;
}> = await ctx.runQuery(internal.semanticSearchQueries.fetchPagesByIds, {
ids: pageResults.map((r) => r._id),
});
// Build results with scores
const results: Array<{
_id: string;
type: "post" | "page";
slug: string;
title: string;
description?: string;
snippet: string;
score: number;
}> = [];
// Map posts with scores
for (const result of postResults) {
const post = posts.find((p) => p._id === result._id);
if (post) {
results.push({
_id: String(post._id),
type: "post",
slug: post.slug,
title: post.title,
description: post.description,
snippet: createSnippet(post.content, 120),
score: result._score,
});
}
}
// Map pages with scores
for (const result of pageResults) {
const page = pages.find((p) => p._id === result._id);
if (page) {
results.push({
_id: String(page._id),
type: "page",
slug: page.slug,
title: page.title,
snippet: createSnippet(page.content, 120),
score: result._score,
});
}
}
// Sort by score descending (higher = more similar)
results.sort((a, b) => b.score - a.score);
// Limit to top 15 results
return results.slice(0, 15);
},
});
// Check if semantic search is available (API key configured)
export const isSemanticSearchAvailable = action({
args: {},
returns: v.boolean(),
handler: async () => {
return !!process.env.OPENAI_API_KEY;
},
});
// Helper to create snippet from content (same logic as search.ts)
function createSnippet(content: string, maxLength: number): string {
// Remove markdown syntax for cleaner snippets
const cleanContent = content
.replace(/#{1,6}\s/g, "") // Headers
.replace(/\*\*([^*]+)\*\*/g, "$1") // Bold
.replace(/\*([^*]+)\*/g, "$1") // Italic
.replace(/`([^`]+)`/g, "$1") // Inline code
.replace(/```[\s\S]*?```/g, "") // Code blocks
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Links
.replace(/!\[([^\]]*)\]\([^)]+\)/g, "") // Images
.replace(/\n+/g, " ") // Newlines to spaces
.replace(/\s+/g, " ") // Multiple spaces to single
.trim();
if (cleanContent.length <= maxLength) {
return cleanContent;
}
return cleanContent.slice(0, maxLength) + "...";
}