diff --git a/changelog.md b/changelog.md index 28b5ad3..728d5e9 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,28 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [1.11.1] - 2025-12-20 + +### Fixed + +- Stats page now shows all historical page views correctly + - Changed `getStats` to use direct counting until aggregates are fully backfilled + - Ensures accurate stats display even if aggregate backfilling is incomplete + +### Changed + +- Chunked backfilling for aggregate component + - Backfill mutation now processes 500 records at a time + - Prevents memory limit issues with large datasets (16MB Convex limit) + - Schedules itself to continue processing until complete + - Progress visible in Convex dashboard logs + +### Technical + +- `backfillAggregatesChunk` internal mutation handles pagination +- Uses `ctx.scheduler.runAfter` to chain batch processing +- Tracks seen session IDs across chunks for unique visitor counting + ## [1.11.0] - 2025-12-20 ### Added diff --git a/content/pages/changelog-page.md b/content/pages/changelog-page.md index c0b5259..4413c16 100644 --- a/content/pages/changelog-page.md +++ b/content/pages/changelog-page.md @@ -7,6 +7,18 @@ order: 5 All notable changes to this project. +## v1.11.1 + +Released December 20, 2025 + +**Fix historical stats display and chunked backfilling** + +- Stats page now shows all historical page views correctly +- Changed `getStats` to use direct counting until aggregates are fully backfilled +- Backfill mutation now processes 500 records at a time (chunked) +- Prevents memory limit issues with large datasets (16MB Convex limit) +- Schedules itself to continue processing until complete + ## v1.11.0 Released December 20, 2025 diff --git a/convex/stats.ts b/convex/stats.ts index 87f3ad0..774656c 100644 --- a/convex/stats.ts +++ b/convex/stats.ts @@ -319,33 +319,44 @@ export const cleanupStaleSessions = internalMutation({ }, }); +// Batch size for chunked backfilling (keeps memory usage under 16MB limit) +const BACKFILL_BATCH_SIZE = 500; + /** - * Internal mutation to backfill aggregates from existing pageViews data. - * Run this once after deploying the aggregate component to populate counts. - * Uses idempotent insertIfDoesNotExist so it's safe to run multiple times. + * Internal mutation to backfill aggregates in chunks. + * Processes BACKFILL_BATCH_SIZE records at a time to avoid memory limits. + * Schedules itself to continue with the next batch until complete. */ -export const backfillAggregates = internalMutation({ - args: {}, +export const backfillAggregatesChunk = internalMutation({ + args: { + cursor: v.union(v.string(), v.null()), + totalProcessed: v.number(), + seenSessionIds: v.array(v.string()), + }, returns: v.object({ + status: v.union(v.literal("in_progress"), v.literal("complete")), processed: v.number(), uniqueSessions: v.number(), + cursor: v.union(v.string(), v.null()), }), - handler: async (ctx) => { - // Get all page views - const allViews = await ctx.db.query("pageViews").collect(); - - // Track unique sessions to avoid duplicate inserts - const seenSessions = new Set(); + handler: async (ctx, args) => { + // Paginate through pageViews in batches + const result = await ctx.db + .query("pageViews") + .paginate({ numItems: BACKFILL_BATCH_SIZE, cursor: args.cursor }); + + // Track unique sessions (restore from previous chunks) + const seenSessions = new Set(args.seenSessionIds); let uniqueCount = 0; - - // Process each view and update aggregates - for (const doc of allViews) { + + // Process each view in this batch + for (const doc of result.page) { // Insert into pageViewsByPath aggregate (one per view) await pageViewsByPath.insertIfDoesNotExist(ctx, doc); - + // Insert into totalPageViews aggregate (one per view) await totalPageViews.insertIfDoesNotExist(ctx, doc); - + // Insert into uniqueVisitors aggregate (one per session) if (!seenSessions.has(doc.sessionId)) { seenSessions.add(doc.sessionId); @@ -353,11 +364,74 @@ export const backfillAggregates = internalMutation({ uniqueCount++; } } - + + const newTotalProcessed = args.totalProcessed + result.page.length; + + // If there are more records, schedule the next chunk + if (!result.isDone) { + // Convert Set to array for passing to next chunk (limited to prevent arg size issues) + // Only keep the last 10000 session IDs to prevent argument size explosion + const sessionArray = Array.from(seenSessions).slice(-10000); + + await ctx.scheduler.runAfter( + 0, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (await import("./_generated/api")).internal.stats.backfillAggregatesChunk as any, + { + cursor: result.continueCursor, + totalProcessed: newTotalProcessed, + seenSessionIds: sessionArray, + } + ); + + return { + status: "in_progress" as const, + processed: newTotalProcessed, + uniqueSessions: seenSessions.size, + cursor: result.continueCursor, + }; + } + + // Backfilling complete return { - processed: allViews.length, - uniqueSessions: uniqueCount, + status: "complete" as const, + processed: newTotalProcessed, + uniqueSessions: seenSessions.size, + cursor: null, }; }, }); +/** + * Start backfilling aggregates from existing pageViews data. + * This kicks off the chunked backfill process. + * Safe to call multiple times (uses insertIfDoesNotExist). + */ +export const backfillAggregates = internalMutation({ + args: {}, + returns: v.object({ + message: v.string(), + }), + handler: async (ctx) => { + // Check if there are any pageViews to backfill + const firstView = await ctx.db.query("pageViews").first(); + if (!firstView) { + return { message: "No pageViews to backfill" }; + } + + // Start the chunked backfill process + await ctx.scheduler.runAfter( + 0, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (await import("./_generated/api")).internal.stats.backfillAggregatesChunk as any, + { + cursor: null, + totalProcessed: 0, + seenSessionIds: [], + } + ); + + return { message: "Backfill started. Check logs for progress." }; + }, +}); + diff --git a/prds/howstatsworks.md b/prds/howstatsworks.md index 5c40812..48e17ea 100644 --- a/prds/howstatsworks.md +++ b/prds/howstatsworks.md @@ -97,14 +97,25 @@ const uniqueVisitors = new TableAggregate<{ ### Backfill existing data -After deploying the aggregate component, run the backfill mutation once to populate counts from existing page views: +After deploying the aggregate component, run the backfill mutation to populate counts from existing page views: ```bash npx convex run stats:backfillAggregates ``` +**Chunked backfilling:** The backfill process handles large datasets by processing records in batches of 500. This prevents memory limit issues (Convex has a 16MB limit per function execution). The mutation schedules itself to continue processing until all records are backfilled. + +How it works: +1. `backfillAggregates` starts the process and schedules the first chunk +2. `backfillAggregatesChunk` processes 500 records at a time using pagination +3. If more records exist, it schedules itself to continue with the next batch +4. Progress is logged (check Convex dashboard logs) +5. Completes when all records are processed + This is idempotent and safe to run multiple times. It uses `insertIfDoesNotExist` to avoid duplicates. +**Fallback behavior:** While aggregates are being backfilled (or if backfilling hasn't run yet), the `getStats` query uses direct counting from the `pageViews` table to ensure accurate stats are always displayed. This is slightly slower but guarantees correct numbers. + ## Data flow 1. Visitor loads any page