From 1257fa220f3a2aa880ac617cb7898e162f9988ff Mon Sep 17 00:00:00 2001 From: Wayne Sutton Date: Wed, 7 Jan 2026 21:48:41 -0800 Subject: [PATCH] Canonical URL fix for search engines (GitHub Issue #6) and other seo fixes" --- .claude/skills/gitsafety.md | 37 ++++++++++++++++++ .cursor/rules/gitsafety.mdc | 51 ++++++++++++++++++++++++ AGENTS.md | 2 +- CLAUDE.md | 2 +- FORK_CONFIG.md | 61 +++++++++++++++++++++++++++++ TASK.md | 12 +++++- changelog.md | 25 ++++++++++++ content/blog/setup-guide.md | 44 +++++++++++++++++++++ content/pages/changelog-page.md | 21 ++++++++++ content/pages/home.md | 4 +- files.md | 2 +- index.html | 17 ++++++++ netlify.toml | 12 ++++++ netlify/edge-functions/botMeta.ts | 40 ++++++++++++++++++- public/llms.txt | 2 +- public/raw/about.md | 2 +- public/raw/changelog.md | 23 ++++++++++- public/raw/contact.md | 2 +- public/raw/docs-ask-ai.md | 2 +- public/raw/docs-configuration.md | 2 +- public/raw/docs-content.md | 2 +- public/raw/docs-dashboard.md | 2 +- public/raw/docs-deployment.md | 2 +- public/raw/docs-frontmatter.md | 2 +- public/raw/docs-search.md | 2 +- public/raw/docs-semantic-search.md | 2 +- public/raw/documentation.md | 2 +- public/raw/footer.md | 2 +- public/raw/home-intro.md | 6 ++- public/raw/index.md | 4 +- public/raw/newsletter.md | 2 +- public/raw/projects.md | 2 +- public/raw/setup-guide.md | 44 +++++++++++++++++++++ src/App.tsx | 62 +++++++++++++++++++++--------- src/components/BlogPost.tsx | 40 ++++++++++++++++++- src/components/DocsLayout.tsx | 9 +++-- src/components/Layout.tsx | 4 +- src/context/ThemeContext.tsx | 9 +++++ src/pages/Home.tsx | 6 ++- src/styles/global.css | 8 ++-- vite.config.ts | 17 ++++++++ 41 files changed, 537 insertions(+), 55 deletions(-) create mode 100644 .claude/skills/gitsafety.md create mode 100644 .cursor/rules/gitsafety.mdc diff --git a/.claude/skills/gitsafety.md b/.claude/skills/gitsafety.md new file mode 100644 index 0000000..f92dd54 --- /dev/null +++ b/.claude/skills/gitsafety.md @@ -0,0 +1,37 @@ +## Critical Git Safety Protocol + +**🚨 NEVER USE `git checkout` TO REVERT CHANGES 🚨** + +**MANDATORY GIT SAFETY RULES:** + +- **NEVER run `git checkout -- `** without first examining what you're about to destroy +- **ALWAYS use `git diff `** to see exactly what changes will be lost +- **MANUALLY undo changes** by editing files to revert specific problematic sections +- **Preserve valuable work** — if user says changes are bad, ask which specific parts to revert +- **`git checkout` destroys ALL changes** — this can eliminate hours of valuable progress +- **When user asks to "undo" changes**: Read the current file, identify problematic sections, and manually edit to fix them + +**Why this matters**: Using `git checkout` blindly can destroy sophisticated implementations, complex prompts, provider-specific logic, and other valuable work that took significant time to develop. + +## Git Safety Rules - CRITICAL + +**NEVER run these commands without explicit user approval:** + +- `git reset --hard` - Destroys uncommitted changes permanently +- `git checkout -- .` - Discards all working directory changes +- `git clean -fd` - Deletes untracked files permanently +- `git stash drop` - Deletes stashed changes + +**ALWAYS before any git operation:** + +1. Run `git status` first to check for uncommitted changes +2. If there are uncommitted changes, STOP and ASK the user before proceeding +3. Suggest `git stash` to preserve changes if needed + +**If user asks to "revert" something:** + +1. First clarify: revert committed changes or uncommitted changes? +2. Show what will be affected before doing anything +3. Get explicit confirmation for destructive operations + +This rule exists because careless git operations destroyed 2 days of work. diff --git a/.cursor/rules/gitsafety.mdc b/.cursor/rules/gitsafety.mdc new file mode 100644 index 0000000..0506a5b --- /dev/null +++ b/.cursor/rules/gitsafety.mdc @@ -0,0 +1,51 @@ +--- +description: +globs: +alwaysApply: true +--- + +--- + +description: Critical Git Safety Protocol +globs: +alwaysApply: true + +--- + +## Critical Git Safety Protocol + +**🚨 NEVER USE `git checkout` TO REVERT CHANGES 🚨** + +**MANDATORY GIT SAFETY RULES:** + +- **NEVER run `git checkout -- `** without first examining what you're about to destroy +- **ALWAYS use `git diff `** to see exactly what changes will be lost +- **MANUALLY undo changes** by editing files to revert specific problematic sections +- **Preserve valuable work** — if user says changes are bad, ask which specific parts to revert +- **`git checkout` destroys ALL changes** — this can eliminate hours of valuable progress +- **When user asks to "undo" changes**: Read the current file, identify problematic sections, and manually edit to fix them + +**Why this matters**: Using `git checkout` blindly can destroy sophisticated implementations, complex prompts, provider-specific logic, and other valuable work that took significant time to develop. + +## Git Safety Rules - CRITICAL + +**NEVER run these commands without explicit user approval:** + +- `git reset --hard` - Destroys uncommitted changes permanently +- `git checkout -- .` - Discards all working directory changes +- `git clean -fd` - Deletes untracked files permanently +- `git stash drop` - Deletes stashed changes + +**ALWAYS before any git operation:** + +1. Run `git status` first to check for uncommitted changes +2. If there are uncommitted changes, STOP and ASK the user before proceeding +3. Suggest `git stash` to preserve changes if needed + +**If user asks to "revert" something:** + +1. First clarify: revert committed changes or uncommitted changes? +2. Show what will be affected before doing anything +3. Get explicit confirmation for destructive operations + +This rule exists because careless git operations destroyed 2 days of work. diff --git a/AGENTS.md b/AGENTS.md index 2c0858d..0affaad 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,7 +22,7 @@ Your content is instantly available to browsers, LLMs, and AI agents.. Write mar - **Total Posts**: 17 - **Total Pages**: 4 - **Latest Post**: 2025-12-29 -- **Last Updated**: 2026-01-06T21:21:00.308Z +- **Last Updated**: 2026-01-07T06:23:37.520Z ## Tech stack diff --git a/CLAUDE.md b/CLAUDE.md index a266f67..90fbe97 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,7 +5,7 @@ Project instructions for Claude Code. ## Project context - + Markdown sync framework. Write markdown in `content/`, run sync commands, content appears instantly via Convex real-time database. Built for developers and AI agents. diff --git a/FORK_CONFIG.md b/FORK_CONFIG.md index ee7114d..c282a53 100644 --- a/FORK_CONFIG.md +++ b/FORK_CONFIG.md @@ -1356,3 +1356,64 @@ Replace example content in: | `public/images/logo.svg` | Site logo | | `public/images/og-default.svg` | Default social share image | | `public/images/logos/*.svg` | Logo gallery images | + +--- + +## SEO Bot Configuration + +The site serves pre-rendered HTML with correct canonical URLs and meta tags to search engines and social preview bots. Configure bot detection in `netlify/edge-functions/botMeta.ts`. + +### How It Works + +The edge function detects different types of bots and serves appropriate responses: + +| Bot Type | Response | Examples | +| ------------------- | ------------------------------------- | ------------------------------------ | +| Social preview bots | Pre-rendered HTML with OG tags | Twitter, Facebook, LinkedIn, Discord | +| Search engine bots | Pre-rendered HTML with correct canonical | Google, Bing, DuckDuckGo | +| AI crawlers | Normal SPA (can render JavaScript) | GPTBot, ClaudeBot, PerplexityBot | +| Regular browsers | Normal SPA | Chrome, Firefox, Safari | + +### Customizing Bot Lists + +Edit the arrays at the top of `netlify/edge-functions/botMeta.ts`: + +```typescript +// Add or remove social preview bots +const SOCIAL_PREVIEW_BOTS = [ + "facebookexternalhit", + "twitterbot", + // ... add your own +]; + +// Add or remove search engine bots +const SEARCH_ENGINE_BOTS = [ + "googlebot", + "bingbot", + // ... add your own +]; + +// Add or remove AI crawlers +const AI_CRAWLERS = [ + "gptbot", + "claudebot", + // ... add your own +]; +``` + +### Testing Bot Detection + +Test with curl to simulate different bots: + +```bash +# Test Googlebot (should get pre-rendered HTML with correct canonical) +curl -H "User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1)" \ + https://yoursite.com/your-post | grep canonical + +# Test normal browser (should get SPA with homepage canonical) +curl https://yoursite.com/your-post | grep canonical +``` + +### Why This Matters + +Single-page apps (SPAs) update meta tags via JavaScript after the page loads. Search engines that check raw HTML before rendering may see incorrect canonical URLs. By serving pre-rendered HTML to search engine bots, we ensure they see the correct canonical URL for each page. diff --git a/TASK.md b/TASK.md index 8136688..382fbb6 100644 --- a/TASK.md +++ b/TASK.md @@ -4,10 +4,20 @@ ## Current Status -v2.10.2 ready. SEO fixes from GitHub Issue #4 implemented. +v2.12.0 ready. Canonical URL fix for GitHub Issue #6 implemented. ## Completed +- [x] Canonical URL mismatch fix (GitHub Issue #6) + - [x] Raw HTML was serving homepage canonical instead of page-specific canonical + - [x] Added SEARCH_ENGINE_BOTS array to botMeta.ts for search engine crawler detection + - [x] Added isSearchEngineBot() helper function + - [x] Updated condition to serve pre-rendered HTML to search engine bots + - [x] Added documentation header explaining bot detection configuration + - [x] Added SEO Bot Configuration section to FORK_CONFIG.md + - [x] Added SEO and Bot Detection section to setup-guide.md + - [x] Search engines (Google, Bing, DuckDuckGo, etc.) now receive correct canonical URLs + - [x] SEO fixes for GitHub Issue #4 (7 issues) - [x] Canonical URL: Dynamic canonical link tags for posts and pages in Post.tsx - [x] Single H1 per page: Markdown H1s demoted to H2 with `.blog-h1-demoted` class in BlogPost.tsx diff --git a/changelog.md b/changelog.md index b47240e..39a839f 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,31 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [2.12.0] - 2026-01-07 + +### Fixed + +- Canonical URL mismatch between raw and rendered HTML (GitHub Issue #6) + - Raw HTML was showing homepage canonical URL instead of page-specific canonical + - Added search engine bot detection to serve pre-rendered HTML with correct canonical URLs + - Search engines (Google, Bing, DuckDuckGo, etc.) now receive correct canonical tags in initial HTML + +### Added + +- SEO Bot Configuration section in FORK_CONFIG.md for developers who fork the app +- SEO and Bot Detection section in setup-guide.md with configuration examples +- `SEARCH_ENGINE_BOTS` array in `netlify/edge-functions/botMeta.ts` for customizable bot detection +- `isSearchEngineBot()` helper function for search engine crawler detection +- Documentation header in botMeta.ts explaining bot detection configuration + +### Technical + +- Updated `netlify/edge-functions/botMeta.ts`: + - Added configuration documentation header explaining three bot categories + - Added SEARCH_ENGINE_BOTS array (googlebot, bingbot, yandexbot, duckduckbot, baiduspider, sogou, yahoo! slurp, applebot) + - Added isSearchEngineBot() function + - Updated condition to serve pre-rendered HTML to both social preview and search engine bots + ## [2.11.0] - 2026-01-06 ### Added diff --git a/content/blog/setup-guide.md b/content/blog/setup-guide.md index ff491b8..4527a3f 100644 --- a/content/blog/setup-guide.md +++ b/content/blog/setup-guide.md @@ -1408,6 +1408,50 @@ Your blog includes these API endpoints for search engines and AI: | `/openapi.yaml` | OpenAPI 3.0 specification | | `/llms.txt` | AI agent discovery | +## SEO and Bot Detection + +Your site includes intelligent bot detection that serves different responses to different visitors. + +### How It Works + +The `netlify/edge-functions/botMeta.ts` edge function intercepts requests and serves pre-rendered HTML with correct meta tags to: + +- **Social preview bots** (Twitter, Facebook, LinkedIn, Discord): Get Open Graph tags for link previews +- **Search engine bots** (Google, Bing, DuckDuckGo): Get correct canonical URLs + +Regular browsers and AI crawlers receive the normal SPA and let JavaScript update the meta tags. + +### Configuration + +Edit the bot arrays at the top of `netlify/edge-functions/botMeta.ts` to customize which bots receive pre-rendered HTML: + +```typescript +// Social preview bots - for link previews +const SOCIAL_PREVIEW_BOTS = ["twitterbot", "facebookexternalhit", ...]; + +// Search engine bots - for correct canonical URLs +const SEARCH_ENGINE_BOTS = ["googlebot", "bingbot", ...]; + +// AI crawlers - get normal SPA (can render JavaScript) +const AI_CRAWLERS = ["gptbot", "claudebot", ...]; +``` + +### Testing + +Verify bot detection with curl: + +```bash +# Simulate Googlebot +curl -H "User-Agent: Googlebot" https://yoursite.com/post-slug | grep canonical +# Expected: correct page canonical + +# Normal request +curl https://yoursite.com/post-slug | grep canonical +# Expected: homepage canonical (JavaScript will update it) +``` + +See `FORK_CONFIG.md` for detailed configuration options. + ## Import External Content Use Firecrawl to import articles from external URLs as markdown posts: diff --git a/content/pages/changelog-page.md b/content/pages/changelog-page.md index df7afae..c70b672 100644 --- a/content/pages/changelog-page.md +++ b/content/pages/changelog-page.md @@ -11,6 +11,27 @@ docsSectionOrder: 4 All notable changes to this project. +## v2.12.0 + +Released January 7, 2026 + +**Canonical URL fix for search engines (GitHub Issue #6)** + +Fixed a mismatch where raw HTML was showing the homepage canonical URL instead of the page-specific canonical URL. Search engines that check raw HTML before rendering JavaScript now receive the correct canonical tags. + +**Changes:** + +- Added search engine bot detection (Google, Bing, DuckDuckGo, etc.) to serve pre-rendered HTML +- Search engines now receive correct canonical URLs in the initial HTML response +- Added SEO Bot Configuration documentation in FORK_CONFIG.md and setup-guide.md +- Bot detection arrays are easily customizable in `netlify/edge-functions/botMeta.ts` + +**For forkers:** + +The bot detection configuration is documented with clear comments at the top of `botMeta.ts`. You can customize which bots receive pre-rendered HTML by editing the `SOCIAL_PREVIEW_BOTS`, `SEARCH_ENGINE_BOTS`, and `AI_CRAWLERS` arrays. + +--- + ## v2.11.0 Released January 6, 2026 diff --git a/content/pages/home.md b/content/pages/home.md index 27441fb..3533c5d 100644 --- a/content/pages/home.md +++ b/content/pages/home.md @@ -31,4 +31,6 @@ agents. --> **Sync Commands** - Sync discovery commands to update AGENTS.md, CLAUDE.md, and llms.txt -**Semantic search** - Find content by meaning, not just keywords, using vector embeddings. +**Semantic search** - Find content by meaning, not just keywords. + +**Ask AI** - Chat with your site content. Get answers with sources. diff --git a/files.md b/files.md index e7a9d8a..c70655a 100644 --- a/files.md +++ b/files.md @@ -283,7 +283,7 @@ Frontmatter is the YAML metadata at the top of each markdown file. Here is how i | File | Description | | ------------ | -------------------------------------------------------------------------------------------------------------- | -| `botMeta.ts` | Edge function for social media crawler detection, excludes `/raw/*` paths and AI crawlers from OG interception | +| `botMeta.ts` | Edge function for bot detection with configurable arrays for social preview bots, search engine bots, and AI crawlers. Serves pre-rendered HTML with correct canonical URLs and OG tags to social and search bots. Excludes `/raw/*` paths and AI crawlers from interception. Configuration documented at top of file. | | `rss.ts` | Proxies `/rss.xml` and `/rss-full.xml` to Convex HTTP | | `sitemap.ts` | Proxies `/sitemap.xml` to Convex HTTP | | `api.ts` | Proxies `/api/posts`, `/api/post`, `/api/export` to Convex | diff --git a/index.html b/index.html index eceede3..add6a45 100644 --- a/index.html +++ b/index.html @@ -31,6 +31,23 @@ + + + Pre-rendered HTML with correct canonical/meta tags +// - AI crawlers -> Normal SPA (they can render JavaScript and want raw content) +// - Regular browsers -> Normal SPA (React updates meta tags client-side) +// ============================================================================= + // Social preview bots that need OG metadata HTML // These bots cannot render JavaScript and need pre-rendered OG tags const SOCIAL_PREVIEW_BOTS = [ @@ -21,6 +37,19 @@ const SOCIAL_PREVIEW_BOTS = [ "showyoubot", ]; +// Search engine crawlers that need correct canonical URLs in raw HTML +// These bots may not render JavaScript or check raw HTML first +const SEARCH_ENGINE_BOTS = [ + "googlebot", + "bingbot", + "yandexbot", + "duckduckbot", + "baiduspider", + "sogou", + "yahoo! slurp", + "applebot", +]; + // AI crawlers that should get raw content, not OG previews const AI_CRAWLERS = [ "gptbot", @@ -54,6 +83,13 @@ function isAICrawler(userAgent: string | null): boolean { return AI_CRAWLERS.some((bot) => ua.includes(bot)); } +// Check if user agent is a search engine bot +function isSearchEngineBot(userAgent: string | null): boolean { + if (!userAgent) return false; + const ua = userAgent.toLowerCase(); + return SEARCH_ENGINE_BOTS.some((bot) => ua.includes(bot)); +} + export default async function handler( request: Request, context: Context, @@ -100,8 +136,8 @@ export default async function handler( return context.next(); } - // Only serve OG metadata to social preview bots, not search engines or AI - if (!isSocialPreviewBot(userAgent)) { + // Serve pre-rendered HTML with correct canonical URLs to social preview and search engine bots + if (!isSocialPreviewBot(userAgent) && !isSearchEngineBot(userAgent)) { return context.next(); } diff --git a/public/llms.txt b/public/llms.txt index f1e24e8..9aff604 100644 --- a/public/llms.txt +++ b/public/llms.txt @@ -1,6 +1,6 @@ # llms.txt - Information for AI assistants and LLMs # Learn more: https://llmstxt.org/ -# Last updated: 2026-01-06T21:21:00.309Z +# Last updated: 2026-01-07T06:23:37.522Z > Your content is instantly available to browsers, LLMs, and AI agents. diff --git a/public/raw/about.md b/public/raw/about.md index 6381b8a..5b6e7f3 100644 --- a/public/raw/about.md +++ b/public/raw/about.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- An open-source publishing framework built for AI agents and developers to ship websites, docs, or blogs. Write markdown, sync from the terminal. Your content is instantly available to browsers, LLMs, and AI agents. Built on Convex and Netlify. diff --git a/public/raw/changelog.md b/public/raw/changelog.md index 9a77950..62839a9 100644 --- a/public/raw/changelog.md +++ b/public/raw/changelog.md @@ -2,11 +2,32 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- All notable changes to this project. +## v2.12.0 + +Released January 7, 2026 + +**Canonical URL fix for search engines (GitHub Issue #6)** + +Fixed a mismatch where raw HTML was showing the homepage canonical URL instead of the page-specific canonical URL. Search engines that check raw HTML before rendering JavaScript now receive the correct canonical tags. + +**Changes:** + +- Added search engine bot detection (Google, Bing, DuckDuckGo, etc.) to serve pre-rendered HTML +- Search engines now receive correct canonical URLs in the initial HTML response +- Added SEO Bot Configuration documentation in FORK_CONFIG.md and setup-guide.md +- Bot detection arrays are easily customizable in `netlify/edge-functions/botMeta.ts` + +**For forkers:** + +The bot detection configuration is documented with clear comments at the top of `botMeta.ts`. You can customize which bots receive pre-rendered HTML by editing the `SOCIAL_PREVIEW_BOTS`, `SEARCH_ENGINE_BOTS`, and `AI_CRAWLERS` arrays. + +--- + ## v2.11.0 Released January 6, 2026 diff --git a/public/raw/contact.md b/public/raw/contact.md index 62ae28f..97e07b8 100644 --- a/public/raw/contact.md +++ b/public/raw/contact.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- You found the contact page. Nice diff --git a/public/raw/docs-ask-ai.md b/public/raw/docs-ask-ai.md index 9d599ca..c5ad786 100644 --- a/public/raw/docs-ask-ai.md +++ b/public/raw/docs-ask-ai.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Ask AI diff --git a/public/raw/docs-configuration.md b/public/raw/docs-configuration.md index 72f7d2b..003e476 100644 --- a/public/raw/docs-configuration.md +++ b/public/raw/docs-configuration.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Configuration diff --git a/public/raw/docs-content.md b/public/raw/docs-content.md index 1f40316..1a5b335 100644 --- a/public/raw/docs-content.md +++ b/public/raw/docs-content.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Content diff --git a/public/raw/docs-dashboard.md b/public/raw/docs-dashboard.md index 418b1e5..7bb689e 100644 --- a/public/raw/docs-dashboard.md +++ b/public/raw/docs-dashboard.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Dashboard diff --git a/public/raw/docs-deployment.md b/public/raw/docs-deployment.md index 63bc0c9..8e2bc4e 100644 --- a/public/raw/docs-deployment.md +++ b/public/raw/docs-deployment.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Deployment diff --git a/public/raw/docs-frontmatter.md b/public/raw/docs-frontmatter.md index d411afe..6900a7a 100644 --- a/public/raw/docs-frontmatter.md +++ b/public/raw/docs-frontmatter.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Frontmatter diff --git a/public/raw/docs-search.md b/public/raw/docs-search.md index 88e71a4..f267bc3 100644 --- a/public/raw/docs-search.md +++ b/public/raw/docs-search.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Keyword Search diff --git a/public/raw/docs-semantic-search.md b/public/raw/docs-semantic-search.md index fa673e4..7492beb 100644 --- a/public/raw/docs-semantic-search.md +++ b/public/raw/docs-semantic-search.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Semantic Search diff --git a/public/raw/documentation.md b/public/raw/documentation.md index d5faa9e..723d5d9 100644 --- a/public/raw/documentation.md +++ b/public/raw/documentation.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- ## Getting started diff --git a/public/raw/footer.md b/public/raw/footer.md index 03eab29..50bd6bc 100644 --- a/public/raw/footer.md +++ b/public/raw/footer.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- Built with [Convex](https://convex.dev) for real-time sync and deployed on [Netlify](https://netlify.com). Read the [project on GitHub](https://github.com/waynesutton/markdown-site) to fork and deploy your own. View [real-time site stats](/stats). diff --git a/public/raw/home-intro.md b/public/raw/home-intro.md index 00ecefa..7c1b146 100644 --- a/public/raw/home-intro.md +++ b/public/raw/home-intro.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- An open-source publishing framework built for AI agents and developers to ship **[docs](/docs)**, or **[blogs](/blog)** or **[websites](/)**. @@ -29,4 +29,6 @@ agents. --> **Sync Commands** - Sync discovery commands to update AGENTS.md, CLAUDE.md, and llms.txt -**Semantic search** - Find content by meaning, not just keywords, using vector embeddings. \ No newline at end of file +**Semantic search** - Find content by meaning, not just keywords. + +**Ask AI** - Chat with your site content. Get answers with sources. \ No newline at end of file diff --git a/public/raw/index.md b/public/raw/index.md index b8e7b9f..78e7756 100644 --- a/public/raw/index.md +++ b/public/raw/index.md @@ -24,7 +24,9 @@ agents. --> **Sync Commands** - Sync discovery commands to update AGENTS.md, CLAUDE.md, and llms.txt -**Semantic search** - Find content by meaning, not just keywords, using vector embeddings. +**Semantic search** - Find content by meaning, not just keywords. + +**Ask AI** - Chat with your site content. Get answers with sources. --- diff --git a/public/raw/newsletter.md b/public/raw/newsletter.md index a86256c..5171d97 100644 --- a/public/raw/newsletter.md +++ b/public/raw/newsletter.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- # Newsletter Demo Page diff --git a/public/raw/projects.md b/public/raw/projects.md index 747fcf0..b8860ba 100644 --- a/public/raw/projects.md +++ b/public/raw/projects.md @@ -2,7 +2,7 @@ --- Type: page -Date: 2026-01-07 +Date: 2026-01-08 --- This markdown framework is open source and built to be extended. Here is what ships out of the box. diff --git a/public/raw/setup-guide.md b/public/raw/setup-guide.md index d66a635..8d3d22f 100644 --- a/public/raw/setup-guide.md +++ b/public/raw/setup-guide.md @@ -1397,6 +1397,50 @@ Your blog includes these API endpoints for search engines and AI: | `/openapi.yaml` | OpenAPI 3.0 specification | | `/llms.txt` | AI agent discovery | +## SEO and Bot Detection + +Your site includes intelligent bot detection that serves different responses to different visitors. + +### How It Works + +The `netlify/edge-functions/botMeta.ts` edge function intercepts requests and serves pre-rendered HTML with correct meta tags to: + +- **Social preview bots** (Twitter, Facebook, LinkedIn, Discord): Get Open Graph tags for link previews +- **Search engine bots** (Google, Bing, DuckDuckGo): Get correct canonical URLs + +Regular browsers and AI crawlers receive the normal SPA and let JavaScript update the meta tags. + +### Configuration + +Edit the bot arrays at the top of `netlify/edge-functions/botMeta.ts` to customize which bots receive pre-rendered HTML: + +```typescript +// Social preview bots - for link previews +const SOCIAL_PREVIEW_BOTS = ["twitterbot", "facebookexternalhit", ...]; + +// Search engine bots - for correct canonical URLs +const SEARCH_ENGINE_BOTS = ["googlebot", "bingbot", ...]; + +// AI crawlers - get normal SPA (can render JavaScript) +const AI_CRAWLERS = ["gptbot", "claudebot", ...]; +``` + +### Testing + +Verify bot detection with curl: + +```bash +# Simulate Googlebot +curl -H "User-Agent: Googlebot" https://yoursite.com/post-slug | grep canonical +# Expected: correct page canonical + +# Normal request +curl https://yoursite.com/post-slug | grep canonical +# Expected: homepage canonical (JavaScript will update it) +``` + +See `FORK_CONFIG.md` for detailed configuration options. + ## Import External Content Use Firecrawl to import articles from external URLs as markdown posts: diff --git a/src/App.tsx b/src/App.tsx index 533b082..d730c6c 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,22 +1,30 @@ import { Routes, Route, useLocation } from "react-router-dom"; -import Home from "./pages/Home"; -import Post from "./pages/Post"; -import Stats from "./pages/Stats"; -import Blog from "./pages/Blog"; -import DocsPage from "./pages/DocsPage"; -import Write from "./pages/Write"; -import TagPage from "./pages/TagPage"; -import AuthorPage from "./pages/AuthorPage"; -import Unsubscribe from "./pages/Unsubscribe"; -import NewsletterAdmin from "./pages/NewsletterAdmin"; -import Dashboard from "./pages/Dashboard"; -import Callback from "./pages/Callback"; +import { lazy, Suspense } from "react"; import Layout from "./components/Layout"; import ScrollToTopOnNav from "./components/ScrollToTopOnNav"; import { usePageTracking } from "./hooks/usePageTracking"; import { SidebarProvider } from "./context/SidebarContext"; import siteConfig from "./config/siteConfig"; +// Lazy load page components for better LCP and code splitting +const Home = lazy(() => import("./pages/Home")); +const Post = lazy(() => import("./pages/Post")); +const Stats = lazy(() => import("./pages/Stats")); +const Blog = lazy(() => import("./pages/Blog")); +const DocsPage = lazy(() => import("./pages/DocsPage")); +const Write = lazy(() => import("./pages/Write")); +const TagPage = lazy(() => import("./pages/TagPage")); +const AuthorPage = lazy(() => import("./pages/AuthorPage")); +const Unsubscribe = lazy(() => import("./pages/Unsubscribe")); +const NewsletterAdmin = lazy(() => import("./pages/NewsletterAdmin")); +const Dashboard = lazy(() => import("./pages/Dashboard")); +const Callback = lazy(() => import("./pages/Callback")); + +// Minimal loading fallback to prevent layout shift +function PageSkeleton() { + return
; +} + function App() { // Track page views and active sessions usePageTracking(); @@ -24,22 +32,38 @@ function App() { // Write page renders without Layout (no header, full-screen writing) if (location.pathname === "/write") { - return ; + return ( + }> + + + ); } // Newsletter admin page renders without Layout (full-screen admin) if (location.pathname === "/newsletter-admin") { - return ; + return ( + }> + + + ); } // Dashboard renders without Layout (full-screen admin) if (location.pathname === "/dashboard") { - return ; + return ( + }> + + + ); } // Callback handles OAuth redirect from WorkOS if (location.pathname === "/callback") { - return ; + return ( + }> + + + ); } // Determine if we should use a custom homepage @@ -50,7 +74,8 @@ function App() { - + }> + {/* Homepage route - either default Home or custom page/post */} } /> {/* Catch-all for post/page slugs - must be last */} } /> - + + ); diff --git a/src/components/BlogPost.tsx b/src/components/BlogPost.tsx index df8369c..a6ea70d 100644 --- a/src/components/BlogPost.tsx +++ b/src/components/BlogPost.tsx @@ -4,7 +4,45 @@ import remarkGfm from "remark-gfm"; import remarkBreaks from "remark-breaks"; import rehypeRaw from "rehype-raw"; import rehypeSanitize, { defaultSchema } from "rehype-sanitize"; -import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; +import { PrismLight as SyntaxHighlighter } from "react-syntax-highlighter"; +// Import only needed languages for smaller bundle (INP optimization) +import jsx from "react-syntax-highlighter/dist/esm/languages/prism/jsx"; +import tsx from "react-syntax-highlighter/dist/esm/languages/prism/tsx"; +import typescript from "react-syntax-highlighter/dist/esm/languages/prism/typescript"; +import javascript from "react-syntax-highlighter/dist/esm/languages/prism/javascript"; +import bash from "react-syntax-highlighter/dist/esm/languages/prism/bash"; +import json from "react-syntax-highlighter/dist/esm/languages/prism/json"; +import css from "react-syntax-highlighter/dist/esm/languages/prism/css"; +import markdown from "react-syntax-highlighter/dist/esm/languages/prism/markdown"; +import python from "react-syntax-highlighter/dist/esm/languages/prism/python"; +import sql from "react-syntax-highlighter/dist/esm/languages/prism/sql"; +import yaml from "react-syntax-highlighter/dist/esm/languages/prism/yaml"; +import go from "react-syntax-highlighter/dist/esm/languages/prism/go"; +import rust from "react-syntax-highlighter/dist/esm/languages/prism/rust"; +import diff from "react-syntax-highlighter/dist/esm/languages/prism/diff"; + +// Register languages +SyntaxHighlighter.registerLanguage("jsx", jsx); +SyntaxHighlighter.registerLanguage("tsx", tsx); +SyntaxHighlighter.registerLanguage("typescript", typescript); +SyntaxHighlighter.registerLanguage("ts", typescript); +SyntaxHighlighter.registerLanguage("javascript", javascript); +SyntaxHighlighter.registerLanguage("js", javascript); +SyntaxHighlighter.registerLanguage("bash", bash); +SyntaxHighlighter.registerLanguage("shell", bash); +SyntaxHighlighter.registerLanguage("sh", bash); +SyntaxHighlighter.registerLanguage("json", json); +SyntaxHighlighter.registerLanguage("css", css); +SyntaxHighlighter.registerLanguage("markdown", markdown); +SyntaxHighlighter.registerLanguage("md", markdown); +SyntaxHighlighter.registerLanguage("python", python); +SyntaxHighlighter.registerLanguage("py", python); +SyntaxHighlighter.registerLanguage("sql", sql); +SyntaxHighlighter.registerLanguage("yaml", yaml); +SyntaxHighlighter.registerLanguage("yml", yaml); +SyntaxHighlighter.registerLanguage("go", go); +SyntaxHighlighter.registerLanguage("rust", rust); +SyntaxHighlighter.registerLanguage("diff", diff); import { Copy, Check, X } from "lucide-react"; import { useTheme } from "../context/ThemeContext"; import NewsletterSignup from "./NewsletterSignup"; diff --git a/src/components/DocsLayout.tsx b/src/components/DocsLayout.tsx index 2122eed..686c1b3 100644 --- a/src/components/DocsLayout.tsx +++ b/src/components/DocsLayout.tsx @@ -54,14 +54,15 @@ export default function DocsLayout({ return (
- {/* Left sidebar - docs navigation */} + {/* Main content - placed first in DOM for SEO (H1 loads before sidebar H3) */} + {/* CSS position: fixed handles visual positioning of sidebars */} +
{children}
+ + {/* Left sidebar - docs navigation (after main content in DOM for SEO) */} - {/* Main content */} -
{children}
- {/* Right sidebar - AI chat toggle + table of contents */} {hasRightSidebar && (