scripts/import-url.ts

import fs from "fs";
import path from "path";
import FirecrawlApp from "@mendable/firecrawl-js";
import dotenv from "dotenv";

// Load environment variables
dotenv.config({ path: ".env.local" });

const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;

if (!FIRECRAWL_API_KEY) {
  console.error("Error: FIRECRAWL_API_KEY not found in .env.local");
  console.log("\nTo set up Firecrawl:");
  console.log("1. Get an API key from https://firecrawl.dev");
  console.log("2. Add FIRECRAWL_API_KEY=fc-xxx to your .env.local file");
  process.exit(1);
}

const firecrawl = new FirecrawlApp({ apiKey: FIRECRAWL_API_KEY });

// Generate a URL-safe slug from a title
function generateSlug(title: string): string {
  return title
    .toLowerCase()
    .replace(/[^a-z0-9\s-]/g, "") // Remove special characters
    .replace(/\s+/g, "-") // Replace spaces with hyphens
    .replace(/-+/g, "-") // Remove consecutive hyphens
    .replace(/^-|-$/g, "") // Remove leading/trailing hyphens
    .substring(0, 60); // Limit length
}

// Clean up markdown content
function cleanMarkdown(content: string): string {
  return content
    .replace(/^\s+|\s+$/g, "") // Trim whitespace
    .replace(/\n{3,}/g, "\n\n"); // Remove excessive newlines
}

async function importFromUrl(url: string) {
  console.log(`\nScraping: ${url}`);
  console.log("This may take a moment...\n");

  try {
    const result = await firecrawl.scrapeUrl(url, {
      formats: ["markdown"],
    });

    if (!result.success) {
      console.error("Failed to scrape URL");
      console.error("Error:", result.error || "Unknown error");
      process.exit(1);
    }

    const title = result.metadata?.title || "Imported Post";
    const description = result.metadata?.description || "";
    const content = cleanMarkdown(result.markdown || "");

    if (!content) {
      console.error("No content found at URL");
      process.exit(1);
    }

    // Generate slug from title
    const baseSlug = generateSlug(title);
    const slug = baseSlug || `imported-${Date.now()}`;

    // Get today's date
    const today = new Date().toISOString().split("T")[0];

    // Create markdown file with frontmatter
    const markdown = `---
title: "${title.replace(/"/g, '\\"')}"
description: "${description.replace(/"/g, '\\"')}"
date: "${today}"
slug: "${slug}"
published: false
tags: ["imported"]
---

${content}

---

*Originally published at [${new URL(url).hostname}](${url})*
`;

    // Ensure content/blog directory exists
    const blogDir = path.join(process.cwd(), "content", "blog");
    if (!fs.existsSync(blogDir)) {
      fs.mkdirSync(blogDir, { recursive: true });
    }

    // Write the file
    const filePath = path.join(blogDir, `${slug}.md`);

    // Check if file already exists
    if (fs.existsSync(filePath)) {
      console.warn(`Warning: File already exists at ${filePath}`);
      console.warn("Adding timestamp to filename to avoid overwrite.");
      const newSlug = `${slug}-${Date.now()}`;
      const newFilePath = path.join(blogDir, `${newSlug}.md`);
      fs.writeFileSync(
        newFilePath,
        markdown.replace(`slug: "${slug}"`, `slug: "${newSlug}"`),
      );
      console.log(`\nCreated: ${newFilePath}`);
      console.log(`Slug: ${newSlug}`);
    } else {
      fs.writeFileSync(filePath, markdown);
      console.log(`\nCreated: ${filePath}`);
      console.log(`Slug: ${slug}`);
    }

    console.log(`Title: ${title}`);
    console.log(`Status: Draft (published: false)`);
    console.log("\nNext steps:");
    console.log("1. Review and edit the imported content");
    console.log("2. Set published: true when ready");
    console.log("3. Run: npm run sync");
  } catch (error) {
    console.error("Error importing URL:", error);
    process.exit(1);
  }
}

// Parse command line arguments
const url = process.argv[2];

if (!url) {
  console.log("Firecrawl Content Importer");
  console.log("==========================\n");
  console.log("Usage: npm run import <url>\n");
  console.log("Example:");
  console.log("  npm run import https://example.com/article\n");
  console.log("This will:");
  console.log("  1. Scrape the URL and convert to markdown");
  console.log("  2. Create a draft post in content/blog/");
  console.log("  3. You can then review, edit, and sync\n");
  process.exit(0);
}

// Validate URL
try {
  new URL(url);
} catch {
  console.error("Error: Invalid URL provided");
  console.log("Please provide a valid URL starting with http:// or https://");
  process.exit(1);
}

importFromUrl(url);
feat: add featured section, logo gallery, Firecrawl import, and API export Featured Section - Frontmatter-controlled featured items with featured: true and featuredOrder - Card view with excerpts and list/card toggle button - View preference saved to localStorage - New Convex queries for featured posts and pages with by_featured index Logo Gallery - Continuous marquee scroll with clickable logos - CSS animation, grayscale with color on hover - Configurable speed, position, and title - 5 sample logos included Firecrawl Content Importer - npm run import <url> scrapes external URLs to markdown drafts - Creates local files in content/blog/ with frontmatter - Then sync to dev or prod (no separate import:prod command) API Enhancements - New /api/export endpoint for batch content fetching - AI plugin discovery at /.well-known/ai-plugin.json - OpenAPI 3.0 spec at /openapi.yaml - Enhanced llms.txt documentation Documentation - AGENTS.md with codebase instructions for AI agents - Updated all sync vs deploy tables to include import workflow - Renamed content/pages/changelog.md to changelog-page.md Technical - New components: FeaturedCards.tsx, LogoMarquee.tsx - New script: scripts/import-url.ts - New dependency: @mendable/firecrawl-js - Schema updates with featured, featuredOrder, excerpt fields 2025-12-18 12:28:25 -08:00			`import fs from "fs";`
			`import path from "path";`
			`import FirecrawlApp from "@mendable/firecrawl-js";`
			`import dotenv from "dotenv";`

			`// Load environment variables`
			`dotenv.config({ path: ".env.local" });`

			`const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;`

			`if (!FIRECRAWL_API_KEY) {`
			`console.error("Error: FIRECRAWL_API_KEY not found in .env.local");`
			`console.log("\nTo set up Firecrawl:");`
			`console.log("1. Get an API key from https://firecrawl.dev");`
			`console.log("2. Add FIRECRAWL_API_KEY=fc-xxx to your .env.local file");`
			`process.exit(1);`
			`}`

			`const firecrawl = new FirecrawlApp({ apiKey: FIRECRAWL_API_KEY });`

			`// Generate a URL-safe slug from a title`
			`function generateSlug(title: string): string {`
			`return title`
			`.toLowerCase()`
			`.replace(/[^a-z0-9\s-]/g, "") // Remove special characters`
			`.replace(/\s+/g, "-") // Replace spaces with hyphens`
			`.replace(/-+/g, "-") // Remove consecutive hyphens`
			`.replace(/^-\|-$/g, "") // Remove leading/trailing hyphens`
			`.substring(0, 60); // Limit length`
			`}`

			`// Clean up markdown content`
			`function cleanMarkdown(content: string): string {`
			`return content`
			`.replace(/^\s+\|\s+$/g, "") // Trim whitespace`
			`.replace(/\n{3,}/g, "\n\n"); // Remove excessive newlines`
			`}`

			`async function importFromUrl(url: string) {`
			console.log(`\nScraping: ${url}`);
			`console.log("This may take a moment...\n");`

			`try {`
			`const result = await firecrawl.scrapeUrl(url, {`
			`formats: ["markdown"],`
			`});`

			`if (!result.success) {`
			`console.error("Failed to scrape URL");`
			`console.error("Error:", result.error \|\| "Unknown error");`
			`process.exit(1);`
			`}`

			`const title = result.metadata?.title \|\| "Imported Post";`
			`const description = result.metadata?.description \|\| "";`
			`const content = cleanMarkdown(result.markdown \|\| "");`

			`if (!content) {`
			`console.error("No content found at URL");`
			`process.exit(1);`
			`}`

			`// Generate slug from title`
			`const baseSlug = generateSlug(title);`
			const slug = baseSlug \|\| `imported-${Date.now()}`;

			`// Get today's date`
			`const today = new Date().toISOString().split("T")[0];`

			`// Create markdown file with frontmatter`
			const markdown = `---
			`title: "${title.replace(/"/g, '\\"')}"`
			`description: "${description.replace(/"/g, '\\"')}"`
			`date: "${today}"`
			`slug: "${slug}"`
			`published: false`
			`tags: ["imported"]`
			`---`

			`${content}`

			`---`

			`Originally published at [${new URL(url).hostname}](${url})`
			`;

			`// Ensure content/blog directory exists`
			`const blogDir = path.join(process.cwd(), "content", "blog");`
			`if (!fs.existsSync(blogDir)) {`
			`fs.mkdirSync(blogDir, { recursive: true });`
			`}`

			`// Write the file`
			const filePath = path.join(blogDir, `${slug}.md`);

			`// Check if file already exists`
			`if (fs.existsSync(filePath)) {`
			console.warn(`Warning: File already exists at ${filePath}`);
			`console.warn("Adding timestamp to filename to avoid overwrite.");`
			const newSlug = `${slug}-${Date.now()}`;
			const newFilePath = path.join(blogDir, `${newSlug}.md`);
			`fs.writeFileSync(`
			`newFilePath,`
			markdown.replace(`slug: "${slug}"`, `slug: "${newSlug}"`),
			`);`
			console.log(`\nCreated: ${newFilePath}`);
			console.log(`Slug: ${newSlug}`);
			`} else {`
			`fs.writeFileSync(filePath, markdown);`
			console.log(`\nCreated: ${filePath}`);
			console.log(`Slug: ${slug}`);
			`}`

			console.log(`Title: ${title}`);
			console.log(`Status: Draft (published: false)`);
			`console.log("\nNext steps:");`
			`console.log("1. Review and edit the imported content");`
			`console.log("2. Set published: true when ready");`
			`console.log("3. Run: npm run sync");`
			`} catch (error) {`
			`console.error("Error importing URL:", error);`
			`process.exit(1);`
			`}`
			`}`

			`// Parse command line arguments`
			`const url = process.argv[2];`

			`if (!url) {`
			`console.log("Firecrawl Content Importer");`
			`console.log("==========================\n");`
			`console.log("Usage: npm run import <url>\n");`
			`console.log("Example:");`
			`console.log(" npm run import https://example.com/article\n");`
			`console.log("This will:");`
			`console.log(" 1. Scrape the URL and convert to markdown");`
			`console.log(" 2. Create a draft post in content/blog/");`
			`console.log(" 3. You can then review, edit, and sync\n");`
			`process.exit(0);`
			`}`

			`// Validate URL`
			`try {`
			`new URL(url);`
			`} catch {`
			`console.error("Error: Invalid URL provided");`
			`console.log("Please provide a valid URL starting with http:// or https://");`
			`process.exit(1);`
			`}`

			`importFromUrl(url);`