TutorialCollect User Context

Add Webpage to Context

The Synvo API allows you to add web pages and URLs to your context collection. This enables the AI to crawl, extract, and analyze content from external websites, making it part of your searchable knowledge base.

Authentication

All endpoints require authentication via:

  • API Key: X-API-Key: <api_key>

Base URL

https://api.synvo.ai

Add Webpage

Crawls and indexes a web page, extracting text content and adding it to your user context.

Endpoint: POST /webpage/add

Content-Type: application/x-www-form-urlencoded

Parameters

ParameterTypeRequiredDescription
urlstringYesThe webpage URL to crawl and index
pathstringNoDirectory path to organize the webpage (default: "/")

Example Request

curl -X POST "https://api.synvo.ai/webpage/add" \
  -H "X-API-Key: ${API-KEY}" \
  -H "Content-Type: application/x-www-form-urlencoded" \
  --data-urlencode "url=https://example.com/article" \
  --data-urlencode "path=/research"
import requests

api_key = "<API_KEY>"
url = "https://api.synvo.ai/webpage/add"
data = {
    "url": "https://example.com/article",
    "path": "/research"
}
headers = {"X-API-Key": api_key}

response = requests.post(url, data=data, headers=headers, timeout=30)
response.raise_for_status()
print(response.json())
const apiKey = "<API_KEY>";
const params = new URLSearchParams({
  url: "https://example.com/article",
  path: "/research"
});

const response = await fetch("https://api.synvo.ai/webpage/add", {
  method: "POST",
  headers: {
    "X-API-Key": apiKey,
  },
  body: params,
});

if (!response.ok) {
  throw new Error(`Request failed: ${response.status}`);
}

console.log(await response.json());

Example Response

{
  "message": "Webpage added successfully",
  "file_id": "web_abc123xyz",
  "url": "https://example.com/article",
  "path": "/research",
  "timestamp": "2024-01-15T10:30:00Z",
  "status": "PENDING"
}

Response Codes

  • 200 - Webpage successfully queued for processing
  • 400 - Bad request (invalid URL or parameters)
  • 401 - Unauthorized
  • 422 - URL cannot be accessed or crawled

Batch Upload with Status Verification

When adding multiple webpages, upload them and verify that processing is complete before querying:

import requests
import time

api_key = "<API_KEY>"
BASE_URL = "https://api.synvo.ai"

# List of webpages to add
webpages = [
    {"url": "https://example.com/article1", "path": "/research"},
    {"url": "https://example.com/article2", "path": "/research"},
    {"url": "https://example.com/blog-post", "path": "/blog"},
]

# Step 1: Upload all webpages
print("📤 Uploading webpages...")
file_ids = {}

for webpage in webpages:
    try:
        response = requests.post(
            f"{BASE_URL}/webpage/add",
            data=webpage,
            headers={"X-API-Key": api_key},
            timeout=30
        )
        response.raise_for_status()
        result = response.json()
        
        file_id = result.get("file_id")
        file_ids[webpage["url"]] = file_id
        print(f"✓ Uploaded: {webpage['url']} (ID: {file_id})")
        
        time.sleep(0.5)  # Rate limiting
        
    except Exception as e:
        print(f"✗ Failed to upload {webpage['url']}: {e}")

# Step 2: Wait for all files to complete processing
print("\n⏳ Waiting for all files to process...")
for url, file_id in file_ids.items():
    max_retries = 60  # Maximum 5 minutes (60 * 5 seconds)
    retries = 0
    
    while retries < max_retries:
        try:
            status_response = requests.get(
                f"{BASE_URL}/file/status/{file_id}",
                headers={"X-API-Key": api_key},
                timeout=10
            )
            status_response.raise_for_status()
            status = status_response.json()["status"]
            
            if status == "COMPLETED":
                print(f"✅ {url} - Processing complete!")
                break
            elif status == "FAILED":
                print(f"❌ {url} - Processing failed!")
                break
            
            retries += 1
            time.sleep(5)
            
        except Exception as e:
            print(f"⚠️  Error checking status for {url}: {e}")
            retries += 1
            time.sleep(5)
    
    if retries >= max_retries:
        print(f"⏱️  {url} - Timeout waiting for processing")

# Step 3: Summary
print("\n🎉 All files ready for querying!")
print("\nProcessed File IDs:")
for url, file_id in file_ids.items():
    print(f"  {file_id}: {url}")
const apiKey = "<API_KEY>";
const BASE_URL = "https://api.synvo.ai";

// List of webpages to add
const webpages = [
  { url: "https://example.com/article1", path: "/research" },
  { url: "https://example.com/article2", path: "/research" },
  { url: "https://example.com/blog-post", path: "/blog" },
];

async function batchUploadWithVerification() {
  // Step 1: Upload all webpages
  console.log("📤 Uploading webpages...");
  const fileIds = {};

  for (const webpage of webpages) {
    try {
      const params = new URLSearchParams(webpage);
      const response = await fetch(`${BASE_URL}/webpage/add`, {
        method: "POST",
        headers: { "X-API-Key": apiKey },
        body: params,
      });

      if (!response.ok) {
        throw new Error(`HTTP ${response.status}`);
      }

      const result = await response.json();
      const fileId = result.file_id;
      fileIds[webpage.url] = fileId;
      console.log(`✓ Uploaded: ${webpage.url} (ID: ${fileId})`);

      await new Promise(resolve => setTimeout(resolve, 500)); // Rate limiting
    } catch (error) {
      console.log(`✗ Failed to upload ${webpage.url}: ${error.message}`);
    }
  }

  // Step 2: Wait for all files to complete processing
  console.log("\n⏳ Waiting for all files to process...");
  
  for (const [url, fileId] of Object.entries(fileIds)) {
    const maxRetries = 60; // Maximum 5 minutes (60 * 5 seconds)
    let retries = 0;

    while (retries < maxRetries) {
      try {
        const statusResponse = await fetch(
          `${BASE_URL}/file/status/${fileId}`,
          { headers: { "X-API-Key": apiKey } }
        );

        if (!statusResponse.ok) {
          throw new Error(`HTTP ${statusResponse.status}`);
        }

        const data = await statusResponse.json();
        const status = data.status;

        if (status === "COMPLETED") {
          console.log(`✅ ${url} - Processing complete!`);
          break;
        } else if (status === "FAILED") {
          console.log(`❌ ${url} - Processing failed!`);
          break;
        }

        retries++;
        await new Promise(resolve => setTimeout(resolve, 5000));
      } catch (error) {
        console.log(`⚠️  Error checking status for ${url}: ${error.message}`);
        retries++;
        await new Promise(resolve => setTimeout(resolve, 5000));
      }
    }

    if (retries >= maxRetries) {
      console.log(`⏱️  ${url} - Timeout waiting for processing`);
    }
  }

  // Step 3: Summary
  console.log("\n🎉 All files ready for querying!");
  console.log("\nProcessed File IDs:");
  for (const [url, fileId] of Object.entries(fileIds)) {
    console.log(`  ${fileId}: ${url}`);
  }
}

// Execute batch operation
batchUploadWithVerification();

Best Practices

  • Status Verification: Always check file processing status before querying
  • Timeout Protection: Set maximum wait times (e.g., 5 minutes) to prevent infinite loops
  • Error Handling: Wrap API calls in try-catch blocks to handle failures gracefully
  • Rate Limiting: Add delays between requests (0.5-1 second) to avoid rate limits
  • Progress Tracking: Log upload and processing progress for monitoring

Supported Platforms

Synvo API supports content extraction from various platforms:

PlatformURL PatternStatus
Apple Podcastspodcasts.apple.com✅ Stable
arXivarxiv.org✅ Stable
Bilibilibilibili.com✅ Stable
Twitter (X)twitter.com, x.com✅ Stable
Xiaoyuzhouxiaoyuzhoufm.com✅ Stable
WeChat Articlesmp.weixin.qq.com✅ Stable
YouTubeyoutube.com, youtu.be⚠️ Unstable
Xiaohongshuxhslink.com⚠️ Unstable

Examples

# Add an arXiv paper
requests.post(
    "https://api.synvo.ai/webpage/add",
    data={"url": "https://arxiv.org/abs/2301.00000", "path": "/papers"},
    headers={"X-API-Key": api_key}
)

# Add a YouTube video
requests.post(
    "https://api.synvo.ai/webpage/add",
    data={"url": "https://youtube.com/watch?v=example", "path": "/videos"},
    headers={"X-API-Key": api_key}
)

# Add a WeChat article
requests.post(
    "https://api.synvo.ai/webpage/add",
    data={"url": "https://mp.weixin.qq.com/s/example", "path": "/articles"},
    headers={"X-API-Key": api_key}
)

Note on Unstable Platforms Platforms marked as "Unstable" may experience occasional extraction issues due to anti-scraping measures or frequent layout changes. For best results, verify content extraction after upload.

How It Works

  1. Submit URL: Provide a publicly accessible web page URL
  2. Content Extraction: The system crawls the page and extracts text, images, and metadata
  3. Indexing: Content is processed and indexed in the vector database
  4. Context Integration: The extracted content becomes searchable in your context
  5. AI Analysis: The AI can now reference and analyze this web content in conversations