Crawler Examples

This section provides examples of how to configure and use crawlers to access content through paywalls.net. These examples demonstrate best practices and common use cases.

Basic Crawler

  • Configuration: Set up variables for the paywalls.net API host, your API key, and your company ID.
  • Token Management: Use the requestAccessTokenOAuth function to refresh tokens when they expire.
  • Main Logic: The fetchPage function handles the core logic of sending requests with the appropriate authorization headers and processing responses.

Crawler Examples

Fetching a Page with Authorization

import fetch from "node-fetch";
import { Request } from "node-fetch";

export async function fetchPage(url: string | URL | Request, token: string) {
    try {
        const headers = new Headers();
        headers.set("User-Agent", "node-fetch");
        if (token) {
            headers.set("Authorization", `Bearer ${token}`);
        }
        const response = await fetch(url, { headers });

        if (!response.ok) {
            console.log(`Failed to fetch ${url}: ${response.status}`);
            const data = await response.text();
            console.log(data);
        } else {
            console.log(`Fetched ${url}: ${response.status}`);
        }
    } catch (error) {
        console.log(`Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`);
        throw error;
    }
}

Requesting a new access token

const paywallsAPIHost = 'https://cloud-api.paywalls.net';
type AccessToken = {
    token_type: string;
    access_token: string;
    expires_in: number;
};

export async function requestAccessTokenOAuth(agentPublicId: string, refresh_token: string): Promise<AccessToken|null> {
    const url = `${paywallsAPIHost}/api/oauth/refresh`;
    const headers = {
        "Content-Type": "application/json",
        "User-Agent": "node-fetch"
    };
    const body = JSON.stringify({
        client_id: agentPublicId,
        grant_type: "refresh_token",
        refresh_token: refresh_token
    });

    const response = await fetch(url, { method: 'POST', headers, body });
    let data: AccessToken | null = null;
    if (!response.ok) {
        console.log(`Failed to fetch ${url}: ${response.status}`);
        const error = await response.json();
        console.log(error);
    } else {
        data = await response.json() as AccessToken;
    }
    // example response format: { token_type: "Bearer", access_token, expires_in: 3600 }
    return data;
}