Crawler Examples
This section provides examples of how to configure and use crawlers to access content through paywalls.net. These examples demonstrate best practices and common use cases.
Basic Crawler
- Configuration: Set up variables for the paywalls.net API host, your API key, and your company ID.
- Token Management: Use the
requestAccessTokenOAuth
function to refresh tokens when they expire. - Main Logic: The
fetchPage
function handles the core logic of sending requests with the appropriate authorization headers and processing responses.
Crawler Examples
Fetching a Page with Authorization
import fetch from "node-fetch"; import { Request } from "node-fetch"; export async function fetchPage(url: string | URL | Request, token: string) { try { const headers = new Headers(); headers.set("User-Agent", "node-fetch"); if (token) { headers.set("Authorization", `Bearer ${token}`); } const response = await fetch(url, { headers }); if (!response.ok) { console.log(`Failed to fetch ${url}: ${response.status}`); const data = await response.text(); console.log(data); } else { console.log(`Fetched ${url}: ${response.status}`); } } catch (error) { console.log(`Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`); throw error; } }
Requesting a new access token
- See also API Integration
const paywallsAPIHost = 'https://cloud-api.paywalls.net'; type AccessToken = { token_type: string; access_token: string; expires_in: number; }; export async function requestAccessTokenOAuth(agentPublicId: string, refresh_token: string): Promise<AccessToken|null> { const url = `${paywallsAPIHost}/api/oauth/refresh`; const headers = { "Content-Type": "application/json", "User-Agent": "node-fetch" }; const body = JSON.stringify({ client_id: agentPublicId, grant_type: "refresh_token", refresh_token: refresh_token }); const response = await fetch(url, { method: 'POST', headers, body }); let data: AccessToken | null = null; if (!response.ok) { console.log(`Failed to fetch ${url}: ${response.status}`); const error = await response.json(); console.log(error); } else { data = await response.json() as AccessToken; } // example response format: { token_type: "Bearer", access_token, expires_in: 3600 } return data; }