TypeScript: Save Successful Pages
import { LLMLayerClient } from 'llmlayer';
import { mkdir, writeFile } from 'node:fs/promises';
import { createHash } from 'node:crypto';
const client = new LLMLayerClient({
apiKey: process.env.LLMLAYER_API_KEY,
});
await mkdir('crawl-output', { recursive: true });
for await (const event of client.crawlStream({
url: 'https://www.ycombinator.com',
maxPages: 25,
maxDepth: 2,
mainContentOnly: true,
})) {
if (event.type === 'page') {
const page = event.page;
if (!page.success || !page.markdown) {
console.warn('Skipped:', page.final_url, page.error);
continue;
}
const id = createHash('sha1').update(page.final_url || page.requested_url || '').digest('hex');
await writeFile(`crawl-output/${id}.md`, page.markdown);
}
if (event.type === 'usage') {
console.log(`Billed pages: ${event.billed_count}, cost: $${event.cost}`);
}
if (event.type === 'error') {
console.error(event.error);
}
}
Python: Stream Into Memory
from llmlayer import LLMLayerClient
client = LLMLayerClient(api_key="YOUR_LLMLAYER_API_KEY")
pages = []
for event in client.crawl_stream(
"https://www.ycombinator.com",
max_pages=25,
max_depth=2,
main_content_only=True,
):
event_type = event.get("type")
if event_type == "page":
page = event["page"]
if page.get("success") and page.get("markdown"):
pages.append(page)
else:
print("Skipped:", page.get("final_url"), page.get("error"))
elif event_type == "usage":
print("Billed pages:", event.get("billed_count"))
print("Cost:", event.get("cost"))
elif event_type == "error":
print("Crawl error:", event.get("error"))
print("Saved pages:", len(pages))
Python: Async Crawl
import asyncio
from llmlayer import LLMLayerClient
async def main():
client = LLMLayerClient(api_key="YOUR_LLMLAYER_API_KEY")
async for event in client.crawl_stream_async(
"https://www.ycombinator.com",
max_pages=10,
max_depth=1,
main_content_only=True,
):
if event.get("type") == "page":
page = event["page"]
print(page.get("final_url"), bool(page.get("markdown")))
asyncio.run(main())
Notes
- Crawl currently returns markdown page content only.
- Use Map first if you need to inspect or filter URLs before fetching content.
- Use the
usageevent as the request-level cost summary.
