Extract API - Multi-mode page extraction

curl --request POST \
  --url https://api.llmlayer.dev/api/v2/extract \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://www.ycombinator.com/blog",
  "modes": [
    "json",
    "summary"
  ],
  "json_schema": "<string>",
  "query": "<string>",
  "instructions": "<string>",
  "response_language": "auto",
  "advanced_proxy": false,
  "main_content_only": true
}
'

import requests

url = "https://api.llmlayer.dev/api/v2/extract"

payload = {
    "url": "https://www.ycombinator.com/blog",
    "modes": ["json", "summary"],
    "json_schema": "<string>",
    "query": "<string>",
    "instructions": "<string>",
    "response_language": "auto",
    "advanced_proxy": False,
    "main_content_only": True
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://www.ycombinator.com/blog',
    modes: ['json', 'summary'],
    json_schema: '<string>',
    query: '<string>',
    instructions: '<string>',
    response_language: 'auto',
    advanced_proxy: false,
    main_content_only: true
  })
};

fetch('https://api.llmlayer.dev/api/v2/extract', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.llmlayer.dev/api/v2/extract",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://www.ycombinator.com/blog',
    'modes' => [
        'json',
        'summary'
    ],
    'json_schema' => '<string>',
    'query' => '<string>',
    'instructions' => '<string>',
    'response_language' => 'auto',
    'advanced_proxy' => false,
    'main_content_only' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.llmlayer.dev/api/v2/extract"

	payload := strings.NewReader("{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.llmlayer.dev/api/v2/extract")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.llmlayer.dev/api/v2/extract")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}"

response = http.request(request)
puts response.read_body

{
  "url": "<string>",
  "title": "<string>",
  "metadata": {},
  "structured_data": {},
  "summary": "<string>",
  "answer": "<string>",
  "links": [
    {
      "url": "<string>",
      "text": "<string>",
      "internal": true
    }
  ],
  "brand": {},
  "cost": 0.01,
  "response_time": "3.42",
  "statusCode": 200
}

Endpoint

Extract API - Multi-mode page extraction

Extract from a single web page in one call. Combine any modes: structured JSON (‘json’, $0.005), a markdown summary ('summary',$ 0.005), an answer to a question (‘qa’, $0.005), all page links ('links',$ 0.001), and a brand profile with logos/colors/socials (‘brand’, $0.002). Pricing is summed per selected mode; advanced proxy adds$ 0.004 once when a scrape runs. If the request fails before any AI cost is incurred (page fetch failure, empty content, brand fetch failure), you are fully refunded.

POST

api

extract

curl --request POST \
  --url https://api.llmlayer.dev/api/v2/extract \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://www.ycombinator.com/blog",
  "modes": [
    "json",
    "summary"
  ],
  "json_schema": "<string>",
  "query": "<string>",
  "instructions": "<string>",
  "response_language": "auto",
  "advanced_proxy": false,
  "main_content_only": true
}
'

import requests

url = "https://api.llmlayer.dev/api/v2/extract"

payload = {
    "url": "https://www.ycombinator.com/blog",
    "modes": ["json", "summary"],
    "json_schema": "<string>",
    "query": "<string>",
    "instructions": "<string>",
    "response_language": "auto",
    "advanced_proxy": False,
    "main_content_only": True
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://www.ycombinator.com/blog',
    modes: ['json', 'summary'],
    json_schema: '<string>',
    query: '<string>',
    instructions: '<string>',
    response_language: 'auto',
    advanced_proxy: false,
    main_content_only: true
  })
};

fetch('https://api.llmlayer.dev/api/v2/extract', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.llmlayer.dev/api/v2/extract",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://www.ycombinator.com/blog',
    'modes' => [
        'json',
        'summary'
    ],
    'json_schema' => '<string>',
    'query' => '<string>',
    'instructions' => '<string>',
    'response_language' => 'auto',
    'advanced_proxy' => false,
    'main_content_only' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.llmlayer.dev/api/v2/extract"

	payload := strings.NewReader("{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.llmlayer.dev/api/v2/extract")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.llmlayer.dev/api/v2/extract")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://www.ycombinator.com/blog\",\n  \"modes\": [\n    \"json\",\n    \"summary\"\n  ],\n  \"json_schema\": \"<string>\",\n  \"query\": \"<string>\",\n  \"instructions\": \"<string>\",\n  \"response_language\": \"auto\",\n  \"advanced_proxy\": false,\n  \"main_content_only\": true\n}"

response = http.request(request)
puts response.read_body

{
  "url": "<string>",
  "title": "<string>",
  "metadata": {},
  "structured_data": {},
  "summary": "<string>",
  "answer": "<string>",
  "links": [
    {
      "url": "<string>",
      "text": "<string>",
      "internal": true
    }
  ],
  "brand": {},
  "cost": 0.01,
  "response_time": "3.42",
  "statusCode": 200
}

Authorizations

Authorization

string

header

required

Bearer token authentication using your LLMLayer API key. Include in Authorization header as: Bearer YOUR_LLMLAYER_API_KEY

Body

application/json

url

string

required

The page URL to extract from. Must be http(s). PDF URLs are not supported — use /get_pdf_content instead.

Example:

"https://www.ycombinator.com/blog"

modes

enum<string>[]

Extraction modes to run in one call. Any combination; duplicates are ignored. Pricing is summed per mode: json/summary/qa $0.005 each, links $0.001, brand $0.002.

Available options:

json,

summary,

qa,

links,

brand

Example:

["json", "summary"]

json_schema

Required when modes includes 'json'. Accepts a formal JSON schema, an example object (e.g. {"title": "string", "price": "number"}), or a plain-text description of the fields you want.

query

string

The question to answer from the page — required when modes includes 'qa'.

instructions

string

Optional extra guidance applied to all AI modes (json, summary, qa). E.g. 'dates in DD/MM/YYYY format'.

response_language

string

default:auto

Output language for summary/qa ('auto' matches the user/page language). E.g. 'en', 'fr', 'es'.

advanced_proxy

boolean

default:false

Use advanced proxy for sites with bot protection. Adds $0.004 once per request, only when a scrape runs (brand-only requests never scrape).

main_content_only

boolean

Omit this field to let the API pick the best default per selection: links-only requests scrape the full page (nav/footer links matter), AI modes use main content only.

Response

Extraction results. All result fields (structured_data, summary, answer, links, brand) are always present; modes you did not request are null.

All five result fields are always present; modes you did not request are null. The json mode result is returned as structured_data.

url

string

Final URL after redirects.

title

string | null

Page title.

metadata

object | null

Page metadata (description, OpenGraph fields, language, ...) as found by the scraper.

structured_data

object | null

Structured data matching your schema (when 'json' in modes).

summary

string | null

Markdown summary of the page (when 'summary' in modes).

answer

string | null

Markdown answer to your question (when 'qa' in modes).

links

object[] | null

All links found on the page, deduplicated, max 500 (when 'links' in modes).

Show child attributes

brand

object | null

Brand profile: domain, title, description, colors, logos, backdrops, socials, industries, key links and pages (when 'brand' in modes).

cost

number

Total cost in USD — sum of the selected modes (+$0.004 advanced proxy when a scrape runs).

Example:

0.01

response_time

string

Total processing time in seconds.

Example:

"3.42"

statusCode

integer

Example:

200

Scraper API - Multi-format content extraction Map API - Discover URLs on a website