Forked from danielsig/visit-website
src / browser.ts
import { platform } from "os";
import { configureUndiciDispatcher } from "./tls";
// Detect the actual OS to align TLS fingerprint with User-Agent
const getOSProfile = () => {
try {
const p = platform();
if (p === 'darwin') return { os: 'macOS', platformHeader: '"macOS"' };
if (p === 'linux') return { os: 'Linux', platformHeader: '"Linux"' };
return { os: 'Windows', platformHeader: '"Windows"' };
} catch {
return { os: 'Windows', platformHeader: '"Windows"' };
}
};
const getBrowserHeaders = (osProfile: { os: string, platformHeader: string }) => {
const chromeVersion = "131";
const fullVersion = "131.0.0.0";
const greaseBrand = '"Not_A Brand";v="24"';
let ua = "";
if (osProfile.os === 'macOS') {
ua = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
} else if (osProfile.os === 'Linux') {
ua = `Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
} else {
ua = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
}
return {
ua,
chromeVersion,
greaseBrand,
platformHeader: osProfile.platformHeader
};
};
function getCommonHeaders(url: string, currentProfile: ReturnType<typeof getBrowserHeaders>, referrer?: string) {
// 1. Defined in specific order mimicking Chrome
const headers: Record<string, string> = {
'host': new URL(url).hostname,
'connection': 'keep-alive',
'sec-ch-ua': `"Google Chrome";v="${currentProfile.chromeVersion}", "Chromium";v="${currentProfile.chromeVersion}", ${currentProfile.greaseBrand}`,
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': currentProfile.platformHeader,
'upgrade-insecure-requests': '1',
'user-agent': currentProfile.ua,
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'en-US,en;q=0.9',
'priority': 'u=0, i',
};
if (referrer) {
headers['referer'] = referrer;
// If referer is present, sec-fetch-site usually changes to 'same-origin' or 'cross-site'
const refUrl = new URL(referrer);
const targetUrl = new URL(url);
if (refUrl.origin === targetUrl.origin) {
headers['sec-fetch-site'] = 'same-origin';
} else {
headers['sec-fetch-site'] = 'cross-site';
}
}
return headers;
}
export function getImageHeaders(url: string, originUrl?: string) {
const profile = getBrowserHeaders(getOSProfile());
const headers = getCommonHeaders(url, profile, originUrl);
return {
...headers,
'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'sec-fetch-dest': 'image',
'sec-fetch-mode': 'no-cors',
'priority': 'u=1, i'
};
}
export const fetchHTML = async (url: string, signal: AbortSignal, warn: (msg: string) => void) => {
// Ensure advanced TLS is set up if possible
await configureUndiciDispatcher();
const profile = getBrowserHeaders(getOSProfile());
const headers = getCommonHeaders(url, profile);
const response = await fetch(url, {
method: "GET",
signal,
headers,
redirect: 'follow'
});
if (response.status === 403 || response.status === 429) {
// 403 Forbidden is the classic "Bot Detected" signal
warn(`Access denied (${response.status}) for ${url}. The site may be blocking automated access or requires JavaScript.`);
throw new Error(`Access denied: ${response.status} ${response.statusText}`);
}
if (!response.ok) {
warn(`Failed to fetch website: ${response.statusText}`);
throw new Error(`Failed to fetch website: ${response.statusText}`);
}
const html = await response.text();
const head = html.match(/<head[^>]*>([\s\S]*?)<\/head>/i)?.[1] || "";
const bodyMatch = html.match(/<body[^>]*>([\s\S]*)<\/body>/i);
const body = bodyMatch ? bodyMatch[1] : html;
return { html, head, body };
};