src / browser.ts

import { platform } from "os";
import { configureUndiciDispatcher } from "./tls";

// Detect the actual OS to align TLS fingerprint with User-Agent
const getOSProfile = () => {
	try {
		const p = platform();
		if (p === 'darwin') return { os: 'macOS', platformHeader: '"macOS"' };
		if (p === 'linux') return { os: 'Linux', platformHeader: '"Linux"' };
		return { os: 'Windows', platformHeader: '"Windows"' };
	} catch {
		return { os: 'Windows', platformHeader: '"Windows"' };
	}
};

const getBrowserHeaders = (osProfile: { os: string, platformHeader: string }) => {
	const chromeVersion = "131";
	const fullVersion = "131.0.0.0";
	const greaseBrand = '"Not_A Brand";v="24"';
	
	let ua = "";
	if (osProfile.os === 'macOS') {
		ua = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
	} else if (osProfile.os === 'Linux') {
		ua = `Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
	} else {
		ua = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${fullVersion} Safari/537.36`;
	}

	return {
		ua,
		chromeVersion,
		greaseBrand,
		platformHeader: osProfile.platformHeader
	};
};

function getCommonHeaders(url: string, currentProfile: ReturnType<typeof getBrowserHeaders>, referrer?: string) {
	// 1. Defined in specific order mimicking Chrome
	const headers: Record<string, string> = {
		'host': new URL(url).hostname,
		'connection': 'keep-alive',
		'sec-ch-ua': `"Google Chrome";v="${currentProfile.chromeVersion}", "Chromium";v="${currentProfile.chromeVersion}", ${currentProfile.greaseBrand}`,
		'sec-ch-ua-mobile': '?0',
		'sec-ch-ua-platform': currentProfile.platformHeader,
		'upgrade-insecure-requests': '1',
		'user-agent': currentProfile.ua,
		'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
		'sec-fetch-site': 'none',
		'sec-fetch-mode': 'navigate',
		'sec-fetch-user': '?1',
		'sec-fetch-dest': 'document',
		'accept-language': 'en-US,en;q=0.9',
		'priority': 'u=0, i',
	};

	if (referrer) {
		headers['referer'] = referrer;
		// If referer is present, sec-fetch-site usually changes to 'same-origin' or 'cross-site'
		const refUrl = new URL(referrer);
		const targetUrl = new URL(url);
		if (refUrl.origin === targetUrl.origin) {
			headers['sec-fetch-site'] = 'same-origin';
		} else {
			headers['sec-fetch-site'] = 'cross-site';
		}
	}

	return headers;
}

export function getImageHeaders(url: string, originUrl?: string) {
	const profile = getBrowserHeaders(getOSProfile());
	const headers = getCommonHeaders(url, profile, originUrl);
	
	return {
		...headers,
		'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
		'sec-fetch-dest': 'image',
		'sec-fetch-mode': 'no-cors',
		'priority': 'u=1, i'
	};
}

export const fetchHTML = async (url: string, signal: AbortSignal, warn: (msg: string) => void) => {
	// Ensure advanced TLS is set up if possible
	await configureUndiciDispatcher();

	const profile = getBrowserHeaders(getOSProfile());
	const headers = getCommonHeaders(url, profile);
	
	const response = await fetch(url, {
		method: "GET",
		signal,
		headers,
		redirect: 'follow'
	});

	if (response.status === 403 || response.status === 429) {
		// 403 Forbidden is the classic "Bot Detected" signal
		warn(`Access denied (${response.status}) for ${url}. The site may be blocking automated access or requires JavaScript.`);
		throw new Error(`Access denied: ${response.status} ${response.statusText}`);
	}

	if (!response.ok) {
		warn(`Failed to fetch website: ${response.statusText}`);
		throw new Error(`Failed to fetch website: ${response.statusText}`);
	}
	const html = await response.text();
	
	const head = html.match(/<head[^>]*>([\s\S]*?)<\/head>/i)?.[1] || "";
	const bodyMatch = html.match(/<body[^>]*>([\s\S]*)<\/body>/i);
	const body = bodyMatch ? bodyMatch[1] : html;
	
	return { html, head, body };
};