# ============================================================ # Tamil Selvan — tamilselvan.in # robots.txt | Crawl Directives for Search Engines # ============================================================ # ───────────────────────────────────────────── # ALLOW ALL MAJOR SEARCH ENGINE CRAWLERS # ───────────────────────────────────────────── # Google — Googlebot (web) + Googlebot-Image + Googlebot-News User-agent: Googlebot Allow: / Crawl-delay: 2 User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / # Bing / Microsoft User-agent: Bingbot Allow: / Crawl-delay: 3 User-agent: msnbot Allow: / # Yahoo User-agent: Slurp Allow: / Crawl-delay: 3 # DuckDuckGo User-agent: DuckDuckBot Allow: / # Yandex User-agent: YandexBot Allow: / Crawl-delay: 3 # Baidu User-agent: Baiduspider Allow: / # Apple User-agent: Applebot Allow: / # ───────────────────────────────────────────── # BLOCK AGGRESSIVE SEO SCRAPERS & BAD BOTS # ───────────────────────────────────────────── User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: serpstatbot Disallow: / User-agent: PetalBot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: Bytespider Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: cohere-ai Disallow: / User-agent: ia_archiver Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: WebBandit Disallow: / User-agent: EmailWolf Disallow: / User-agent: ExtractorPro Disallow: / User-agent: CopyRightCheck Disallow: / User-agent: Linkscan Disallow: / User-agent: BackDoorBot Disallow: / User-agent: MorningPaper Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZip Disallow: / User-agent: WebCopier Disallow: / User-agent: HTTrack Disallow: / # ───────────────────────────────────────────── # GLOBAL RULES (all other bots) # ───────────────────────────────────────────── User-agent: * Allow: / # No private or system paths to block on a personal portfolio site # Disallow: /cgi-bin/ # Disallow: /tmp/ # Disallow: /private/ # ───────────────────────────────────────────── # SITEMAP LOCATIONS # ───────────────────────────────────────────── Sitemap: https://tamilselvan.in/sitemap.xml # ───────────────────────────────────────────── # HOST (canonical host directive for Yandex) # ───────────────────────────────────────────── Host: tamilselvan.in # ============================================================ # END OF robots.txt — tamilselvan.in # ============================================================