added robots.txt and sitemap
This commit is contained in:
112
public/robots.txt
Normal file
112
public/robots.txt
Normal file
@@ -0,0 +1,112 @@
|
||||
# Block all bots by default
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
|
||||
# Allow legitimate search engines (for discoverability)
|
||||
User-agent: Googlebot
|
||||
Allow: /
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: Bingbot
|
||||
Allow: /
|
||||
Crawl-delay: 15
|
||||
|
||||
User-agent: DuckDuckBot
|
||||
Allow: /
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: Slurp
|
||||
Allow: /
|
||||
Crawl-delay: 20
|
||||
|
||||
User-agent: Applebot
|
||||
Allow: /
|
||||
Crawl-delay: 15
|
||||
|
||||
User-agent: Brave
|
||||
Allow: /
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: StartPageBot
|
||||
Allow: /
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: Qwantbot
|
||||
Allow: /
|
||||
Crawl-delay: 15
|
||||
|
||||
# Block Chinese/Russian search engines
|
||||
User-agent: Baiduspider
|
||||
Disallow: /
|
||||
|
||||
User-agent: YandexBot
|
||||
Disallow: /
|
||||
|
||||
# Block SEO and analysis tools (these provide no value)
|
||||
User-agent: AhrefsBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SemrushBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: MJ12bot
|
||||
Disallow: /
|
||||
|
||||
User-agent: DotBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: BLEXBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SiteAuditBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: MegaIndex
|
||||
Disallow: /
|
||||
|
||||
User-agent: SiteAuditBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: PetalBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: CCBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: GPTBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: ChatGPT-User
|
||||
Disallow: /
|
||||
|
||||
User-agent: Google-Extended
|
||||
Disallow: /
|
||||
|
||||
User-agent: Claude-Web
|
||||
Disallow: /
|
||||
|
||||
# Block social media crawlers (optional - they help with link previews)
|
||||
User-agent: facebookexternalhit
|
||||
Disallow: /
|
||||
|
||||
User-agent: Twitterbot
|
||||
Disallow: /
|
||||
|
||||
User-agent: LinkedInBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: WhatsApp
|
||||
Disallow: /
|
||||
|
||||
# Block archiving services
|
||||
User-agent: ia_archiver
|
||||
Disallow: /
|
||||
|
||||
User-agent: archive.org_bot
|
||||
Disallow: /
|
||||
|
||||
User-agent: WaybackMachine
|
||||
Disallow: /
|
||||
|
||||
# Sitemap location
|
||||
Sitemap: https://juchatz.com/sitemap.xml
|
||||
Reference in New Issue
Block a user