# robots.txt for Q-Hub # Last updated: 2025-02-06 # Notes: # - Advanced directives for precision crawl management # - Combines query handling, resource prioritisation, and aggressive bot control # General Rules for All Bots User-agent: * # Block query parameters to prevent duplicate indexing Disallow: /*?* # Disallow internal/private sections Disallow: /preview/ Disallow: /draft/ Disallow: /admin/ Disallow: /login/ Disallow: /cart/ Disallow: /checkout/ Disallow: /search/ Disallow: /404/ # Block unnecessary file types to save crawl budget Disallow: /*.pdf$ Disallow: /*.doc$ Disallow: /*.xls$ Disallow: /*.zip$ # Allow necessary resources Allow: /css/ Allow: /js/ Allow: /images/ # Specific Bot Rules - Block known aggressive or unnecessary bots User-agent: AhrefsBot Disallow: / User-agent: SEMrushBot Disallow: / Crawl-delay: 5 User-agent: MJ12bot Disallow: / Crawl-delay: 10 User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: MauiBot Disallow: / User-agent: MegaIndex Disallow: / User-agent: SeznamBot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / User-agent: SEOkicks-Robot Disallow: / User-agent: SEOBilityBot Disallow: / User-agent: SEODiver Disallow: / User-agent: SEOprofiler Disallow: / User-agent: SISTRIX Crawler Disallow: / User-agent: LinkpadBot Disallow: / User-agent: woorank Disallow: / User-agent: SiteAnalyzerBot Disallow: / User-agent: RankActiveLinkBot Disallow: / User-agent: RankSonicSiteAuditor Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: LinkChecker Disallow: / User-agent: LinkExaminer Disallow: / User-agent: Xenu Link Sleuth Disallow: / # Google-Specific Rules User-agent: Googlebot Allow: / Disallow: /search/ Disallow: /*?* # Bing-Specific Rules User-agent: Bingbot Allow: / Disallow: /search/ Disallow: /*?* # Query Parameter Optimisation Clean-param: utm_source&utm_medium&utm_campaign&utm_content&utm_term&utm_id # Advanced Techniques # Prioritise JavaScript-rendered content User-agent: Googlebot Allow: /rendered-content/ # Block render-blocking files if applicable Disallow: /render-blocking/ # Prevent discovery of sensitive files Disallow: /wp-content/debug.log Disallow: /error_log Disallow: /readme.html Disallow: /.git/ Disallow: /.env # Bots that should crawl but avoid heavy resources User-agent: DuckDuckBot Disallow: /large-images/ Disallow: /assets/ # Advanced Caching Directives - Allow static assets for efficient caching Allow: /*.css$ Allow: /*.js$ Allow: /*.jpg$ Allow: /*.png$ # Comments for Future Updates: # - Regularly monitor server logs for unusual bot behaviour. # - Update the blocked bots list based on crawler trends. # Sitemap Sitemap: https://www.q-hub.app/sitemap.xml