# robots.txt — Nissin Foods Company Limited # Corporate and investor relations website (Umbraco CMS) # https://www.nissingroup.com.hk # ----------------------------------------------------------------------------- # Default rules (all crawlers) # ----------------------------------------------------------------------------- User-agent: * Allow: / # CMS, backoffice, and non-public application paths Disallow: /umbraco/ Disallow: /App_Plugins/ Disallow: /App_Data/ Disallow: /install/ # Search result pages (avoid indexing duplicate or thin content) Disallow: /search? # ----------------------------------------------------------------------------- # Major search engines # (Explicit groups for clarity; same public-site rules apply.) # ----------------------------------------------------------------------------- User-agent: Googlebot User-agent: Googlebot-Image User-agent: Googlebot-News User-agent: Googlebot-Video User-agent: Bingbot User-agent: Slurp User-agent: DuckDuckBot User-agent: Baiduspider User-agent: YandexBot User-agent: Applebot Allow: / Disallow: /umbraco/ Disallow: /App_Plugins/ Disallow: /App_Data/ Disallow: /install/ Disallow: /search? # ----------------------------------------------------------------------------- # AI assistants and answer engines # Public investor-facing content may be crawled and cited. # Admin paths remain blocked for all agents below. # ----------------------------------------------------------------------------- User-agent: GPTBot User-agent: ChatGPT-User User-agent: OAI-SearchBot User-agent: ClaudeBot User-agent: anthropic-ai User-agent: PerplexityBot User-agent: Google-Extended User-agent: Bytespider User-agent: Amazonbot User-agent: meta-externalagent User-agent: FacebookBot User-agent: Applebot-Extended User-agent: cohere-ai User-agent: CCBot Allow: / Disallow: /umbraco/ Disallow: /App_Plugins/ Disallow: /App_Data/ Disallow: /install/ Disallow: /search?