Антибот конфиг для nginx
В последние пару лет усилилась активность различных ботов на моих сайтах, что заставляло старенький сервер страдать. В конце концов у меня лопнуло терпение, и я решил проблему хирургическим путем.
if ($http_user_agent ~* (Googlebot|bingbot|YandexBot)) {
return 403;
}
if ($http_user_agent ~* (01h4x\.com|360Spider|404checker|404enemy|80legs|Abonti|Aboundex|Aboundexbot|Acunetix|ADmantX|adscanner|AdsTxtCrawlerTP|AfD\-Verbotsverfahren|AhrefsBot|AIBOT|AiHitBot|Aipbot|Alexibot|ALittle\ Client|Alligator|AllSubmitter|AlphaBot|Anarchie|Anarchy|Anarchy99|Ankit|Anthill|anthropic\-ai|Apexoo|archive\.org_bot|arquivo\-web\-crawler|arquivo\.pt|Aspiegel|ASPSeek|Asterias|Atomseobot|Attach|autoemailspider|awario\.com|AwarioBot|AwarioRssBot|AwarioSmartBot|BackDoorBot|Backlink\-Ceck|backlink\-check|BacklinkCrawler|BackStreet|BackWeb|Badass|Bandit|Barkrowler|BatchFTP|Battleztar\ Bazinga|BBBike|BDCbot|BDFetch|BetaBot|Bigfoot|Bitacle|Black\ Hole|Blackboard|BlackWidow|BLEXBot|Blow|BlowFish|Boardreader|Bolt|BotALot|Brandprotect|Brandwatch|Buck|Buddy|BuiltBotTough|BuiltWith|Bullseye|BunnySlippers|BuzzSumo|Bytespider|cah\.io\.community|Calculon|CATExplorador|CazoodleBot|CCBot|Cegbfeieh|CensysInspect|ChatGPT\-User|check1\.exe|CheeseBot|CherryPicker|CheTeam|ChinaClaw|Chlooe|Citoid|Claritybot|clark\-crawler|Cliqzbot|Cloud\ mapping|coccocbot|Cocolyzebot|CODE87|Cogentbot|cognitiveseo|cohere\-ai|Collector|com\.plumanalytics|Copier|CopyRightCheck|Copyscape|Cosmos|Craftbot|crawl\.sogou\.com|crawler\.feedback|crawler4j|Crawling\ at\ Home\ Project|CrazyWebCrawler|Crescent|CrunchBot|CSHttp|Curious|Custo|CyotekWebCopy|DatabaseDriverMysqli|DataCha0s|dataforseo\.com|dataforseobot|DBLBot|demandbase\-bot|Demon|Deusu|Devil|Digincore|DigitalPebble|DIIbot|Dirbuster|Disco|Discobot|Discoverybot|Dispatch|DittoSpyder|DnBCrawler\-Analytics|DnyzBot|DomainAppender|DomainCrawler|Domains\ Project|DomainSigmaCrawler|domainsproject\.org|DomainStatsBot|DomCopBot|Dotbot|Download\ Wonder|Dragonfly|Drip|DSearch|DTS\ Agent|EasyDL|Ebingbong|eCatch|ECCP/1\.0|Ecxi|EirGrabber|EMail\ Siphon|EMail\ Wolf|EroCrawler|evc\-batch|Evil|Exabot|Express\ WebPictures|ExtLinksBot|Extractor|ExtractorPro|Extreme\ Picture\ Finder|EyeNetIE|Ezooms|FacebookBot|facebookscraper|FDM|FemtosearchBot|FHscan|Fimap|FlashGet|Flunky|Foobot|Freeuploader|FrontPage|Fuzz|FyberSpider|Fyrebot|G\-i\-g\-a\-b\-o\-t|GalaxyBot|Genieo|GermCrawler|Getintent|GetRight|GetWeb|Gigabot|Go\-Ahead\-Got\-It|Go!Zilla|gopher|Gotit|GoZilla|GPTBot|Grabber|GrabNet|Grafula|GrapeFX|GrapeshotCrawler|GridBot|GT::WWW|Haansoft|HaosouSpider|Harvest|Havij|HEADMasterSEO|Heritrix|heritrix|Hloader|HMView|HonoluluBot|HTMLparser|HTTP::Lite|HTTrack|Humanlinks|HybridBot|Iblog|Id\-search|IDBot|IDBTE4M|IlseBot|Image\ Fetch|Image\ Sucker|imagesift\.com|ImagesiftBot|IndeedBot|Indy\ Library|InfoNaviRobot|Information\ Security\ Team\ InfraSec\ Scanner|InfoTekies|InfraSec\ Scanner|instabid|Intelliseek|InterGET|Internet\ Ninja|InternetMeasurement|InternetSeer|internetVista\ monitor|ips\-agent|Iria|IRLbot|isitwp\.com|Iskanie|IstellaBot|iubenda\-radar|JamesBOT|Jbrofuzz|JennyBot|JetCar|Jetty|JikeSpider|JOC\ Web\ Spider|Joomla|Jorgee|JustView|Jyxobot|Kenjin\ Spider|Keybot\ Translation\-Search\-Machine|Keyword\ Density|Kinza|Kozmosbot|Lanshanbot|Larbin|Leap|LeechFTP|LeechGet|LexiBot|Lftp|LibWeb|Libwhisker|LieBaoFast|Lightspeedsystems|Likse|Linkbot|linkdexbot|LinkextractorPro|linkfluence|LinkpadBot|LinkScan|LinksManager|LinkWalker|LinqiaMetadataDownloaderBot|LinqiaRSSBot|LinqiaScrapeBot|Lipperhey|Lipperhey\ Spider|Litemage_walker|Lmspider|LNSpiderguy|Ltx71|lwp\-request|lwp\-trivial|LWP::Simple|Mag\-Net|Magnet|magpie\-crawler|Majestic\ SEO|Majestic\-SEO|Majestic12|MarkMonitor|MarkWatch|Mass\ Downloader|Masscan|Mata\ Hari|MauiBot|Mb2345Browser|MeanPath\ Bot|Meanpathbot|meanpathbot|Mediatoolkitbot|mediawords|MegaIndex\.ru|Metauri|MFC_Tear_Sample|MicroMessenger|MIDown\ tool|MIIxpc|Minefield|Mister\ PiX|MJ12bot|Mojeek|Mojolicious|MolokaiBot)) {
return 403;
}
if ($http_user_agent ~* (Morfeus\ Fucking\ Scanner|Mozlila|Mr\.4x3|MSFrontPage|MSIECrawler|Msrabot|MTRobot|muhstik\-scan|Musobot|Name\ Intelligence|Nameprotect|Navroad|NearSite|Needle|Nessus|Net\ Vampire|NetAnts|Netcraft|netEstate\ NE\ Crawler|NetLyzer|NetMechanic|NetSpider|Nettrack|Netvibes|NetZIP|NextGenSearchBot|Nibbler|NICErsPRO|Niki\-bot|Nikto|NimbleCrawler|Nimbostratus|Ninja|Nmap|NPbot|Nuclei|Nutch|oBot|Octopus|Offline\ Explorer|Offline\ Navigator|omgili|OnCrawl|openai|openai\.com|Openfind|OpenLinkProfiler|OpenVAS|Openvas|OrangeBot|OrangeSpider|OutclicksBot|OutfoxBot|Page\ Analyzer|page\ scorer|PageAnalyzer|PageGrabber|PageScorer|PageThing\.com|Pandalytics|Panscient|Papa\ Foto|Pavuk|pcBrowser|PECL::HTTP|PeoplePal|Petalbot|PHPCrawl|Pi\-Monster|Picscout|Picsearch|PictureFinder|Piepmatz|Pimonster|Pixray|PleaseCrawl|plumanalytics|Pockey|POE\-Component\-Client\-HTTP|polaris\ version|probe\-image\-size|Probethenet|ProPowerBot|ProWebWalker|Proximic|Psbot|Pu_iN|Pump|PxBroker|PyCurl|QueryN\ Metasearch|Quick\-Crawler|Rainbot|RankActive|RankActiveLinkBot|RankFlex|RankingBot|RankingBot2|Rankivabot|RankurBot|Re\-re|RealDownload|Reaper|RebelMouse|Recorder|RedesScrapy|ReGet|RepoMonkey|Ripper|ripz|RocketCrawler|Rogerbot|RSSingBot|s1z\.ru|SalesIntelligent|satoristudio\.net|SBIder|scalaj\-http|scan\.lol|ScanAlert|Scanbot|ScoutJet|Scrapy|Screaming|ScreenerBot|ScrepyBot|Searchestate|SearchmetricsBot|Seekport|SeekportBot|SemanticJuice|Semrush|SemrushBot|SentiBot|SenutoBot|seobility|SeobilityBot|seocompany\.store|SEOkicks|SEOkicks\-Robot|SEOlyticsCrawler|Seomoz|SEOprofiler|seoscanners|SeoSiteCheckup|seostar|SEOstats|serpstatbot|sexsearcher|Shodan|Siphon|SISTRIX|Site\ Sucker|Sitebeam|sitechecker\.pro|SiteCheckerBotCrawler|SiteExplorer|Siteimprove|SiteLockSpider|siteripz|SiteSnagger|SiteSucker|Sitevigil|SlySearch|SmartDownload|SMTBot|Snake|Snapbot|Snoopy|SocialRankIOBot|Sociscraper|Sogou\ web\ spider|sogouspider|Sosospider|Sottopop|sp_auditbot|SpaceBison|Spammen|SpankBot|Spanner|Spbot|Spinn3r|SputnikBot|spyfu|Sqlmap|Sqlworm|Sqworm|Steeler|Stripper|Sucker|Sucuri|SuperBot|SuperHTTP|Surfbot|SurveyBot|Suzuran|Swiftbot|sysscan|Szukacz|T0PHackTeam|T8Abot|tAkeOut|Teleport|TeleportPro|Telesoft|Telesphoreo|Telesphorep|The\ Intraformant|TheNomad|Thumbor|TightTwatBot|TinyTestBot|Titan|Toata|Toweyabot|Tracemyfile|Trendiction|trendiction\.com|trendiction\.de|Trendictionbot|True_Robot|Turingos|Turnitin|TurnitinBot|TwengaBot|Twice|Typhoeus|ubermetrics\-technologies\.com|UnisterBot|Upflow|URLy\ Warning|URLy\.Warning|V\-BOT|Vacuum|Vagabondo|VB\ Project|VCI|VelenPublicWebCrawler|VeriCiteCrawler|VidibleScraper|Virusdie|VoidEYE|Voil|Voltron|voyagerx\.com|Wallpapers|Wallpapers/3\.0|WallpapersHD|WASALive\-Bot|WBSearchBot|Web\ Auto|Web\ Collage|Web\ Enhancer|Web\ Fetch|Web\ Fuck|Web\ Pix|Web\ Sauger|Web\ Sucker|Webalta|WebAuto|WebBandit|WebCollage|WebCopier|WEBDAV|WebEnhancer|WebFetch|WebFuck|webgains\-bot|WebGo\ IS|WebImageCollector|WebLeacher|WebmasterWorldForumBot|webmeup\-crawler|WebPix|webpros\.com|webprosbot|WebReaper|WebSauger|Webshag|Website\ Quester|WebsiteExtractor|WebsiteQuester|Webster|WebStripper|WebSucker|WebWhacker|WebZIP|WeSEE|Whack|Whacker|Whatweb|Who\.is\ Bot|Widow|WinHTTrack|WiseGuys\ Robot|WISENutbot|Wonderbot|Woobot|Wotbox|Wprecon|WPScan|WWW\-Collector\-E|WWW\-Mechanize|WWW::Mechanize|WWWOFFLE|Xaldon\ WebSpider|Xaldon_WebSpider|Xenu|xpymep1\.exe|YaK|YoudaoBot|Zade|Zauba|zauba\.io|Zermelo|Zeus|zgrab|Zitebot|ZmEu|ZoomBot|ZoominfoBot|ZumBot|ZyBorg)) {
return 403;
}
if ($http_user_agent ~* (Baiduspider|SERankingBacklinksBot|BacklinksExtendedBot|Mediapartners\-Google|SemrushBot|Adsbot|AhrefsBot|MJ12bot|Riddler|aiHitBot|trovitBot|Detectify|BLEXBot|LinkpadBot|dotbot|FlipboardProxy|PetalBot|Facebot|Twitterbot|Mediatoolkitbot|ZoominfoBot|MegaIndex|AppEngine\-Google|ZyBorg|Zen_Bot|Amazonbot|Bytespider|ClaudeBot|ImagesiftBot|GPTBot|meta\-externalagent|facebookexternalhit|Barkrowler)) {
return 403;
}
Эти строчки надо сохранить в файл antibot.conf и подключить в конфигурацию сайта командой include antibot.conf. Пришлось разбить список на части из-за ограничения на максимальную длину регулярного выражения в nginx.
Данные взяты из рекомендаций ФСТЭК для государственных сайтов, а также из личных наблюдений за логами. Список скорее всего будет пополняться.