From dfce050dd92155a40e9a0134294427449df7c87a Mon Sep 17 00:00:00 2001 From: Roi Feng <37480123+Rayzggz@users.noreply.github.com> Date: Sun, 9 Mar 2025 22:08:06 -0400 Subject: [PATCH] feat: Verify Bot --- config_example/rules/VerifyBot.yml | 6 +++ internal/check/VerifyBot.go | 63 ++++++++++++++++++++++++++++++ internal/config/config.go | 39 +++++++++++++----- internal/dataType/type.go | 9 +++++ internal/server/checker.go | 1 + 5 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 config_example/rules/VerifyBot.yml create mode 100644 internal/check/VerifyBot.go diff --git a/config_example/rules/VerifyBot.yml b/config_example/rules/VerifyBot.yml new file mode 100644 index 0000000..d20da9f --- /dev/null +++ b/config_example/rules/VerifyBot.yml @@ -0,0 +1,6 @@ +verify_google_bot: true +verify_bing_bot: true +verify_baidu_bot: true +verify_yandex_bot: true +verify_sogou_bot: true +verify_apple_bot: true \ No newline at end of file diff --git a/internal/check/VerifyBot.go b/internal/check/VerifyBot.go new file mode 100644 index 0000000..0d6dc7a --- /dev/null +++ b/internal/check/VerifyBot.go @@ -0,0 +1,63 @@ +package check + +import ( + "log" + "net" + "server_torii/internal/action" + "server_torii/internal/config" + "server_torii/internal/dataType" + "strings" +) + +func VerifyBot(reqData dataType.UserRequest, ruleSet *config.RuleSet, decision *action.Decision) { + ua := strings.ToLower(reqData.UserAgent) + + var exptractRDNS []string + switch { + case strings.Contains(ua, "googlebot") && ruleSet.VerifyBotRule.VerifyGoogleBot: + exptractRDNS = []string{"googlebot.com", "google.com", "googleusercontent.com"} + case strings.Contains(ua, "bingbot") && ruleSet.VerifyBotRule.VerifyBingBot: + exptractRDNS = []string{"search.msn.com"} + case strings.Contains(ua, "baiduspider") && ruleSet.VerifyBotRule.VerifyBaiduBot: + exptractRDNS = []string{"baidu.com", "baidu.jp"} + case strings.Contains(ua, "yandex.com/bots") && ruleSet.VerifyBotRule.VerifyYandexBot: + exptractRDNS = []string{"yandex.com", "yandex.ru", "yandex.net"} + case strings.Contains(ua, "sogou web spider") && ruleSet.VerifyBotRule.VerifySogouBot: + exptractRDNS = []string{"sogou.com"} + case strings.Contains(ua, "applebot") && ruleSet.VerifyBotRule.VerifyAppleBot: + exptractRDNS = []string{"apple.com"} + default: + decision.Set(action.Continue) + return + } + + actualRDNS, err := net.LookupAddr(reqData.RemoteIP) + if err != nil { + log.Printf("VerifyBot: LookupAddr failed for %s: %v", reqData.RemoteIP, err) + decision.SetCode(action.Done, []byte("403")) + return + } + + for _, rdns := range exptractRDNS { + for _, actual := range actualRDNS { + if strings.Contains(actual, rdns) { + ips, err := net.LookupIP(actual) + if err != nil { + log.Printf("VerifyBot: LookupIP failed for %s: %v", actual, err) + decision.SetCode(action.Done, []byte("403")) + return + } + for _, ip := range ips { + if ip.String() == reqData.RemoteIP { + decision.Set(action.Done) + return + } + } + } + } + } + log.Printf("VerifyBot: IP lookup failed for %s: %v", reqData.RemoteIP, err) + decision.SetCode(action.Done, []byte("403")) + return + +} diff --git a/internal/config/config.go b/internal/config/config.go index 598b60f..0ddf1fe 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -50,21 +50,23 @@ func LoadMainConfig(basePath string) (*MainConfig, error) { // RuleSet stores all rules type RuleSet struct { - IPAllowTrie *dataType.TrieNode - IPBlockTrie *dataType.TrieNode - URLAllowList *dataType.URLRuleList - URLBlockList *dataType.URLRuleList - CAPTCHARule *dataType.CaptchaRule + IPAllowTrie *dataType.TrieNode + IPBlockTrie *dataType.TrieNode + URLAllowList *dataType.URLRuleList + URLBlockList *dataType.URLRuleList + CAPTCHARule *dataType.CaptchaRule + VerifyBotRule *dataType.VerifyBotRule } // LoadRules Load all rules from the specified path func LoadRules(rulePath string) (*RuleSet, error) { rs := RuleSet{ - IPAllowTrie: &dataType.TrieNode{}, - IPBlockTrie: &dataType.TrieNode{}, - URLAllowList: &dataType.URLRuleList{}, - URLBlockList: &dataType.URLRuleList{}, - CAPTCHARule: &dataType.CaptchaRule{}, + IPAllowTrie: &dataType.TrieNode{}, + IPBlockTrie: &dataType.TrieNode{}, + URLAllowList: &dataType.URLRuleList{}, + URLBlockList: &dataType.URLRuleList{}, + CAPTCHARule: &dataType.CaptchaRule{}, + VerifyBotRule: &dataType.VerifyBotRule{}, } // Load IP Allow List @@ -97,6 +99,12 @@ func LoadRules(rulePath string) (*RuleSet, error) { return nil, err } + // Load Verify Bot Rule + verifyBotFile := rulePath + "/VerifyBot.yml" + if err := loadVerifyBotRule(verifyBotFile, rs.VerifyBotRule); err != nil { + return nil, err + } + return &rs, nil } @@ -114,6 +122,17 @@ func loadCAPTCHARule(file string, rule *dataType.CaptchaRule) error { } +func loadVerifyBotRule(file string, rule *dataType.VerifyBotRule) error { + data, err := os.ReadFile(file) + if err != nil { + return err + } + if err := yaml.Unmarshal(data, &rule); err != nil { + return err + } + return nil +} + // loadIPRules read the IP rule file and insert the rules into the trie func loadIPRules(filePath string, trie *dataType.TrieNode) error { file, err := os.Open(filePath) diff --git a/internal/dataType/type.go b/internal/dataType/type.go index 426764e..4f247b9 100644 --- a/internal/dataType/type.go +++ b/internal/dataType/type.go @@ -16,3 +16,12 @@ type CaptchaRule struct { CaptchaChallengeSessionTimeout int64 `yaml:"captcha_challenge_session_timeout"` HCaptchaSecret string `yaml:"hcaptcha_secret"` } + +type VerifyBotRule struct { + VerifyGoogleBot bool `yaml:"verify_google_bot"` + VerifyBingBot bool `yaml:"verify_bing_bot"` + VerifyBaiduBot bool `yaml:"verify_baidu_bot"` + VerifyYandexBot bool `yaml:"verify_yandex_bot"` + VerifySogouBot bool `yaml:"verify_sogou_bot"` + VerifyAppleBot bool `yaml:"verify_apple_bot"` +} diff --git a/internal/server/checker.go b/internal/server/checker.go index 1356362..9946e82 100644 --- a/internal/server/checker.go +++ b/internal/server/checker.go @@ -22,6 +22,7 @@ func CheckMain(w http.ResponseWriter, userRequestData dataType.UserRequest, rule checkFuncs = append(checkFuncs, check.IPBlockList) checkFuncs = append(checkFuncs, check.URLAllowList) checkFuncs = append(checkFuncs, check.URLBlockList) + checkFuncs = append(checkFuncs, check.VerifyBot) checkFuncs = append(checkFuncs, check.Captcha) for _, checkFunc := range checkFuncs {