前言

公共后缀列表

// ===BEGIN ICANN DOMAINS===
...
// ===END ICANN DOMAINS===
...
// ===BEGIN PRIVATE DOMAINS===
...
// ===END PRIVATE DOMAINS===
// cn : <https://en.wikipedia.org/wiki/.cn>
// Submitted by registry <[email protected]>
cn
ac.cn
com.cn
edu.cn
gov.cn
net.cn
org.cn
mil.cn
公司.cn
网络.cn
網絡.cn
// jp geographic type names
// <http://jprs.jp/doc/rule/saisoku-1.html>
*.kawasaki.jp
*.kitakyushu.jp
*.kobe.jp
*.nagoya.jp
*.sapporo.jp
*.sendai.jp
*.yokohama.jp
!city.kawasaki.jp
!city.kitakyushu.jp
!city.kobe.jp
!city.nagoya.jp
!city.sapporo.jp
!city.sendai.jp
!city.yokohama.jp

Trie树是什么

Trie的结构

blog.kali-team.cn
www.gd.gov.cn
www.zj.gov.cn
mirrors.tuna.tsinghua.edu.cn
flowchart TD
Root --> cn[<b>cn</b>]
Root --> ...
cn --> edu[<b>edu</b>]
cn --> gov[<b>gov</b>]
cn --> kali-team
kali-team --> blog
edu --> tsinghua
tsinghua --> tuna
tsinghua --> tsinghuawww[www]
tuna --> mirrors
gov --> gd
gov --> zj[zj]
gd --> www
zj --> zjwww[www]

公共后缀列表构造到Trie树

cn
ac.cn
com.cn
edu.cn
gov.cn
net.cn
org.cn
mil.cn
公司.cn
网络.cn
網絡.cn
flowchart TD
Root --> cn[<b>cn</b>]
cn --> edu[<b>edu</b>]
cn --> gov[<b>gov</b>]
cn --> com[<b>com</b>]
cn --> ac[<b>ac</b>]
cn --> net[<b>net</b>]
cn --> org[<b>org</b>]
cn --> mil[<b>mil</b>]
cn --> 公司[<b>公司</b>]
cn --> 网络[<b>网络</b>]
cn --> 網絡[<b>網絡</b>]

代码实现

/// TLDTrieTree
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TLDTrieTree {
    // 节点
    node: HashMap<String, TLDTrieTree>,
    // 是否可以为顶级域名
    end: bool,
}

插入数据

impl TLDTrieTree {
  /// Insert TLDTrieTree Construction Data
  #[inline]
  fn insert(&mut self, keys: Vec<&str>) {
    let keys_len = keys.len();
    let mut current_node = &mut self.node;
    for (index, mut key) in keys.clone().into_iter().enumerate() {
      let mut is_exclude = false;
      // 以!开头的需要排除掉
      if index == keys_len - 1 && key.starts_with('!') {
        key = &key[1..];
        is_exclude = true;
      }
      // 获取下一个节点,没有就插入默认节点
      let next_node = current_node.entry(key.to_string()).or_insert(TLDTrieTree {
        node: Default::default(),
        end: false,
      });
      // 当这是最后一个节点,设置可以为顶级域名
      if !is_exclude && (index == keys_len - 1)
                // 最后一个为*的,节点可以为顶级域名
                || (key != "*" && index == keys_len - 2 && keys[index + 1] == "*")
      {
        next_node.end = true;
      }
      current_node = &mut next_node.node;
    }
  }
}
{
  "node": {
    "cn": {
      "node": {
        "mil": {
          "node": {},
          "end": true
        },
        "com": {
          "node": {},
          "end": true
        },
        "xn--od0alg": {
          "node": {},
          "end": true
        },
        "xn--io0a7i": {
          "node": {},
          "end": true
        },
        "gov": {
          "node": {},
          "end": true
        },
        "xn--55qx5d": {
          "node": {},
          "end": true
        },
        "net": {
          "node": {},
          "end": true
        },
        "ac": {
          "node": {},
          "end": true
        },
        "edu": {
          "node": {},
          "end": true
        },
        "org": {
          "node": {},
          "end": true
        }
      },
      "end": true
    }
  },
  "end": false
}
*.ck
!www.ck
{
  "node": {
    "ck": {
      "node": {
        "*": {
          "node": {},
          "end": true
        },
        "www": {
          "node": {},
          "end": false
        }
      },
      "end": true
    }
  },
  "end": false
}

查询数据

impl TLDTrieTree {
/// Search tree, return the maximum path searched
  #[inline]
  fn search(&self, keys: &[String]) -> Vec<Suffix> {
    let mut suffix_list = Vec::new();
    let mut current_node = &self.node;
    for key in keys.iter() {
      match current_node.get(key) {
        Some(next_node) => {
          suffix_list.push(Suffix {
            suffix: key.to_string(),
            end: next_node.end,
          });
          current_node = &next_node.node;
        }
        None => {
          if let Some(next_node) = current_node.get("*") {
            suffix_list.push(Suffix {
              suffix: key.to_string(),
              end: next_node.end,
            });
          }
          break;
        }
      }
    }
    suffix_list
  }
}
[Suffix { suffix: "jp", end: true }, Suffix { suffix: "kawasaki", end: true }, Suffix { suffix: "city", end: false }]
ExtractResult {
        subdomain: Some(
            "www.asd",
        ),
        domain: Some(
            "city",
        ),
        suffix: Some(
            "kawasaki.jp",
        ),
        registered_domain: Some(
            "city.kawasaki.jp",
        ),
    },

结论

在本文中,我们介绍了如何使用Trie树来快速查找顶级域名。这种方法可以帮助我们快速识别恶意链接,并提高我们的安全性。我们还介绍了Trie树的基本概念和实现细节。希望这篇文章对你有所帮助!

参考

Powered by Kali-Team