Generate domain sinkhole (blacklist) files for DNSCrypt & pdnsd DNS servers
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

342 lines
13 KiB

5 years ago
5 years ago
5 years ago
5 years ago
  1. #!/bin/env python3
  2. # Copyright 2019 Pekka Helenius
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy of this software
  4. # and associated documentation files (the "Software"), to deal in the Software without restriction,
  5. # including without limitation the rights to use, copy, modify, merge, publish, distribute,
  6. # sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
  7. # furnished to do so, subject to the following conditions:
  8. # The above copyright notice and this permission notice shall be included in all copies or
  9. # substantial portions of the Software.
  10. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  11. # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  12. # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  13. # FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  14. # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  15. ########################################
  16. # Simple DNS sinkhole file generation for DNSCrypt & pdnsd servers
  17. # Block DNS query resolutions for specific network domains
  18. ########################################
  19. import os
  20. import re
  21. import readline
  22. import signal
  23. import sys
  24. import time
  25. import numpy as np
  26. import urllib.request as URL
  27. from datetime import datetime
  28. from socket import timeout
  29. ########################################
  30. url_useragent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0'
  31. url_timeout = 60
  32. filepath = '/tmp/'
  33. #timestamp_short = datetime.now().strftime('%Y-%m-%d')
  34. timestamp_long = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  35. ####################
  36. pdnsd_datafile = 'pdnsd.sinkhole'
  37. pdnsd_tempfile = pdnsd_datafile + '.tmp'
  38. pdnsd_fileheader = "// Auto-generated list, build date " + timestamp_long + "\n// No addresses of these domains must be resolved" + "\n\n"
  39. pdnsd_outmessage = ("Move it to /etc/ folder and add the following configuration setting in /etc/pdnsd.conf:\n\n" + \
  40. "//Blacklisted domains\ninclude { file = \"/etc/" + pdnsd_datafile + "\"; }\n\n--------------------\nRestart pdnsd by issuing command 'systemctl restart pdnsd'\n\nYou may need to delete your pdnsd.cache file before the list rules apply.\n")
  41. ####################
  42. dnscrypt_datafile = 'dnscrypt.cloaking.txt'
  43. dnscrypt_tempfile = dnscrypt_datafile + ".tmp"
  44. dnscrypt_fileheader = "# Auto-generated list, build date " + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n# No addresses of these domains must be resolved" + "\n\n"
  45. dnscrypt_outmessage = ("Move it to /etc/dnscrypt-proxy/ and add the following configuration setting in\n/etc/dnscrypt-proxy/dnscrypt-proxy.toml:\n\n" + \
  46. "cloaking_rules = '/etc/dnscrypt-proxy/" + dnscrypt_datafile + "'\n\n--------------------\nRestart dnscrypt-proxy by issuing command 'systemctl restart dnscrypt-proxy'\n")
  47. ########################################
  48. domains_blacklists = [
  49. # {
  50. # 'name': 'Cameleon blocklist',
  51. # 'url': 'https://sysctl.org/cameleon/hosts'
  52. # },
  53. # {
  54. # 'name': 'Xiaomi spyware blocklist (kevle2)',
  55. # 'url': 'https://raw.githubusercontent.com/kevle2/XiaomiSpywareBlockList/master/xiaomiblock.txt'
  56. # },
  57. {
  58. 'name': 'My custom blocklist',
  59. 'url': 'file:///home/' + os.environ['USER'] + '/dns-sinkhole.txt'
  60. },
  61. {
  62. 'name': 'Simple tracking',
  63. 'url': 'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt'
  64. },
  65. {
  66. 'name': 'Simple ads',
  67. 'url': 'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt'
  68. },
  69. {
  70. 'name': 'Zeustracker blocklist',
  71. 'url': 'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist'
  72. },
  73. {
  74. 'name': 'Zeustracker baddomains',
  75. 'url': 'https://zeustracker.abuse.ch/blocklist.php?download=baddomains'
  76. },
  77. {
  78. 'name': 'StevenBlack blocklist',
  79. 'url': 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts'
  80. },
  81. {
  82. 'name': 'Malwaredomains blocklist',
  83. 'url': 'https://mirror1.malwaredomains.com/files/justdomains'
  84. },
  85. {
  86. 'name': 'Ad servers',
  87. 'url': 'https://hosts-file.net/ad_servers.txt'
  88. },
  89. {
  90. 'name': 'YouTube ads (kboghdady)',
  91. 'url': 'https://raw.githubusercontent.com/kboghdady/youTube_ads_4_pi-hole/master/black.list'
  92. },
  93. {
  94. 'name': 'YouTube ads (Akamaru)',
  95. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/youtube.txt'
  96. },
  97. {
  98. 'name': 'HbbTV ads (Akamaru)',
  99. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/hbbtv.txt'
  100. },
  101. {
  102. 'name': 'Windows ads (Akamaru)',
  103. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/nomsdata.txt'
  104. },
  105. {
  106. 'name': 'Android & iOS ads (Akamaru)',
  107. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/appads.txt'
  108. },
  109. {
  110. 'name': 'Fake jailbreak websites (Akamaru)',
  111. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/jbfake.txt'
  112. },
  113. {
  114. 'name': 'Adobe updates (Akamaru)',
  115. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/adobeblock.txt'
  116. },
  117. {
  118. 'name': 'Fake emulators (Akamaru)',
  119. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/gamefake.txt'
  120. },
  121. {
  122. 'name': 'ADsecu blocklist',
  123. 'url': 'https://raw.githubusercontent.com/ADsecu/black-domains/master/domain_hosts.txt'
  124. },
  125. {
  126. 'name': 'SweetSophia blocklist',
  127. 'url': 'https://raw.githubusercontent.com/SweetSophia/mifitxiaomipiholelist/master/mifitblocklist.txt'
  128. },
  129. {
  130. 'name': 'Android ads (SweetSophia)',
  131. 'url': 'https://raw.githubusercontent.com/SweetSophia/androidappspihole/master/testrareandroappblock.txt'
  132. },
  133. {
  134. 'name': 'Blocklist (zebpalmer)',
  135. 'url': 'https://raw.githubusercontent.com/zebpalmer/dns_blocklists/master/blocklist.txt'
  136. },
  137. {
  138. 'name': 'Ads and tracking extended (lightswitch05)',
  139. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/ads-and-tracking-extended.txt'
  140. },
  141. {
  142. 'name': 'Amp hosts extended (lightswitch05)',
  143. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/amp-hosts-extended.txt'
  144. },
  145. {
  146. 'name': 'Tracking aggressive (lightswitch05)',
  147. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/tracking-aggressive-extended.txt'
  148. },
  149. {
  150. 'name': 'dnscrypt.info blacklist',
  151. 'url': 'https://download.dnscrypt.info/blacklists/domains/mybase.txt'
  152. },
  153. {
  154. 'name': 'dnscrypt-proxy blacklist',
  155. 'url': 'https://raw.githubusercontent.com/CNMan/dnscrypt-proxy-config/master/dnscrypt-blacklist-domains.txt'
  156. },
  157. {
  158. 'name': 'dnscrypt - activation blocklist',
  159. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/activation.txt'
  160. },
  161. {
  162. 'name': 'dnscrypt - ads blocklist',
  163. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/ads.txt'
  164. },
  165. {
  166. 'name': 'dnscrypt - anticheat blocklist',
  167. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/anticheat.txt'
  168. },
  169. {
  170. 'name': 'dnscrypt - fakenews blocklist',
  171. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/fakenews.txt'
  172. },
  173. {
  174. 'name': 'dnscrypt - tracking blocklist',
  175. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/tracking.txt'
  176. },
  177. {
  178. 'name': 'dnscrypt - misc blocklist',
  179. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/misc.txt'
  180. },
  181. {
  182. 'name': 'WindowsSpyBlocker - spy (crazy-max)',
  183. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/spy.txt'
  184. },
  185. {
  186. 'name': 'WindowsSpyBlocker - update (crazy-max)',
  187. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/update.txt'
  188. },
  189. {
  190. 'name': 'WindowsSpyBlocker - extra (crazy-max)',
  191. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/extra.txt'
  192. }
  193. ]
  194. ########################################
  195. # Exclude these pre-blacklisted domains from the final DNS sinkhole blacklist
  196. domains_whitelists = [
  197. {
  198. 'name': 'My custom whitelist',
  199. 'url': 'file:///home/' + os.environ['USER'] + '/dns-whitelist.txt'
  200. }
  201. ]
  202. ########################################
  203. failedlists = []
  204. ##########
  205. def filewrite(filepath, datafile, string, operationmode, closefile):
  206. with open(os.path.join(filepath, datafile),operationmode) as f:
  207. f.write(string)
  208. if closefile is True:
  209. f.close()
  210. ##########
  211. def getlist(domainlist,timeout):
  212. if not domainlist is None:
  213. try:
  214. print("Processing list:\t\t" + domainlist['name'])
  215. request = URL.Request(domainlist['url'],headers={'User-Agent': url_useragent})
  216. return np.array(URL.urlopen(request, timeout=timeout).read().decode('utf-8').split('\n'))
  217. except KeyboardInterrupt:
  218. exit(0)
  219. except:
  220. print("Data retrieval failed:\t\t" + domainlist['url'] + "\n")
  221. failedlists.append(domainlist['name'])
  222. pass
  223. ##########
  224. def fetchdomaindata(dataset):
  225. fetched_data = set()
  226. if not dataset is None:
  227. for line in dataset:
  228. if not re.search('.*:.*', line) \
  229. and not re.search('[\[|\]]', line) \
  230. and not re.search('^.*#', line) \
  231. and not re.search('.*localhost.*', line) \
  232. and not re.search('\slocal$', line) \
  233. and not re.search('^$', line) \
  234. and re.search('[a-z]+', line):
  235. line = re.sub(r'^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[ \t]+','',line)
  236. # Windows EOL last character substitution, corrects misformatted line variable
  237. line = re.sub('[\n]?\r$','',line)
  238. if not re.match('^$',line):
  239. fetched_data.add(line)
  240. if len(set(fetched_data)) == 0:
  241. print("\t\t\t\tNo domain entries found\n")
  242. return fetched_data
  243. ########################################
  244. # DNS sinkhole file headers
  245. filewrite(filepath, pdnsd_datafile, pdnsd_fileheader, 'w', True)
  246. filewrite(filepath, dnscrypt_datafile, dnscrypt_fileheader, 'w', True)
  247. ####################
  248. # Download and parse white/blocklists
  249. ##########
  250. for whitelist in domains_whitelists:
  251. whitelist_dataset = getlist(whitelist, url_timeout)
  252. whitelist_fetched_data = fetchdomaindata(whitelist_dataset)
  253. ##########
  254. for blacklist in domains_blacklists:
  255. blacklist_dataset = getlist(blacklist, url_timeout)
  256. if not blacklist_dataset is None:
  257. for line in (fetchdomaindata(blacklist_dataset)):
  258. if not line in whitelist_fetched_data:
  259. if re.search('^\.', line):
  260. pdnsd_line = "neg { name=*" + line + "; types = domain; }"
  261. elif re.search('\*', line):
  262. pdnsd_line = "neg { name=" + line + "; types = domain; }"
  263. else:
  264. pdnsd_line = "rr { name=" + line + "; a=0.0.0.0; }"
  265. dnscrypt_line = line + " " + "0.0.0.0"
  266. filewrite(filepath, pdnsd_tempfile, pdnsd_line + '\n', 'a', False)
  267. if not dnscrypt_line is None:
  268. filewrite(filepath, dnscrypt_tempfile, dnscrypt_line + '\n', 'a', False)
  269. ####################
  270. # Parse generated list, get only unique lines and write to final file
  271. def parseuniqlines(filepath, tempfile, outfile, outmessage):
  272. uniqdata = set()
  273. with open(os.path.join(filepath, outfile),'a') as f:
  274. for line in open(os.path.join(filepath, tempfile),'r'):
  275. if not line in uniqdata:
  276. f.write(line)
  277. uniqdata.add(line)
  278. f.close()
  279. os.remove(os.path.join(filepath, tempfile))
  280. print("----------------------------------------")
  281. print("Added " + str(len(set(uniqdata))) + " unique domains to the sinkhole file " + filepath + outfile)
  282. print("DNS sinkhole file " + filepath + outfile + " generated successfully.")
  283. print(outmessage)
  284. parseuniqlines(filepath, pdnsd_tempfile, pdnsd_datafile, pdnsd_outmessage)
  285. parseuniqlines(filepath, dnscrypt_tempfile, dnscrypt_datafile, dnscrypt_outmessage)
  286. ####################
  287. # Inform user about failed DNS blocklist downloads
  288. if len(failedlists) > 0:
  289. print("Warning: could not get data for the following blocklists:\n")
  290. for i in range(len(failedlists)):
  291. print("\t" + failedlists[i])
  292. print("")