Generate domain sinkhole (blacklist) files for DNSCrypt & pdnsd DNS servers
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

326 lines
12 KiB

5 years ago
  1. #!/bin/env python3
  2. # Simple DNS sinkhole file generation for DNSCrypt & pdnsd servers
  3. #
  4. # Block DNS query resolutions for specific network domains
  5. #
  6. # Author: Pekka Helenius (~Fincer), 2019
  7. #
  8. ########################################
  9. import os
  10. import re
  11. import readline
  12. import signal
  13. import sys
  14. import time
  15. import numpy as np
  16. import urllib.request as URL
  17. from datetime import datetime
  18. from socket import timeout
  19. ########################################
  20. url_useragent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0'
  21. url_timeout = 60
  22. filepath = '/tmp/'
  23. #timestamp_short = datetime.now().strftime('%Y-%m-%d')
  24. timestamp_long = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  25. ####################
  26. pdnsd_datafile = 'pdnsd.sinkhole'
  27. pdnsd_tempfile = pdnsd_datafile + '.tmp'
  28. pdnsd_fileheader = "// Auto-generated list, build date " + timestamp_long + "\n// No addresses of these domains must be resolved" + "\n\n"
  29. pdnsd_outmessage = ("Move it to /etc/ folder and add the following configuration setting in /etc/pdnsd.conf:\n\n" + \
  30. "//Blacklisted domains\ninclude { file = \"/etc/" + pdnsd_datafile + "\"; }\n\n--------------------\nRestart pdnsd by issuing command 'systemctl restart pdnsd'\n")
  31. ####################
  32. dnscrypt_datafile = 'dnscrypt.cloaking.txt'
  33. dnscrypt_tempfile = dnscrypt_datafile + ".tmp"
  34. dnscrypt_fileheader = "# Auto-generated list, build date " + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n# No addresses of these domains must be resolved" + "\n\n"
  35. dnscrypt_outmessage = ("Move it to /etc/dnscrypt-proxy/ and add the following configuration setting in\n/etc/dnscrypt-proxy/dnscrypt-proxy.toml:\n\n" + \
  36. "cloaking_rules = '/etc/dnscrypt-proxy/" + dnscrypt_datafile + "'\n\n--------------------\nRestart dnscrypt-proxy by issuing command 'systemctl restart dnscrypt-proxy'\n")
  37. ########################################
  38. domains_blacklists = [
  39. # {
  40. # 'name': 'Cameleon blocklist',
  41. # 'url': 'https://sysctl.org/cameleon/hosts'
  42. # },
  43. # {
  44. # 'name': 'Xiaomi spyware blocklist (kevle2)',
  45. # 'url': 'https://raw.githubusercontent.com/kevle2/XiaomiSpywareBlockList/master/xiaomiblock.txt'
  46. # },
  47. {
  48. 'name': 'My custom blocklist',
  49. 'url': 'file:///home/' + os.environ['USER'] + '/dns-sinkhole.txt'
  50. },
  51. {
  52. 'name': 'Simple tracking',
  53. 'url': 'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt'
  54. },
  55. {
  56. 'name': 'Simple ads',
  57. 'url': 'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt'
  58. },
  59. {
  60. 'name': 'Zeustracker blocklist',
  61. 'url': 'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist'
  62. },
  63. {
  64. 'name': 'Zeustracker baddomains',
  65. 'url': 'https://zeustracker.abuse.ch/blocklist.php?download=baddomains'
  66. },
  67. {
  68. 'name': 'StevenBlack blocklist',
  69. 'url': 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts'
  70. },
  71. {
  72. 'name': 'Malwaredomains blocklist',
  73. 'url': 'https://mirror1.malwaredomains.com/files/justdomains'
  74. },
  75. {
  76. 'name': 'Ad servers',
  77. 'url': 'https://hosts-file.net/ad_servers.txt'
  78. },
  79. {
  80. 'name': 'YouTube ads (kboghdady)',
  81. 'url': 'https://raw.githubusercontent.com/kboghdady/youTube_ads_4_pi-hole/master/black.list'
  82. },
  83. {
  84. 'name': 'YouTube ads (Akamaru)',
  85. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/youtube.txt'
  86. },
  87. {
  88. 'name': 'HbbTV ads (Akamaru)',
  89. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/hbbtv.txt'
  90. },
  91. {
  92. 'name': 'Windows ads (Akamaru)',
  93. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/nomsdata.txt'
  94. },
  95. {
  96. 'name': 'Android & iOS ads (Akamaru)',
  97. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/appads.txt'
  98. },
  99. {
  100. 'name': 'Fake jailbreak websites (Akamaru)',
  101. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/jbfake.txt'
  102. },
  103. {
  104. 'name': 'Adobe updates (Akamaru)',
  105. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/adobeblock.txt'
  106. },
  107. {
  108. 'name': 'Fake emulators (Akamaru)',
  109. 'url': 'https://raw.githubusercontent.com/Akamaru/Pi-Hole-Lists/master/gamefake.txt'
  110. },
  111. {
  112. 'name': 'ADsecu blocklist',
  113. 'url': 'https://raw.githubusercontent.com/ADsecu/black-domains/master/domain_hosts.txt'
  114. },
  115. {
  116. 'name': 'SweetSophia blocklist',
  117. 'url': 'https://raw.githubusercontent.com/SweetSophia/mifitxiaomipiholelist/master/mifitblocklist.txt'
  118. },
  119. {
  120. 'name': 'Android ads (SweetSophia)',
  121. 'url': 'https://raw.githubusercontent.com/SweetSophia/androidappspihole/master/testrareandroappblock.txt'
  122. },
  123. {
  124. 'name': 'Blocklist (zebpalmer)',
  125. 'url': 'https://raw.githubusercontent.com/zebpalmer/dns_blocklists/master/blocklist.txt'
  126. },
  127. {
  128. 'name': 'Ads and tracking extended (lightswitch05)',
  129. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/ads-and-tracking-extended.txt'
  130. },
  131. {
  132. 'name': 'Amp hosts extended (lightswitch05)',
  133. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/amp-hosts-extended.txt'
  134. },
  135. {
  136. 'name': 'Tracking aggressive (lightswitch05)',
  137. 'url': 'https://raw.githubusercontent.com/lightswitch05/hosts/master/tracking-aggressive-extended.txt'
  138. },
  139. {
  140. 'name': 'dnscrypt.info blacklist',
  141. 'url': 'https://download.dnscrypt.info/blacklists/domains/mybase.txt'
  142. },
  143. {
  144. 'name': 'dnscrypt-proxy blacklist',
  145. 'url': 'https://raw.githubusercontent.com/CNMan/dnscrypt-proxy-config/master/dnscrypt-blacklist-domains.txt'
  146. },
  147. {
  148. 'name': 'dnscrypt - activation blocklist',
  149. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/activation.txt'
  150. },
  151. {
  152. 'name': 'dnscrypt - ads blocklist',
  153. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/ads.txt'
  154. },
  155. {
  156. 'name': 'dnscrypt - anticheat blocklist',
  157. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/anticheat.txt'
  158. },
  159. {
  160. 'name': 'dnscrypt - fakenews blocklist',
  161. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/fakenews.txt'
  162. },
  163. {
  164. 'name': 'dnscrypt - tracking blocklist',
  165. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/tracking.txt'
  166. },
  167. {
  168. 'name': 'dnscrypt - misc blocklist',
  169. 'url': 'https://raw.githubusercontent.com/zeffy/dnscrypt-blocking-additions/master/hosts/blacklist/misc.txt'
  170. },
  171. {
  172. 'name': 'WindowsSpyBlocker - spy (crazy-max)',
  173. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/spy.txt'
  174. },
  175. {
  176. 'name': 'WindowsSpyBlocker - update (crazy-max)',
  177. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/update.txt'
  178. },
  179. {
  180. 'name': 'WindowsSpyBlocker - extra (crazy-max)',
  181. 'url': 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/dnscrypt/extra.txt'
  182. }
  183. ]
  184. ########################################
  185. # Exclude these pre-blacklisted domains from the final DNS sinkhole blacklist
  186. domains_whitelists = [
  187. {
  188. 'name': 'My custom whitelist',
  189. 'url': 'file:///home/' + os.environ['USER'] + '/dns-whitelist.txt'
  190. }
  191. ]
  192. ########################################
  193. failedlists = []
  194. ##########
  195. def filewrite(filepath, datafile, string, operationmode, closefile):
  196. with open(os.path.join(filepath, datafile),operationmode) as f:
  197. f.write(string)
  198. if closefile is True:
  199. f.close()
  200. ##########
  201. def getlist(domainlist,timeout):
  202. if not domainlist is None:
  203. try:
  204. print("Processing list:\t\t" + domainlist['name'])
  205. request = URL.Request(domainlist['url'],headers={'User-Agent': url_useragent})
  206. return np.array(URL.urlopen(request, timeout=timeout).read().decode('utf-8').split('\n'))
  207. except KeyboardInterrupt:
  208. exit(0)
  209. except:
  210. print("Data retrieval failed:\t\t" + domainlist['url'] + "\n")
  211. failedlists.append(domainlist['name'])
  212. pass
  213. ##########
  214. def fetchdomaindata(dataset):
  215. fetched_data = set()
  216. if not dataset is None:
  217. for line in dataset:
  218. if not re.search('.*:.*', line) \
  219. and not re.search('[\[|\]]', line) \
  220. and not re.search('^.*#', line) \
  221. and not re.search('.*localhost.*', line) \
  222. and not re.search('\slocal$', line) \
  223. and not re.search('^$', line) \
  224. and re.search('[a-z]+', line):
  225. line = re.sub(r'^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[ \t]+','',line)
  226. # Windows EOL last character substitution, corrects misformatted line variable
  227. line = re.sub('[\n]?\r$','',line)
  228. if not re.match('^$',line):
  229. fetched_data.add(line)
  230. if len(set(fetched_data)) == 0:
  231. print("\t\t\t\tNo domain entries found\n")
  232. return fetched_data
  233. ########################################
  234. # DNS sinkhole file headers
  235. filewrite(filepath, pdnsd_datafile, pdnsd_fileheader, 'w', True)
  236. filewrite(filepath, dnscrypt_datafile, dnscrypt_fileheader, 'w', True)
  237. ####################
  238. # Download and parse white/blocklists
  239. ##########
  240. for whitelist in domains_whitelists:
  241. whitelist_dataset = getlist(whitelist, url_timeout)
  242. whitelist_fetched_data = fetchdomaindata(whitelist_dataset)
  243. ##########
  244. for blacklist in domains_blacklists:
  245. blacklist_dataset = getlist(blacklist, url_timeout)
  246. if not blacklist_dataset is None:
  247. for line in (fetchdomaindata(blacklist_dataset)):
  248. if not line in whitelist_fetched_data:
  249. if re.search('^\.', line):
  250. pdnsd_line = "neg { name=*" + line + "; types = domain; }"
  251. elif re.search('\*', line):
  252. pdnsd_line = "neg { name=" + line + "; types = domain; }"
  253. else:
  254. pdnsd_line = "rr { name=" + line + "; a=0.0.0.0; }"
  255. dnscrypt_line = line + " " + "0.0.0.0"
  256. filewrite(filepath, pdnsd_tempfile, pdnsd_line + '\n', 'a', False)
  257. if not dnscrypt_line is None:
  258. filewrite(filepath, dnscrypt_tempfile, dnscrypt_line + '\n', 'a', False)
  259. ####################
  260. # Parse generated list, get only unique lines and write to final file
  261. def parseuniqlines(filepath, tempfile, outfile, outmessage):
  262. uniqdata = set()
  263. with open(os.path.join(filepath, outfile),'a') as f:
  264. for line in open(os.path.join(filepath, tempfile),'r'):
  265. if not line in uniqdata:
  266. f.write(line)
  267. uniqdata.add(line)
  268. f.close()
  269. os.remove(os.path.join(filepath, tempfile))
  270. print("----------------------------------------")
  271. print("Added " + str(len(set(uniqdata))) + " unique domains to the sinkhole file " + filepath + outfile)
  272. print("DNS sinkhole file " + filepath + outfile + " generated successfully.")
  273. print(outmessage)
  274. parseuniqlines(filepath, pdnsd_tempfile, pdnsd_datafile, pdnsd_outmessage)
  275. parseuniqlines(filepath, dnscrypt_tempfile, dnscrypt_datafile, dnscrypt_outmessage)
  276. ####################
  277. # Inform user about failed DNS blocklist downloads
  278. if len(failedlists) > 0:
  279. print("Warning: could not get data for the following blocklists:\n")
  280. for i in range(len(failedlists)):
  281. print("\t" + failedlists[i])
  282. print("")