{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating statistics: https://hoxhunt.com/\n" ] }, { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support. ' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
');\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('');\n", " button.click(method_name, toolbar_event);\n", " button.mouseover(tooltip, toolbar_mouse_event);\n", " nav_element.append(button);\n", " }\n", "\n", " // Add the status bar.\n", " var status_bar = $('');\n", " nav_element.append(status_bar);\n", " this.message = status_bar[0];\n", "\n", " // Add the close button to the window.\n", " var buttongrp = $('
');\n", " var button = $('');\n", " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", " buttongrp.append(button);\n", " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", " titlebar.prepend(buttongrp);\n", "}\n", "\n", "mpl.figure.prototype._root_extra_style = function(el){\n", " var fig = this\n", " el.on(\"remove\", function(){\n", "\tfig.close_ws(fig, {});\n", " });\n", "}\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(el){\n", " // this is important to make the div 'focusable\n", " el.attr('tabindex', 0)\n", " // reach out to IPython and tell the keyboard manager to turn it's self\n", " // off when our div gets focus\n", "\n", " // location in version 3\n", " if (IPython.notebook.keyboard_manager) {\n", " IPython.notebook.keyboard_manager.register_events(el);\n", " }\n", " else {\n", " // location in version 2\n", " IPython.keyboard_manager.register_events(el);\n", " }\n", "\n", "}\n", "\n", "mpl.figure.prototype._key_event_extra = function(event, name) {\n", " var manager = IPython.notebook.keyboard_manager;\n", " if (!manager)\n", " manager = IPython.keyboard_manager;\n", "\n", " // Check for shift+enter\n", " if (event.shiftKey && event.which == 13) {\n", " this.canvas_div.blur();\n", " // select the cell after this one\n", " var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n", " IPython.notebook.select(index + 1);\n", " }\n", "}\n", "\n", "mpl.figure.prototype.handle_save = function(fig, msg) {\n", " fig.ondownload(fig, null);\n", "}\n", "\n", "\n", "mpl.find_output_cell = function(html_output) {\n", " // Return the cell and output element which can be found *uniquely* in the notebook.\n", " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", " // IPython event is triggered only after the cells have been serialised, which for\n", " // our purposes (turning an active figure into a static one), is too late.\n", " var cells = IPython.notebook.get_cells();\n", " var ncells = cells.length;\n", " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", " data = data.data;\n", " }\n", " if (data['text/html'] == html_output) {\n", " return [cell, data, j];\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "// Register the function which deals with the matplotlib target/channel.\n", "// The kernel may be null if the page has been refreshed.\n", "if (IPython.notebook.kernel != null) {\n", " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", "}\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Generating statistics: https://ts.fi\n" ] }, { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support. ' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
');\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('');\n", " button.click(method_name, toolbar_event);\n", " button.mouseover(tooltip, toolbar_mouse_event);\n", " nav_element.append(button);\n", " }\n", "\n", " // Add the status bar.\n", " var status_bar = $('');\n", " nav_element.append(status_bar);\n", " this.message = status_bar[0];\n", "\n", " // Add the close button to the window.\n", " var buttongrp = $('
');\n", " var button = $('');\n", " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", " buttongrp.append(button);\n", " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", " titlebar.prepend(buttongrp);\n", "}\n", "\n", "mpl.figure.prototype._root_extra_style = function(el){\n", " var fig = this\n", " el.on(\"remove\", function(){\n", "\tfig.close_ws(fig, {});\n", " });\n", "}\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(el){\n", " // this is important to make the div 'focusable\n", " el.attr('tabindex', 0)\n", " // reach out to IPython and tell the keyboard manager to turn it's self\n", " // off when our div gets focus\n", "\n", " // location in version 3\n", " if (IPython.notebook.keyboard_manager) {\n", " IPython.notebook.keyboard_manager.register_events(el);\n", " }\n", " else {\n", " // location in version 2\n", " IPython.keyboard_manager.register_events(el);\n", " }\n", "\n", "}\n", "\n", "mpl.figure.prototype._key_event_extra = function(event, name) {\n", " var manager = IPython.notebook.keyboard_manager;\n", " if (!manager)\n", " manager = IPython.keyboard_manager;\n", "\n", " // Check for shift+enter\n", " if (event.shiftKey && event.which == 13) {\n", " this.canvas_div.blur();\n", " // select the cell after this one\n", " var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n", " IPython.notebook.select(index + 1);\n", " }\n", "}\n", "\n", "mpl.figure.prototype.handle_save = function(fig, msg) {\n", " fig.ondownload(fig, null);\n", "}\n", "\n", "\n", "mpl.find_output_cell = function(html_output) {\n", " // Return the cell and output element which can be found *uniquely* in the notebook.\n", " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", " // IPython event is triggered only after the cells have been serialised, which for\n", " // our purposes (turning an active figure into a static one), is too late.\n", " var cells = IPython.notebook.get_cells();\n", " var ncells = cells.length;\n", " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", " data = data.data;\n", " }\n", " if (data['text/html'] == html_output) {\n", " return [cell, data, j];\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "// Register the function which deals with the matplotlib target/channel.\n", "// The kernel may be null if the page has been refreshed.\n", "if (IPython.notebook.kernel != null) {\n", " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", "}\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#!/bin/env python\n", "\n", "\"\"\"\n", "URL data extractor\n", "\n", "Pekka Helenius \n", "\n", "Requirements:\n", "\n", "Python 3\n", "Python 3 BeautifulSoup4 (python-beautifulsoup4)\n", "Python 3 whois (python-whois; PyPI)\n", "Python 3 JSON Schema (python-jsonschema)\n", "Python 3 Numpy (python-numpy)\n", "Python 3 matplotlib (python-matplotlib)\n", "\n", "TODO: URL domain part length comparison analysis\n", "TODO: URL non-TLD part length comparison analysis\n", " - in phishing webpages, URL tends to be much longer than legitimate webpages\n", " however, domains themselves tend to be much shorter (without TLD)\n", " - phishing URLs often contain more number of dots and subdomains than legitimate URLs\n", " - legitimate: robots.txt redirects bots to a legitimate domain rather than to the original phishing domain\n", "\n", "TODO: Website visual similarity analysis\n", "TODO: consistency of RDN usage in HTML data\n", "\"\"\"\n", "\n", "######################################\n", "\n", "%matplotlib notebook\n", "import matplotlib.pyplot as plt\n", "\n", "from bs4 import BeautifulSoup as bs\n", "from collections import Counter\n", "from datetime import date, datetime\n", "import json\n", "import os\n", "import re\n", "import requests\n", "from time import sleep\n", "import urllib\n", "from whois import whois\n", "\n", "# Target URLs\n", "urls = [\n", " \"https://hoxhunt.com/\",\n", " \"https://hs.fi\",\n", " \"https://ts.fi\",\n", " \"https://facebook.com\"\n", "]\n", "\n", "# Some web servers may block our request unless we set a widely used, well-known user agent string\n", "request_headers = {\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'\n", "}\n", "\n", "# Date format for domain timestamps\n", "dateformat = \"%Y/%m/%d\"\n", "\n", "# All webpages may not like fetching data too fast\n", "# Sleep time in seconds\n", "sleep_interval_between_requests = 0.5\n", "\n", "# Write JSON results to a file?\n", "use_file = True\n", "# Full file path + name\n", "filename = os.getcwd() + \"/\" + \"url_info.json\"\n", "\n", "# Generate plot from existing JSON data?\n", "plot_only = False\n", "\n", "# Save generated plot images?\n", "save_plot_images = True\n", "\n", "# DPI of plot images\n", "plot_images_dpi = 150\n", "\n", "# Common link attribute references in various HTML elements\n", "link_refs = {\n", " 'a': 'href',\n", " 'img': 'src',\n", " 'script': 'src'\n", "}\n", "\n", "############################################################################\n", "############################################################################\n", "\n", "class json_url_data(object):\n", "\n", "# def __init__(self):\n", "\n", "######################################\n", " \"\"\"\n", " Set a new HTTP session and get response.\n", "\n", " Returns a requests.models.Response object.\n", " \"\"\"\n", " def set_session(self, url, method='get', redirects=True):\n", " \n", " # HTTP response status codes 1XX, 2XX and 3XX are OK\n", " # Treat other codes as errors\n", " sc = re.compile(r\"^[123]{1}[0-9]{2}\")\n", " \n", " sleep(sleep_interval_between_requests)\n", " \n", " try:\n", " session = requests.Session()\n", " response = session.request(method, url, headers=request_headers, allow_redirects=redirects)\n", " \n", " if not sc.match(str(response.status_code)):\n", " raise Exception(\"Error: got invalid response status from the web server\")\n", " return response\n", " \n", " except:\n", " raise Exception(\"Error: HTTP session could not be established. URL: '\" + url + \"' (method: \" + method + \")\") from None\n", "\n", "######################################\n", " \"\"\"\n", " Fetch HTML data.\n", "\n", " Returns a bs4.BeautifulSoup object.\n", " \"\"\"\n", " def get_html_data(self, url):\n", " \n", " try:\n", " data = bs(self.set_session(url).content, 'html.parser')\n", " return data\n", " except:\n", " raise Exception(\"Error: HTML data could not be retrieved\")\n", "\n", "######################################\n", " \"\"\"\n", " Get URL redirects and related HTTP status codes.\n", "\n", " Returns a list object.\n", " \"\"\"\n", " def get_url_redirects(self, url):\n", " \n", " response = self.set_session(url)\n", " list_data = []\n", " \n", " if response.history:\n", " \n", " for r in response.history:\n", " list_data.append({'redirect_url': r.url, 'status': r.status_code})\n", " \n", " return list_data\n", "\n", "######################################\n", " \"\"\"\n", " Extract title HTML element contents from given HTML data.\n", "\n", " Returns a string object.\n", " \"\"\"\n", " def get_webpage_title(self, url):\n", " \n", " html_data = self.get_html_data(url)\n", " \n", " title = html_data.title.string\n", " return title\n", "\n", "######################################\n", " \"\"\"\n", " Get WHOIS domain data.\n", "\n", " Returns a dict object.\n", " \"\"\"\n", " def get_whois_data(self, url):\n", " dict_data = whois(url)\n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Get domain name based on WHOIS domain data.\n", " \"\"\"\n", " def get_domain_name(self, url):\n", " domain_name = self.get_whois_data(url).domain_name\n", " \n", " if type(domain_name) is list:\n", " return domain_name[0].lower()\n", " else:\n", " return domain_name.lower()\n", "\n", "######################################\n", " \"\"\"\n", " Get initial and final URLs\n", " \n", " Compare whether the final (destination) URL\n", " matches with the initial URL in a request.\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " def get_startfinal_urls(self, url):\n", " \n", " response = self.set_session(url)\n", " end_url = response.url\n", " \n", " start_match = False\n", " final_match = False\n", " \n", " # dr = re.compile(r\"^([a-z]+://)?([^/]+)\")\n", " # dr_group_lastindex = dr.match(url).lastindex\n", " # domain_name = dr.match(url).group(dr_group_lastindex)\n", " \n", " domain_name = self.get_domain_name(url)\n", " \n", " if re.search(domain_name, end_url):\n", " final_match = True\n", " \n", " dict_data = {\n", " 'startfinal_urls': {\n", " 'start_url': {\n", " 'url': url\n", " },\n", " 'final_url': {\n", " 'url': end_url, 'domain_match': final_match\n", " }\n", " }\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Get domain registrar\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " def get_domain_registrar(self, url):\n", " dict_data = {'domain_registrar': self.get_whois_data(url).registrar }\n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Do comparison between the domain name, extracted\n", " from WHOIS domain data and contents of a title HTML\n", " element, extracted from HTML data based on a given URL.\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " def get_domain_title_match(self, url):\n", " \n", " domain_name = self.get_domain_name(url)\n", " title = self.get_webpage_title(url)\n", " \n", " # If is string:\n", " if type(domain_name) is str:\n", " if re.search(domain_name, title, re.IGNORECASE):\n", " match = True\n", " else:\n", " match = False\n", " \n", " # If is list:\n", " elif type(domain_name) is list:\n", " for d in domain_name:\n", " if re.search(d, title, re.IGNORECASE):\n", " match = True\n", " break\n", " else:\n", " match = False\n", " else:\n", " match = False\n", " \n", " dict_data = {\n", " 'webpage_title': title,\n", " 'domain_in_webpage_title': match\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Get a single timestamp from given data\n", " \n", " Two scenarios are considered: dates argument is either\n", " a list or a string. If it is a list, then we need\n", " to decide which date value to extract.\n", " \n", " Returns a date object.\n", " \"\"\"\n", " def get_single_date(self, dates, newest=False):\n", " \n", " dates_epoch = []\n", " \n", " if type(dates) is list:\n", " for d in dates:\n", " dates_epoch.append(d.timestamp())\n", " else:\n", " dates_epoch.append(dates.timestamp())\n", " \n", " return datetime.fromtimestamp(sorted(dates_epoch, reverse=newest)[0])\n", "\n", "######################################\n", " \"\"\"\n", " Get domain time information based on WHOIS domain data.\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " def get_domain_timeinfo(self, url):\n", " \n", " whois_data = self.get_whois_data(url)\n", " domain_creation_date = self.get_single_date(whois_data.creation_date, newest = False)\n", " domain_updated_date = self.get_single_date(whois_data.updated_date, newest = False)\n", " domain_expiration_date = self.get_single_date(whois_data.expiration_date, newest = False)\n", " \n", " dict_data = {\n", " 'domain_timestamps':\n", " {\n", " 'created': domain_creation_date.strftime(dateformat),\n", " 'updated': domain_updated_date.strftime(dateformat),\n", " 'expires': domain_expiration_date.strftime(dateformat)\n", " }\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Get domain time information based on WHOIS domain data,\n", " relative to the current date (UTC time).\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " def get_domain_timeinfo_relative(self, url):\n", " \n", " date_now = datetime.utcnow()\n", " \n", " whois_data = self.get_whois_data(url)\n", " domain_creation_date = self.get_single_date(whois_data.creation_date, newest = False)\n", " domain_updated_date = self.get_single_date(whois_data.updated_date, newest = False)\n", " domain_expiration_date = self.get_single_date(whois_data.expiration_date, newest = False)\n", " \n", " dict_data = {\n", " 'domain_timestamps_relative':\n", " {\n", " 'current_date': (date_now.strftime(dateformat)),\n", " 'created_days_ago': (date_now - domain_creation_date).days,\n", " 'updated_days_ago': (date_now - domain_updated_date).days,\n", " 'expires_days_left': (domain_expiration_date - date_now).days\n", " }\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " Determine whether URL matches syntaxes such as\n", " '../foo/bar/'\n", " '/foo/../../bar/,\n", " 'https://foo.bar/foo/../'\n", " \n", " etc.\n", " \n", " Returns a boolean object.\n", " \"\"\"\n", " def is_multidot_url(self, url):\n", " \n", " multidot = re.compile(r\".*[.]{2}/.*\")\n", " \n", " if multidot.match(url):\n", " return True\n", " return False\n", "\n", "######################################\n", " \"\"\"\n", " Get HTML element data from HTML data contents.\n", " \n", " Two fetching methods are supported:\n", " - A) use only HTML element/tag name and extract raw contents of\n", " these tags\n", " - B) use both HTML element/tag name and more fine-grained\n", " inner attribute name to determine which HTML elements are extracted\n", " \n", " Special case - URL link references:\n", " - attributes 'href' or 'src' are considered as link referrals and \n", " they are handled in a special way\n", " - A) link referrals to directly to domain are placed in 'self_refs' list\n", " (patterns: '/', '#', '../' and '/')\n", " - B) link referrals to external domains are placed in 'ext_refs' list\n", " (patterns such as 'https://foo.bar.dot/fancysite' etc.)\n", " \n", " - Both A) and B) link categories have 'normal' and 'multidot' subcategories\n", " - normal links do not contain pattern '../'\n", " - multidot links contain '../' pattern\n", " \n", " Returns a dict object.\n", " \"\"\"\n", " \n", " def get_tag_data(self, url, tag, attribute=None):\n", " \n", " html_data = self.get_html_data(url)\n", " domain_name = self.get_domain_name(url)\n", " data = []\n", " \n", " if attribute != None:\n", " \n", " for d in html_data.find_all(tag):\n", " \n", " # Ignore the HTML tag if it does not contain our attribute\n", " if d.get(attribute) != None:\n", " data.append(d.get(attribute))\n", " \n", " if attribute == 'href' or attribute == 'src':\n", " \n", " self_refs = { 'normal': [], 'multidot': []}\n", " ext_refs = { 'normal': [], 'multidot': []}\n", " \n", " # Syntax: '#', '/', '../'\n", " rs = re.compile(r\"^[/#]|^[.]{2}/.*\")\n", " \n", " # Syntax: ':/'\n", " rd = re.compile(r\"^[a-z]+:[a-z]+/\")\n", " \n", " # Syntax examples:\n", " # 'http://foo.bar/', 'https://foo.bar/, 'foo.bar/', 'https://virus.foo.bar/'\n", " rl = re.compile(r\"^([a-z]+://)?([^/]*\" + domain_name + \"/)\")\n", " \n", " for s in data:\n", " \n", " # Ignore mailto links\n", " if re.match(\"^mailto:\", s): continue\n", " \n", " if rs.match(s) or rl.match(s) or rd.match(s):\n", " if self.is_multidot_url(s):\n", " self_refs['multidot'].append(s)\n", " else:\n", " self_refs['normal'].append(s)\n", " else:\n", " \n", " if self.is_multidot_url(s):\n", " try:\n", " ext_refs['multidot'].append({'url': s, 'registrar': self.get_whois_data(s).registrar })\n", " except:\n", " # Fallback if WHOIS query fails\n", " ext_refs['normal'].append({'url': s, 'registrar': None })\n", " pass\n", " else:\n", " try:\n", " ext_refs['normal'].append({'url': s, 'registrar': self.get_whois_data(s).registrar })\n", " except:\n", " ext_refs['normal'].append({'url': s, 'registrar': None })\n", " pass\n", " \n", " data = None\n", " \n", " dict_data = {\n", " tag: {\n", " attribute + '_ext': (ext_refs),\n", " attribute + '_self': (self_refs)\n", " }\n", " }\n", " \n", " else:\n", " dict_data = {\n", " tag: {\n", " attribute: (data)\n", " }\n", " }\n", " \n", " else:\n", " for d in html_data.find_all(tag):\n", " data.append(d.prettify())\n", " \n", " dict_data = {\n", " tag: (data)\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", " \"\"\"\n", " How many external URL links have same registrar than\n", " the webpage itself?\n", " \"\"\"\n", " def get_registrar_count(self, registrar, urls):\n", " \n", " i = 0\n", " \n", " for u in urls:\n", " for k,v in u.items():\n", " if k == 'registrar' and v == registrar:\n", " i += 1\n", " \n", " o = len(urls) - i\n", " \n", " dict_data = {\n", " 'same_registrar_count': i,\n", " 'other_registrar_count': o\n", " }\n", " \n", " return dict_data\n", "\n", "######################################\n", "\n", " \"\"\"\n", " Get values existing in a dict object,\n", " based on a known key string.\n", " \n", " Returns a list object.\n", " \n", " TODO: Major re-work for the fetch function\n", "\n", " TODO: Support for more sophisticated JSON key string filtering\n", " (possibility to use multiple keys for filtering)\n", " \"\"\"\n", " class json_fetcher(object):\n", "\n", " def __init__(self, dict_data, json_key):\n", " self.json_dict = json.loads(json.dumps(dict_data))\n", " self.json_key = json_key\n", "\n", " ##########\n", " # Ref: https://www.codespeedy.com/how-to-loop-through-json-with-subkeys-in-python/\n", " def fetch(self, jdata):\n", "\n", " if isinstance(jdata, dict):\n", "\n", " for k,v in jdata.items():\n", " if k == self.json_key:\n", " yield v\n", " elif isinstance(v, dict):\n", " for val in self.fetch(v):\n", " yield val\n", " elif isinstance(v, list):\n", " for l in v:\n", " if isinstance(l, dict):\n", " for ka,va in l.items():\n", " if ka == self.json_key:\n", " yield va\n", "\n", " elif isinstance(jdata, list):\n", " for l in jdata:\n", " if isinstance(l, dict):\n", " for k,v in l.items():\n", " if k == self.json_key:\n", " yield v\n", " elif isinstance(l, list):\n", " for lb in v:\n", " for ka,va in lb.items():\n", " if ka == self.json_key:\n", " yield va\n", "\n", " ##########\n", " def get_data(self, flatten=True):\n", "\n", " data_extract = []\n", " flat_data = []\n", "\n", " for i in self.fetch(self.json_dict):\n", " data_extract.append(i)\n", "\n", " # Flatten possible nested lists\n", " # (i.e. JSON data contains multiple keys in\n", " # different nested sections)\n", " def get_data_extract(ld):\n", " for l in ld:\n", " if isinstance(l, list):\n", " for la in get_data_extract(l):\n", " yield la\n", " else:\n", " yield l\n", "\n", " if flatten == True:\n", " for u in get_data_extract(data_extract):\n", " flat_data.append(u)\n", " \n", " return flat_data\n", " else:\n", " return data_extract\n", "\n", "######################################\n", " \"\"\"\n", " Compile URL related data.\n", " \"\"\"\n", " def get_url_data(self, url):\n", " \n", " # Dict object for simple, non-nested data\n", " data_simple = {}\n", "\n", " # Pre-defined dict object for specific data sets\n", " webpage_data = {}\n", " \n", " startfinal_url = self.get_startfinal_urls(url)\n", " redirect_url = self.get_url_redirects(url)\n", " domain_registrar = self.get_domain_registrar(url)\n", " domaintitle_match = self.get_domain_title_match(url)\n", " \n", " domain_time_relative = self.get_domain_timeinfo_relative(url)\n", " domain_time = self.get_domain_timeinfo(url)\n", " \n", " html_element_iframe = self.get_tag_data(url, 'iframe')\n", " html_element_a_href = self.get_tag_data(url, 'a', link_refs['a'])\n", " html_element_img_src = self.get_tag_data(url, 'img', link_refs['img'])\n", " html_element_script_src = self.get_tag_data(url, 'script', link_refs['script'])\n", "\n", " iframes_count = {\n", " 'iframes_count':\n", " len(self.json_fetcher(html_element_iframe, 'iframe').get_data())\n", " }\n", " \n", " multidot_urls_count = {\n", " 'multidot_url_count':\n", " len(self.json_fetcher(html_element_a_href, 'multidot').get_data()) + len(self.json_fetcher(html_element_img_src, 'multidot').get_data()) + len(self.json_fetcher(html_element_script_src, 'multidot').get_data())\n", " }\n", " \n", " ###################\n", " def get_total_registrars():\n", "\n", " same_registrar_counts = 0\n", " other_registrar_counts = 0\n", " for k,v in link_refs.items():\n", " \n", " html_element = self.get_tag_data(url, k, v)\n", " \n", " same_registrar_counts += self.get_registrar_count(\n", " domain_registrar['domain_registrar'],\n", " html_element[k][v + '_ext']['normal']\n", " )['same_registrar_count']\n", " \n", " other_registrar_counts += self.get_registrar_count(\n", " domain_registrar['domain_registrar'],\n", " html_element[k][v + '_ext']['normal']\n", " )['other_registrar_count']\n", " \n", " registrar_counts = {\n", " 'same_registrar_count': same_registrar_counts,\n", " 'other_registrar_count': other_registrar_counts\n", " }\n", " return registrar_counts\n", " \n", " # Avoid unnecessary nesting of the following data\n", " data_simple.update(domain_registrar)\n", " data_simple.update(domaintitle_match)\n", " data_simple.update(iframes_count)\n", " data_simple.update(multidot_urls_count)\n", " data_simple.update(get_total_registrars())\n", " \n", " url_data = dict({\n", " url: [\n", " data_simple,\n", " startfinal_url,\n", " {'redirects': redirect_url},\n", " \n", " domain_time_relative,\n", " domain_time,\n", " \n", " {'webpage_data': [\n", " html_element_iframe,\n", " html_element_a_href,\n", " html_element_img_src,\n", " html_element_script_src\n", " ]\n", " }\n", " ]\n", " })\n", " \n", " return url_data\n", "\n", "\n", "\n", "class write_operations(object):\n", "\n", " def __init__(self):\n", " self.filename = filename\n", "\n", "######################################\n", " \"\"\"\n", " Set JSON file name, append number suffix\n", " # if file exists already.\n", " \n", " Returns file name path.\n", " \"\"\"\n", " def set_filename(self):\n", " \n", " c = 0\n", " while True:\n", " if os.path.exists(self.filename):\n", " if c == 0:\n", " self.filename = self.filename + \".\" + str(c)\n", " else:\n", " self.filename = re.sub(\"[0-9]+$\", str(c), self.filename)\n", " else:\n", " break\n", " c += 1\n", " return self.filename\n", "\n", "######################################\n", " \"\"\"\n", " Append to a JSON file.\n", " \"\"\"\n", " def write_to_file(self, data):\n", " \n", " try:\n", " json_file = open(self.filename, \"a\")\n", " json_file.write(data)\n", " json_file.close()\n", " return 0\n", " except:\n", " return 1\n", "\n", "######################################\n", " \"\"\"\n", " Fetch all pre-defined URLs.\n", " \"\"\"\n", " def fetch_and_store_url_data(self, urls, use_file):\n", "\n", " data_parts = {}\n", " fetch_json_data = json_url_data()\n", "\n", " for u in urls:\n", " print(\"Fetching URL data: %s\" % u)\n", " try:\n", " data_parts.update(fetch_json_data.get_url_data(u))\n", " except:\n", " print(\"Failed: %s\" % u)\n", " pass\n", "\n", " json_data = json.dumps(data_parts)\n", "\n", " if use_file == True:\n", " self.write_to_file(json_data)\n", "\n", " return json_data\n", "\n", "######################################\n", "\"\"\"\n", "Visualize & summarize data.\n", "\"\"\"\n", "\n", "class data_visualization(object):\n", "\n", " def __init__(self, url, json_data):\n", " self.url = url\n", " self.json_data = json_data\n", "\n", " self.data = json.loads(json.dumps(self.json_data)).get(self.url)\n", " self.json_url_obj = json_url_data()\n", " self.domain_registrar = self.json_url_obj.get_domain_registrar(self.url)['domain_registrar']\n", " self.webpage_data = self.json_url_obj.json_fetcher(self.data, 'webpage_data').get_data()\n", "\n", " def get_urls_count_summary(self):\n", "\n", " unique_refs = []\n", "\n", " for k,v in link_refs.items():\n", " if v in unique_refs: continue\n", " unique_refs.append(v)\n", "\n", " def link_count(refs, suffix):\n", "\n", " urls_cnt = 0\n", "\n", " for u in self.webpage_data:\n", " for l in refs:\n", " urls = self.json_url_obj.json_fetcher(u, l + suffix).get_data()\n", " for n in urls:\n", " urls_cnt += len(n['normal'])\n", " urls_cnt += len(n['multidot'])\n", " return urls_cnt\n", "\n", " data = {\n", " 'local_urls': link_count(unique_refs, '_self'),\n", " 'external_urls': link_count(unique_refs, '_ext')\n", " }\n", " \n", " return data\n", "\n", " def get_registrars(self):\n", "\n", " registrars = []\n", " #registrars.append(self.domain_registrar)\n", "\n", " for w in self.webpage_data:\n", " webpage_registrars = self.json_url_obj.json_fetcher(w, 'registrar').get_data()\n", " for wa in webpage_registrars:\n", " if wa != None:\n", " registrars.append(wa)\n", " return registrars\n", "\n", " def get_registrar_count_summary(self):\n", " \n", " domain_counter = dict(Counter(self.get_registrars()))\n", " data = {'fetched_domains': domain_counter, 'url_domain_registrar': self.domain_registrar }\n", " return data\n", "\n", "######################################\n", "\"\"\"\n", "Execute the main program code.\n", "\n", "TODO: this code must figure out the correct JSON file\n", "if multiple generated files are present.\n", "\"\"\"\n", "if __name__ == '__main__':\n", "\n", " if plot_only == False:\n", " write_obj = write_operations()\n", " write_obj.set_filename()\n", " data = write_obj.fetch_and_store_url_data(urls, use_file)\n", "\n", " url_str_pattern = re.compile(r\"(^[a-z]+://)?([^/]*)\")\n", "\n", " if os.path.exists(filename):\n", " with open(filename, \"r\") as json_file:\n", " json_data = json.load(json_file)\n", " else:\n", " json_data = data\n", "\n", " # Get URLs from an available JSON data\n", " for key_url in json_data.keys():\n", " \n", " print(\"Generating statistics: %s\" % key_url)\n", "\n", " fig = plt.figure()\n", " fig_params = {\n", " 'xtick.labelsize': 8,\n", " 'figure.figsize': [9,8]\n", " # 'figure.constrained_layout.use': True\n", " }\n", " plt.rcParams.update(fig_params)\n", " \n", " domain_string = url_str_pattern.split(key_url)[2].replace('.','')\n", " summary = data_visualization(key_url, json_data)\n", " \n", " summary_registrars = summary.get_registrar_count_summary()['fetched_domains']\n", "\n", " x_r = list(summary_registrars.keys())\n", " y_r = list(summary_registrars.values())\n", " \n", " # Show bar values\n", " for index,data in enumerate(y_r):\n", " plt.text(x=index, y=data+0.5, s=data, fontdict=dict(fontsize=8))\n", " \n", " title_r = \"Domains associated with HTML URL data (\" + key_url + \")\"\n", " xlabel_r = \"Fetched domains\"\n", " ylabel_r = \"Domain count\"\n", "\n", " plt.bar(x_r, y_r, color=\"green\", edgecolor=\"black\")\n", " plt.title(title_r)\n", " plt.xlabel(xlabel_r)\n", " plt.ylabel(ylabel_r)\n", " plt.xticks(rotation=45, horizontalalignment=\"right\")\n", "\n", " if save_plot_images == True:\n", " plt.savefig(os.getcwd() + \"/\" + \"domain_figure_\" + domain_string + \".png\", dpi=plot_images_dpi)\n", " plt.show()\n", "\n", " #fig_u = plt.figure()\n", " \n", " #summary_urls = summary.get_urls_count_summary()\n", " \n", " #x_u = list(summary_urls.keys())\n", " #y_u = list(summary_urls.values())\n", " #title_u = \"Local and external URL references (\" + key_url + \")\"\n", " #xlabel_u = \"Fetched URLs\"\n", " #ylabel_u = \"URL count\"\n", " \n", " #plt.bar(x_u, y_u, color=\"blue\", edgecolor='black')\n", " #plt.title(title_u)\n", " #plt.xlabel(xlabel_u)\n", " #plt.ylabel(ylabel_u)\n", " #plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }