diff --git a/run.py b/run.py index a8acbbe..370b7a8 100644 --- a/run.py +++ b/run.py @@ -3,13 +3,42 @@ import logging from pysitemap import crawler if __name__ == '__main__': - if '--iocp' in sys.argv: - from asyncio import events, windows_events - sys.argv.remove('--iocp') - logging.info('using iocp') - el = windows_events.ProactorEventLoop() - events.set_event_loop(el) + root_url = 'https://mytestsite.com/' + crawler( + root_url, + out_file='sitemap.xml', + maxtasks=100, + verifyssl=False, + exclude_urls=[ + '/git/.*(action|commit|stars|activity|followers|following|\?sort|issues|pulls|milestones|archive|/labels$|/wiki$|/releases$|/forks$|/watchers$)', + '/git/user/(sign_up|login|forgot_password)', + '/css', + '/js', + 'favicon', + '[a-zA-Z0-9]*\.[a-zA-Z0-9]*$', + '\?\.php', + ], + exclude_imgs=[ + 'logo\.(png|jpg)', + 'avatars', + 'avatar_default', + '/symbols/' + ], + image_root_urls=[ + 'https://mytestsite.com/photos/', + 'https://mytestsite.com/git/', + ], + headers={'User-Agent': 'Crawler'}, + # TZ offset in hours + timezone_offset=3, + changefreq={ + "/git/": "weekly", + "/": "monthly" + }, + priorities={ + "/git/": 0.7, + "/metasub/": 0.6, + "/": 0.5 + } + ) - # root_url = sys.argv[1] - root_url = 'https://www.metpromstroi.ru' - crawler(root_url, out_file='sitemap.xml') \ No newline at end of file