Sitemap generator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
1.2 KiB

9 years ago
8 years ago
4 years ago
4 years ago
4 years ago
  1. import sys
  2. import logging
  3. from pysitemap import crawler
  4. if __name__ == '__main__':
  5. root_url = 'https://mytestsite.com/'
  6. crawler(
  7. root_url,
  8. out_file='sitemap.xml',
  9. maxtasks=100,
  10. verifyssl=False,
  11. findimages=True,
  12. images_this_domain=True,
  13. exclude_urls=[
  14. '/git/.*(action|commit|stars|activity|followers|following|\?sort|issues|pulls|milestones|archive|/labels$|/wiki$|/releases$|/forks$|/watchers$)',
  15. '/git/user/(sign_up|login|forgot_password)',
  16. '/css',
  17. '/js',
  18. 'favicon',
  19. '[a-zA-Z0-9]*\.[a-zA-Z0-9]*$',
  20. '\?\.php',
  21. ],
  22. exclude_imgs=[
  23. 'logo\.(png|jpg)',
  24. 'avatars',
  25. 'avatar_default',
  26. '/symbols/'
  27. ],
  28. image_root_urls=[
  29. 'https://mytestsite.com/photos/',
  30. 'https://mytestsite.com/git/',
  31. ],
  32. headers={'User-Agent': 'Crawler'},
  33. # TZ offset in hours
  34. timezone_offset=3,
  35. changefreq={
  36. "/git/": "weekly",
  37. "/": "monthly"
  38. },
  39. priorities={
  40. "/git/": 0.7,
  41. "/metasub/": 0.6,
  42. "/": 0.5
  43. }
  44. )