Sitemap generator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1.2 KiB

9 years ago
8 years ago
4 years ago
4 years ago
  1. import sys
  2. import logging
  3. from pysitemap import crawler
  4. if __name__ == '__main__':
  5. root_url = 'https://mytestsite.com/'
  6. crawler(
  7. root_url,
  8. out_file='sitemap.xml',
  9. maxtasks=100,
  10. verifyssl=False,
  11. exclude_urls=[
  12. '/git/.*(action|commit|stars|activity|followers|following|\?sort|issues|pulls|milestones|archive|/labels$|/wiki$|/releases$|/forks$|/watchers$)',
  13. '/git/user/(sign_up|login|forgot_password)',
  14. '/css',
  15. '/js',
  16. 'favicon',
  17. '[a-zA-Z0-9]*\.[a-zA-Z0-9]*$',
  18. '\?\.php',
  19. ],
  20. exclude_imgs=[
  21. 'logo\.(png|jpg)',
  22. 'avatars',
  23. 'avatar_default',
  24. '/symbols/'
  25. ],
  26. image_root_urls=[
  27. 'https://mytestsite.com/photos/',
  28. 'https://mytestsite.com/git/',
  29. ],
  30. headers={'User-Agent': 'Crawler'},
  31. # TZ offset in hours
  32. timezone_offset=3,
  33. changefreq={
  34. "/git/": "weekly",
  35. "/": "monthly"
  36. },
  37. priorities={
  38. "/git/": 0.7,
  39. "/metasub/": 0.6,
  40. "/": 0.5
  41. }
  42. )