Sitemap generator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.6 KiB

  1. import asyncio
  2. from aiofile import AIOFile, Reader, Writer
  3. import logging
  4. from datetime import datetime, timezone, timedelta
  5. class XMLWriter():
  6. def __init__(self, filename: str):
  7. self.filename = filename
  8. async def write(self, urls, timezone_offset):
  9. async with AIOFile(self.filename, 'w') as aiodf:
  10. writer = Writer(aiodf)
  11. await writer('<?xml version="1.0" encoding="utf-8"?>\n')
  12. await writer(
  13. '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"'
  14. ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
  15. ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'
  16. ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">\n')
  17. await aiodf.fsync()
  18. for data in urls:
  19. timestamp = data[1][1]
  20. changefreq = data[1][2]
  21. priority = data[1][3]
  22. image_data = data[1][4]
  23. url = "<loc>{}</loc>".format(data[0])
  24. if timestamp is not None:
  25. timestamp = datetime.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z").astimezone(tz=timezone(timedelta(hours=timezone_offset))).isoformat()
  26. url += "<lastmod>{}</lastmod>".format(str(timestamp))
  27. if changefreq is not None:
  28. url += "<changefreq>{}</changefreq>".format(str(changefreq))
  29. if priority is not None:
  30. url += "<priority>{}</priority>".format(str(priority))
  31. if len(image_data) > 0:
  32. for image in image_data:
  33. for arg in image:
  34. image_xml = ""
  35. if 'src' in arg: image_xml += "<image:loc>{}</image:loc>".format(arg['src'])
  36. if 'title' in arg: image_xml += "<image:title>{}</image:title>".format(arg['title'])
  37. if 'caption' in arg: image_xml += "<image:caption>{}</image:caption>".format(arg['caption'])
  38. if 'geo_location' in arg: image_xml += "<image:geo_location>{}</image:geo_location>".format(arg['geo_location'])
  39. if 'license' in arg: image_xml += "<image:license>{}</image:license>".format(arg['license'])
  40. url += "<image:image>{}</image:image>".format(image_xml)
  41. await writer('<url>{}</url>\n'.format(url))
  42. await aiodf.fsync()
  43. await writer('</urlset>')
  44. await aiodf.fsync()