|
|
- import asyncio
- from aiofile import AIOFile, Reader, Writer
- import logging
- from datetime import datetime, timezone, timedelta
-
- class XMLWriter():
- def __init__(self, filename: str):
- self.filename = filename
-
-
- async def write(self, urls, timezone_offset):
- async with AIOFile(self.filename, 'w') as aiodf:
- writer = Writer(aiodf)
- await writer('<?xml version="1.0" encoding="utf-8"?>\n')
- await writer(
- '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"'
- ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
- ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'
- ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">\n')
- await aiodf.fsync()
- for data in urls:
-
- timestamp = data[1][1]
- changefreq = data[1][2]
- priority = data[1][3]
- image_data = data[1][4]
-
- url = "<loc>{}</loc>".format(data[0])
-
- if timestamp is not None:
- timestamp = datetime.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z").astimezone(tz=timezone(timedelta(hours=timezone_offset))).isoformat()
- url += "<lastmod>{}</lastmod>".format(str(timestamp))
-
- if changefreq is not None:
- url += "<changefreq>{}</changefreq>".format(str(changefreq))
-
- if priority is not None:
- url += "<priority>{}</priority>".format(str(priority))
-
- if len(image_data) > 0:
- for image in image_data:
- for arg in image:
- image_xml = ""
- if 'src' in arg: image_xml += "<image:loc>{}</image:loc>".format(arg['src'])
- if 'title' in arg: image_xml += "<image:title>{}</image:title>".format(arg['title'])
- if 'caption' in arg: image_xml += "<image:caption>{}</image:caption>".format(arg['caption'])
- if 'geo_location' in arg: image_xml += "<image:geo_location>{}</image:geo_location>".format(arg['geo_location'])
- if 'license' in arg: image_xml += "<image:license>{}</image:license>".format(arg['license'])
-
- url += "<image:image>{}</image:image>".format(image_xml)
-
- await writer('<url>{}</url>\n'.format(url))
-
- await aiodf.fsync()
-
- await writer('</urlset>')
- await aiodf.fsync()
|