diff --git a/pysitemap/crawler.py b/pysitemap/crawler.py index d726a6b..2101cce 100644 --- a/pysitemap/crawler.py +++ b/pysitemap/crawler.py @@ -101,7 +101,11 @@ class Crawler: self.errlog("Error {} at url {}".format(response.status_code, url)) return - tree = html.fromstring(response.text) + try: + tree = html.fromstring(response.text) + except ValueError as e: + self.errlog(repr(e)) + tree = html.fromstring(response.content) for link_tag in tree.findall('.//a'): link = link_tag.attrib.get('href', '') newurl = urlparse.urljoin(self.url, link) diff --git a/setup.py b/setup.py index 41be5e6..c6b0ecb 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( version=get_version( major=0, minor=5, - build=1, + build=2, ), packages=find_packages(exclude=EXCLUDE_FROM_PACKAGES), include_package_data=True,