Browse Source

0.2.8

pysitemap-python-2.7
Kamo Petrosyan 9 years ago
parent
commit
347b4f7380
4 changed files with 8 additions and 3750 deletions
  1. +3
    -3
      pysitemap/crawler.py
  2. +4
    -6
      run.py
  3. +1
    -1
      setup.py
  4. +0
    -3740
      sitemap.xml

+ 3
- 3
pysitemap/crawler.py View File

@ -21,7 +21,7 @@ class Crawler:
self.urls = set([url])
self.visited = set([url])
self.exts = ['htm', 'php']
self.allowed_regex = '(\w+)\.((?!htm)(?!rar)\w+)$'
self.allowed_regex = '\.((?!htm)(?!php)\w+)$'
def set_exts(self, exts):
self.exts = exts
@ -33,7 +33,7 @@ class Crawler:
allowed_regex = ''
for ext in self.exts:
allowed_regex += '(!{})'.format(ext)
self.allowed_regex = '(\w+)\.({}\w+)$'.format(allowed_regex)
self.allowed_regex = '\.({}\w+)$'.format(allowed_regex)
def crawl(self):
self.regex = re.compile(self.allowed_regex)
@ -70,7 +70,7 @@ class Crawler:
def write_xml(self):
of = open(self.outputfile, 'w')
of.write('<?xml version="1.0" encoding="utf-8"?><!--Generated by Screaming Frog SEO Spider 2,55-->\n')
of.write('<?xml version="1.0" encoding="utf-8"?>\n')
of.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n')
url_str = '<url><loc>{}</loc></url>\n'
while self.visited:


+ 4
- 6
run.py View File

@ -1,15 +1,13 @@
import pysitemap
import datetime
"""
Example script
"""
if __name__=='__main__':
url = 'http://www.techelec.ru/' # url from to crawl
url = 'http://www.ltsvet.ru/' # url from to crawl
logfile = 'errlog.log' # path to logfile
oformat = 'xml' # output format
crawl = pysitemap.Crawler(url=url, logfile=logfile, oformat=oformat)
print datetime.datetime.now()
crawl.crawl()
print datetime.datetime.now()
outputfile = '/srv/www/site/sitemap.xml' # path to output file
crawl = pysitemap.Crawler(url=url, logfile=logfile, oformat=oformat, outputfile=outputfile)
crawl.crawl()

+ 1
- 1
setup.py View File

@ -13,7 +13,7 @@ setup(
version=get_version(
major=0,
minor=2,
build=3,
build=8,
),
packages=find_packages(exclude=EXCLUDE_FROM_PACKAGES),
include_package_data=True,


+ 0
- 3740
sitemap.xml
File diff suppressed because it is too large
View File


Loading…
Cancel
Save