I have not tested this!
I must admit that I have not tried using Django models in Scrapy, but it says:
, , , deleted.rss, XMLFeedSpider ( , ). , , :
from scrapy import log
from scrapy.contrib.spiders import XMLFeedSpider
from myproject.items import DeletedUrlItem
class MySpider(XMLFeedSpider):
domain_name = 'example.com'
start_urls = ['http://www.example.com/deleted.rss']
iterator = 'iternodes'
itertag = 'item'
def parse_node(self, response, url):
url['url'] = node.select('#path/to/url').extract()
return url
SPIDER = MySpider()
, IIRC - XML. , deleted.rss, , , URL XML. myproject.items.DeletedUrlItem, , t DeletedUrlItem, - :
DeletedUrlItem:
class DeletedUrlItem(Item):
url = Field()
Django Model API Scrapy ItemPipeline - , DjangoItem:
from scrapy.core.exceptions import DropItem
import django.Model.yourModel
class DeleteUrlPipeline(item):
def process_item(self, spider, item):
if item['url']:
delete_item = yourModel.objects.get(url=item['url'])
delete_item.delete()
raise DropItem("Deleted: %s" % item)
delete_item.delete().
, , :-), , .