Source code for scrapy.downloadermiddlewares.cookies

import logging
from collections import defaultdict

from scrapy.exceptions import NotConfigured
from scrapy.http import Response
from scrapy.http.cookies import CookieJar
from scrapy.utils.python import to_unicode


logger = logging.getLogger(__name__)


[docs]class CookiesMiddleware: """This middleware enables working with sites that need cookies""" def __init__(self, debug=False): self.jars = defaultdict(CookieJar) self.debug = debug @classmethod def from_crawler(cls, crawler): if not crawler.settings.getbool('COOKIES_ENABLED'): raise NotConfigured return cls(crawler.settings.getbool('COOKIES_DEBUG')) def process_request(self, request, spider): if request.meta.get('dont_merge_cookies', False): return cookiejarkey = request.meta.get("cookiejar") jar = self.jars[cookiejarkey] cookies = self._get_request_cookies(jar, request) for cookie in cookies: jar.set_cookie_if_ok(cookie, request) # set Cookie header request.headers.pop('Cookie', None) jar.add_cookie_header(request) self._debug_cookie(request, spider) def process_response(self, request, response, spider): if request.meta.get('dont_merge_cookies', False): return response # extract cookies from Set-Cookie and drop invalid/expired cookies cookiejarkey = request.meta.get("cookiejar") jar = self.jars[cookiejarkey] jar.extract_cookies(response, request) self._debug_set_cookie(response, spider) return response def _debug_cookie(self, request, spider): if self.debug: cl = [to_unicode(c, errors='replace') for c in request.headers.getlist('Cookie')] if cl: cookies = "\n".join("Cookie: {}\n".format(c) for c in cl) msg = "Sending cookies to: {}\n{}".format(request, cookies) logger.debug(msg, extra={'spider': spider}) def _debug_set_cookie(self, response, spider): if self.debug: cl = [to_unicode(c, errors='replace') for c in response.headers.getlist('Set-Cookie')] if cl: cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl) msg = "Received cookies from: {}\n{}".format(response, cookies) logger.debug(msg, extra={'spider': spider}) def _format_cookie(self, cookie): # build cookie string cookie_str = '%s=%s' % (cookie['name'], cookie['value']) if cookie.get('path', None): cookie_str += '; Path=%s' % cookie['path'] if cookie.get('domain', None): cookie_str += '; Domain=%s' % cookie['domain'] return cookie_str def _get_request_cookies(self, jar, request): if isinstance(request.cookies, dict): cookie_list = [ {'name': k, 'value': v} for k, v in request.cookies.items() ] else: cookie_list = request.cookies cookies = [self._format_cookie(x) for x in cookie_list] headers = {'Set-Cookie': cookies} response = Response(request.url, headers=headers) return jar.make_cookies(response, request)