Module `nse_daily.bse`

Expand source code

# get bse daily bhav
# https://www.bseindia.com/download/BhavCopy/Equity/EQ130122_CSV.ZIP
from datetime import datetime, timedelta
from typing import Optional, Any

import requests
import os
from pathlib import Path
from requests.adapters import HTTPAdapter
from requests.packages.urllib3 import Retry
from numpy import random
from time import sleep
from multiprocessing.pool import ThreadPool

from nse_daily.common import _get_exception, _errorify
from fake_useragent import UserAgent


class BSEDaily(object):
    """
    BSE Daily
    """

    def __init__(self,
                 default_date_pattern: Optional[str] = '%Y%m%d',
                 file_pattern: Optional[str] = "EQ{date_part}_CSV.ZIP",
                 file_date_part_format: Optional[str] = '%d%m%y',
                 uri_pattern: Optional[str] = "https://www.bseindia.com/download/BhavCopy/Equity/{file_name}",
                 download_path: Optional[str] = None
                 ):
        """
        BSE Daily bhav copy can be downloaded from the following
            URI: https://www.bseindia.com/download/BhavCopy/Equity/EQ130122_CSV.ZIP
        Parameters are set by default to match the above URI.
        If the URI Changes, please change the parameters below, to avoid breaking of code parameters.
        :param default_date_pattern: The default input date pattern to be used for parsing dates passed to the functions
        :param file_pattern: The pattern of the BSE Daily bhav file, i.e. for EQ130122_CSV.zip pass in EQ{date_part}_CSV.ZIP
        :param file_date_part_format: The date format of the date part in the DSE Daily bhav file pattern i.e. for 130122 pass in %d%m%y
        :param uri_pattern: The uri from where the BSE Daily bhav copy needs to be downloaded
        :param download_path: The local filesystem path where the BSE Daily bhav copy will be downloaded
        """
        self.default_date_pattern = default_date_pattern
        self.file_pattern = file_pattern
        self.file_date_part_format = file_date_part_format
        self.uri_pattern = uri_pattern
        # self.uri_yy_mm_format = uri_yy_mm_format
        self.download_path = download_path
        if self.download_path is None or str(self.download_path).strip() == '':
            appdir = str(Path.cwd())
            self.download_path = os.path.join(appdir, 'downloads')
        self._create_session()

    def _create_session(self):
        """
        Function to create and set the requests.Session
        :return:
        """
        self.session = requests.Session()
        retry = Retry(total=5,
                      read=5,
                      connect=5,
                      status_forcelist=(500, 502, 504),
                      method_whitelist=frozenset(['GET', 'POST']),
                      backoff_factor=1
                      )
        adapter = HTTPAdapter(max_retries=retry)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)
        ua = UserAgent()
        self.session.headers.update({"User-Agent": str(ua.chrome)})

    def _check_reponse(self, response: requests.Response):
        try:
            response.raise_for_status()
            return True
        except requests.exceptions.HTTPError:
            print('HTTP Error %s', response.reason)
            print(response.text)
            return False

    def _download_by_date(self, file_date: datetime) -> (str, Any):
        """
        Internal function to request the download for a single date. Function has a random uniform distribution sleep
        time between 1 to 3 seconds to avoid getting blocked during multiple concurrent requests. Function also
        checks and skips the download if date is a weekend.
        :param file_date: The date for which the download is being requested
        :return: (file_date, download_file_path)
        """
        ################################################################################
        # Adding random sleep time to avoid being blocked for multiple requests
        sleep(random.uniform(1, 3))
        #########################################################################
        daynum = file_date.weekday()
        if daynum >= 5:
            print("{} is weekend, file skipped".format(file_date.strftime(self.default_date_pattern)))
            return file_date, None
        file_date_str = file_date.strftime(self.file_date_part_format).upper()
        # nse_yy_mm = file_date.strftime(self.uri_yy_mm_format).upper()
        file_name = self.file_pattern.format(date_part=file_date_str)
        uri = self.uri_pattern.format(file_name=file_name)
        download_file_path = os.path.join(self.download_path, file_name)
        if not os.path.exists(self.download_path):
            os.makedirs(self.download_path)
        print(uri)
        response = self.session.request(method='GET', url=uri, allow_redirects=True)
        # r = requests.get(nse_uri, allow_redirects=True)
        status = self._check_reponse(response)
        if not status:
            return file_date, None
        content_type = response.headers.get('content-type')
        print(content_type)

        if content_type in ['application/zip', 'application/x-zip-compressed', 'application/x-7z-compressed',
                            'text/csv', 'application/gzip', 'application/x-tar', 'text/plain']:
            with open(download_file_path, 'wb') as file_pointer:
                file_pointer.write(response.content)
            print("{} download complete".format(file_name))
        else:
            e = _errorify("INVALID_CONTENT_TYPE",f"content-type {content_type} being returned is not supported..")
            raise Exception(e)
        return file_date, download_file_path

    def download_by_date(self, date_str, date_format: Optional[str] = '%Y%m%d'):
        """
        Function to download the BSE Daily bhav copy for a date
        :param date_str: Input date string i.e. '20210105' for 5th Jab 2021
        :param date_format: The date format of the input date string, default = '%Y%m%d'
        :return:
        """
        try:
            file_date = datetime.strptime(date_str, date_format)
            return self._download_by_date(file_date)
        except:
            e = _get_exception()
            raise Exception(e)

    def download_by_date_range(self, date_start: str, date_end: str, date_format: Optional[str] = '%Y%m%d',
                               num_workers: Optional[int] = 1):
        """
        Function to download multiple Daily BSE bhav copies for a date range

        :param str date_start: The start date of the date range
        :param str date_end: The end date of the date range
        :param str date_format: The format of the input dates, default='%Y%m%d'
        :param str num_workers: The number of workers to be utilized to get the files, default=1
        :return:
        """
        try:
            start = datetime.strptime(date_start, date_format)
            end = datetime.strptime(date_end, date_format)
            l_dates = [start + timedelta(days=x) for x in range(0, (end - start).days)]
            tpool = ThreadPool(processes=num_workers)
            l_e = tpool.map(self._download_by_date, l_dates)
            return l_e
        except:
            e = _get_exception()
            raise Exception(e)

Classes

class BSEDaily (default_date_pattern: Optional[str] = '%Y%m%d', file_pattern: Optional[str] = 'EQ{date_part}_CSV.ZIP', file_date_part_format: Optional[str] = '%d%m%y', uri_pattern: Optional[str] = 'https://www.bseindia.com/download/BhavCopy/Equity/{file_name}', download_path: Optional[str] = None)

BSE Daily

BSE Daily bhav copy can be downloaded from the following URI: https://www.bseindia.com/download/BhavCopy/Equity/EQ130122_CSV.ZIP Parameters are set by default to match the above URI. If the URI Changes, please change the parameters below, to avoid breaking of code parameters. :param default_date_pattern: The default input date pattern to be used for parsing dates passed to the functions :param file_pattern: The pattern of the BSE Daily bhav file, i.e. for EQ130122_CSV.zip pass in EQ{date_part}_CSV.ZIP :param file_date_part_format: The date format of the date part in the DSE Daily bhav file pattern i.e. for 130122 pass in %d%m%y :param uri_pattern: The uri from where the BSE Daily bhav copy needs to be downloaded :param download_path: The local filesystem path where the BSE Daily bhav copy will be downloaded

Expand source code

class BSEDaily(object):
    """
    BSE Daily
    """

    def __init__(self,
                 default_date_pattern: Optional[str] = '%Y%m%d',
                 file_pattern: Optional[str] = "EQ{date_part}_CSV.ZIP",
                 file_date_part_format: Optional[str] = '%d%m%y',
                 uri_pattern: Optional[str] = "https://www.bseindia.com/download/BhavCopy/Equity/{file_name}",
                 download_path: Optional[str] = None
                 ):
        """
        BSE Daily bhav copy can be downloaded from the following
            URI: https://www.bseindia.com/download/BhavCopy/Equity/EQ130122_CSV.ZIP
        Parameters are set by default to match the above URI.
        If the URI Changes, please change the parameters below, to avoid breaking of code parameters.
        :param default_date_pattern: The default input date pattern to be used for parsing dates passed to the functions
        :param file_pattern: The pattern of the BSE Daily bhav file, i.e. for EQ130122_CSV.zip pass in EQ{date_part}_CSV.ZIP
        :param file_date_part_format: The date format of the date part in the DSE Daily bhav file pattern i.e. for 130122 pass in %d%m%y
        :param uri_pattern: The uri from where the BSE Daily bhav copy needs to be downloaded
        :param download_path: The local filesystem path where the BSE Daily bhav copy will be downloaded
        """
        self.default_date_pattern = default_date_pattern
        self.file_pattern = file_pattern
        self.file_date_part_format = file_date_part_format
        self.uri_pattern = uri_pattern
        # self.uri_yy_mm_format = uri_yy_mm_format
        self.download_path = download_path
        if self.download_path is None or str(self.download_path).strip() == '':
            appdir = str(Path.cwd())
            self.download_path = os.path.join(appdir, 'downloads')
        self._create_session()

    def _create_session(self):
        """
        Function to create and set the requests.Session
        :return:
        """
        self.session = requests.Session()
        retry = Retry(total=5,
                      read=5,
                      connect=5,
                      status_forcelist=(500, 502, 504),
                      method_whitelist=frozenset(['GET', 'POST']),
                      backoff_factor=1
                      )
        adapter = HTTPAdapter(max_retries=retry)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)
        ua = UserAgent()
        self.session.headers.update({"User-Agent": str(ua.chrome)})

    def _check_reponse(self, response: requests.Response):
        try:
            response.raise_for_status()
            return True
        except requests.exceptions.HTTPError:
            print('HTTP Error %s', response.reason)
            print(response.text)
            return False

    def _download_by_date(self, file_date: datetime) -> (str, Any):
        """
        Internal function to request the download for a single date. Function has a random uniform distribution sleep
        time between 1 to 3 seconds to avoid getting blocked during multiple concurrent requests. Function also
        checks and skips the download if date is a weekend.
        :param file_date: The date for which the download is being requested
        :return: (file_date, download_file_path)
        """
        ################################################################################
        # Adding random sleep time to avoid being blocked for multiple requests
        sleep(random.uniform(1, 3))
        #########################################################################
        daynum = file_date.weekday()
        if daynum >= 5:
            print("{} is weekend, file skipped".format(file_date.strftime(self.default_date_pattern)))
            return file_date, None
        file_date_str = file_date.strftime(self.file_date_part_format).upper()
        # nse_yy_mm = file_date.strftime(self.uri_yy_mm_format).upper()
        file_name = self.file_pattern.format(date_part=file_date_str)
        uri = self.uri_pattern.format(file_name=file_name)
        download_file_path = os.path.join(self.download_path, file_name)
        if not os.path.exists(self.download_path):
            os.makedirs(self.download_path)
        print(uri)
        response = self.session.request(method='GET', url=uri, allow_redirects=True)
        # r = requests.get(nse_uri, allow_redirects=True)
        status = self._check_reponse(response)
        if not status:
            return file_date, None
        content_type = response.headers.get('content-type')
        print(content_type)

        if content_type in ['application/zip', 'application/x-zip-compressed', 'application/x-7z-compressed',
                            'text/csv', 'application/gzip', 'application/x-tar', 'text/plain']:
            with open(download_file_path, 'wb') as file_pointer:
                file_pointer.write(response.content)
            print("{} download complete".format(file_name))
        else:
            e = _errorify("INVALID_CONTENT_TYPE",f"content-type {content_type} being returned is not supported..")
            raise Exception(e)
        return file_date, download_file_path

    def download_by_date(self, date_str, date_format: Optional[str] = '%Y%m%d'):
        """
        Function to download the BSE Daily bhav copy for a date
        :param date_str: Input date string i.e. '20210105' for 5th Jab 2021
        :param date_format: The date format of the input date string, default = '%Y%m%d'
        :return:
        """
        try:
            file_date = datetime.strptime(date_str, date_format)
            return self._download_by_date(file_date)
        except:
            e = _get_exception()
            raise Exception(e)

    def download_by_date_range(self, date_start: str, date_end: str, date_format: Optional[str] = '%Y%m%d',
                               num_workers: Optional[int] = 1):
        """
        Function to download multiple Daily BSE bhav copies for a date range

        :param str date_start: The start date of the date range
        :param str date_end: The end date of the date range
        :param str date_format: The format of the input dates, default='%Y%m%d'
        :param str num_workers: The number of workers to be utilized to get the files, default=1
        :return:
        """
        try:
            start = datetime.strptime(date_start, date_format)
            end = datetime.strptime(date_end, date_format)
            l_dates = [start + timedelta(days=x) for x in range(0, (end - start).days)]
            tpool = ThreadPool(processes=num_workers)
            l_e = tpool.map(self._download_by_date, l_dates)
            return l_e
        except:
            e = _get_exception()
            raise Exception(e)

Methods

def download_by_date(self, date_str, date_format: Optional[str] = '%Y%m%d')

Function to download the BSE Daily bhav copy for a date :param date_str: Input date string i.e. '20210105' for 5th Jab 2021 :param date_format: The date format of the input date string, default = '%Y%m%d' :return:

Expand source code

def download_by_date(self, date_str, date_format: Optional[str] = '%Y%m%d'):
    """
    Function to download the BSE Daily bhav copy for a date
    :param date_str: Input date string i.e. '20210105' for 5th Jab 2021
    :param date_format: The date format of the input date string, default = '%Y%m%d'
    :return:
    """
    try:
        file_date = datetime.strptime(date_str, date_format)
        return self._download_by_date(file_date)
    except:
        e = _get_exception()
        raise Exception(e)

def download_by_date_range(self, date_start: str, date_end: str, date_format: Optional[str] = '%Y%m%d', num_workers: Optional[int] = 1)

Function to download multiple Daily BSE bhav copies for a date range

:param str date_start: The start date of the date range :param str date_end: The end date of the date range :param str date_format: The format of the input dates, default='%Y%m%d' :param str num_workers: The number of workers to be utilized to get the files, default=1 :return:

Expand source code

def download_by_date_range(self, date_start: str, date_end: str, date_format: Optional[str] = '%Y%m%d',
                           num_workers: Optional[int] = 1):
    """
    Function to download multiple Daily BSE bhav copies for a date range

    :param str date_start: The start date of the date range
    :param str date_end: The end date of the date range
    :param str date_format: The format of the input dates, default='%Y%m%d'
    :param str num_workers: The number of workers to be utilized to get the files, default=1
    :return:
    """
    try:
        start = datetime.strptime(date_start, date_format)
        end = datetime.strptime(date_end, date_format)
        l_dates = [start + timedelta(days=x) for x in range(0, (end - start).days)]
        tpool = ThreadPool(processes=num_workers)
        l_e = tpool.map(self._download_by_date, l_dates)
        return l_e
    except:
        e = _get_exception()
        raise Exception(e)