From 09d644b54bc441b4e3338d9d295631492b275d8e Mon Sep 17 00:00:00 2001 From: Aleksei Semenov Date: Fri, 24 Mar 2023 07:47:28 -0300 Subject: [PATCH 1/3] Migrate tests to python 3.11 Use unittest from standart lib as main lib for testing Move .py file with fixtures to subdir to avoid relative import (tests) Update requierements: use newer versions of numpy and pandas Fix bug with depreciated module collections.Container --- .gitignore | 1 + finam/utils.py | 4 +- requirements.txt | 5 +- tests/__init__.py | 57 ---------------------- tests/fixtures/__init__.py | 59 +++++++++++++++++++++++ tests/tests_integration.py | 13 ++--- tests/tests_unit.py | 99 +++++++++++++++++++------------------- 7 files changed, 121 insertions(+), 117 deletions(-) create mode 100644 tests/fixtures/__init__.py diff --git a/.gitignore b/.gitignore index 7bef278..0049299 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build/ finam_export.egg-info/ .devcontainer .vscode +.venv \ No newline at end of file diff --git a/finam/utils.py b/finam/utils.py index a33d76b..480e35a 100644 --- a/finam/utils.py +++ b/finam/utils.py @@ -1,6 +1,6 @@ import re -import collections import six +from collections.abc import Container from operator import attrgetter from urllib.request import Request @@ -10,7 +10,7 @@ def is_container(val): - return isinstance(val, collections.Container)\ + return isinstance(val, Container)\ and not isinstance(val, six.string_types) \ and not isinstance(val, bytes) diff --git a/requirements.txt b/requirements.txt index 6c7c5f7..4393239 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ -pandas==1.3.1 +numpy==1.24.2 +pandas==1.4.2 requests==2.24.0 click==7.1.2 click-datetime==0.2 # dev -nose -mock coverage urltools parameterized diff --git a/tests/__init__.py b/tests/__init__.py index 4da2d97..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,57 +0,0 @@ -# coding: utf-8 -from glob import glob -from io import open -from collections import namedtuple -import os.path - -import urltools - -from finam.config import FINAM_CHARSET -from finam.utils import smart_encode - -Contract = namedtuple('Contract', ('id', 'code', 'name')) -SBER = Contract(3, 'SBER', u'Сбербанк') -MICEX = Contract(13851, 'MICEX', u'ММВБ') - -# 10:00 - 18:40 -SHARES_SESSION_MINUTES = 60 * 8 + 40 - - -class FixtureRegistry(object): - - """ - Fixtures holder for easier access in tests - - Note it would return bytes in cp1251 - as any reply from finam.ru export tool would do - """ - - __SPLIT_SUFFIX = '__split' - - def __init__(self): - path = os.path.dirname(os.path.abspath(__file__)) - items = {} - # glob omits .* so it's used instead of listdir - for fixture in glob(os.path.join(path, 'fixtures', '*')): - name, _ = os.path.splitext(os.path.basename(fixture)) - if name in items: - raise RuntimeError('Duplicate fixture name for {} in {}' - .format(fixture, path)) - with open(fixture, 'r', encoding=FINAM_CHARSET) as f: - data = f.read() - items[name] = data - items[name + self.__SPLIT_SUFFIX] = data.split('\n') - self._fixtures = items - - def __getattr__(self, key): - return self._fixtures[key] - - -fixtures = FixtureRegistry() - -# 2.x <-> 3.x compatibility -startswith_compat = type(u'').startswith - - -def urls_equal(url1, url2): - return urltools.normalize(url1) == urltools.normalize(url2) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..afe3ba4 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from glob import glob +from io import open +from collections import namedtuple +import os.path + +import urltools + +from finam.config import FINAM_CHARSET +from finam.utils import smart_encode + +Contract = namedtuple('Contract', ('id', 'code', 'name')) +SBER = Contract(3, 'SBER', u'Сбербанк') +MICEX = Contract(13851, 'MICEX', u'ММВБ') + +# 10:00 - 18:40 +SHARES_SESSION_MINUTES = 60 * 8 + 40 + + +class FixtureRegistry(object): + + """ + Fixtures holder for easier access in tests + + Note it would return bytes in cp1251 + as any reply from finam.ru export tool would do + """ + + __SPLIT_SUFFIX = '__split' + + def __init__(self): + path = os.path.dirname(os.path.abspath(__file__)) + items = {} + # glob omits .* so it's used instead of listdir + for fixture in glob(os.path.join(path, '*')): + name, ext = os.path.splitext(os.path.basename(fixture)) + if name in items: + raise RuntimeError('Duplicate fixture name for {} in {}' + .format(fixture, path)) + if ext not in ['.csv', '.js', '.html']: + continue + with open(fixture, 'r', encoding=FINAM_CHARSET) as f: + data = f.read() + items[name] = data + items[name + self.__SPLIT_SUFFIX] = data.split('\n') + self._fixtures = items + + def __getattr__(self, key): + return self._fixtures[key] + + +fixtures = FixtureRegistry() + +# 2.x <-> 3.x compatibility +startswith_compat = type(u'').startswith + + +def urls_equal(url1, url2): + return urltools.normalize(url1) == urltools.normalize(url2) diff --git a/tests/tests_integration.py b/tests/tests_integration.py index 0ea454f..8194d1d 100644 --- a/tests/tests_integration.py +++ b/tests/tests_integration.py @@ -1,15 +1,16 @@ +import unittest from datetime import datetime, date from parameterized import parameterized from finam import Exporter, Market, Timeframe -from . import SBER, SHARES_SESSION_MINUTES +from fixtures import SBER, SHARES_SESSION_MINUTES -class TestIntegration(object): +class TestIntegration(unittest.TestCase): - @parameterized([ + @parameterized.expand([ (date(2015, 1, 1), date(2016, 1, 1), Timeframe.DAILY), (date(2016, 1, 1), date(2018, 1, 1), Timeframe.MINUTES1), ]) @@ -27,7 +28,7 @@ def test_basic(self, start_date, end_date, timeframe): '', '', '', '', ''] - @parameterized([ + @parameterized.expand([ (date(2018, 1, 1), date(2018, 1, 1), Timeframe.DAILY), ]) def test_blank(self, start_date, end_date, timeframe): @@ -41,7 +42,7 @@ def test_blank(self, start_date, end_date, timeframe): '', '', '', '', ''] - @parameterized([ + @parameterized.expand([ (date(2016, 10, 27), date(2016, 10, 27)), (date(2020, 9, 7), date(2020, 9, 9)), ]) @@ -61,7 +62,7 @@ def test_ticks(self, start_date, end_date): '', ''] - @parameterized([ + @parameterized.expand([ (date(2018, 1, 1), date(2018, 1, 1)), ]) def test_ticks_blank(self, start_date, end_date): diff --git a/tests/tests_unit.py b/tests/tests_unit.py index e6dbc1c..4ab6bc3 100644 --- a/tests/tests_unit.py +++ b/tests/tests_unit.py @@ -1,9 +1,9 @@ import operator +import unittest +import unittest.mock as mock from datetime import date -import mock import pandas as pd -from nose.tools import assert_raises, assert_raises_regexp from parameterized import parameterized from finam import (Market, @@ -17,10 +17,10 @@ ExporterMetaFile) from finam.interval import split_interval -from . import fixtures, startswith_compat, SBER, MICEX +from fixtures import fixtures, startswith_compat, SBER, MICEX -class TestExporterMetaPage(object): +class TestExporterMetaPage(unittest.TestCase): def test_find_ok(self): fetcher = mock.MagicMock(return_value=fixtures.page_valid) @@ -30,11 +30,11 @@ def test_find_ok(self): def test_find_on_broken_page(self): fetcher = mock.MagicMock(return_value=fixtures.page_broken) - with assert_raises(FinamParsingError): + with self.assertRaises(FinamParsingError): ExporterMetaPage(fetcher).find_meta_file() -class TestExporterMetaFile(object): +class TestExporterMetaFile(unittest.TestCase): def test_parse_df_ok(self): fetcher = mock.MagicMock(return_value=fixtures.meta_valid__split) @@ -55,14 +55,14 @@ def test_parse_df_malformed_or_blank(self): for fixture in (fixtures.meta_malformed__split, fixtures.meta_blank__split): fetcher = mock.MagicMock(return_value=fixture) - meta_file = ExporterMetaFile('https://exampe.com', fetcher) - with assert_raises(FinamDownloadError): + meta_file = ExporterMetaFile('https://example.com', fetcher) + with self.assertRaises(FinamDownloadError): meta_file.parse_df() -class TestExporterMeta(object): +class TestExporterMeta(unittest.TestCase): - def setup(self): + def setUp(self): with mock.patch('finam.export.ExporterMetaPage'): fetcher = mock.MagicMock(return_value=fixtures.meta_valid__split) self._meta = ExporterMeta(lazy=False, fetcher=fetcher) @@ -85,7 +85,7 @@ def test_lookup_by_ids(self): def test_lookup_by_missing_id(self): MISSING_ID = self._meta.meta.index.values.max() + 1 - with assert_raises(FinamObjectNotFoundError): + with self.assertRaises(FinamObjectNotFoundError): self._meta.lookup(id_=MISSING_ID) def test_lookup_name_code_by_comparators(self): @@ -119,41 +119,42 @@ def test_lookup_by_market_and_codes(self): assert set(actual['market']) == {Market.SHARES} -@parameterized([ - (date(2016, 1, 1), date(2020, 1, 30), Timeframe.DAILY, - ((date(2016, 1, 1), date(2020, 1, 30)),)), - (date(2016, 1, 1), date(2016, 1, 1), Timeframe.MINUTES1, - ((date(2016, 1, 1), date(2016, 1, 1)),)), - (date(2016, 1, 1), date(2016, 1, 2), Timeframe.MINUTES1, - ((date(2016, 1, 1), date(2016, 1, 2)),)), - (date(2018, 1, 1), date(2020, 9, 15), Timeframe.MINUTES1, - ((date(2018, 1, 1), date(2018, 12, 31)), - (date(2019, 1, 1), date(2019, 12, 31)), - (date(2020, 1, 1), date(2020, 9, 15)),)), - (date(2019, 3, 1), date(2020, 3, 1), Timeframe.MINUTES1, - ((date(2019, 3, 1), date(2020, 2, 28)), - (date(2020, 2, 29), date(2020, 3, 1)),)), - (date(2018, 3, 1), date(2019, 3, 1), Timeframe.MINUTES1, - ((date(2018, 3, 1), date(2019, 2, 28)), - (date(2019, 3, 1), date(2019, 3, 1)),)), - (date(2019, 3, 1), date(2020, 2, 29), Timeframe.MINUTES1, - ((date(2019, 3, 1), date(2020, 2, 28)), - (date(2020, 2, 29), date(2020, 2, 29)),)), - (date(2016, 1, 1), date(2016, 1, 1), Timeframe.TICKS, - ((date(2016, 1, 1), date(2016, 1, 1)),)), - (date(2020, 2, 29), date(2020, 3, 1), Timeframe.TICKS, - ((date(2020, 2, 29), date(2020, 2, 29)), - (date(2020, 3, 1), date(2020, 3, 1)),)), - (date(2020, 1, 30), date(2020, 2, 1), Timeframe.TICKS, - ((date(2020, 1, 30), date(2020, 1, 30)), - (date(2020, 1, 31), date(2020, 1, 31)), - (date(2020, 2, 1), date(2020, 2, 1)),)), -]) -def test_split_interval(start_date, end_date, interval, expected): - actual = split_interval(start_date, end_date, interval) - assert expected == actual - - -def test_split_interval_validation(): - with assert_raises_regexp(ValueError, 'start_date must be'): - split_interval(date(2020, 1, 1), date(2010, 1, 1), Timeframe.DAILY) +class TestInterval(unittest.TestCase): + @parameterized.expand([ + (date(2016, 1, 1), date(2020, 1, 30), Timeframe.DAILY, + ((date(2016, 1, 1), date(2020, 1, 30)),)), + (date(2016, 1, 1), date(2016, 1, 1), Timeframe.MINUTES1, + ((date(2016, 1, 1), date(2016, 1, 1)),)), + (date(2016, 1, 1), date(2016, 1, 2), Timeframe.MINUTES1, + ((date(2016, 1, 1), date(2016, 1, 2)),)), + (date(2018, 1, 1), date(2020, 9, 15), Timeframe.MINUTES1, + ((date(2018, 1, 1), date(2018, 12, 31)), + (date(2019, 1, 1), date(2019, 12, 31)), + (date(2020, 1, 1), date(2020, 9, 15)),)), + (date(2019, 3, 1), date(2020, 3, 1), Timeframe.MINUTES1, + ((date(2019, 3, 1), date(2020, 2, 28)), + (date(2020, 2, 29), date(2020, 3, 1)),)), + (date(2018, 3, 1), date(2019, 3, 1), Timeframe.MINUTES1, + ((date(2018, 3, 1), date(2019, 2, 28)), + (date(2019, 3, 1), date(2019, 3, 1)),)), + (date(2019, 3, 1), date(2020, 2, 29), Timeframe.MINUTES1, + ((date(2019, 3, 1), date(2020, 2, 28)), + (date(2020, 2, 29), date(2020, 2, 29)),)), + (date(2016, 1, 1), date(2016, 1, 1), Timeframe.TICKS, + ((date(2016, 1, 1), date(2016, 1, 1)),)), + (date(2020, 2, 29), date(2020, 3, 1), Timeframe.TICKS, + ((date(2020, 2, 29), date(2020, 2, 29)), + (date(2020, 3, 1), date(2020, 3, 1)),)), + (date(2020, 1, 30), date(2020, 2, 1), Timeframe.TICKS, + ((date(2020, 1, 30), date(2020, 1, 30)), + (date(2020, 1, 31), date(2020, 1, 31)), + (date(2020, 2, 1), date(2020, 2, 1)),)), + ]) + def test_split_interval(self, start_date, end_date, interval, expected): + actual = split_interval(start_date, end_date, interval) + assert expected == actual + + + def test_split_interval_validation(self): + with self.assertRaisesRegex(ValueError, 'start_date must be'): + split_interval(date(2020, 1, 1), date(2010, 1, 1), Timeframe.DAILY) From 0fd73a2463614697e7e3dae6ba4ca39bacf7797e Mon Sep 17 00:00:00 2001 From: Aleksei Semenov Date: Sat, 25 Mar 2023 13:13:03 -0300 Subject: [PATCH 2/3] Add new webdriver-based fetcher Use new fetcher for meta data downloading Sort imports in export.py module Update requirements.txt --- finam/export.py | 215 +++++++++++++++++++++++++++++++++-------------- requirements.txt | 8 +- 2 files changed, 158 insertions(+), 65 deletions(-) diff --git a/finam/export.py b/finam/export.py index 42e6bd3..7e8e405 100644 --- a/finam/export.py +++ b/finam/export.py @@ -1,30 +1,29 @@ -import time import datetime import logging import operator +import time from enum import IntEnum from io import StringIO +from typing import Type, Union from urllib.parse import urlencode from urllib.request import urlopen import pandas as pd from pandas.errors import ParserError +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from webdriver_manager.chrome import ChromeDriverManager -from .utils import (is_container, - smart_decode, - build_trusted_request, - parse_script_link) from .const import Timeframe -from .exception import (FinamDownloadError, - FinamParsingError, - FinamObjectNotFoundError, - FinamTooLongTimeframeError, - FinamAlreadyInProgressError, - FinamThrottlingError) - - +from .exception import (FinamAlreadyInProgressError, FinamDownloadError, + FinamObjectNotFoundError, FinamParsingError, + FinamThrottlingError, FinamTooLongTimeframeError) from .interval import split_interval - +from .utils import (build_trusted_request, is_container, parse_script_link, + smart_decode) __all__ = ['Exporter', 'LookupComparator'] @@ -39,9 +38,10 @@ class LookupComparator(IntEnum): CONTAINS = 3 -def fetch_url(url, lines=False): +def fetch_url_urllib(url, lines=False): """ Fetches url from finam.ru + Since January 2023 this fetcher does not support fetching meta data """ logger.info('Fetching {}'.format(url)) request = build_trusted_request(url) @@ -59,13 +59,97 @@ def fetch_url(url, lines=False): raise FinamDownloadError('Unable to decode: {}'.format(e.message)) +def fetch_url_webdriver(url, lines=False) -> str: + """ + Fetches url from finam.ru + Selenium webdriver based method for meta data fetching + """ + logger.info('Fetching {}'.format(url)) + locator = (By.XPATH, "//*") + with FetchMetaWebriver() as fetcher: + fetcher.driver.get(url) + res = fetcher.wait.until( + lambda driver: driver.find_element(*locator).get_attribute('outerHTML') + ) + if lines: + res = res.encode('cp1252').decode('cp1251') + res = res.split('\n') + return res + return res + + +def use_fetcher_meta(cls: Type) -> Type: + """ + It is class decorator. + Use it to decorate all classes that should use webdriver for fetching + """ + FetchMetaWebriver.pages_to_load += 1 + return cls + + +class FetchMetaWebriver: + """ + This class provides a method for fetching meta data from the finam.ru website + The method is based on the Selenium webdriver and uses a cached webdriver stored as a class attribute driver + This caching saves around 1-2 seconds of loading time + The number of pages to download is dynamically calculated using a class decorator and stored in the pages_to_load attribute + The webdriver is automatically closed when all pages have been downloaded + """ + + driver: Union[WebDriver, None] = None + pages_to_load = 0 + timeout = 30 + wait: WebDriverWait + + def __enter__(self): + """ + Setup chrome driver with webdriver service + NB: + Using headless mode is not allowed by finam + If you are going to use this lib inside docker container you have to use virtual screen, e.g. xvfb + """ + logger.info('Meta data fetching started') + self.__class__.pages_to_load -= 1 + if self.__class__.driver: + return self + chromeService = Service(ChromeDriverManager().install()) + options = webdriver.ChromeOptions() + # Basic driver`s options + options.add_argument('--disable-translate') + options.add_argument('--disable-extensions') + options.add_argument('--disable-notifications') + # The following options is mandatory if you are going to run it in docker container + options.add_argument('--no-sandbox') + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + # Disable images and css loading + prefs = { + "profile.managed_default_content_settings.images": 2, + "profile.managed_default_content_settings.stylesheets": 2, + } + options.add_experimental_option("prefs", prefs) + # Setup driver and cache it inside the class + self.__class__.driver = webdriver.Chrome(service=chromeService, options=options) + self.__class__.wait = WebDriverWait(self.__class__.driver, self.__class__.timeout) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if any((exc_type, exc_val, exc_tb)): + self.driver.quit() + logger.info(f'Meta data fetching failed. {exc_type}): {exc_val}') + if self.__class__.pages_to_load == 0: + self.driver.quit() + logger.info('Meta data fetching finished') + + +@use_fetcher_meta class ExporterMetaPage(object): FINAM_BASE = 'https://www.finam.ru' FINAM_ENTRY_URL = FINAM_BASE + '/profile/moex-akcii/gazprom/export/' FINAM_META_FILENAME = 'icharts.js' - def __init__(self, fetcher=fetch_url): + def __init__(self, fetcher=fetch_url_urllib): self._fetcher = fetcher def find_meta_file(self): @@ -83,16 +167,16 @@ def find_meta_file(self): try: url = parse_script_link(html, self.FINAM_META_FILENAME) except ValueError as e: - raise FinamParsingError('Unable to parse meta url from html: {}' - .format(e)) + raise FinamParsingError('Unable to parse meta url from html: {}'.format(e)) return self.FINAM_BASE + url +@use_fetcher_meta class ExporterMetaFile(object): FINAM_CATEGORIES = -1 - def __init__(self, url, fetcher=fetch_url): + def __init__(self, url, fetcher=fetch_url_urllib): self._url = url self._fetcher = fetcher @@ -112,10 +196,9 @@ def _parse_js_assignment(self, line): start_char, end_char = '[', ']' start_idx = line.find(start_char) end_idx = line.find(end_char) - if (start_idx == -1 or - end_idx == -1): + if start_idx == -1 or end_idx == -1: raise FinamDownloadError('Unable to parse line: {}'.format(line)) - items = line[start_idx + 1:end_idx] + items = line[start_idx + 1 : end_idx] # string items if items.startswith("'"): @@ -135,7 +218,7 @@ def _parse_js(self, data): """ cols = ('id', 'name', 'code', 'market') parsed = dict() - for idx, col in enumerate(cols[:len(cols)]): + for idx, col in enumerate(cols[: len(cols)]): parsed[col] = self._parse_js_assignment(data[idx]) df = pd.DataFrame(columns=cols, data=parsed) df['market'] = df['market'].astype(int) @@ -153,8 +236,7 @@ def parse_df(self): class ExporterMeta(object): - - def __init__(self, lazy=True, fetcher=fetch_url): + def __init__(self, lazy=True, fetcher=fetch_url_urllib): self._meta = None self._fetcher = fetcher if not lazy: @@ -192,8 +274,7 @@ def _apply_filter(self, col, val, comparator): op = 'startswith' else: op = 'contains' - expr = self._combine_filters( - map(getattr(self._meta[col].str, op), val), operator.or_) + expr = self._combine_filters(map(getattr(self._meta[col].str, op), val), operator.or_) return expr def _combine_filters(self, filters, op): @@ -203,9 +284,15 @@ def _combine_filters(self, filters, op): result = op(result, filter_) return result - def lookup(self, id_=None, code=None, name=None, market=None, - name_comparator=LookupComparator.CONTAINS, - code_comparator=LookupComparator.EQUALS): + def lookup( + self, + id_=None, + code=None, + name=None, + market=None, + name_comparator=LookupComparator.CONTAINS, + code_comparator=LookupComparator.EQUALS, + ): """ Looks up contracts matching specified combinations of requirements If multiple requirements are specified they will be ANDed @@ -214,17 +301,18 @@ def lookup(self, id_=None, code=None, name=None, market=None, may appear in different markets """ if not any((id_, code, name, market)): - raise ValueError('Either id or code or name or market' - ' must be specified') + raise ValueError('Either id or code or name or market' ' must be specified') self._load() filters = [] # applying filters - filter_groups = (('id', id_, LookupComparator.EQUALS), - ('code', code, code_comparator), - ('name', name, name_comparator), - ('market', market, LookupComparator.EQUALS)) + filter_groups = ( + ('id', id_, LookupComparator.EQUALS), + ('code', code, code_comparator), + ('name', name, name_comparator), + ('market', market, LookupComparator.EQUALS), + ) for col, val, comparator in filter_groups: if val is not None: @@ -251,20 +339,21 @@ class Exporter(object): 'mstimever': '1', 'sep': '3', 'sep2': '1', - 'at': '1' + 'at': '1', } EMPTY_RESULT_NOT_TICKS = ';