Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 3 additions & 22 deletions healthtools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

# sites to be scraped
SITES = {
"DOCTORS": "http://medicalboard.co.ke/online-services/retention/?currpage={}",
"DOCTORS": "https://medicalboard.co.ke/online-services/retention/?currpage={}",
"FOREIGN_DOCTORS": "http://medicalboard.co.ke/online-services/foreign-doctors-license-register/?currpage={}",
"CLINICAL_OFFICERS": "http://clinicalofficerscouncil.org/online-services/retention/?currpage={}",
"TOKEN_URL": "http://api.kmhfl.health.go.ke/o/token/",
Expand All @@ -49,46 +49,27 @@
NHIF_SERVICES = ["inpatient", "outpatient", "outpatient-cs"]

# config logging

LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"simple": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S"
},
"slack": {
"format": """
Location: %(module)s: %(funcName)s: %(lineno)d \n Time: %(asctime)s \n Message: %(message)s
""",
"datefmt": "%Y-%m-%d %H:%M:%S"
}
},

"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "DEBUG",
"formatter": "simple",
"stream": "ext://sys.stdout"
},

"slack_log": {
"level": "WARNING",
"api_key": os.getenv('SLACK_API_TOKEN', None),
"class": "slacker_log_handler.SlackerLogHandler",
"channel": os.getenv('SLACK_LOGGER_CHANNEL', None),
"username": "Scrapper Slack Logger",
"stack_trace": True,
"formatter": "slack",
"fail_silent": True #would not raise an error when api token is invalid
}

},

"root": {
"level": "INFO",
"handlers": ["console","slack_log"]
"handlers": ["console"]
}
}

4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ nose==1.3.7
packaging==16.8
pyparsing==2.2.0
python-dateutil==2.6.0
python-memcached==1.58
python3-memcached== 1.51
requests==2.13.0
requests-aws4auth==0.9
s3transfer==0.1.10
six==1.10.0
slack-logger==0.2.0
slackclient==1.0.6
slacker==0.9.42
slacker-log-handler==1.6.1
termcolor==1.1.0
urllib3==1.21.1
websocket-client==0.40.0
Expand Down
2 changes: 1 addition & 1 deletion runtime.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python-2.7.13
python-3.6.2
35 changes: 33 additions & 2 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,25 @@ def setup_logging(default_level=logging.INFO):
logging.config.dictConfig(LOGGING)
except Exception as ex:
logging.basicConfig(level=default_level)


if __name__ == "__main__":
# add slack log handler
SLACK_URL = os.getenv("MORPH_WEBHOOK_URL", None)
if SLACK_URL:
from slack_logger import SlackHandler, SlackFormatter
log.setLevel(logging.WARNING)
try:
sh = SlackHandler(username='Scraper Logger', url=SLACK_URL)
sh.setLevel(logging.WARNING)
f = SlackFormatter()
sh.setFormatter(f)
log.addHandler(sh)
except Exception as ex:
log.error('Unable to add slack_logger', str(ex))

def scrapers():
'''
Function to run every scraper
'''
# Initialize the Scrapers
doctors_scraper = DoctorsScraper()
foreign_doctors_scraper = ForeignDoctorsScraper()
Expand Down Expand Up @@ -97,3 +113,18 @@ def setup_logging(default_level=logging.INFO):
scraper_stats.data_archive_key = "stats/stats-{}.json"
scraper_stats.archive_data(json.dumps(scraping_statistics))

# log warning when scraper ran more than 30 minutes
if(m >= 30):
log.warning('Scraper: {} ran for about {}'.format(scraper_id, time_taken))

if __name__ == "__main__":
setup_logging()
import multiprocessing
# Start the scrapers
scraping = multiprocessing.Process(target=scrapers)
scraping.start()
scraping.join(30 * 60)

# log error if scraping is still running after 30 minutes
if scraping.is_alive():
log.warning('Scraper: {} is running for more than 30 minutes'.format(scraper_id))