simple.py 832 B

12345678910111213141516171819202122232425262728
  1. import logging
  2. logging.basicConfig(
  3. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  4. level=logging.DEBUG)
  5. import requests
  6. from settings import READER_PORT
  7. READ_API = 'http://127.0.0.1:{}/simple/details'.format(READER_PORT or 3000)
  8. TIMEOUT = 20
  9. def get_html(url):
  10. logging.info(f"Simple Scraper: {url}")
  11. details = get_details(url)
  12. if not details:
  13. return ''
  14. return details['content']
  15. def get_details(url):
  16. try:
  17. r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
  18. if r.status_code != 200:
  19. raise Exception('Bad response code ' + str(r.status_code))
  20. return r.json()
  21. except KeyboardInterrupt:
  22. raise
  23. except BaseException as e:
  24. logging.error('Problem getting article: {}'.format(str(e)))
  25. return None