server.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import logging
  2. logging.basicConfig(
  3. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  4. level=logging.INFO)
  5. import gevent
  6. from gevent import monkey
  7. monkey.patch_all()
  8. from gevent.pywsgi import WSGIServer
  9. import copy
  10. import json
  11. import threading
  12. import traceback
  13. import time
  14. from datetime import datetime, timedelta
  15. from urllib.parse import urlparse, parse_qs
  16. import settings
  17. import database
  18. import search
  19. import feed
  20. from utils import gen_rand_id
  21. from flask import abort, Flask, request, render_template, stream_with_context, Response
  22. from werkzeug.exceptions import NotFound
  23. from flask_cors import CORS
  24. database.init()
  25. search.init()
  26. def new_id():
  27. nid = gen_rand_id()
  28. while database.get_story(nid):
  29. nid = gen_rand_id()
  30. return nid
  31. build_folder = '../webclient/build'
  32. flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
  33. cors = CORS(flask_app)
  34. @flask_app.route('/api')
  35. def api():
  36. skip = request.args.get('skip', 0)
  37. limit = request.args.get('limit', 20)
  38. stories = database.get_stories(skip=skip, limit=limit)
  39. res = Response(json.dumps({"stories": stories}))
  40. res.headers['content-type'] = 'application/json'
  41. return res
  42. @flask_app.route('/api/search', strict_slashes=False)
  43. def apisearch():
  44. q = request.args.get('q', '')
  45. skip = request.args.get('skip', 0)
  46. limit = request.args.get('limit', 20)
  47. if len(q) >= 3:
  48. results = search.search(q, skip=skip, limit=limit)
  49. else:
  50. results = []
  51. return dict(results=results)
  52. @flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
  53. def submit():
  54. try:
  55. url = request.form['url']
  56. nid = new_id()
  57. parse = urlparse(url)
  58. if 'news.ycombinator.com' in parse.hostname:
  59. source = 'hackernews'
  60. ref = parse_qs(parse.query)['id'][0]
  61. elif 'tildes.net' in parse.hostname and '~' in url:
  62. source = 'tildes'
  63. ref = parse.path.split('/')[2]
  64. elif 'lobste.rs' in parse.hostname and '/s/' in url:
  65. source = 'lobsters'
  66. ref = parse.path.split('/')[2]
  67. elif 'reddit.com' in parse.hostname and 'comments' in url:
  68. source = 'reddit'
  69. ref = parse.path.split('/')[4]
  70. elif settings.HOSTNAME in parse.hostname:
  71. raise Exception('Invalid URL')
  72. else:
  73. source = 'manual'
  74. ref = url
  75. existing = database.get_story_by_ref(ref)
  76. if existing:
  77. return {'nid': existing.sid}
  78. existing = database.get_story_by_url(url)
  79. if existing:
  80. return {'nid': existing.sid}
  81. else:
  82. story = dict(id=nid, ref=ref, source=source)
  83. valid = feed.update_story(story, is_manual=True)
  84. if valid:
  85. database.put_story(story)
  86. search.put_story(story)
  87. return {'nid': nid}
  88. else:
  89. logging.info(str(story))
  90. raise Exception('Invalid article')
  91. except BaseException as e:
  92. logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
  93. print(traceback.format_exc())
  94. abort(400)
  95. @flask_app.route('/api/<sid>')
  96. def story(sid):
  97. story = database.get_story(sid)
  98. if story:
  99. related = []
  100. if story.meta['url']:
  101. related = database.get_stories_by_url(story.meta['url'])
  102. related = [r.meta for r in related]
  103. links = story.meta.get('meta_links', [])
  104. if links:
  105. links = [database.get_story_by_url(link) for link in links]
  106. links = list(filter(None, [l.meta if l else None for l in links]))
  107. res = Response(json.dumps({"story": story.data, "related": related, "links": links}))
  108. res.headers['content-type'] = 'application/json'
  109. return res
  110. else:
  111. return abort(404)
  112. @flask_app.route('/')
  113. @flask_app.route('/search')
  114. def index():
  115. return render_template('index.html',
  116. title='Feed',
  117. url=settings.HOSTNAME,
  118. description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
  119. @flask_app.route('/<sid>', strict_slashes=False)
  120. @flask_app.route('/<sid>/c', strict_slashes=False)
  121. def static_story(sid):
  122. try:
  123. return flask_app.send_static_file(sid)
  124. except NotFound:
  125. pass
  126. story = database.get_story(sid)
  127. if not story: return abort(404)
  128. story = story.data
  129. score = story['score']
  130. num_comments = story['num_comments']
  131. source = story['source']
  132. description = '{} point{}, {} comment{} on {}'.format(
  133. score, 's' if score != 1 else '',
  134. num_comments, 's' if num_comments != 1 else '',
  135. source)
  136. url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
  137. url = url.replace('www.', '')
  138. return render_template('index.html',
  139. title=story['title'],
  140. url=url,
  141. description=description)
  142. http_server = WSGIServer(('', settings.API_PORT or 33842), flask_app)
  143. def _add_new_refs():
  144. added = []
  145. for ref, source, urlref in feed.get_list():
  146. if database.get_story_by_ref(ref):
  147. continue
  148. try:
  149. nid = new_id()
  150. database.put_ref(ref, nid, source, urlref)
  151. logging.info('Added ref ' + ref)
  152. added.append(ref)
  153. gevent.sleep(1)
  154. except database.IntegrityError:
  155. #logging.info('Unable to add ref ' + ref)
  156. continue
  157. return added
  158. def _update_current_story(item):
  159. try:
  160. story = database.get_story(item['sid']).data
  161. except AttributeError:
  162. story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
  163. logging.info('Updating story: {}'.format(str(story['ref'])))
  164. valid = feed.update_story(story, urlref=item['urlref'])
  165. if valid:
  166. try:
  167. database.put_story(story)
  168. search.put_story(story)
  169. if story['source'] == 'manual':
  170. database.del_ref(item['ref'])
  171. logging.info('Removed manual ref {}'.format(item['ref']))
  172. except database.IntegrityError:
  173. logging.info('Unable to add story with ref ' + item['ref'])
  174. else:
  175. database.del_ref(item['ref'])
  176. logging.info('Removed ref {}'.format(item['ref']))
  177. def feed_thread():
  178. new_refs = []
  179. update_refs = []
  180. last_check = datetime.now() - timedelta(minutes=20)
  181. try:
  182. while True:
  183. # onboard new stories
  184. time_since_check = datetime.now() - last_check
  185. if not len(new_refs) and time_since_check > timedelta(minutes=15):
  186. added = _add_new_refs()
  187. ref_list = database.get_reflist()
  188. new_refs = list(filter(None, [i if i['ref'] in added else None for i in ref_list]))
  189. update_queue = list(filter(None, [i if i['ref'] not in added else None for i in ref_list]))
  190. current_queue_refs = [i['ref'] for i in update_refs]
  191. update_queue = list(filter(None, [i if i['ref'] not in current_queue_refs else None for i in update_queue]))
  192. update_refs += update_queue
  193. logging.info('Added {} new refs'.format(len(added)))
  194. logging.info('Have {} refs in update queue'.format(len(current_queue_refs)))
  195. logging.info('Fetched {} refs for update queue'.format(len(update_queue)))
  196. last_check = datetime.now()
  197. gevent.sleep(5)
  198. # update new stories
  199. if len(new_refs):
  200. item = new_refs.pop(0)
  201. logging.info('Processing new story ref {}'.format(item['ref']))
  202. _update_current_story(item)
  203. gevent.sleep(5)
  204. # update current stories
  205. if len(update_refs):
  206. item = update_refs.pop(0)
  207. logging.info('Processing existing story ref {}'.format(item['ref']))
  208. _update_current_story(item)
  209. gevent.sleep(5)
  210. gevent.sleep(10)
  211. except KeyboardInterrupt:
  212. logging.info('Ending feed thread...')
  213. except ValueError as e:
  214. logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
  215. http_server.stop()
  216. gevent.kill(feed_thread_ref)
  217. print('Starting Feed thread...')
  218. feed_thread_ref = gevent.spawn(feed_thread)
  219. print('Starting HTTP thread...')
  220. try:
  221. http_server.serve_forever()
  222. except KeyboardInterrupt:
  223. gevent.kill(feed_thread_ref)
  224. logging.info('Exiting...')