Browse Source

adjust caching.

Jason Schwarzenberger 3 months ago
parent
commit
406b7f05f9
4 changed files with 63 additions and 21 deletions
  1. 14 3
      routes/simple/get-content.js
  2. 17 11
      routes/simple/get-details.js
  3. 31 6
      routes/simple/get-telegraph-link.js
  4. 1 1
      utils/cache.js

+ 14 - 3
routes/simple/get-content.js

@@ -1,4 +1,5 @@
 const scraper = require("../../scraper/simple");
+const cache = require('../../utils/cache');
 
 module.exports = {
 	getContent,
@@ -6,13 +7,23 @@ module.exports = {
 
 async function getContent(req, res) {
 	const url = req.body.url;
+	const nocache = !!req.body.nocache;
 	if (!/https?:\/\/(www\.)?.*\/.*/i.test(url)) {
 		return res.status(400);
 	}
 	console.log(`[simple/html] for url ${url}`);
-	console.log('[simple/html] doing a declutter');
-	const readable = await scraper.getDetails(url);
-	console.log('[simple/html] have decluttered readable', !!readable);
+	let readable = cache.declutter.get(url);
+	if (nocache) {
+		readable = undefined;
+		console.log('[simple/html] no cache');
+	}
+	console.log('[simple/html] have cached readable', !!readable);
+	if (!readable) {
+		console.log('[simple/html] doing a declutter');
+		readable = await scraper.getDetails(url);
+		cache.declutter.set(url, readable);
+		console.log('[simple/html] have decluttered readable', !!readable);
+	}
 	if (!readable || !readable.content) {
 		return res.status(500);
 	}

+ 17 - 11
routes/simple/get-details.js

@@ -1,4 +1,5 @@
 const scraper = require("../../scraper/simple");
+const cache = require('../../utils/cache');
 
 module.exports = {
 	getDetails,
@@ -6,21 +7,26 @@ module.exports = {
 
 async function getDetails(req, res) {
 	const url = req.body.url;
+	const nocache = !!req.body.nocache;
 	if (!/https?:\/\/(www\.)?.*\/.*/i.test(url)) {
 		return res.status(400);
 	}
 	console.log(`[simple/details] for url ${url}`);
-	console.log('[simple/details] doing a declutter');
-	try {
-		const readable = await scraper.getDetails(url);
+	let readable = cache.declutter.get(url);
+	if (nocache) {
+		readable = undefined;
+		console.log('[simple/details] no cache');
+	}
+	console.log('[simple/details] have cached readable', !!readable);
+	if (!readable) {
+		console.log('[simple/details] doing a declutter');
+		readable = await scraper.getDetails(url);
+		cache.declutter.set(url, readable);
 		console.log('[simple/details] have decluttered readable', !!readable);
-		if (!readable) {
-			return res.status(500);
-		}
-		console.log('[simple/details] sent readable');
-		return res.send(readable);
-	} catch (e) {
-		console.error('[simple/details] have decluttered readable', e);
-		return res.sendStatus(500);
 	}
+	if (!readable) {
+		return res.status(500);
+	}
+	console.log('[simple/details] sent readable');
+	return res.send(readable);
 }

+ 31 - 6
routes/simple/get-telegraph-link.js

@@ -1,5 +1,6 @@
 const scraper = require("../../scraper/simple");
 const { publishReadable } = require("../../utils/publish-telegraph");
+const cache = require('../../utils/cache');
 
 module.exports = {
 	getTelegraphLink,
@@ -8,17 +9,41 @@ module.exports = {
 async function getTelegraphLink(req, res) {
 	const url = req.body.url;
 	const redirect = !!req.body.redirect;
+	const nocache = !!req.body.nocache;
 	try {
 		if (!/https?:\/\/(www\.)?.*\/.*/i.test(url)) {
 			return res.status(400);
 		}
 		console.log(`[simple/telegraph] for url ${url}`);
-		console.log('[simple/telegraph] doing a declutter');
-		const readable = await scraper.getDetails(url);
-		console.log('[simple/telegraph] have decluttered readable', !!readable);
-		console.log('[simple/telegraph] doing a page');
-		const page = await publishReadable(url, readable);
-		console.log('[simple/telegraph] have created page', !!page);
+		let page = cache.telegraph.get(url);
+		let readable = cache.declutter.get(url);
+		if (nocache) {
+			page = undefined;
+			readable = undefined;
+			console.log('[simple/telegraph] no cache');
+		}
+		console.log('[simple/telegraph] have cached page', !!page);
+		if (!page) {
+			console.log('[simple/telegraph] have cached readable', !!readable);
+			if (!readable) {
+				console.log('[simple/telegraph] doing a declutter');
+				readable = await scraper.getDetails(url);
+				cache.declutter.set(url, readable);
+				console.log('[simple/telegraph] have decluttered readable', !!readable);
+			}
+			console.log('[simple/telegraph] doing a page');
+			page = await publishReadable(url, readable);
+			console.log('[simple/telegraph] have created page', !!page);
+			if (page) {
+				cache.telegraph.set(url, {
+					author: page.author,
+					author_url: page.author_url,
+					description: page.description,
+					title: page.title,
+					url: page.url,
+				});
+			}
+		}
 		if (!page) {
 			return res.status(500);
 		}

+ 1 - 1
utils/cache.js

@@ -4,7 +4,7 @@ const MINUTE = 60;
 const HOUR = 60 * MINUTE;
 
 const telegraph = new NodeCache({ stdTTL: 24 * HOUR });
-const declutter = new NodeCache({ stdTTL: 30 * MINUTE });
+const declutter = new NodeCache({ stdTTL: 4 * HOUR });
 const comment = new NodeCache({ stdTTL: 30 * MINUTE });
 
 module.exports = {