Browse Source

play around with extracting m3u8 from websites.

Jason Schwarzenberger 1 year ago
parent
commit
5a6f519ab0
7 changed files with 290 additions and 15 deletions
  1. 19 12
      index.js
  2. 46 1
      public/style.css
  3. 36 0
      public/watch.html
  4. 77 0
      public/watch.js
  5. 0 0
      utils/config/sites.js
  6. 6 2
      utils/declutter.js
  7. 106 0
      utils/stream.js

+ 19 - 12
index.js

@@ -3,6 +3,7 @@ const bodyParser = require('body-parser');
 const path = require('path');
 
 const declutter = require('./utils/declutter');
+const getStreamUrl = require('./utils/stream');
 
 const port = process.env.NODE_PORT || 3000;
 const app = express();
@@ -14,9 +15,13 @@ app.get('/favicon.ico', async (req, res) => res.sendStatus(404));
 app.get('/loading.svg', async (req, res) => res.sendFile(path.join(__dirname, '/public/loading.svg')));
 app.get('/style.css', async (req, res) => res.sendFile(path.join(__dirname, '/public/style.css')));
 app.get('/script.js', async (req, res) => res.sendFile(path.join(__dirname, '/public/script.js')));
+app.get('/watch.js', async (req, res) => res.sendFile(path.join(__dirname, '/public/watch.js')));
+app.get('/watch', async (req, res) => res.sendFile(path.join(__dirname, '/public/watch.html')));
 app.get('/', async (req, res) => res.sendFile(path.join(__dirname, '/public/index.html')));
 
-const declutterRequest = async (res, url, redirect) => {
+app.post('/', async (req, res) => {
+  const url = req.body.url;
+  const redirect = !!req.body.redirect;
   try {
     if (!/https?:\/\/(www\.)?.*\/.*/i.test(url)) {
       return res.status(400);
@@ -33,18 +38,20 @@ const declutterRequest = async (res, url, redirect) => {
     console.error(e);
     return res.status(500);
   }
-};
-
-app.post('/', async (req, res) => await declutterRequest(res, req.body.url, !!req.body.redirect));
-
-app.get('*', async (req, res) => {
-  const queryString = Object.keys(req.query)
-    .map(k => `${k}=${req.query[k]}`)
-    .join('&');
-
-  const url = req.path.substring(1) + '?' + queryString;
+});
 
-  return await declutterRequest(res, url, false);
+app.post('/watch', async (req, res) => {
+  const url = req.body.url;
+  try {
+    const stream = await getStreamUrl(url);
+    return res.send(stream);
+  } catch (e) {
+    if (/timeout/i.test(e.message)) {
+      return res.status(504);
+    }
+    console.error(e);
+    return res.status(500);
+  }
 });
 
 app.listen(port, () => console.log(`Declutter app listening on port ${port}!`));

+ 46 - 1
public/style.css

@@ -1,10 +1,19 @@
 body {
   margin: 3.5rem auto 0.5rem;
-  max-width: 40rem;
+  width: 40rem;
   font: 1.2em/1.62 sans-serif;
   text-align: center;
 }
 
+.is-playing.is-fullwidth body {
+  width: 100%;
+}
+
+.is-dark {
+  background: #222;
+  color: #fff;
+}
+
 h1,
 h2 {
   margin: 0 auto;
@@ -62,6 +71,18 @@ button {
   vertical-align: middle;
 }
 
+.is-dark input {
+  background: #333;
+  color: #fff;
+}
+.is-dark button {
+  background: #222;
+  color: #fff;
+}
+.is-dark .loading {
+  filter: invert();
+}
+
 footer {
   margin: 0 auto 1.25rem;
   position: absolute;
@@ -102,3 +123,27 @@ footer p {
 .is-error form {
   margin-top: 5rem;
 }
+
+.is-playing body {
+  margin: 1rem auto;
+}
+
+.is-playing form,
+.player {
+  display: none;
+}
+
+.is-playing .player {
+  display: initial;
+
+  min-width: 100%;
+}
+
+.is-playing .video-js {
+  position: absolute;
+  bottom: 0;
+  left: 0;
+  right: 0;
+  height: 100%;
+  width: 100%;
+}

+ 36 - 0
public/watch.html

@@ -0,0 +1,36 @@
+<!DOCTYPE html>
+<html class="is-dark is-fullwidth">
+
+<head>
+	<meta charset="utf-8">
+	<meta name="viewport" content="width=device-width, initial-scale=1">
+	<title>Declutter</title>
+	<link rel="stylesheet" href="/style.css" />
+	<link href="https://vjs.zencdn.net/7.6.0/video-js.css" rel="stylesheet">
+	<link href="https://cdnjs.cloudflare.com/ajax/libs/videojs-overlay/1.1.4/videojs-overlay.css" rel="stylesheet">
+	<script src="https://vjs.zencdn.net/ie8/1.1.2/videojs-ie8.min.js"></script>
+</head>
+
+<body>
+	<h1>Declutter</h1>
+	<h2>Watch <em>only</em> what matters.</h2>
+	<img class="loading" src="/loading.svg" alt="loading..." width="200" height="200" />
+	<h3 class="error">ERROR</h3>
+	<form action="/watch" method="POST" accept-charset="UTF-8" autocomplete="off"><input name="url"
+			placeholder="Enter stream site link" pattern="^https?:\/\/(www\.)?.*" required /><button value="true"
+			name="redirect" type="submit">view</button>
+	</form>
+	<div class="player">
+		<video id='decluttered-video' class='video-js' controls preload='auto' data-setup='{}'>
+			<p class='vjs-no-js'>
+				To view this video please enable JavaScript, and consider upgrading to a web browser that
+				<a href='https://videojs.com/html5-video-support/' target='_blank'>supports HTML5 video</a>
+			</p>
+		</video>
+	</div>
+	<script src="https://vjs.zencdn.net/7.6.0/video.js"></script>
+	<script src="https://cdnjs.cloudflare.com/ajax/libs/videojs-overlay/1.1.4/videojs-overlay.min.js"></script>
+	<script src="/watch.js" type="text/javascript"></script>
+</body>
+
+</html>

+ 77 - 0
public/watch.js

@@ -0,0 +1,77 @@
+(function() {
+  document.querySelector('form').addEventListener('submit', e => {
+    e.preventDefault();
+    document.querySelector('html').classList.add('is-loading');
+    document.querySelector('html').classList.remove('is-error');
+    document.querySelector('html').classList.remove('is-playing');
+    videojs('decluttered-video') && videojs('decluttered-video').pause();
+    fetch('/watch', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({
+        url: e.target.url.value
+      })
+    })
+      .then(response => {
+        if (response.ok) {
+          return response.json();
+        }
+        throw response.statusText;
+      })
+      .then(details => {
+        console.log('stream', details);
+        const player = videojs('decluttered-video');
+        player.src({
+          src: details.streams[0] || ''
+        });
+        const overlay = `<div><h1>${[details.title, details.publisher]
+          .filter(f => !!f && !!f.trim())
+          .join(' &mdash; ')}</h2></div>`;
+        player.overlay({
+          overlays: [
+            {
+              start: 'loadstart',
+              content: overlay,
+              end: 'playing',
+              align: 'top'
+            },
+            {
+              start: 'pause',
+              content: overlay,
+              end: 'playing',
+              align: 'top'
+            }
+          ]
+        });
+        player.play();
+        document.querySelector('h1').innerText = details.title;
+        document.querySelector('html').classList.add('is-playing');
+        document.querySelector('html').classList.remove('is-loading');
+      })
+      .catch(status => {
+        videojs('decluttered-video') && videojs('decluttered-video').pause();
+        document.querySelector('.error').innerText = status;
+        document.querySelector('html').classList.add('is-error');
+        document.querySelector('html').classList.remove('is-loading');
+        document.querySelector('html').classList.remove('is-playing');
+      });
+  });
+
+  const query = window.location.search
+    .substring(1)
+    .split('&')
+    .reduce((m, p) => {
+      const [key, value] = p.split('=');
+      return {
+        ...m,
+        [key]: decodeURIComponent(value)
+      };
+    }, {});
+  if (!query.url) {
+    return;
+  }
+  document.querySelector('input[name="url"]').value = query.url;
+  document.querySelector('button').click();
+})();

utils/sites.js → utils/config/sites.js


+ 6 - 2
utils/declutter.js

@@ -7,7 +7,7 @@ const util = require('util');
 
 const domToNode = require('./dom-node');
 const telegraph = require('./telegraph');
-const sites = require('./sites');
+const sites = require('./config/sites');
 const bypass = require('./bypass');
 
 const cleanHtmlText = text => {
@@ -148,7 +148,11 @@ const UA = site => {
 
 module.exports = async url => {
   const extensions = await getExtensions();
-  const browser = await puppeteer.launch({ headless: true, args: [].concat(extensions) });
+  const browser = await puppeteer.launch({
+    args: [].concat(extensions),
+    executablePath: process.env.DECLUTTER_CHROME_PATH || undefined,
+    headless: true
+  });
   const tab = await browser.newPage();
   try {
     const site = sites.find(s => s.host.test(url));

+ 106 - 0
utils/stream.js

@@ -0,0 +1,106 @@
+const puppeteer = require('puppeteer');
+const path = require('path');
+const fs = require('fs');
+const util = require('util');
+const process = require('process');
+
+const domToNode = require('./dom-node');
+const telegraph = require('./telegraph');
+const bypass = require('./bypass');
+
+const fixRelativeLinks = async (tab, url) => {
+  return await tab.evaluate(url => {
+    const host = url
+      .split('/')
+      .slice(0, 3)
+      .join('/');
+
+    Array.from(document.querySelectorAll('[src^="/"]'))
+      .filter(e => e.attributes.src && /^\/[^\/]/.test(e.attributes.src.value))
+      .forEach(e => {
+        e.attributes.src.value = `${host}${e.attributes.src.value}`;
+      });
+    Array.from(document.querySelectorAll('[href^="/"]'))
+      .filter(e => e.attributes.href && /^\/[^\/]/.test(e.attributes.href.value))
+      .forEach(e => {
+        e.attributes.href.value = `${host}${e.attributes.href.value}`;
+      });
+  }, url);
+};
+
+const getExtensions = async () => {
+  const extensionDir = path.join(__dirname, '../extensions');
+  const exists = await util.promisify(fs.exists)(extensionDir);
+  if (!exists) {
+    return [];
+  }
+  const ls = await util.promisify(fs.readdir)(extensionDir);
+  const extensions = ls.map(name => '--load-extension=' + path.join(extensionDir, name));
+  return extensions;
+};
+
+module.exports = async url => {
+  if (/(master)?\.m3u8/i.test(url)) {
+    return {
+      title: 'm3u8 stream',
+      author: '',
+      publisher: url
+        .split('/')
+        .slice(0, 3)
+        .join('/'),
+      streams: [url]
+    };
+  }
+  const extensions = await getExtensions();
+  const browser = await puppeteer.launch({
+    args: [].concat(extensions),
+    executablePath:
+      process.env.DECLUTTER_CHROME_PATH || 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\Chrome.exe',
+    headless: true
+  });
+  const tab = await browser.newPage();
+  try {
+    const streams = [];
+
+    tab.on('request', request => {
+      if (/(master)?\.m3u8/.test(request.url())) {
+        streams.push(request.url());
+      }
+    });
+
+    await tab.setViewport({ width: 3840, height: 4096 });
+    await tab.setUserAgent(
+      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
+    );
+    await tab.goto(url, {
+      timeout: 30000,
+      waitUntil: 'networkidle2'
+    });
+    await fixRelativeLinks(tab, url);
+    await tab.waitFor(extensions.length > 0 ? 3000 : 1000);
+
+    let { title, author, publisher } = await tab.evaluate(url => {
+      const $author = document.querySelector('meta[property="og:site_name"]');
+      const $title = document.querySelector('meta[property="og:title"]');
+      return {
+        title: $title && $title.content ? $title.content : '',
+        author: $author && $author.content ? $author.content : '',
+        publisher: new URL(url).host
+      };
+    }, url);
+
+    await tab.close();
+    await browser.close();
+
+    return {
+      title,
+      author,
+      publisher,
+      streams
+    };
+  } catch (e) {
+    await tab.close();
+    await browser.close();
+    throw e;
+  }
+};