From de1ffbaf5b1c2955660bdb9339910e2a2bca2694 Mon Sep 17 00:00:00 2001 From: maggievu Date: Wed, 13 Oct 2021 13:07:29 -0700 Subject: [PATCH 01/16] install cors-anywhere --- server/index.js | 10 ++++++++++ server/package.json | 1 + 2 files changed, 11 insertions(+) diff --git a/server/index.js b/server/index.js index e88ebda..3f37864 100644 --- a/server/index.js +++ b/server/index.js @@ -4,6 +4,7 @@ const scrape = require('website-scraper'); const puppeteer = require('puppeteer'); const path = require('path'); const fs = require('fs'); +const cors_proxy = require('cors-anywhere'); const PORT = 3001; @@ -12,6 +13,12 @@ const app = express(); app.use(express.static('public')); app.use(cors()); +const CORS_SERVER = cors_proxy.createServer({ + originWhitelist: [], + // requireHeader: [ 'origin', 'x-requested-with' ], + removeHeaders: [ 'cookie', 'cookie2' ] +}); + // endpoint to scrape the website and generate a thumb preview app.get('/website', (req, res) => { const { url, width, height } = req.query; @@ -32,6 +39,9 @@ app.get('/website', (req, res) => { filenameGenerator: `${pagePath}`, }; + req.url = `/${url}`; + CORS_SERVER.emit("request", req, res); + scrape(options).then(async (result) => { // get the screenshot with puppeteer after the scrape is complete const browser = await puppeteer.launch({ diff --git a/server/package.json b/server/package.json index 2155cb6..a15d664 100644 --- a/server/package.json +++ b/server/package.json @@ -10,6 +10,7 @@ "license": "ISC", "dependencies": { "cors": "^2.8.5", + "cors-anywhere": "^0.4.3", "express": "^4.17.1", "puppeteer": "^7.0.4", "website-scraper": "^4.2.3" From 5919bdafca64e7e6a5c7fbdd09b4cfcd222af43f Mon Sep 17 00:00:00 2001 From: Kristian Hein Date: Wed, 13 Oct 2021 15:26:13 -0700 Subject: [PATCH 02/16] testing --- client/src/App.js | 60 +++++++----- client/src/setupProxy.js | 22 ++--- server/index.js | 201 ++++++++++++++++++++++++--------------- server/package-lock.json | 55 +++++++++++ server/package.json | 1 + 5 files changed, 225 insertions(+), 114 deletions(-) diff --git a/client/src/App.js b/client/src/App.js index 96914d7..83fcf53 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -10,32 +10,40 @@ function App() { const [pdfBlob, setPdfBlob] = useState(null); const loadURL = (url, width, height) => { - setShow(true); - fetch( - `http://127.0.0.1:3001/website?url=${url}&width=${width}&height=${height}` - ) - .then(async (response) => { - if (response.ok) { - const json = await response.json(); - if (json.data.url) { - json.data.url = `http://localhost:3000/redirect/${json.data.url}`; - } - setResponse({ - url: json.data.url, - width, - height, - thumb: json.data.thumb, - origUrl: url - }); - setShow(false); - } - }) - .catch((err) => { - setShow(false); - setFetchError( - 'Trouble fetching the URL, please make sure the server is running. `cd server && npm start`' - ); - }); + // setShow(true); + setResponse({ + url: 'http://localhost:8080/www.google.ca', + origUrl: 'http://localhost:8080/www.google.ca', + width: 1800, + height: 1100, + thumb: '', + }) + + // fetch( + // `http://127.0.0.1:3001/website?url=${url}&width=${width}&height=${height}` + // ) + // .then(async (response) => { + // if (response.ok) { + // const json = await response.json(); + // if (json.data.url) { + // json.data.url = `http://localhost:3000/redirect/${json.data.url}`; + // } + // setResponse({ + // url: json.data.url, + // width, + // height, + // thumb: json.data.thumb, + // origUrl: url + // }); + // setShow(false); + // } + // }) + // .catch((err) => { + // setShow(false); + // setFetchError( + // 'Trouble fetching the URL, please make sure the server is running. `cd server && npm start`' + // ); + // }); }; const downloadPDF = () => { diff --git a/client/src/setupProxy.js b/client/src/setupProxy.js index a024ce8..18e8ae2 100644 --- a/client/src/setupProxy.js +++ b/client/src/setupProxy.js @@ -1,12 +1,12 @@ -const { createProxyMiddleware } = require('http-proxy-middleware'); +// const { createProxyMiddleware } = require('http-proxy-middleware'); -module.exports = function (app) { - app.use( - '/redirect', - createProxyMiddleware({ - target: 'http://localhost:3001', - changeOrigin: true, - pathRewrite: { '^/redirect': '' }, - }) - ); -}; +// module.exports = function (app) { +// app.use( +// '/redirect', +// createProxyMiddleware({ +// target: 'http://localhost:3001', +// changeOrigin: true, +// pathRewrite: { '^/redirect': '' }, +// }) +// ); +// }; diff --git a/server/index.js b/server/index.js index 3f37864..7e09b22 100644 --- a/server/index.js +++ b/server/index.js @@ -1,10 +1,12 @@ const express = require('express'); const cors = require('cors'); const scrape = require('website-scraper'); -const puppeteer = require('puppeteer'); +// const puppeteer = require('puppeteer'); const path = require('path'); const fs = require('fs'); const cors_proxy = require('cors-anywhere'); +var http = require('http'); +var httpProxy = require('http-proxy'); const PORT = 3001; @@ -13,91 +15,136 @@ const app = express(); app.use(express.static('public')); app.use(cors()); -const CORS_SERVER = cors_proxy.createServer({ - originWhitelist: [], - // requireHeader: [ 'origin', 'x-requested-with' ], - removeHeaders: [ 'cookie', 'cookie2' ] +// // Listen on a specific host via the HOST environment variable +// var host = process.env.HOST || '0.0.0.0'; +// // Listen on a specific port via the PORT environment variable +// var port = process.env.PORT || 8080; + +// const CORS_SERVER = cors_proxy.createServer({ +// originWhitelist: [], +// // requireHeader: [ 'origin', 'x-requested-with' ], +// removeHeaders: [ 'cookie', 'cookie2' ] +// }).listen(port, host, function() { +// console.log('Running CORS Anywhere on ' + host + ':' + port); +// }); + + + +// var proxy = httpProxy.createProxyServer(options); // See (†) +// http.createServer(function(req, res) { +// proxy.web(req, res, { target: 'http://localhost:8080' }); +// }); + +// + + +// +// Create a proxy server with custom application logic +// +var proxy = httpProxy.createProxyServer({}); + +// Create your custom server and just call `proxy.web()` to proxy +// a web request to the target passed in the options +// also you can use `proxy.ws()` to proxy a websockets request +// +var server = http.createServer(function(req, res) { + // You can define here your custom logic to handle the request + // and then proxy the request. + proxy.web(req, res, { target: 'http://127.0.0.1:5050' }); }); +console.log("listening on port 5050") +server.listen(5050); + // endpoint to scrape the website and generate a thumb preview app.get('/website', (req, res) => { - const { url, width, height } = req.query; - const timestamp = Date.now(); - if (!url) { - res.status(400).json({ - status: 'Bad Request', - data: 'Please provide URL of the website you want to scrape as a query parameter.', - }); - } - const urlToConvert = new URL(url); - const pagePath = `${urlToConvert.hostname}${timestamp}`; - const directory = path.resolve(__dirname, `./public/${pagePath}`); - - const options = { - urls: [url], - directory, - filenameGenerator: `${pagePath}`, - }; - - req.url = `/${url}`; - CORS_SERVER.emit("request", req, res); - - scrape(options).then(async (result) => { - // get the screenshot with puppeteer after the scrape is complete - const browser = await puppeteer.launch({ - defaultViewport: { - width: Number(width), - height: Number(height), - }, - }); - const page = await browser.newPage(); - await page.goto(`http://127.0.0.1:${PORT}/${pagePath}/index.html`); - const thumbPath = path.resolve(__dirname, `./public/${pagePath}/thumb.png`); - await page.screenshot({ - path: thumbPath, - }); - - // read the file from the filepath and respond to server with URL and thumb - await fs.readFile(thumbPath, { encoding: 'base64' }, (err, data) => { - if (err) throw err; - const prefix = 'data:image/png;base64,'; - res.status(200).json({ - status: 'success', - data: { - url: `${pagePath}/index.html`, - thumb: prefix + data, - }, - }); - }); - - await browser.close(); - }); -}); - -app.get('/getpdf', async (req, res) => { - const { url, width, height } = req.query; - const pagePath = path.resolve(__dirname, `./public/pdf/html.pdf`); - const browser = await puppeteer.launch({ - defaultViewport: { - width: Number(width), - height: Number(height), + const { url } = req.query; + res.status(200).json({ + status: 'success', + data: { + url, + // thumb: prefix + data, + thumb: '', }, }); - const page = await browser.newPage(); - await page.goto(`http://127.0.0.1:${PORT}/${url}/index.html`); - await page.pdf({ - path: pagePath, - width: Number(width), - height: Number(height), - printBackground: true, - pageRanges: '1', - }); - await browser.close(); - // read the file from the filepath and respond to server - res.sendFile(pagePath); + + // const timestamp = Date.now(); + // if (!url) { + // res.status(400).json({ + // status: 'Bad Request', + // data: 'Please provide URL of the website you want to scrape as a query parameter.', + // }); + // } + // const urlToConvert = new URL(url); + // const pagePath = `${urlToConvert.hostname}${timestamp}`; + // const directory = path.resolve(__dirname, `./public/${pagePath}`); + + // const options = { + // urls: [url], + // directory, + // filenameGenerator: `${pagePath}`, + // }; + + // req.url = `/${url}`; + // // CORS_SERVER.emit("request", req, res); + + // scrape(options).then(async (result) => { + // // get the screenshot with puppeteer after the scrape is complete + // const browser = await puppeteer.launch({ + // defaultViewport: { + // width: Number(width), + // height: Number(height), + // }, + // }); + // const page = await browser.newPage(); + // await page.goto(`http://127.0.0.1:${PORT}/${pagePath}/index.html`); + // const thumbPath = path.resolve(__dirname, `./public/${pagePath}/thumb.png`); + // await page.screenshot({ + // path: thumbPath, + // }); + + // // read the file from the filepath and respond to server with URL and thumb + // await fs.readFile(thumbPath, { encoding: 'base64' }, (err, data) => { + // if (err) throw err; + // const prefix = 'data:image/png;base64,'; + // res.status(200).json({ + // status: 'success', + // data: { + // url: `${pagePath}/index.html`, + // thumb: prefix + data, + // }, + // }); + // }); + + // await browser.close(); + // }); }); +// app.get('/getpdf', async (req, res) => { +// const { url, width, height } = req.query; +// const pagePath = path.resolve(__dirname, `./public/pdf/html.pdf`); +// const browser = await puppeteer.launch({ +// defaultViewport: { +// width: Number(width), +// height: Number(height), +// }, +// }); +// const page = await browser.newPage(); +// await page.goto(`http://127.0.0.1:${PORT}/${url}/index.html`); +// await page.pdf({ +// path: pagePath, +// width: Number(width), +// height: Number(height), +// printBackground: true, +// pageRanges: '1', +// }); +// await browser.close(); + +// // read the file from the filepath and respond to server +// res.sendFile(pagePath); +// }); + app.listen(PORT, () => { console.log(`Server is now live at ${PORT}`); }); diff --git a/server/package-lock.json b/server/package-lock.json index 36f1632..3d9ad9b 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -268,6 +268,41 @@ "vary": "^1" } }, + "cors-anywhere": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/cors-anywhere/-/cors-anywhere-0.4.4.tgz", + "integrity": "sha512-8OBFwnzMgR4mNrAeAyOLB2EruS2z7u02of2bOu7i9kKYlZG+niS7CTHLPgEXKWW2NAOJWRry9RRCaL9lJRjNqg==", + "requires": { + "http-proxy": "1.11.1", + "proxy-from-env": "0.0.1" + }, + "dependencies": { + "eventemitter3": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-1.2.0.tgz", + "integrity": "sha1-HIaZHYFq0eUEdQ5zh0Ik7PO+xQg=" + }, + "http-proxy": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.11.1.tgz", + "integrity": "sha1-cd9VdX6ALVjqgQ3yJEAZ3aBa6F0=", + "requires": { + "eventemitter3": "1.x.x", + "requires-port": "0.x.x" + } + }, + "proxy-from-env": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-0.0.1.tgz", + "integrity": "sha1-snxJRunm1dutt1mKZDXTAUxM/Uk=" + }, + "requires-port": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-0.0.1.tgz", + "integrity": "sha1-S0QUQR2d98hVmV3YmajHiilRwW0=" + } + } + }, "css-select": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", @@ -525,6 +560,11 @@ "path-exists": "^4.0.0" } }, + "follow-redirects": { + "version": "1.14.4", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.4.tgz", + "integrity": "sha512-zwGkiSXC1MUJG/qmeIFH2HBJx9u0V46QGUe3YR1fXG8bXQxq7fLj0RjLZQ5nubr9qNJUZrH+xUcwXEoXNpfS+g==" + }, "forever-agent": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", @@ -640,6 +680,16 @@ "toidentifier": "1.0.0" } }, + "http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "requires": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + } + }, "http-signature": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", @@ -1146,6 +1196,11 @@ } } }, + "requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=" + }, "rimraf": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", diff --git a/server/package.json b/server/package.json index a15d664..e4b645d 100644 --- a/server/package.json +++ b/server/package.json @@ -12,6 +12,7 @@ "cors": "^2.8.5", "cors-anywhere": "^0.4.3", "express": "^4.17.1", + "http-proxy": "^1.18.1", "puppeteer": "^7.0.4", "website-scraper": "^4.2.3" }, From 95f86eaf05310bc9a2ec08020877426b07c83bcb Mon Sep 17 00:00:00 2001 From: Kristian Hein Date: Thu, 14 Oct 2021 01:35:57 -0700 Subject: [PATCH 03/16] working proxy to pdftron --- server/index.js | 481 +++++++++++++++++++++++++++------------ server/package-lock.json | 22 +- server/package.json | 1 + 3 files changed, 355 insertions(+), 149 deletions(-) diff --git a/server/index.js b/server/index.js index 7e09b22..4a7ec10 100644 --- a/server/index.js +++ b/server/index.js @@ -1,150 +1,347 @@ -const express = require('express'); -const cors = require('cors'); -const scrape = require('website-scraper'); -// const puppeteer = require('puppeteer'); -const path = require('path'); -const fs = require('fs'); -const cors_proxy = require('cors-anywhere'); -var http = require('http'); -var httpProxy = require('http-proxy'); +// const express = require('express'); +// const cors = require('cors'); +// const scrape = require('website-scraper'); +// // const puppeteer = require('puppeteer'); +// // const path = require('path'); +// // const fs = require('fs'); +// // const cors_proxy = require('cors-anywhere'); + +// const http = require('http'); +// const httpProxy = require('http-proxy'); -const PORT = 3001; +// const proxy = httpProxy.createProxyServer({}); +// http.createServer(function(req, res) { +// console.log('Request', req.method, req.url, `${req.protocol}://${req.hostname}`); +// proxy.web(req, res, { target: 'http://info.cern.ch/' }); +// }).listen(3000); -const app = express(); +// // (async () => { +// // // const express = require('express'); +// // const httpProxy = require('http-proxy'); -app.use(express.static('public')); -app.use(cors()); +// // // Create a proxy and listen on port 3000 +// // const proxy = httpProxy.createProxyServer({}); +// // const app = express(); +// // app.get('*', function(req, res) { +// // // Prints "Request GET https://httpbin.org/get?answer=42" +// // console.log('Request', req.method, req.url); +// // proxy.web(req, res, { target: `${req.protocol}://${req.hostname}` }); +// // }); +// // const server = await app.listen(3000); -// // Listen on a specific host via the HOST environment variable -// var host = process.env.HOST || '0.0.0.0'; -// // Listen on a specific port via the PORT environment variable -// var port = process.env.PORT || 8080; +// // const axios = require('axios'); +// // const res = await axios.get('http://httpbin.org/get?answer=42', { +// // // `proxy` means the request actually goes to the server listening +// // // on localhost:3000, but the request says it is meant for +// // // 'http://httpbin.org/get?answer=42' +// // proxy: { +// // host: 'localhost', +// // port: 3000 +// // } +// // }); +// // console.log(res.data); +// // })(); -// const CORS_SERVER = cors_proxy.createServer({ -// originWhitelist: [], -// // requireHeader: [ 'origin', 'x-requested-with' ], -// removeHeaders: [ 'cookie', 'cookie2' ] -// }).listen(port, host, function() { -// console.log('Running CORS Anywhere on ' + host + ':' + port); -// }); +// // var http = require('http'); +// // var httpProxy = require('http-proxy'); +// // const PORT = 3001; -// var proxy = httpProxy.createProxyServer(options); // See (†) -// http.createServer(function(req, res) { -// proxy.web(req, res, { target: 'http://localhost:8080' }); -// }); - -// - - -// -// Create a proxy server with custom application logic -// -var proxy = httpProxy.createProxyServer({}); - -// Create your custom server and just call `proxy.web()` to proxy -// a web request to the target passed in the options -// also you can use `proxy.ws()` to proxy a websockets request -// -var server = http.createServer(function(req, res) { - // You can define here your custom logic to handle the request - // and then proxy the request. - proxy.web(req, res, { target: 'http://127.0.0.1:5050' }); -}); - -console.log("listening on port 5050") -server.listen(5050); - -// endpoint to scrape the website and generate a thumb preview -app.get('/website', (req, res) => { - const { url } = req.query; - res.status(200).json({ - status: 'success', - data: { - url, - // thumb: prefix + data, - thumb: '', - }, - }); - - - // const timestamp = Date.now(); - // if (!url) { - // res.status(400).json({ - // status: 'Bad Request', - // data: 'Please provide URL of the website you want to scrape as a query parameter.', - // }); - // } - // const urlToConvert = new URL(url); - // const pagePath = `${urlToConvert.hostname}${timestamp}`; - // const directory = path.resolve(__dirname, `./public/${pagePath}`); - - // const options = { - // urls: [url], - // directory, - // filenameGenerator: `${pagePath}`, - // }; - - // req.url = `/${url}`; - // // CORS_SERVER.emit("request", req, res); - - // scrape(options).then(async (result) => { - // // get the screenshot with puppeteer after the scrape is complete - // const browser = await puppeteer.launch({ - // defaultViewport: { - // width: Number(width), - // height: Number(height), - // }, - // }); - // const page = await browser.newPage(); - // await page.goto(`http://127.0.0.1:${PORT}/${pagePath}/index.html`); - // const thumbPath = path.resolve(__dirname, `./public/${pagePath}/thumb.png`); - // await page.screenshot({ - // path: thumbPath, - // }); - - // // read the file from the filepath and respond to server with URL and thumb - // await fs.readFile(thumbPath, { encoding: 'base64' }, (err, data) => { - // if (err) throw err; - // const prefix = 'data:image/png;base64,'; - // res.status(200).json({ - // status: 'success', - // data: { - // url: `${pagePath}/index.html`, - // thumb: prefix + data, - // }, - // }); - // }); - - // await browser.close(); - // }); -}); - -// app.get('/getpdf', async (req, res) => { -// const { url, width, height } = req.query; -// const pagePath = path.resolve(__dirname, `./public/pdf/html.pdf`); -// const browser = await puppeteer.launch({ -// defaultViewport: { -// width: Number(width), -// height: Number(height), -// }, -// }); -// const page = await browser.newPage(); -// await page.goto(`http://127.0.0.1:${PORT}/${url}/index.html`); -// await page.pdf({ -// path: pagePath, -// width: Number(width), -// height: Number(height), -// printBackground: true, -// pageRanges: '1', -// }); -// await browser.close(); - -// // read the file from the filepath and respond to server -// res.sendFile(pagePath); -// }); - -app.listen(PORT, () => { - console.log(`Server is now live at ${PORT}`); -}); +// // const app = express(); + +// // app.use(express.static('public')); +// // app.use(cors()); + + +// // var http = require('http'), +// // httpProxy = require('http-proxy'); + +// // // +// // // Create a proxy server with custom application logic +// // // +// // var proxy = httpProxy.createProxyServer({}); + +// // // +// // // Create your custom server and just call `proxy.web()` to proxy +// // // a web request to the target passed in the options +// // // also you can use `proxy.ws()` to proxy a websockets request +// // // +// // var server = http.createServer(function(req, res) { +// // // You can define here your custom logic to handle the request +// // // and then proxy the request. +// // proxy.web(req, res, { target: 'http://www.google.ca' }); +// // }); + +// // console.log("listening on port 5050") +// // server.listen(5050); + +// // // var http = require('http'), +// // // httpProxy = require('http-proxy'); + +// // // var addresses = [ +// // // { +// // // host: '127.0.0.1', +// // // port: 8000 +// // // }, +// // // { +// // // host: '127.0.0.1', +// // // port: 8001 +// // // }, +// // // { +// // // host: '127.0.0.1', +// // // port: 8002 +// // // }, +// // // { +// // // host: '127.0.0.1', +// // // port: 8003 +// // // } +// // // ]; + +// // // // +// // // // Create your target server +// // // // +// // // var server = http.createServer(function (req, res) { +// // // addresses = addresses.concat(addresses.splice(0, 1)); +// // // console.log(addresses); +// // // var target = { target: addresses }; +// // // // +// // // // Create your proxy server and set the target in the options. +// // // // +// // // // var proxyServer = httpProxy.createProxyServer(target); + +// // // res.writeHead(200, { 'Content-Type': 'application/json' }); +// // // res.write('request successfully proxied!' + '\n' + JSON.stringify(req.headers, true, 2)); +// // // res.end(); +// // // }) + +// // // server.listen(9000); + +// // // // Listen on a specific host via the HOST environment variable +// // // var host = process.env.HOST || '0.0.0.0'; +// // // // Listen on a specific port via the PORT environment variable +// // // var port = process.env.PORT || 8080; + +// // // const CORS_SERVER = cors_proxy.createServer({ +// // // originWhitelist: [], +// // // // requireHeader: [ 'origin', 'x-requested-with' ], +// // // removeHeaders: [ 'cookie', 'cookie2' ] +// // // }).listen(port, host, function() { +// // // console.log('Running CORS Anywhere on ' + host + ':' + port); +// // // }); + + + +// // // var proxy = httpProxy.createProxyServer(options); // See (†) +// // // http.createServer(function(req, res) { +// // // proxy.web(req, res, { target: 'http://localhost:8080' }); +// // // }); + +// // // // + + +// // // // +// // // // Create a proxy server with custom application logic +// // // // +// // // var proxy = httpProxy.createProxyServer({}); + +// // // // Create your custom server and just call `proxy.web()` to proxy +// // // // a web request to the target passed in the options +// // // // also you can use `proxy.ws()` to proxy a websockets request +// // // // +// // // var server = http.createServer(function(req, res) { +// // // // You can define here your custom logic to handle the request +// // // // and then proxy the request. +// // // proxy.web(req, res, { target: 'http://127.0.0.1:5050' }); +// // // }); + +// // // console.log("listening on port 5050") +// // // server.listen(5050); + +// // // var httpProxy = require('http-proxy'); +// // // var url = require('url'); + +// // // // var proxy = httpProxy.createProxyServer({}); + +// // // // http.createServer(function(req, res) { +// // // // proxy.web(req, res, { target: 'http://mytarget.com:8080' }); +// // // // }); + + +// // // var proxy = httpProxy.createProxyServer({ +// // // target: 'http://localhost:9000', +// // // secure: false, +// // // // changeOrigin: true, +// // // xfwd: true, +// // // // autoRewrite: true +// // // }); + +// // // proxy.on('error', function (err) { +// // // console.log(err); +// // // console.log('Listening... [press Control-C to exit]'); +// // // }) + +// // // console.log('sdkfjsdlfk'); +// // // proxy.on("proxyReq", function(proxyReq, req, _, options) { +// // // console.log('bheljkrelrjelwr'); +// // // }); + + +// // // endpoint to scrape the website and generate a thumb preview +// // app.get('/website', (req, res) => { +// // const { url } = req.query; +// // res.status(200).json({ +// // status: 'success', +// // data: { +// // url, +// // // thumb: prefix + data, +// // thumb: '', +// // }, +// // }); + + +// // // const timestamp = Date.now(); +// // // if (!url) { +// // // res.status(400).json({ +// // // status: 'Bad Request', +// // // data: 'Please provide URL of the website you want to scrape as a query parameter.', +// // // }); +// // // } +// // // const urlToConvert = new URL(url); +// // // const pagePath = `${urlToConvert.hostname}${timestamp}`; +// // // const directory = path.resolve(__dirname, `./public/${pagePath}`); + +// // // const options = { +// // // urls: [url], +// // // directory, +// // // filenameGenerator: `${pagePath}`, +// // // }; + +// // // req.url = `/${url}`; +// // // // CORS_SERVER.emit("request", req, res); + +// // // scrape(options).then(async (result) => { +// // // // get the screenshot with puppeteer after the scrape is complete +// // // const browser = await puppeteer.launch({ +// // // defaultViewport: { +// // // width: Number(width), +// // // height: Number(height), +// // // }, +// // // }); +// // // const page = await browser.newPage(); +// // // await page.goto(`http://127.0.0.1:${PORT}/${pagePath}/index.html`); +// // // const thumbPath = path.resolve(__dirname, `./public/${pagePath}/thumb.png`); +// // // await page.screenshot({ +// // // path: thumbPath, +// // // }); + +// // // // read the file from the filepath and respond to server with URL and thumb +// // // await fs.readFile(thumbPath, { encoding: 'base64' }, (err, data) => { +// // // if (err) throw err; +// // // const prefix = 'data:image/png;base64,'; +// // // res.status(200).json({ +// // // status: 'success', +// // // data: { +// // // url: `${pagePath}/index.html`, +// // // thumb: prefix + data, +// // // }, +// // // }); +// // // }); + +// // // await browser.close(); +// // // }); +// // }); + +// // // app.get('/getpdf', async (req, res) => { +// // // const { url, width, height } = req.query; +// // // const pagePath = path.resolve(__dirname, `./public/pdf/html.pdf`); +// // // const browser = await puppeteer.launch({ +// // // defaultViewport: { +// // // width: Number(width), +// // // height: Number(height), +// // // }, +// // // }); +// // // const page = await browser.newPage(); +// // // await page.goto(`http://127.0.0.1:${PORT}/${url}/index.html`); +// // // await page.pdf({ +// // // path: pagePath, +// // // width: Number(width), +// // // height: Number(height), +// // // printBackground: true, +// // // pageRanges: '1', +// // // }); +// // // await browser.close(); + +// // // // read the file from the filepath and respond to server +// // // res.sendFile(pagePath); +// // // }); + +// // app.listen(PORT, () => { +// // console.log(`Server is now live at ${PORT}`); +// // }); + +// TAKEN FROM: https://stackoverflow.com/a/63602976 +var express = require('express') +var app = express() +var https = require('https'); +var http = require('http'); +const { response } = require('express'); + + +app.use('/', function(clientRequest, clientResponse) { + var url; + // url = 'https://www.google.com' + console.log('----', clientRequest.hostname, clientRequest.url); + url = 'https://www.pdftron.com'; + var parsedHost = url.split('/').splice(2).splice(0, 1).join('/') + var parsedPort; + var parsedSSL; + if (url.startsWith('https://')) { + parsedPort = 443 + parsedSSL = https + } else if (url.startsWith('http://')) { + parsedPort = 80 + parsedSSL = http + } + var options = { + hostname: parsedHost, + port: parsedPort, + path: clientRequest.url, + method: clientRequest.method, + headers: { + 'User-Agent': clientRequest.headers['user-agent'] + } + }; + + var serverRequest = parsedSSL.request(options, function(serverResponse) { + var body = ''; + if (String(serverResponse.headers['content-type']).indexOf('text/html') !== -1) { + serverResponse.on('data', function(chunk) { + body += chunk; + }); + + serverResponse.on('end', function() { + // Make changes to HTML files when they're done being read. + body = body.replace(`example`, `Cat!` ); + + clientResponse.writeHead(serverResponse.statusCode, serverResponse.headers); + clientResponse.end(body); + }); + } + else { + serverResponse.pipe(clientResponse, { + end: true + }); + // Can be undefined + if (serverResponse.headers['content-type']) { + clientResponse.contentType(serverResponse.headers['content-type']) + } + } + }); + + serverRequest.end(); + }); + + + app.listen(3000) + console.log('Running on 0.0.0.0:3000') \ No newline at end of file diff --git a/server/package-lock.json b/server/package-lock.json index 3d9ad9b..2a47149 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -95,6 +95,14 @@ "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.1.tgz", "integrity": "sha512-zg7Hz2k5lI8kb7U32998pRRFin7zJlkfezGJjUc2heaD4Pw2wObakCDVzkKztTm/Ln7eiVvYsjqak0Ed4LkMDA==" }, + "axios": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.23.0.tgz", + "integrity": "sha512-NmvAE4i0YAv5cKq8zlDoPd1VLKAqX5oLuZKs8xkJa4qi6RGn0uhCYFjWtHHC9EM/MwOwYWOs53W+V0aqEXq1sg==", + "requires": { + "follow-redirects": "^1.14.4" + } + }, "balanced-match": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", @@ -581,9 +589,9 @@ } }, "forwarded": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", - "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==" }, "fresh": { "version": "0.5.2", @@ -1065,11 +1073,11 @@ "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==" }, "proxy-addr": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz", - "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==", + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", "requires": { - "forwarded": "~0.1.2", + "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, diff --git a/server/package.json b/server/package.json index e4b645d..ed5c8d5 100644 --- a/server/package.json +++ b/server/package.json @@ -9,6 +9,7 @@ "author": "Andrey Safonov", "license": "ISC", "dependencies": { + "axios": "^0.23.0", "cors": "^2.8.5", "cors-anywhere": "^0.4.3", "express": "^4.17.1", From 1fe054522a8278aa0ab5e54671db0dc1aab0c1e5 Mon Sep 17 00:00:00 2001 From: Kristian Hein Date: Fri, 15 Oct 2021 15:21:43 -0700 Subject: [PATCH 04/16] more testing --- client/src/App.js | 6 +- client/src/setupProxy.js | 12 -- server/index.js | 296 ++------------------------------------- 3 files changed, 16 insertions(+), 298 deletions(-) delete mode 100644 client/src/setupProxy.js diff --git a/client/src/App.js b/client/src/App.js index 83fcf53..0f2a3dc 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -12,8 +12,10 @@ function App() { const loadURL = (url, width, height) => { // setShow(true); setResponse({ - url: 'http://localhost:8080/www.google.ca', - origUrl: 'http://localhost:8080/www.google.ca', + // url: 'http://localhost:8080/www.google.ca', + url: 'http://localhost:3000', + // origUrl: 'http://localhost:8080/www.google.ca', + origUrl: 'http://localhost:3000', width: 1800, height: 1100, thumb: '', diff --git a/client/src/setupProxy.js b/client/src/setupProxy.js deleted file mode 100644 index 18e8ae2..0000000 --- a/client/src/setupProxy.js +++ /dev/null @@ -1,12 +0,0 @@ -// const { createProxyMiddleware } = require('http-proxy-middleware'); - -// module.exports = function (app) { -// app.use( -// '/redirect', -// createProxyMiddleware({ -// target: 'http://localhost:3001', -// changeOrigin: true, -// pathRewrite: { '^/redirect': '' }, -// }) -// ); -// }; diff --git a/server/index.js b/server/index.js index 4a7ec10..596a178 100644 --- a/server/index.js +++ b/server/index.js @@ -1,285 +1,3 @@ -// const express = require('express'); -// const cors = require('cors'); -// const scrape = require('website-scraper'); -// // const puppeteer = require('puppeteer'); -// // const path = require('path'); -// // const fs = require('fs'); -// // const cors_proxy = require('cors-anywhere'); - -// const http = require('http'); -// const httpProxy = require('http-proxy'); - -// const proxy = httpProxy.createProxyServer({}); -// http.createServer(function(req, res) { -// console.log('Request', req.method, req.url, `${req.protocol}://${req.hostname}`); -// proxy.web(req, res, { target: 'http://info.cern.ch/' }); -// }).listen(3000); - -// // (async () => { -// // // const express = require('express'); -// // const httpProxy = require('http-proxy'); - -// // // Create a proxy and listen on port 3000 -// // const proxy = httpProxy.createProxyServer({}); -// // const app = express(); -// // app.get('*', function(req, res) { -// // // Prints "Request GET https://httpbin.org/get?answer=42" -// // console.log('Request', req.method, req.url); -// // proxy.web(req, res, { target: `${req.protocol}://${req.hostname}` }); -// // }); -// // const server = await app.listen(3000); - -// // const axios = require('axios'); -// // const res = await axios.get('http://httpbin.org/get?answer=42', { -// // // `proxy` means the request actually goes to the server listening -// // // on localhost:3000, but the request says it is meant for -// // // 'http://httpbin.org/get?answer=42' -// // proxy: { -// // host: 'localhost', -// // port: 3000 -// // } -// // }); -// // console.log(res.data); -// // })(); - - -// // var http = require('http'); -// // var httpProxy = require('http-proxy'); - -// // const PORT = 3001; - -// // const app = express(); - -// // app.use(express.static('public')); -// // app.use(cors()); - - -// // var http = require('http'), -// // httpProxy = require('http-proxy'); - -// // // -// // // Create a proxy server with custom application logic -// // // -// // var proxy = httpProxy.createProxyServer({}); - -// // // -// // // Create your custom server and just call `proxy.web()` to proxy -// // // a web request to the target passed in the options -// // // also you can use `proxy.ws()` to proxy a websockets request -// // // -// // var server = http.createServer(function(req, res) { -// // // You can define here your custom logic to handle the request -// // // and then proxy the request. -// // proxy.web(req, res, { target: 'http://www.google.ca' }); -// // }); - -// // console.log("listening on port 5050") -// // server.listen(5050); - -// // // var http = require('http'), -// // // httpProxy = require('http-proxy'); - -// // // var addresses = [ -// // // { -// // // host: '127.0.0.1', -// // // port: 8000 -// // // }, -// // // { -// // // host: '127.0.0.1', -// // // port: 8001 -// // // }, -// // // { -// // // host: '127.0.0.1', -// // // port: 8002 -// // // }, -// // // { -// // // host: '127.0.0.1', -// // // port: 8003 -// // // } -// // // ]; - -// // // // -// // // // Create your target server -// // // // -// // // var server = http.createServer(function (req, res) { -// // // addresses = addresses.concat(addresses.splice(0, 1)); -// // // console.log(addresses); -// // // var target = { target: addresses }; -// // // // -// // // // Create your proxy server and set the target in the options. -// // // // -// // // // var proxyServer = httpProxy.createProxyServer(target); - -// // // res.writeHead(200, { 'Content-Type': 'application/json' }); -// // // res.write('request successfully proxied!' + '\n' + JSON.stringify(req.headers, true, 2)); -// // // res.end(); -// // // }) - -// // // server.listen(9000); - -// // // // Listen on a specific host via the HOST environment variable -// // // var host = process.env.HOST || '0.0.0.0'; -// // // // Listen on a specific port via the PORT environment variable -// // // var port = process.env.PORT || 8080; - -// // // const CORS_SERVER = cors_proxy.createServer({ -// // // originWhitelist: [], -// // // // requireHeader: [ 'origin', 'x-requested-with' ], -// // // removeHeaders: [ 'cookie', 'cookie2' ] -// // // }).listen(port, host, function() { -// // // console.log('Running CORS Anywhere on ' + host + ':' + port); -// // // }); - - - -// // // var proxy = httpProxy.createProxyServer(options); // See (†) -// // // http.createServer(function(req, res) { -// // // proxy.web(req, res, { target: 'http://localhost:8080' }); -// // // }); - -// // // // - - -// // // // -// // // // Create a proxy server with custom application logic -// // // // -// // // var proxy = httpProxy.createProxyServer({}); - -// // // // Create your custom server and just call `proxy.web()` to proxy -// // // // a web request to the target passed in the options -// // // // also you can use `proxy.ws()` to proxy a websockets request -// // // // -// // // var server = http.createServer(function(req, res) { -// // // // You can define here your custom logic to handle the request -// // // // and then proxy the request. -// // // proxy.web(req, res, { target: 'http://127.0.0.1:5050' }); -// // // }); - -// // // console.log("listening on port 5050") -// // // server.listen(5050); - -// // // var httpProxy = require('http-proxy'); -// // // var url = require('url'); - -// // // // var proxy = httpProxy.createProxyServer({}); - -// // // // http.createServer(function(req, res) { -// // // // proxy.web(req, res, { target: 'http://mytarget.com:8080' }); -// // // // }); - - -// // // var proxy = httpProxy.createProxyServer({ -// // // target: 'http://localhost:9000', -// // // secure: false, -// // // // changeOrigin: true, -// // // xfwd: true, -// // // // autoRewrite: true -// // // }); - -// // // proxy.on('error', function (err) { -// // // console.log(err); -// // // console.log('Listening... [press Control-C to exit]'); -// // // }) - -// // // console.log('sdkfjsdlfk'); -// // // proxy.on("proxyReq", function(proxyReq, req, _, options) { -// // // console.log('bheljkrelrjelwr'); -// // // }); - - -// // // endpoint to scrape the website and generate a thumb preview -// // app.get('/website', (req, res) => { -// // const { url } = req.query; -// // res.status(200).json({ -// // status: 'success', -// // data: { -// // url, -// // // thumb: prefix + data, -// // thumb: '', -// // }, -// // }); - - -// // // const timestamp = Date.now(); -// // // if (!url) { -// // // res.status(400).json({ -// // // status: 'Bad Request', -// // // data: 'Please provide URL of the website you want to scrape as a query parameter.', -// // // }); -// // // } -// // // const urlToConvert = new URL(url); -// // // const pagePath = `${urlToConvert.hostname}${timestamp}`; -// // // const directory = path.resolve(__dirname, `./public/${pagePath}`); - -// // // const options = { -// // // urls: [url], -// // // directory, -// // // filenameGenerator: `${pagePath}`, -// // // }; - -// // // req.url = `/${url}`; -// // // // CORS_SERVER.emit("request", req, res); - -// // // scrape(options).then(async (result) => { -// // // // get the screenshot with puppeteer after the scrape is complete -// // // const browser = await puppeteer.launch({ -// // // defaultViewport: { -// // // width: Number(width), -// // // height: Number(height), -// // // }, -// // // }); -// // // const page = await browser.newPage(); -// // // await page.goto(`http://127.0.0.1:${PORT}/${pagePath}/index.html`); -// // // const thumbPath = path.resolve(__dirname, `./public/${pagePath}/thumb.png`); -// // // await page.screenshot({ -// // // path: thumbPath, -// // // }); - -// // // // read the file from the filepath and respond to server with URL and thumb -// // // await fs.readFile(thumbPath, { encoding: 'base64' }, (err, data) => { -// // // if (err) throw err; -// // // const prefix = 'data:image/png;base64,'; -// // // res.status(200).json({ -// // // status: 'success', -// // // data: { -// // // url: `${pagePath}/index.html`, -// // // thumb: prefix + data, -// // // }, -// // // }); -// // // }); - -// // // await browser.close(); -// // // }); -// // }); - -// // // app.get('/getpdf', async (req, res) => { -// // // const { url, width, height } = req.query; -// // // const pagePath = path.resolve(__dirname, `./public/pdf/html.pdf`); -// // // const browser = await puppeteer.launch({ -// // // defaultViewport: { -// // // width: Number(width), -// // // height: Number(height), -// // // }, -// // // }); -// // // const page = await browser.newPage(); -// // // await page.goto(`http://127.0.0.1:${PORT}/${url}/index.html`); -// // // await page.pdf({ -// // // path: pagePath, -// // // width: Number(width), -// // // height: Number(height), -// // // printBackground: true, -// // // pageRanges: '1', -// // // }); -// // // await browser.close(); - -// // // // read the file from the filepath and respond to server -// // // res.sendFile(pagePath); -// // // }); - -// // app.listen(PORT, () => { -// // console.log(`Server is now live at ${PORT}`); -// // }); - // TAKEN FROM: https://stackoverflow.com/a/63602976 var express = require('express') var app = express() @@ -287,13 +5,19 @@ var https = require('https'); var http = require('http'); const { response } = require('express'); +// app.get('/', function (req, res) { +// // console.log('hello world'); +// // res.send('root') +// }) app.use('/', function(clientRequest, clientResponse) { var url; + url = 'https://www.teamliquid.com/' // url = 'https://www.google.com' - console.log('----', clientRequest.hostname, clientRequest.url); - url = 'https://www.pdftron.com'; + console.log('----', clientRequest.hostname, clientRequest.url, clientRequest.originalUrl, '--', clientRequest.baseUrl, clientRequest.headers.location); + // url = 'https://www.pdftron.com'; var parsedHost = url.split('/').splice(2).splice(0, 1).join('/') + console.log('parsedHost', parsedHost); var parsedPort; var parsedSSL; if (url.startsWith('https://')) { @@ -314,6 +38,10 @@ app.use('/', function(clientRequest, clientResponse) { }; var serverRequest = parsedSSL.request(options, function(serverResponse) { + // console.log('headers------', serverResponse.headers); + // Delete 'x-frame-options': 'SAMEORIGIN' + // so that the page can be loaded in an iframe + delete serverResponse.headers['x-frame-options']; var body = ''; if (String(serverResponse.headers['content-type']).indexOf('text/html') !== -1) { serverResponse.on('data', function(chunk) { From 808b327a8f9da5ef04ae6f659446e887d0a4732c Mon Sep 17 00:00:00 2001 From: Kristian Hein Date: Fri, 22 Oct 2021 10:18:48 -0700 Subject: [PATCH 05/16] testing some more stuff --- client/src/App.js | 2 +- client/src/components/navigation/Nav.js | 4 ++-- server/index.js | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/client/src/App.js b/client/src/App.js index 0f2a3dc..b7cd90d 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -17,7 +17,7 @@ function App() { // origUrl: 'http://localhost:8080/www.google.ca', origUrl: 'http://localhost:3000', width: 1800, - height: 1100, + height: 8000, thumb: '', }) diff --git a/client/src/components/navigation/Nav.js b/client/src/components/navigation/Nav.js index 14b51ba..3d5148b 100644 --- a/client/src/components/navigation/Nav.js +++ b/client/src/components/navigation/Nav.js @@ -15,7 +15,7 @@ import './Nav.css'; const Nav = ({ handleSubmit, fetchError, showSpinner, handleDownload }) => { const [url, setUrl] = useState(''); const [width, setWidth] = useState(1000); - const [height, setHeight] = useState(2000); + const [height, setHeight] = useState(8000); const [error, setError] = useState(false); return ( @@ -54,7 +54,7 @@ const Nav = ({ handleSubmit, fetchError, showSpinner, handleDownload }) => { onChange={(e) => { setHeight(e.target.value); }} - placeholder="2000" + placeholder="8000" value={height} /> - -