From 504a0e806666ebda2b595b2886bf61502906b8dd Mon Sep 17 00:00:00 2001
From: Dean Sheather <dean@deansheather.com>
Date: Sun, 18 Dec 2016 15:22:53 +1000
Subject: [PATCH] added scanner logic and SQS stuff, untested

---
 .eslintrc.json     |  18 ++---
 LICENSE            |   2 +-
 index.js           | 167 +++++++++++++++++++++++++++++++++++++++++++--
 lib/S3.js          |   9 +++
 lib/SQS.js         |   9 +++
 lib/freshclam.js   |  21 ++++++
 lib/getfile.js     |  11 ---
 lib/refreshclam.js |  15 ----
 lib/scanner.js     |  22 ------
 package.json       |  23 ++++---
 10 files changed, 226 insertions(+), 71 deletions(-)
 create mode 100644 lib/S3.js
 create mode 100644 lib/SQS.js
 create mode 100644 lib/freshclam.js
 delete mode 100644 lib/getfile.js
 delete mode 100644 lib/refreshclam.js
 delete mode 100644 lib/scanner.js

diff --git a/.eslintrc.json b/.eslintrc.json
index 8f22562..a9c0046 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -1,11 +1,11 @@
 {
-    "extends": "standard",
-    "installedESLint": true,
-    "plugins": [
-        "standard",
-        "promise"
-    ],
-    "rules": {
-        "semi": [2, "always"]
-    }
+  "extends": "standard",
+  "installedESLint": true,
+  "plugins": [
+    "standard",
+    "promise"
+  ],
+  "rules": {
+    "semi": [2, "always"]
+  }
 }
diff --git a/LICENSE b/LICENSE
index bcaeea6..f1c803a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2016 OwO.Whats-Th.is?
+Copyright (c) 2016 Dean Sheather
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/index.js b/index.js
index d598a77..eba88d2 100644
--- a/index.js
+++ b/index.js
@@ -1,5 +1,162 @@
-module.exports = {
-    getFile: require('./lib/getfile'),
-    scanner: require('./lib/scanner'),
-    refreshclam: require('./lib/refreshclam')
-}
\ No newline at end of file
+// Required modules
+const clam = require('clamscan')();
+const crypto = require('crypto');
+const freshclam = require('./lib/freshclam.js');
+const fs = require('fs');
+const path = require('path');
+const S3 = require('./lib/S3.js');
+const SQS = require('./lib/SQS.js');
+const scanner = require('./lib/scanner.js');
+
+// Check for required environment variables
+for (let env of [
+  'AWS_ACCESSKEY',
+  'AWS_SECRETKEY',
+  'AWS_SQSURL'
+]) {
+  if (!process.env.hasOwnProperty(env)) {
+    throw new Error(`missing required environment variable "${env}"`);
+  }
+}
+
+// TODO: loop freshclam
+
+/**
+ * Handle all errors.
+ * @param {Error} error
+ */
+function handleError (error) {
+  // TODO: error handling
+}
+
+/**
+ * Check SQS for messages and process virus scanning.
+ */
+function pollSQS () {
+  SQS.receiveMessage({
+    MaxNumberOfMessages: 10,
+    QueueUrl: process.env['AWS_SQSURL'],
+    WaitTimeSeconds: 20
+  }, function (err, data) {
+    if (err) return handleError(error);
+    let promises = [];
+    (data.Records || []).forEach(msg => {
+      promises.push(
+        Promise.resolve(msg)
+        .then(msg => JSON.parse(msg.Body))
+        .then(body => { msg: msg, msgBody: body })
+        .then(getObject)
+        .then(writeTempFile)
+        .then(clamScan)
+        .then(unlinkTempFile)
+        // .then(fireNotification)
+        .then(deleteInfectedFromS3)
+        .then(deleteSQSMessage)
+        .catch(handleError)
+      );
+    });
+
+    // Loop
+    Promise.all(promises)
+    .then(() => setImmediate(pollSQS))
+    .catch(err => handleError(err) && setImmediate(pollSQS));
+  });
+}
+
+/**
+ * Get object from S3, promisified.
+ * @param {Object} params
+ * @return {Promise<Object, Error>}
+ */
+function getObject (data) {
+  return new Promise((resolve, reject) => {
+    S3.getObject({
+      Bucket: body.s3.bucket.name,
+      Key: body.s3.object.Key
+    }, (err, res) => {
+      if (err) return reject(err);
+      data.Body = new Buffer(res.Body);
+      resolve(data);
+    });
+  });
+}
+
+/**
+ * Generate random key.
+ * @return {string} 6 character key.
+ */
+function generateRandomKey () {
+  const seed = String(Math.floor(Math.random() * 10) + Date.now());
+  return crypto.createHash('md5').update(seed).digest('hex').substr(2, 6);
+}
+
+/**
+ * Create a temporary file on disk for scanning.
+ */
+function writeTempFile (data) {
+  return new Promise((resolve, reject) => {
+    // Construct the filepath (including random key)
+    const filepath = path.join('.', '_temp', data.Bucket, generateRandomKey() + data.Key.replace(/[^a-z0-9_.-]/gi, '_'));
+
+    // Write the file
+    fs.writeFile(filepath, data.Body, err => {
+      if (err) return reject(err);
+      data.filepath = filepath;
+      resolve(data);
+    });
+  });
+}
+
+/**
+ * Scan the file for viruses.
+ */
+function clamScan (data) {
+  return new Promise((resolve, reject) => {
+    clam.is_infected(data.filepath, (err, _, isInfected) => {
+      if (err) return reject(err);
+      data.isInfected = isInfected;
+      resolve(data);
+    });
+  });
+}
+
+/**
+ * Unlink temporary file.
+ */
+function unlinkTempFile (data) {
+  return new Promise((resolve, reject) => {
+    fs.unlink(filepath, (err) => {
+      if (err) return reject(err);
+      resolve(data);
+    });
+  });
+}
+
+/**
+ * Deleted infected files from S3.
+ */
+function deleteInfectedFromS3 (data) {
+  return new Promise((resolve, reject) => {
+    if (!data.isInfected) return resolve(data);
+    S3.deleteObject({ Bucket: data.Bucket, Key: data.Key }, (err, res) => {
+      if (err) return reject(err);
+      data.wasPermanentlyDeletedFromS3 = res.DeleteMarker;
+      resolve(data);
+    });
+  });
+}
+
+/**
+ * Delete processed SQS message.
+ */
+function deleteSQSMessage (data) {
+  return new Promise((resolve, reject) => {
+    SQS.deleteMessage({
+      QueueUrl: process.env['AWS_SQSURL'],
+      ReceiptHandle: data.msg.ReceiptHandle
+    }, (err) => {
+      if (err) return reject(err);
+      resolve(data);
+    });
+  });
+}
diff --git a/lib/S3.js b/lib/S3.js
new file mode 100644
index 0000000..b2249f1
--- /dev/null
+++ b/lib/S3.js
@@ -0,0 +1,9 @@
+// Required modules
+const AWS = require('aws-sdk');
+
+// Create S3 client
+module.exports = new AWS.S3({
+  apiVersion: '2006-03-01',
+  accessKeyId: process.env['AWS_ACCESSKEY'],
+  secretAccessKey: process.env['AWS_SECRETKEY']
+});
diff --git a/lib/SQS.js b/lib/SQS.js
new file mode 100644
index 0000000..4956cb5
--- /dev/null
+++ b/lib/SQS.js
@@ -0,0 +1,9 @@
+// Required modules
+const AWS = require('aws-sdk');
+
+// Create SQS client
+module.exports = new AWS.SQS({
+  apiVersion: '2012-11-05',
+  accessKeyId: process.env['AWS_ACCESSKEY'],
+  secretAccessKey: process.env['AWS_SECRETKEY']
+});
diff --git a/lib/freshclam.js b/lib/freshclam.js
new file mode 100644
index 0000000..6504690
--- /dev/null
+++ b/lib/freshclam.js
@@ -0,0 +1,21 @@
+// Required modules
+const exec = require('child_process').exec;
+const debug = require('debug')('scanner:freshclam');
+
+/**
+ * Run `freshclam` using child_process in order to refresh the ClamAV virus
+ * database on the system.
+ * @return {Promise<undefined, Error>}
+ */
+module.exports = () => {
+  return new Promise((resolve, reject) => {
+    debug('updating virus database using freshclam');
+    exec('freshclam', [], { stdio: 'inherit' })
+      .on('error', reject)
+      .on('exit', code => {
+        if (code !== 0) return void reject(new Error(`freshclam exited with code ${code}`));
+        debug('finished updating virus database');
+        resolve();
+      });
+  });
+};
diff --git a/lib/getfile.js b/lib/getfile.js
deleted file mode 100644
index 4ab1ee8..0000000
--- a/lib/getfile.js
+++ /dev/null
@@ -1,11 +0,0 @@
-module.exports = function getFile (S3, key) {
-  return new Promise((resolve, reject) => {
-    S3.getObject({
-      Bucket: `${process.env.SERVICE}-filestore-${process.env.STAGE}-1`,
-      Key: key
-    }, (err, file) => {
-      if (err) return void reject(err);
-      resolve(file);
-    });
-  });
-};
diff --git a/lib/refreshclam.js b/lib/refreshclam.js
deleted file mode 100644
index deea5ec..0000000
--- a/lib/refreshclam.js
+++ /dev/null
@@ -1,15 +0,0 @@
-const exec = require('child_process').exec;
-const debug = require('debug')('scanner');
-
-module.exports = function refresh () {
-  return new Promise((resolve, reject) => {
-    debug('Updating virus database');
-    const proc = exec('freshclam', [], {stdio: 'inherit'});
-    proc.on('error', reject);
-    proc.on('exit', code => {
-      if (code !== 0) return void reject(new Error(`Clamscan exited with code ${code}`));
-      debug('Finished updating');
-      resolve();
-    });
-  });
-};
diff --git a/lib/scanner.js b/lib/scanner.js
deleted file mode 100644
index 60b1838..0000000
--- a/lib/scanner.js
+++ /dev/null
@@ -1,22 +0,0 @@
-const clam = require('clamscan')();
-const fs = require('fs');
-const path = require('path');
-const getFile = require('./getfile');
-
-module.exports = function scanFile (notif, S3) {
-  return new Promise((resolve, reject) => {
-    const key = notif.Records[0].s3.object.key;
-    getFile(S3, key).then(file => {
-      const filepath = path.resolve(path.join(__dirname, '/files/', key));
-      fs.writeFile(filepath, file.body, (err) => {
-        if (err) return void reject(err);
-        clam.is_infected(filepath, (err, _, isInfected) => {
-          fs.unlink(filepath, (unlinkErr) => {
-            if (err || unlinkErr) return void reject(err || unlinkErr);
-            resolve({infected: isInfected});
-          });
-        });
-      });
-    }, reject);
-  });
-};
diff --git a/package.json b/package.json
index 03806f2..ec412e5 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
-  "name": "whats-a-virus",
+  "name": "s3-scanner",
   "version": "0.0.1",
-  "description": "Whats this, a virus? Scanner built for whats-th.is file uploader.",
+  "description": "Node.js microservice to process events from S3 over SNS and scan new objects.",
   "main": "index.js",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"
@@ -14,22 +14,29 @@
     "av",
     "antivirus",
     "clamscan",
-    "clamav"
+    "clamav",
+    "s3",
+    "simple storage service",
+    "simple-storage-service",
+    "aws",
+    "amazon web services",
+    "amazon-web-services"
   ],
-  "author": "aurieh",
+  "author": "Aurieh",
   "license": "MIT",
   "bugs": {
     "url": "https://github.com/whats-this/scanner/issues"
   },
   "homepage": "https://github.com/whats-this/scanner#readme",
+  "dependencies": {
+    "aws-sdk": "^2.7.15",
+    "clamscan": "^0.8.4",
+    "debug": "^2.4.4"
+  },
   "devDependencies": {
     "eslint": "^3.12.1",
     "eslint-config-standard": "^6.2.1",
     "eslint-plugin-promise": "^3.4.0",
     "eslint-plugin-standard": "^2.0.1"
-  },
-  "dependencies": {
-    "clamscan": "^0.8.4",
-    "debug": "^2.4.4"
   }
 }
-- 
GitLab