From 04650b9becdf11e070b69d52e62ca9e23ffc49e8 Mon Sep 17 00:00:00 2001
From: Dean Sheather <dean@deansheather.com>
Date: Sun, 18 Dec 2016 18:58:39 +1000
Subject: [PATCH] small fixes + Dockerfile

---
 Dockerfile | 33 ++++++++++++++++++++++++
 index.js   | 73 ++++++++++++++++++++++++++++++++++++++----------------
 lib/SQS.js |  1 +
 3 files changed, 86 insertions(+), 21 deletions(-)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..0cd2b89
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,33 @@
+FROM buildpack-deps:jessie
+
+MAINTAINER Dean Sheather <dean@deansheather.com>
+
+# add Node.js repository to apt
+RUN curl -sL https://deb.nodesource.com/setup_6.x | bash -
+
+# install Node.js and ClamAV
+RUN apt-get update \
+    && apt-get install -y -qq --force-yes nodejs \
+                                          clamav \
+                                          clamav-freshclam \
+                                          ca-certificates
+
+# make the container smaller
+RUN apt-get clean
+RUN rm -rf /var/lib/apt
+
+# update virus database using freshclam
+RUN freshclam
+
+# copy source files into container
+COPY index.js src/
+COPY package.json src/
+COPY lib/ src/lib
+
+WORKDIR src/
+
+# update NPM dependencies
+RUN npm install
+
+# start the consumer
+CMD ["node", "index.js"]
diff --git a/index.js b/index.js
index eba88d2..2eabc71 100644
--- a/index.js
+++ b/index.js
@@ -1,16 +1,15 @@
 // Required modules
 const clam = require('clamscan')();
-const crypto = require('crypto');
-const freshclam = require('./lib/freshclam.js');
+const debug = require('debug')('scanner:do');
 const fs = require('fs');
 const path = require('path');
 const S3 = require('./lib/S3.js');
 const SQS = require('./lib/SQS.js');
-const scanner = require('./lib/scanner.js');
 
 // Check for required environment variables
 for (let env of [
   'AWS_ACCESSKEY',
+  'AWS_REGION',
   'AWS_SECRETKEY',
   'AWS_SQSURL'
 ]) {
@@ -19,6 +18,11 @@ for (let env of [
   }
 }
 
+// Create _temp folder
+if (!fs.existsSync('_temp')) {
+  fs.mkdirSync('_temp');
+}
+
 // TODO: loop freshclam
 
 /**
@@ -27,6 +31,7 @@ for (let env of [
  */
 function handleError (error) {
   // TODO: error handling
+  console.error(error);
 }
 
 /**
@@ -38,13 +43,15 @@ function pollSQS () {
     QueueUrl: process.env['AWS_SQSURL'],
     WaitTimeSeconds: 20
   }, function (err, data) {
-    if (err) return handleError(error);
+    if (err) return handleError(err);
+    debug(`received data from SQS, ${(data.Messages || []).length} records`);
     let promises = [];
-    (data.Records || []).forEach(msg => {
+    (data.Messages || []).forEach(msg => {
       promises.push(
         Promise.resolve(msg)
         .then(msg => JSON.parse(msg.Body))
-        .then(body => { msg: msg, msgBody: body })
+        .then(body => { return { msg: msg, msgBody: body }; })
+        .then(validateMessageBody)
         .then(getObject)
         .then(writeTempFile)
         .then(clamScan)
@@ -63,16 +70,39 @@ function pollSQS () {
   });
 }
 
+// Start the loop
+pollSQS();
+
+/**
+ * Validate incoming SQS message body.
+ */
+function validateMessageBody (data) {
+  // eslint-disable-next-line promise/param-names
+  return new Promise((resolve, _reject) => {
+    function reject (err) {
+      deleteSQSMessage(data)
+      .then(() => _reject(err))
+      .catch(e => _reject([err, e]));
+    }
+    if (data.msgBody.Event === 's3:TestEvent') return reject(new Error('test event'));
+    if (!Array.isArray(data.msgBody.Records)) return reject(new Error('invalid S3 message structure'));
+    if (data.msgBody.Records.length !== 1) return reject(new Error('records count !== 1'));
+    for (const item of data.msgBody.Records) {
+      if (typeof item !== 'object' || item === null) return reject('invalid item in records');
+      if (typeof item.eventName !== 'string' || item.eventName.indexOf('ObjectCreated') === -1) return reject('invalid event type on record');
+    }
+    resolve(data);
+  });
+}
+
 /**
  * Get object from S3, promisified.
- * @param {Object} params
- * @return {Promise<Object, Error>}
  */
 function getObject (data) {
   return new Promise((resolve, reject) => {
     S3.getObject({
-      Bucket: body.s3.bucket.name,
-      Key: body.s3.object.Key
+      Bucket: data.msgBody.Records[0].s3.bucket.name,
+      Key: data.msgBody.Records[0].s3.object.key
     }, (err, res) => {
       if (err) return reject(err);
       data.Body = new Buffer(res.Body);
@@ -81,22 +111,23 @@ function getObject (data) {
   });
 }
 
-/**
- * Generate random key.
- * @return {string} 6 character key.
- */
-function generateRandomKey () {
-  const seed = String(Math.floor(Math.random() * 10) + Date.now());
-  return crypto.createHash('md5').update(seed).digest('hex').substr(2, 6);
-}
-
 /**
  * Create a temporary file on disk for scanning.
  */
 function writeTempFile (data) {
   return new Promise((resolve, reject) => {
     // Construct the filepath (including random key)
-    const filepath = path.join('.', '_temp', data.Bucket, generateRandomKey() + data.Key.replace(/[^a-z0-9_.-]/gi, '_'));
+    const filepath = path.join(
+      '.',
+      '_temp',
+      data.msgBody.Records[0].s3.bucket.name,
+      data.msg.MessageId + data.msgBody.Records[0].s3.object.key.replace(/[^a-z0-9_.-]/gi, '_')
+    );
+
+    // Create bucket folder
+    if (!fs.existsSync('_temp/' + data.msgBody.Records[0].s3.bucket.name)) {
+      fs.mkdirSync('_temp/' + data.msgBody.Records[0].s3.bucket.name);
+    }
 
     // Write the file
     fs.writeFile(filepath, data.Body, err => {
@@ -125,7 +156,7 @@ function clamScan (data) {
  */
 function unlinkTempFile (data) {
   return new Promise((resolve, reject) => {
-    fs.unlink(filepath, (err) => {
+    fs.unlink(data.filepath, (err) => {
       if (err) return reject(err);
       resolve(data);
     });
diff --git a/lib/SQS.js b/lib/SQS.js
index 4956cb5..f4a4dcc 100644
--- a/lib/SQS.js
+++ b/lib/SQS.js
@@ -5,5 +5,6 @@ const AWS = require('aws-sdk');
 module.exports = new AWS.SQS({
   apiVersion: '2012-11-05',
   accessKeyId: process.env['AWS_ACCESSKEY'],
+  region: process.env['AWS_REGION'],
   secretAccessKey: process.env['AWS_SECRETKEY']
 });
-- 
GitLab