Skip to content

Commit

Permalink
Merge pull request #56 from digital-land/checkURLs
Browse files Browse the repository at this point in the history
Add ability to validate data from URLs
  • Loading branch information
GeorgeGoodall authored Feb 12, 2024
2 parents ba9bc48 + c0f35b1 commit daf3703
Show file tree
Hide file tree
Showing 22 changed files with 855 additions and 373 deletions.
6 changes: 2 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@
"build": "npm run scss && cp -R src/assets/downloadable/. public/downloadable",
"scss": "sass --quiet-deps --load-path=./ src/assets/scss:public/stylesheets",
"scss:watch": "sass --quiet-deps --load-path=./ --watch src/assets/scss:public/stylesheets",
"test": "npm run test:unit && npm run test:integration && npm run test:contract && npm run test:acceptance",
"test": "npm run test:unit && npm run test:acceptance",
"test:unit": "vitest run test/unit",
"test:integration": "vitest run test/integration",
"test:contract": "vitest run test/contract",
"test:watch": "vitest test/unit test/integration test/contract",
"test:coverage": "vitest test/unit test/integration test/contract --coverage",
"test:acceptance": "playwright test",
"test:acceptance:ui": "playwright test --ui",
"test:acceptance:codegen": "playwright codegen http://localhost:3000",
"test:acceptance:codegen": "playwright codegen http://localhost:5000",
"lint": "standard",
"lint:fix": "standard --fix"
},
Expand Down
241 changes: 55 additions & 186 deletions src/controllers/uploadController.js
Original file line number Diff line number Diff line change
@@ -1,228 +1,97 @@
'use strict'
import multer from 'multer'
import axios from 'axios'
import fs from 'fs/promises'
import { lookup } from 'mime-types'
import PageController from './pageController.js'
import config from '../../config/index.js'

import { severityLevels } from '../utils/utils.js'
import logger from '../utils/logger.js'
import hash from '../utils/hasher.js'

const upload = multer({ dest: 'uploads/' })

const apiEndpoint = process.env.NODE_ENV === 'test' ? config.api.localUrl : config.api.url
const apiRoute = apiEndpoint + config.api.validationEndpoint

class UploadController extends PageController {
middlewareSetup () {
super.middlewareSetup()
this.use('/upload', upload.single('datafile'))
}
apiRoute = apiEndpoint + config.api.validationEndpoint

async get (req, res, next) {
req.form.options.validationError = this.validationErrorMessage
super.get(req, res, next)
}

async post (req, res, next) {
this.validationErrorMessage = undefined
if (req.file !== undefined) {
req.body.datafile = req.file
let jsonResult = {}
try {
jsonResult = await this.validateFile({
...req.file,
filePath: req.file.path,
fileName: req.file.originalname,
dataset: req.sessionModel.get('dataset'),
dataSubject: req.sessionModel.get('data-subject'),
// organisation: 'local-authority-eng:CAT', // ToDo: this needs to be dynamic, not collected in the prototype, should it be?
sessionId: await hash(req.sessionID),
ipAddress: await hash(req.ip)
})
if (jsonResult) {
if (jsonResult.error) {
this.validationError('apiError', jsonResult.message, {}, req)
} else {
try {
this.errorCount = jsonResult['issue-log'].filter(issue => issue.severity === severityLevels.error).length + jsonResult['column-field-log'].filter(log => log.missing).length
req.body.validationResult = jsonResult
} catch (error) {
this.validationError('apiError', 'Error parsing api response error count', error, req)
}
}
} else {
this.validationError('apiError', 'Nothing returned from the api', null, req)
}
} catch (error) {
logger.error('Error uploading file', error)
if (error.code === 'ECONNREFUSED') {
this.validationError('apiError', 'Unable to reach the api', error, req)
} else if (error.code === 'ECONNABORTED') {
this.validationError('apiError', 'Gateway Timeout', error, req)
} else {
switch (error.response.status) {
case 400:
this.validationError('apiError', 'Bad request sent to the api', error, req)
break
case 404:
this.validationError('apiError', 'Validation endpoint not found', error, req)
break
case 500:
this.validationError('apiError', 'Internal Server Error', error, req)
break
case 504:
this.validationError('apiError', 'Gateway Timeout', error, req)
break
default:
this.validationError('apiError', 'Error uploading file', error, req)
}
}
}
}

// delete the file from the uploads folder
if (req.file && req.file.path) { fs.unlink(req.file.path) }

super.post(req, res, next)
}

resetValidationErrorMessage () {
this.validationErrorMessage = undefined
}

validationError (type, message, errorObject, req) {
logger.error({ type, message, errorObject })
req.body.validationResult = { error: true, message, errorObject }
this.validationErrorMessage = message
}

async validateFile (datafile) {
if (
!UploadController.extensionIsValid(datafile) ||
!UploadController.sizeIsValid(datafile) ||
!UploadController.fileNameIsntTooLong(datafile) ||
!UploadController.fileNameIsValid(datafile) ||
!UploadController.fileNameDoesntContainDoubleExtension(datafile) ||
!UploadController.fileMimeTypeIsValid(datafile) ||
!UploadController.fileMimeTypeMatchesExtension(datafile)
) {
return false
}

const { filePath, fileName, dataset, dataSubject, organisation, sessionId, ipAddress } = datafile

const formData = new FormData()
formData.append('dataset', dataset)
formData.append('collection', dataSubject)
formData.append('organisation', organisation)
formData.append('sessionId', sessionId)
formData.append('ipAddress', ipAddress)

const file = new Blob([await fs.readFile(filePath)], { type: lookup(filePath) })

formData.append('upload_file', file, fileName)

const result = await axios.post(apiRoute, formData, { timeout: config.api.requestTimeout })

return result.data
}

static resultIsValid (validationResult) {
return validationResult ? !validationResult.error : false
}

static extensionIsValid (datafile) {
const allowedExtensions = ['csv', 'xls', 'xlsx', 'json', 'geojson', 'gml', 'gpkg', 'sqlite3']

const parts = datafile.originalname.split('.')

const extension = parts[parts.length - 1]
if (!allowedExtensions.includes(extension)) {
return false
}

return true
}

static sizeIsValid (datafile) {
const maxSize = 10 * 1024 * 1024 // 10MB

if (datafile.size > maxSize) {
return false
}

return true
}

static fileNameIsntTooLong (datafile) {
const maxSize = 255 // Maximum filename size
if (datafile.originalname.length > maxSize) {
return false
}
return true
}

static fileNameIsValid (datafile) {
const invalidCharacters = /[<>:"/\\|?*]/
if (invalidCharacters.test(datafile.originalname)) {
return false
}
return true
hasErrors () {
return this.errorCount > 0
}

static fileNameDoesntContainDoubleExtension (datafile) {
const parts = datafile.originalname.split('.')
if (parts.length > 2) {
return false
handleValidationResult (jsonResult, req) {
if (jsonResult) {
if (jsonResult.error) {
this.validationError('apiError', jsonResult.message, {}, req)
} else {
try {
this.errorCount = jsonResult['issue-log'].filter(issue => issue.severity === severityLevels.error).length + jsonResult['column-field-log'].filter(log => log.missing).length
req.body.validationResult = jsonResult
} catch (error) {
this.validationError('apiError', 'Error parsing api response error count', error, req)
}
}
} else {
this.validationError('apiError', 'Nothing returned from the api', null, req)
}
return true
}

static fileMimeTypeIsValid (datafile) {
const allowedMimeTypes = [
'text/csv',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/json',
'application/vnd.geo+json',
'application/gml+xml',
'application/gpkg',
'application/geopackage+sqlite3',
'application/octet-stream' // This is a catch all for when the mime type is not recognised
]
if (!allowedMimeTypes.includes(datafile.mimetype)) {
return false
handleApiError (error, req) {
logger.error('Error uploading file', error)

if (error.code === 'ERR_BAD_REQUEST') {
switch (error.response.status) {
case 400:
this.validationError('apiError', error.response.data.detail.errMsg, error, req)
break
case 404:
this.validationError('apiError', 'Validation endpoint not found', error, req)
break
case 500:
this.validationError('apiError', 'Internal Server Error', error, req)
break
case 504:
this.validationError('apiError', 'Gateway Timeout', error, req)
break
default:
this.validationError('apiError', 'Error uploading file', error, req)
}
} else if (error.code === 'ECONNREFUSED') { // this indicates the api is down
this.validationError('apiError', 'Unable to reach the api', error, req)
} else if (error.code === 'ECONNABORTED') { // this indicates the api is down
this.validationError('apiError', 'Gateway Timeout', error, req)
} else {
this.validationError('apiError', 'Error uploading file', error, req)
}
return true
}

static fileMimeTypeMatchesExtension (datafile) {
const parts = datafile.originalname.split('.')
const extension = parts[parts.length - 1]

if (datafile.mimetype === 'application/octet-stream') {
return true
}

const mimeTypes = {
csv: 'text/csv',
xls: 'application/vnd.ms-excel',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
json: 'application/json',
geojson: 'application/vnd.geo+json',
gml: 'application/gml+xml',
gpkg: 'application/gpkg',
sqlite: 'application/geopackage+sqlite3'
}

if (mimeTypes[extension] !== datafile.mimetype) {
return false
}

return true
}
constructBaseFormData ({ dataset, dataSubject, organisation, sessionId, ipAddress }) {
const formData = new FormData()
formData.append('dataset', dataset)
formData.append('collection', dataSubject)
formData.append('organisation', organisation)
formData.append('sessionId', sessionId)
formData.append('ipAddress', ipAddress)

hasErrors () {
return this.errorCount > 0
return formData
}
}

Expand Down
Loading

0 comments on commit daf3703

Please sign in to comment.