From 66c3ba719b311ee34049d6d1a9963f2b1c9d199c Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 16 Sep 2024 16:02:38 +1000 Subject: [PATCH] WIP azure rest API blob store wrapper --- classes/azure_blob_file_system.php | 35 ++ classes/local/store/azure_blob/client.php | 315 +++++++++++++++ .../local/store/azure_blob/file_system.php | 48 +++ .../local/store/azure_blob/stream_wrapper.php | 374 ++++++++++++++++++ 4 files changed, 772 insertions(+) create mode 100644 classes/azure_blob_file_system.php create mode 100644 classes/local/store/azure_blob/client.php create mode 100644 classes/local/store/azure_blob/file_system.php create mode 100644 classes/local/store/azure_blob/stream_wrapper.php diff --git a/classes/azure_blob_file_system.php b/classes/azure_blob_file_system.php new file mode 100644 index 00000000..7fb04265 --- /dev/null +++ b/classes/azure_blob_file_system.php @@ -0,0 +1,35 @@ +. + +/** + * File system for Azure Blob Storage. + * + * @package tool_objectfs + * @author Matthew Hilton + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +namespace tool_objectfs; + +use tool_objectfs\local\store\azure\file_system; + +/** + * Azure blob file system header. + */ +class azure_blob_file_system extends file_system { + +} diff --git a/classes/local/store/azure_blob/client.php b/classes/local/store/azure_blob/client.php new file mode 100644 index 00000000..3eddad99 --- /dev/null +++ b/classes/local/store/azure_blob/client.php @@ -0,0 +1,315 @@ +. + +namespace tool_objectfs\local\store\azure_blob; + +use core\lang_string; +use GuzzleHttp\Psr7\Utils; +use local_azureblobstorage\api; +use stdClass; +use tool_objectfs\local\store\object_client_base; + +/** + * Azure Blob Storage client. + * + * @package tool_objectfs + * @author Matthew Hilton + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class client extends object_client_base { + + /** @var Api $client Azure rest API interface. */ + protected $client; + + /** @var string Container name, used for constructing paths */ + protected $container; + + /** + * The azure client constructor. + * + * @param \stdclass $config + */ + public function __construct($config) { + global $CFG; + $this->autoloader = $CFG->dirroot . '/local/azure_storage/vendor/autoload.php'; + + if ($this->get_availability() && !empty($config)) { + $this->client = new api($config->azure_accountname, $config->azure_container, $this->clean_sastoken($config->azure_sastoken)); + $this->container = $config->azure_container; + } else { + parent::__construct($config); + } + } + + /** + * Sets the StreamWrapper to allow accessing the remote content via a blob:// path. + */ + public function register_stream_wrapper() { + if ($this->get_availability()) { + stream_wrapper::register($this->client); + } else { + parent::register_stream_wrapper(); + } + } + + /** + * Returns azure fullpath to use with php file functions. + * + * @param string $contenthash contenthash used as key in azure. + * @return string fullpath to azure object. + */ + public function get_fullpath_from_hash($contenthash) { + $filepath = $this->get_filepath_from_hash($contenthash); + return "blob://$this->container/$filepath"; + } + + /** + * Deletes file (blob) in azure blob storage. + * + * @param string $fullpath path to azure blob. + */ + public function delete_file($fullpath) { + // TODO get hash from fullpath + // TODo call $client->delete_blob($hash) + } + + /** + * Moves file (blob) within azure blob storage. + * + * @param string $currentpath current path to file to be moved. + * @param string $destinationpath destination path to file. + */ + public function rename_file($currentpath, $destinationpath) { + copy($currentpath, $destinationpath); + + $this->delete_file($currentpath); + } + + /** + * Returns relative path to blob from fullpath to use with php file functions. + * + * @param string $fullpath full path to azure blob. + * @return string relative path to azure blob. + */ + public function get_relative_path_from_fullpath($fullpath) { + $relativepath = str_replace("blob://$this->container/", '', $fullpath); + + return $relativepath; + } + + /** + * Returns a context for the stream that is seekable. + * + * @return resource + */ + public function get_seekable_stream_context() { + $context = stream_context_create([ + 'blob' => [ + 'seekable' => true, + ], + ]); + return $context; + } + + /** + * TODO this might not be necessary anymore ? + * + * Trim a leading '?' character from the sas token. + * + * @param string $sastoken + * @return bool|string + */ + private function clean_sastoken($sastoken) { + if (substr($sastoken, 0, 1) === '?') { + $sastoken = substr($sastoken, 1); + } + + return $sastoken; + } + + /** + * Gets the md5 for a file that is currently stored in Azure. + * Generally this is used for upload verification. + * + * @param string $contenthash + * @return string MD5 hash + */ + private function get_md5_from_hash($contenthash) { + try { + $key = $this->get_filepath_from_hash($contenthash); + + $result = $this->client->get_blob_properties($this->container, $key)->wait(); + + // TODO catch different exception ? + } catch (\MicrosoftAzure\Storage\Common\Exceptions\ServiceException $e) { + return false; + } + + // TODO get the proper header name. + $contentmd5 = $result['x-ms-contentmd5']; + + // TODO should we leave as base64 or decode it ?? + if ($contentmd5) { + $md5 = bin2hex(base64_decode($contentmd5)); + } else { + $md5 = trim($result->getETag(), '"'); // Strip quotation marks. + } + + return $md5; + } + + /** + * Verifies the object using the md5 stored in moodle vs blob storage. + * + * @param string $contenthash + * @param string $localpath + * + * @return bool + */ + public function verify_object($contenthash, $localpath) { + $localmd5 = ''; // TODO. + $remotemd5 = $this->get_md5_from_hash($contenthash); + return $localmd5 == $remotemd5; + } + + /** + * Return filepath from the content hash. + * + * @param string $contenthash + * + * @return string + */ + protected function get_filepath_from_hash($contenthash) { + $l1 = $contenthash[0] . $contenthash[1]; + $l2 = $contenthash[2] . $contenthash[3]; + return "$l1/$l2/$contenthash"; + } + + /** + * Tests connection by trying to create a test blob. + * + * @return stdClass + */ + public function test_connection() { + $connection = new \stdClass(); + $connection->success = true; + $connection->details = ''; + + try { + $md5 = hex2bin(md5('connection_check_file')); + $this->client->put_blob('connection_check_file', Utils::streamFor('connection_check_file'), $md5); + + // TODO catch different exceptions + } catch (\MicrosoftAzure\Storage\Common\Exceptions\ServiceException $e) { + $connection->success = false; + $connection->details = $this->get_exception_details($e); + } catch (\GuzzleHttp\Exception\ConnectException $e) { + $connection->success = false; + $connection->details = $e->getMessage(); + } + + return $connection; + } + + /** + * TEsts permissions by trying to create, get and delete a blob. + * @param mixed $testdelete + * + * @return stdClass + */ + public function test_permissions($testdelete) { + // TODO redo this and make it neat and tidy. + // TODO also support when objectfs deletion is disabled, don't check deletion. + + $permissions = new \stdClass(); + $permissions->success = true; + $permissions->messages = []; + + // try { + // $result = $this->client->createBlockBlob($this->container, 'permissions_check_file', 'permissions_check_file'); + // } catch (\MicrosoftAzure\Storage\Common\Exceptions\ServiceException $e) { + // $details = $this->get_exception_details($e); + // $permissions->messages[get_string('settings:writefailure', 'tool_objectfs') . $details] = 'notifyproblem'; + // $permissions->success = false; + // } + + // try { + // $result = $this->client->getBlob($this->container, 'permissions_check_file'); + // } catch (\MicrosoftAzure\Storage\Common\Exceptions\ServiceException $e) { + // $errorcode = $this->get_body_error_code($e); + + // // Write could have failed. + // if ($errorcode !== 'BlobNotFound') { + // $details = $this->get_exception_details($e); + // $permissions->messages[get_string('settings:permissionreadfailure', 'tool_objectfs') . $details] = 'notifyproblem'; + // $permissions->success = false; + // } + // } + + // try { + // $result = $this->client->deleteBlob($this->container, 'permissions_check_file'); + // $permissions->messages[get_string('settings:deletesuccess', 'tool_objectfs')] = 'warning'; + // $permissions->success = false; + // } catch (\MicrosoftAzure\Storage\Common\Exceptions\ServiceException $e) { + // $errorcode = $this->get_body_error_code($e); + + // // Something else went wrong. + // if ($errorcode !== 'AuthorizationPermissionMismatch') { + // $details = $this->get_exception_details($e); + // $permissions->messages[get_string('settings:deleteerror', 'tool_objectfs') . $details] = 'notifyproblem'; + // $permissions->success = false; + // } + // } + + // if ($permissions->success) { + // $permissions->messages[get_string('settings:permissioncheckpassed', 'tool_objectfs')] = 'notifysuccess'; + // } + + return $permissions; + } + + /** + * Azure settings form with the following elements: + * + * Storage account name. + * Container name. + * Shared Access Signature. + * + * @param admin_settingpage $settings + * @param \stdClass $config + * @return admin_settingpage + */ + public function define_client_section($settings, $config) { + + $settings->add(new \admin_setting_heading('tool_objectfs/azure', + new lang_string('settings:azure:header', 'tool_objectfs'), $this->define_client_check())); + + $settings->add(new \admin_setting_configtext('tool_objectfs/azure_accountname', + new lang_string('settings:azure:accountname', 'tool_objectfs'), + new lang_string('settings:azure:accountname_help', 'tool_objectfs'), '')); + + $settings->add(new \admin_setting_configtext('tool_objectfs/azure_container', + new lang_string('settings:azure:container', 'tool_objectfs'), + new lang_string('settings:azure:container_help', 'tool_objectfs'), '')); + + $settings->add(new \admin_setting_configpasswordunmask('tool_objectfs/azure_sastoken', + new lang_string('settings:azure:sastoken', 'tool_objectfs'), + new lang_string('settings:azure:sastoken_help', 'tool_objectfs'), '')); + + return $settings; + } +} diff --git a/classes/local/store/azure_blob/file_system.php b/classes/local/store/azure_blob/file_system.php new file mode 100644 index 00000000..add65a92 --- /dev/null +++ b/classes/local/store/azure_blob/file_system.php @@ -0,0 +1,48 @@ +. + +/** + * File system for Azure Blob Storage. + * + * @package tool_objectfs + * @author Matthew Hilton + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +namespace tool_objectfs\local\store\azure_blob; + +defined('MOODLE_INTERNAL') || die(); + +use tool_objectfs\local\store\object_file_system; + +require_once($CFG->dirroot . '/admin/tool/objectfs/lib.php'); + +/** + * file_system + */ +class file_system extends object_file_system { + + /** + * initialise_external_client + * @param mixed $config + * + * @return client + */ + protected function initialise_external_client($config) { + return new client($config); + } +} diff --git a/classes/local/store/azure_blob/stream_wrapper.php b/classes/local/store/azure_blob/stream_wrapper.php new file mode 100644 index 00000000..6195a59f --- /dev/null +++ b/classes/local/store/azure_blob/stream_wrapper.php @@ -0,0 +1,374 @@ +. + +namespace tool_objectfs\local\store\azure_blob; + +use local_azureblobstorage\api; +use Psr\Http\Message\StreamInterface; + +/** + * Azure Blob Storage stream wrapper to use "blob:///" files with PHP. + * + * Implementation references, + * https://github.com/aws/aws-sdk-php/blob/master/src/S3/StreamWrapper.php + * https://phpazure.codeplex.com/SourceControl/latest#trunk/library/Microsoft/WindowsAzure/Storage/Blob/Stream.php + * + * @package tool_objectfs + * @author Nicholas Hoobin + * @author Matthew Hilton + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class stream_wrapper { + + /** @var resource|null Stream context (this is set by PHP) */ + public $context; + + /** @var StreamInterface Underlying stream resource */ + private $body; + + /** @var int Size of the body that is opened */ + private $size; + + /** @var array Hash of opened stream parameters */ + private $params = []; + + /** @var string Mode in which the stream was opened */ + private $mode; + + /** @var string The opened protocol (e.g. "blob") */ + private $protocol = 'blob'; + + /** @var resource Hash resource that is sent when flushing the file to Azure. */ + private $hash; + + /** @var bool records whether the file was readable when validating the stream_handle */ + private $readable = true; + + /** + * Register the blob://' stream wrapper + * + * @param api $client Client to use with the stream wrapper + * @param string $protocol Protocol to register as. + */ + public static function register(api $client, $protocol = 'blob') { + if (in_array($protocol, stream_get_wrappers())) { + stream_wrapper_unregister($protocol); + } + + stream_wrapper_register($protocol, get_called_class(), STREAM_IS_URL); + $default = stream_context_get_options(stream_context_get_default()); + $default[$protocol]['client'] = $client; + stream_context_set_default($default); + } + + /** + * Stream does not support casting. + * + * @param mixed $cast_as + * @return boolean false + */ + public function stream_cast($castas): bool { + return false; + } + + /** + * Close the stream + * @return void + */ + public function stream_close() { + $this->body = null; + $this->hash = null; + } + + /** + * Opens the stream + * + * @param string $path Path of stream, usually blob://container/hash + * @param string $mode one of fopen modes, see https://www.php.net/manual/en/function.fopen.php + * @param int $options unused + * @param string $opened_path unused + * + * @return bool + */ + public function stream_open($path, $mode, $options, &$openedpath): bool { + // Select the protocol from the path, usually blob + $this->initProtocol($path); + + // TODO this just needs the hash, we already store the container in the client now. + $this->params = $this->getContainerKey($path); + + // TODO what is this for ? + $this->mode = rtrim($mode, 'bt'); + + // Validate the path for the given mode. + if ($errors = $this->validate($path, $this->mode)) { + return $this->triggerError($errors); + } + + $this->hash = hash_init('md5'); + + return $this->boolCall(function() use ($path) { + switch ($this->mode) { + case 'r': +return $this->openReadStream(); + case 'a': +return $this->openAppendStream(); + default: +return $this->openWriteStream(); + } + }); + } + + /** + * Returns true if nothing more to read (eof i.e. end of file). + * @return bool + */ + public function stream_eof(): bool { + return $this->body->eof(); + } + + /** + * Flushes (closes) the stream. + * In our case, this will upload the temporary file to Azure as a blob. + * + * @return bool True if successful, else false. + */ + public function stream_flush(): bool { + // Cannot flush a read only stream. + if ($this->mode == 'r') { + return false; + } + + // Go to start of the temporarily file stream ($this->body). + if ($this->body->isSeekable()) { + $this->body->seek(0); + } + + // Calculate the final md5 of the file, used for upload integrity checking. + $hash = hash_final($this->hash); + $md5 = hex2bin($hash); + $params = $this->getOptions(true); + + return $this->boolCall(function () use ($params, $md5) { + $this->getClient()->put_blob($params['Key'], $this->body, $md5); + return true; + }); + } + + /** + * Reads the stream by the given byte amount/count. + * @param int $count Number of bytes to read + * @return string + */ + public function stream_read($count) { + // If the file isn't readable, we need to return no content. Azure can emit XML here otherwise. + return $this->readable ? $this->body->read($count) : ''; + } + + /** + * Go to specific position in stream. + * + * @param int $offset + * @param int $whence + * + * @return bool + */ + public function stream_seek($offset, $whence = SEEK_SET): bool { + return !$this->body->isSeekable() + ? false + : $this->boolCall(function () use ($offset, $whence) { + $this->body->seek($offset, $whence); + return true; + }); + } + + /** + * Return current position of stream + * @return int + */ + public function stream_tell(): int { + return $this->body->tell(); + } + + /** + * Write data to stream + * + * @param string $data + * @return int Number of bytes written + */ + public function stream_write($data): int { + hash_update($this->hash, $data); + return $this->body->write($data); + } + + /** + * Get information about the current stream + * @return array + */ + public function stream_stat(): array { + $stat = $this->getStatTemplate(); + $stat[7] = $stat['size'] = $this->getSize(); + $stat[2] = $stat['mode'] = $this->mode; + + return $stat; + } + + /** + * Get information about a filepath. + * + * Provides information for is_dir, is_file, filesize, etc. Works on + * buckets, keys, and prefixes. + * @link http://www.php.net/manual/en/streamwrapper.url-stat.php + * + * @param string $path + * @param mixed $flags + * + * @return mixed + */ + public function url_stat($path, $flags) { + $stat = $this->getStatTemplate(); + + try { + $params = $this->withPath($path); + + // TODO get size and lastmodified from blob properties + $bp = $this->getclient()->get_blob_properties($params['Key'])->wait(); + + // TODO double check right key in $bp. + $stat['size'] = $stat[7] = $bp['Content-Length']; + + // Set the modification time and last modified to the Last-Modified header. + // TODO double check right key in $bp. + $lastmodified = $bp['Last-Modified']; + + $stat['mtime'] = $stat[9] = $lastmodified; + $stat['ctime'] = $stat[10] = $lastmodified; + + // Regular file with 0777 access - see "man 2 stat". + $stat['mode'] = $stat[2] = 0100777; + + return $stat; + + // TODO different ex catch + } catch (ServiceException $ex) { + // The specified blob does not exist. + return false; + } + } + + /** + * getContainerKey + * @param string $path + * + * @return array + */ + private function getcontainerkey($path) { + // Remove the protocol. + $parts = explode('://', $path); + // Get the container, key. + $parts = explode('/', $parts[1], 2); + + return [ + 'Container' => $parts[0], + 'Key' => isset($parts[1]) ? $parts[1] : null + ]; + } + + /** + * Get the stream context options available to the current stream + * + * @param bool $removeContextData Set to true to remove contextual kvp's + * like 'client' from the result. + * + * @return array + */ + private function getoptions($removecontextdata = false) { + // Context is not set when doing things like stat. + if ($this->context === null) { + $options = []; + } else { + $options = stream_context_get_options($this->context); + $options = isset($options[$this->protocol]) + ? $options[$this->protocol] + : []; + } + + $default = stream_context_get_options(stream_context_get_default()); + $default = isset($default[$this->protocol]) + ? $default[$this->protocol] + : []; + $result = $this->params + $options + $default; + + if ($removecontextdata) { + unset($result['client'], $result['seekable']); + } + + return $result; + } + + /** + * Validates the provided stream arguments for fopen and returns an array + * of errors. + * @param string $path + * @param string $mode + * + * @return [type] + */ + private function validate($path, $mode) { + $errors = []; + + if (!$this->getOption('Key')) { + $errors[] = 'Cannot open a bucket. You must specify a path in the ' + . 'form of blob://container/key'; + } + + if (!in_array($mode, ['r', 'w', 'a', 'x'])) { + $errors[] = "Mode not supported: {$mode}. " + . "Use one 'r', 'w', 'a', or 'x'."; + } + + // When using mode "x" validate if the file exists before attempting to read. + // 'x' mode is for writing, and the file must exist to write to it. + if ($mode == 'x' && + // TODO implement blob_exists either in this class or in sdk class. + !$this->getclient()->blob_exists($this->getOption('key')) + ) { + $errors[] = "{$path} already exists on Azure Blob Storage"; + } + + // When using mode 'r' we should validate the file exists before opening a handle on it. + if ($mode == 'r' && + // TODO implement blob_exists either in this class or in sdk class. + !$this->getclient()->blob_exists($this->getOption('key')) + ) { + $errors[] = "{$path} does not exist on Azure Blob Storage"; + $this->readable = false; + } + + return $errors; + } + + /** + * Parse the protocol out of the given path. + * + * @param string $path + */ + private function initprotocol($path) { + $parts = explode('://', $path, 2); + $this->protocol = $parts[0] ?: 'blob'; + } +} \ No newline at end of file