From 075b285f15ee1a59c17ef6209515ba1c13549785 Mon Sep 17 00:00:00 2001 From: Jeremy Daer Date: Sun, 3 Mar 2024 21:00:01 -0800 Subject: [PATCH] Respect MIME type aliases * Warns when extending a type with preexisting extensions, parents, etc. * Warns when extending an aliased type. * MimeType.canonicalize type, instead_of: old to replace a canonical type and make it an alias. Common problem with types like WAV with multiple competing types, RFCs that aren't actually followed, and browser support trumping all. Allows us to override Tika with what browsers actually do. --- lib/marcel/magic.rb | 61 +++++++++++- lib/marcel/mime_type.rb | 38 +++++-- lib/marcel/mime_type/definitions.rb | 30 +++--- lib/marcel/tables.rb | 143 +++++++++++++++++++++++++++ script/generate_tables.rb | 7 ++ test/declared_type_test.rb | 5 + test/illustrator_test.rb | 5 + test/magic_and_declared_type_test.rb | 8 +- test/magic_and_name_test.rb | 2 +- test/magic_test.rb | 50 +++++++++- test/name_test.rb | 2 +- test/test_helper.rb | 6 ++ 12 files changed, 327 insertions(+), 30 deletions(-) diff --git a/lib/marcel/magic.rb b/lib/marcel/magic.rb index 6797ee4..18e6673 100644 --- a/lib/marcel/magic.rb +++ b/lib/marcel/magic.rb @@ -25,25 +25,58 @@ def initialize(type) # Option keys: # * :extensions: String list or single string of file extensions # * :parents: String list or single string of parent mime types + # * :aliases: String list or single string of aliased mime types # * :magic: Mime magic specification # * :comment: Comment string def self.add(type, options) extensions = [options[:extensions]].flatten.compact + extensions.each {|ext| EXTENSIONS[ext] = type } TYPE_EXTS[type] = extensions + + TYPE_ALIASES.delete(type) + [options[:aliases]].flatten.compact.each do |aliased| + TYPE_ALIASES[aliased] = type + end + parents = [options[:parents]].flatten.compact TYPE_PARENTS[type] = parents unless parents.empty? - extensions.each {|ext| EXTENSIONS[ext] = type } + MAGIC.unshift [type, options[:magic]] if options[:magic] end - # Removes a mime type from the dictionary. You might want to do this if + # Override the canonical MIME type with an alias or subtype. + def self.canonicalize(type, instead_of:) + raise ArgumentError, "#{instead_of} is an alias, not canonical" if TYPE_ALIASES[instead_of] + + # Remove the alias or subtype first + remove(type) + + # Replace the old canonical + EXTENSIONS.select { |_, t| t == instead_of }.each_key do |ext| + EXTENSIONS[ext] = type + end + + TYPE_ALIASES.select { |_, t| t == instead_of }.each_key do |aliased| + TYPE_ALIASES[aliased] = type + end + + TYPE_PARENTS[type] = TYPE_PARENTS.delete(instead_of) + + MAGIC.select { |t, _| t == instead_of }.each { |pair| pair[0] = type } + + # Alias the old canonical + TYPE_ALIASES[instead_of] = type + end + + # Removes a mime type from the dictionary. You might want to do this if # you're seeing impossible conflicts (for instance, application/x-gmc-link). - # * type: The mime type to remove. All associated extensions and magic are removed too. + # * type: The mime type to remove. def self.remove(type) - EXTENSIONS.delete_if {|ext, t| t == type } - MAGIC.delete_if {|t, m| t == type } + EXTENSIONS.delete_if { |ext, t| t == type } + MAGIC.delete_if { |t, m| t == type } TYPE_EXTS.delete(type) TYPE_PARENTS.delete(type) + TYPE_ALIASES.delete_if { |aliased, canonical| aliased == type || canonical == type } end # Returns true if type is a text format @@ -64,11 +97,24 @@ def extensions TYPE_EXTS[type] || [] end + def canonical + if to = TYPE_ALIASES[type] + self.class.new(to) + else + self + end + end + # Get mime comment def comment nil # deprecated end + # Lookup canonical mime type by mime type string + def self.by_type(type) + new(type.downcase).canonical if type + end + # Lookup mime type by file extension def self.by_extension(ext) ext = ext.to_s.downcase @@ -111,9 +157,14 @@ def hash alias == eql? def self.child?(child, parent) + child, parent = canonical(child), canonical(parent) child == parent || TYPE_PARENTS[child]&.any? {|p| child?(p, parent) } end + def self.canonical(aliased_type) + by_type(aliased_type)&.type + end + def self.magic_match(io, method) return magic_match(StringIO.new(io.to_s), method) unless io.respond_to?(:read) diff --git a/lib/marcel/mime_type.rb b/lib/marcel/mime_type.rb index 23da698..5ccc7bd 100644 --- a/lib/marcel/mime_type.rb +++ b/lib/marcel/mime_type.rb @@ -5,10 +5,37 @@ class MimeType BINARY = "application/octet-stream" class << self - def extend(type, extensions: [], parents: [], magic: nil) - extensions = (Array(extensions) + Array(Marcel::TYPE_EXTS[type])).uniq - parents = (Array(parents) + Array(Marcel::TYPE_PARENTS[type])).uniq - Magic.add(type, extensions: extensions, magic: magic, parents: parents) + def canonicalize(type, instead_of:) + Magic.canonicalize type, instead_of: instead_of + end + + def extend(type, extensions: nil, aliases: nil, parents: nil, magic: nil) + extensions = Array(extensions) + if extensions.any? && extensions.sort == Array(Marcel::TYPE_EXTS[type]).sort + warn "#{type} already has extensions #{extensions.inspect}" + end + extensions |= Array(Marcel::TYPE_EXTS[type]) + + aliases = Array(aliases) + existing_aliases = Marcel::TYPE_ALIASES.select { |_, t| t == type }.keys + if aliases.any? && aliases.sort == existing_aliases.sort + warn "#{type} already has aliases #{aliases.inspect}" + end + aliases |= existing_aliases + + parents = Array(parents) + if parents.any? && parents.sort == Array(Marcel::TYPE_PARENTS[type]).sort + warn "#{type} already has parents #{parents.inspect}" + end + parents |= Array(Marcel::TYPE_PARENTS[type]) + + magic = Array(magic) + existing_magic = Marcel::MAGIC.select { |type, _| type == type }.map(&:last) + if magic.any? && magic == existing_magic + warn "#{type} already has magic matchers #{magic.inspect}" + end + + Magic.add type, extensions: extensions, magic: magic, aliases: aliases, parents: parents end # Returns the most appropriate content type for the given file. @@ -32,7 +59,6 @@ def for(pathname_or_io = nil, name: nil, extension: nil, declared_type: nil) end private - def for_data(pathname_or_io) if pathname_or_io with_io(pathname_or_io) do |io| @@ -60,7 +86,7 @@ def for_extension(extension) end def for_declared_type(declared_type) - type = parse_media_type(declared_type) + type = Marcel::Magic.canonical(parse_media_type(declared_type)) # application/octet-stream is treated as an undeclared/missing type, # allowing the type to be inferred from the filename. If there's no diff --git a/lib/marcel/mime_type/definitions.rb b/lib/marcel/mime_type/definitions.rb index 65db8bb..7d9d403 100644 --- a/lib/marcel/mime_type/definitions.rb +++ b/lib/marcel/mime_type/definitions.rb @@ -28,30 +28,30 @@ Marcel::MimeType.extend "application/vnd.ms-powerpoint.template.macroenabled.12", parents: "application/vnd.openxmlformats-officedocument.presentationml.presentation" Marcel::MimeType.extend "application/vnd.ms-powerpoint.slideshow.macroenabled.12", parents: "application/vnd.openxmlformats-officedocument.presentationml.presentation" -Marcel::MimeType.extend "application/vnd.apple.pages", extensions: %w( pages ), parents: "application/zip" -Marcel::MimeType.extend "application/vnd.apple.numbers", extensions: %w( numbers ), parents: "application/zip" -Marcel::MimeType.extend "application/vnd.apple.keynote", extensions: %w( key ), parents: "application/zip" +Marcel::MimeType.extend "application/vnd.apple.pages", parents: "application/zip" +Marcel::MimeType.extend "application/vnd.apple.numbers", parents: "application/zip" +Marcel::MimeType.extend "application/vnd.apple.keynote", parents: "application/zip" -Marcel::MimeType.extend "audio/aac", extensions: %w( aac ), parents: "audio/x-aac" -Marcel::MimeType.extend("audio/ogg", extensions: %w( ogg oga ), magic: [[0, 'OggS', [[29, 'vorbis']]]]) +# Upstream aliases to application/x-x509-cert. Override with a ;format=pem subtype. +Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem" -Marcel::MimeType.extend "image/vnd.dwg", magic: [[0, "AC10"]] +Marcel::MimeType.extend "audio/mpc", magic: [[0, "MPCKSH"]], extensions: %w( mpc ) +Marcel::MimeType.extend "audio/ogg", extensions: %w( ogg oga ), magic: [[0, 'OggS', [[29, 'vorbis']]]] +Marcel::MimeType.canonicalize "audio/aac", instead_of: "audio/x-aac" +Marcel::MimeType.canonicalize "audio/flac", instead_of: "audio/x-flac" +Marcel::MimeType.canonicalize "audio/x-wav", instead_of: "audio/vnd.wave" -Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem" +Marcel::MimeType.extend "image/vnd.dwg", magic: [[0, "AC10"]] -Marcel::MimeType.extend "image/avif", magic: [[4, "ftypavif"]], extensions: %w( avif ) -Marcel::MimeType.extend "image/heif", magic: [[4, "ftypmif1"]], extensions: %w( heif ) -Marcel::MimeType.extend "image/heic", magic: [[4, "ftypheic"]], extensions: %w( heic ) +Marcel::MimeType.extend "image/avif", magic: [[4, "ftypavif"]] +Marcel::MimeType.extend "image/heif", magic: [[4, "ftypmif1"]] +Marcel::MimeType.extend "image/heic", magic: [[4, "ftypheic"]] Marcel::MimeType.extend "image/x-raw-sony", extensions: %w( arw ), parents: "image/tiff" -Marcel::MimeType.extend "image/x-raw-canon", extensions: %w( cr2 crw ), parents: "image/tiff" +Marcel::MimeType.extend "image/x-raw-canon", parents: "image/tiff" Marcel::MimeType.extend "video/mp4", magic: [[4, "ftypisom"], [4, "ftypM4V "]], extensions: %w( mp4 m4v ) -Marcel::MimeType.extend "audio/flac", magic: [[0, 'fLaC']], extensions: %w( flac ), parents: "audio/x-flac" -Marcel::MimeType.extend "audio/x-wav", magic: [[0, 'RIFF', [[8, 'WAVE']]]], extensions: %w( wav ), parents: "audio/vnd.wav" -Marcel::MimeType.extend "audio/mpc", magic: [[0, "MPCKSH"]], extensions: %w( mpc ) - Marcel::MimeType.extend "font/ttf", magic: [[0, "\x00\x01\x00\x00"]], extensions: %w( ttf ttc ) Marcel::MimeType.extend "font/otf", magic: [[0, "OTTO"]], extensions: %w( otf ), parents: "font/ttf" Marcel::MimeType.extend "application/vnd.adobe.flash.movie", magic: [[0, "FWS"], [0, "CWS"]], extensions: %w( swf ) diff --git a/lib/marcel/tables.rb b/lib/marcel/tables.rb index 52e0290..2a3de0d 100644 --- a/lib/marcel/tables.rb +++ b/lib/marcel/tables.rb @@ -2148,6 +2148,149 @@ module Marcel 'video/x-sgi-movie' => %w(movie), 'x-conference/x-cooltalk' => %w(ice), # Cooltalk Audio } + TYPE_ALIASES = { + 'application/bat' => 'application/x-bat', + 'application/x-coreldraw' => 'application/coreldraw', + 'application/x-cdr' => 'application/coreldraw', + 'application/cdr' => 'application/coreldraw', + 'image/x-cdr' => 'application/coreldraw', + 'image/cdr' => 'application/coreldraw', + 'application/x-setupscript' => 'application/inf', + 'application/x-wine-extension-inf' => 'application/inf', + 'application/x-javascript' => 'application/javascript', + 'text/javascript' => 'application/javascript', + 'application/x-java-vm' => 'application/java-vm', + 'application/x-java' => 'application/java-vm', + 'application/mac-binhex' => 'application/mac-binhex40', + 'application/binhex' => 'application/mac-binhex40', + 'application/vnd.ms-word' => 'application/msword', + 'application/x-ogg' => 'audio/vorbis', + 'application/msonenote' => 'application/onenote', + 'application/x-pdf' => 'application/pdf', + 'application/pgp' => 'application/pgp-encrypted', + 'text/rss' => 'application/rss+xml', + 'text/rtf' => 'application/rtf', + 'application/smil' => 'application/smil+xml', + 'application/x-kchart' => 'application/vnd.kde.kchart', + 'application/x-kpresenter' => 'application/vnd.kde.kpresenter', + 'application/x-kspread' => 'application/vnd.kde.kspread', + 'application/x-kword' => 'application/vnd.kde.kword', + 'application/x-koan' => 'application/vnd.koan', + 'application/x-123' => 'application/vnd.lotus-1-2-3', + 'application/x-mif' => 'application/vnd.mif', + 'application/x-frame' => 'application/vnd.mif', + 'application/msexcel' => 'application/vnd.ms-excel', + 'application/mspowerpoint' => 'application/vnd.ms-powerpoint', + 'application/ms-tnef' => 'application/vnd.ms-tnef', + 'application/oxps' => 'application/vnd.ms-xpsdocument', + 'application/x-vnd.oasis.opendocument.chart' => 'application/vnd.oasis.opendocument.chart', + 'application/x-vnd.oasis.opendocument.chart-template' => 'application/vnd.oasis.opendocument.chart-template', + 'application/vnd.oasis.opendocument.database' => 'application/vnd.oasis.opendocument.base', + 'application/x-vnd.oasis.opendocument.formula' => 'application/vnd.oasis.opendocument.formula', + 'application/x-vnd.oasis.opendocument.formula-template' => 'application/vnd.oasis.opendocument.formula-template', + 'application/x-vnd.oasis.opendocument.graphics' => 'application/vnd.oasis.opendocument.graphics', + 'application/x-vnd.oasis.opendocument.graphics-template' => 'application/vnd.oasis.opendocument.graphics-template', + 'application/x-vnd.oasis.opendocument.image' => 'application/vnd.oasis.opendocument.image', + 'application/x-vnd.oasis.opendocument.image-template' => 'application/vnd.oasis.opendocument.image-template', + 'application/x-vnd.oasis.opendocument.presentation' => 'application/vnd.oasis.opendocument.presentation', + 'application/x-vnd.oasis.opendocument.presentation-template' => 'application/vnd.oasis.opendocument.presentation-template', + 'application/x-vnd.oasis.opendocument.spreadsheet' => 'application/vnd.oasis.opendocument.spreadsheet', + 'application/x-vnd.oasis.opendocument.spreadsheet-template' => 'application/vnd.oasis.opendocument.spreadsheet-template', + 'application/x-vnd.oasis.opendocument.text' => 'application/vnd.oasis.opendocument.text', + 'application/x-vnd.oasis.opendocument.text-master' => 'application/vnd.oasis.opendocument.text-master', + 'application/x-vnd.oasis.opendocument.text-template' => 'application/vnd.oasis.opendocument.text-template', + 'application/x-vnd.oasis.opendocument.text-web' => 'application/vnd.oasis.opendocument.text-web', + 'application/x-vnd.sun.xml.writer' => 'application/vnd.sun.xml.writer', + 'application/vnd.ms-visio' => 'application/vnd.visio', + 'image/x-targa' => 'image/x-tga', + 'application/x-unix-archive' => 'application/x-archive', + 'application/x-arj-compressed' => 'application/x-arj', + 'application/x-dbm' => 'application/x-berkeley-db', + 'application/vnd.debian.binary-package' => 'application/x-debian-package', + 'application/x-Gnumeric-spreadsheet' => 'application/x-gnumeric', + 'application/x-gzip' => 'application/gzip', + 'application/x-gunzip' => 'application/gzip', + 'application/gzipped' => 'application/gzip', + 'application/gzip-compressed' => 'application/gzip', + 'application/x-gzip-compressed' => 'application/gzip', + 'gzip/document' => 'application/gzip', + 'application/x-windows-installer' => 'application/x-ms-installer', + 'application/x-msi' => 'application/x-ms-installer', + 'application/x-rar' => 'application/x-rar-compressed', + 'text/x-tex' => 'application/x-tex', + 'text/x-texinfo' => 'application/x-texinfo', + 'application/x-x509-ca-cert' => 'application/x-x509-cert', + 'application/x-x509-user-cert' => 'application/x-x509-cert', + 'text/xml' => 'application/xml', + 'application/x-xml' => 'application/xml', + 'text/x-dtd' => 'application/xml-dtd', + 'text/xml-external-parsed-entity' => 'application/xml-external-parsed-entity', + 'text/xsl' => 'application/xslt+xml', + 'application/x-zip-compressed' => 'application/zip', + 'application/x-deflate' => 'application/zlib', + 'audio/x-m4a' => 'audio/mp4', + 'audio/x-mp4a' => 'audio/mp4', + 'audio/x-mpeg' => 'audio/mpeg', + 'audio/x-ogg-flac' => 'audio/x-oggflac', + 'audio/x-ogg-pcm' => 'audio/x-oggpcm', + 'application/x-speex' => 'audio/speex', + 'audio/aiff' => 'audio/x-aiff', + 'audio/x-realaudio' => 'audio/x-pn-realaudio', + 'audio/x-wav' => 'audio/vnd.wave', + 'audio/wave' => 'audio/vnd.wave', + 'audio/wav' => 'audio/vnd.wave', + 'image/x-bmp' => 'image/bmp', + 'image/x-ms-bmp' => 'image/bmp', + 'image/x-emf' => 'image/emf', + 'application/x-emf' => 'image/emf', + 'application/x-ms-emz' => 'image/x-emf-compressed', + 'image/hevc' => 'image/heic', + 'image/hevc-sequence' => 'image/heic-sequence', + 'video/jpm' => 'image/jpm', + 'image/ntf' => 'image/nitf', + 'image/x-psd' => 'image/vnd.adobe.photoshop', + 'application/photoshop' => 'image/vnd.adobe.photoshop', + 'image/x-dwg' => 'image/vnd.dwg', + 'application/acad' => 'image/vnd.dwg', + 'application/x-acad' => 'image/vnd.dwg', + 'application/autocad_dwg' => 'image/vnd.dwg', + 'application/dwg' => 'image/vnd.dwg', + 'application/x-dwg' => 'image/vnd.dwg', + 'application/x-autocad' => 'image/vnd.dwg', + 'drawing/dwg' => 'image/vnd.dwg', + 'image/x-icon' => 'image/vnd.microsoft.icon', + 'image/x-dcx' => 'image/vnd.zbrush.dcx', + 'image/x-pcx' => 'image/vnd.zbrush.pcx', + 'image/x-pc-paintbrush' => 'image/vnd.zbrush.pcx', + 'image/x-wmf' => 'image/wmf', + 'application/x-msmetafile' => 'image/wmf', + 'image/x-jb2' => 'image/x-jbig2', + 'image/xcf' => 'image/x-xcf', + 'application/x-mimearchive' => 'multipart/related', + 'message/rfc2557' => 'multipart/related', + 'drawing/x-dwf' => 'model/vnd.dwf', + 'text/x-asm' => 'text/x-assembly', + 'application/x-troff' => 'text/troff', + 'application/x-troff-man' => 'text/troff', + 'application/x-troff-me' => 'text/troff', + 'application/x-troff-ms' => 'text/troff', + 'text/x-c' => 'text/x-csrc', + 'text/x-java' => 'text/x-java-source', + 'text/x-properties' => 'text/x-java-properties', + 'text/properties' => 'text/x-java-properties', + 'application/x-httpd-jsp' => 'text/x-jsp', + 'application/matlab-mat' => 'application/x-matlab-data', + 'application/x-tcl' => 'text/x-tcl', + 'video/x-daala' => 'video/daala', + 'video/x-theora' => 'video/theora', + 'video/x-ogg-uvs' => 'video/x-ogguvs', + 'video/x-ogg-yuv' => 'video/x-oggyuv', + 'video/x-ogg-rgb' => 'video/x-oggrgb', + 'video/avi' => 'video/x-msvideo', + 'video/msvideo' => 'video/x-msvideo', + 'application/font-woff' => 'font/woff', + 'application/font-woff2' => 'font/woff2', + } TYPE_PARENTS = { 'application/bizagi-modeler' => %w(application/zip), 'application/dash+xml' => %w(application/xml), diff --git a/script/generate_tables.rb b/script/generate_tables.rb index 5ff10e3..27a632c 100755 --- a/script/generate_tables.rb +++ b/script/generate_tables.rb @@ -128,6 +128,7 @@ def get_matches(mime, parent) extensions = {} types = {} +aliases = {} magics = [] ARGV.each do |path| @@ -137,6 +138,7 @@ def get_matches(mime, parent) (doc/'mime-info/mime-type').each do |mime| comments = Hash[*(mime/'_comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten] type = mime['type'] + (mime/'alias').each { |x| aliases[x['type']] = type } subclass = (mime/'sub-class-of').map{|x| x['type']} exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact (mime/'magic').each do |magic| @@ -222,6 +224,11 @@ def get_matches(mime, parent) puts " '#{key}' => %w(#{exts}),#{comment}" end puts " }" +puts " TYPE_ALIASES = {" +aliases.each do |aliased, type| + puts " '#{aliased}' => '#{type}'," +end +puts " }" puts " TYPE_PARENTS = {" types.keys.sort.each do |key| parents = types[key][1].sort.join(' ') diff --git a/test/declared_type_test.rb b/test/declared_type_test.rb index d1feeac..0cf607e 100644 --- a/test/declared_type_test.rb +++ b/test/declared_type_test.rb @@ -19,4 +19,9 @@ class Marcel::MimeType::DeclaredTypeTest < Marcel::TestCase test "ignores charset declarations" do assert_equal "text/html", Marcel::MimeType.for(declared_type: "text/html; charset=utf-8") end + + test "resolves declared type to a canonical MIME type" do + aliased, canonical = Marcel::TYPE_ALIASES.first + assert_equal canonical, Marcel::MimeType.for(declared_type: aliased) + end end diff --git a/test/illustrator_test.rb b/test/illustrator_test.rb index 1f75805..bb96cff 100644 --- a/test/illustrator_test.rb +++ b/test/illustrator_test.rb @@ -2,6 +2,11 @@ require 'rack' class Marcel::MimeType::IllustratorTest < Marcel::TestCase + test ".ai uploaded as application/illustrator" do + file = files("name/application/illustrator/illustrator.ai") + assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/illustrator") + end + test ".ai uploaded as application/postscript" do file = files("name/application/illustrator/illustrator.ai") assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/postscript") diff --git a/test/magic_and_declared_type_test.rb b/test/magic_and_declared_type_test.rb index de3a189..b9efa9a 100644 --- a/test/magic_and_declared_type_test.rb +++ b/test/magic_and_declared_type_test.rb @@ -3,8 +3,14 @@ class Marcel::MimeType::MagicAndDeclaredTypeTest < Marcel::TestCase each_content_type_fixture('name') do |file, name, content_type| - test "correctly returns #{content_type} for #{name} given both file and declared type" do + test "detects #{content_type} given magic bytes from #{name} and declared type" do assert_equal content_type, Marcel::MimeType.for(file, declared_type: content_type) end + + ALIASED[content_type].each do |aliased| + test "detects #{content_type} given magic bytes from #{name} and aliased type #{aliased}" do + assert_equal content_type, Marcel::MimeType.for(file, declared_type: aliased) + end + end end end diff --git a/test/magic_and_name_test.rb b/test/magic_and_name_test.rb index a686a1a..8cae5f4 100644 --- a/test/magic_and_name_test.rb +++ b/test/magic_and_name_test.rb @@ -6,7 +6,7 @@ class Marcel::MimeType::MagicAndNameTest < Marcel::TestCase # the file contents and the name. In some cases, the file contents will point to a # generic type, while the name will choose a more specific subclass each_content_type_fixture('name') do |file, name, content_type| - test "correctly returns #{content_type} for #{name} given both file and name" do + test "detects #{content_type} given filename #{name} and its magic bytes" do assert_equal content_type, Marcel::MimeType.for(file, name: name) end end diff --git a/test/magic_test.rb b/test/magic_test.rb index 14d11e2..06fdebd 100644 --- a/test/magic_test.rb +++ b/test/magic_test.rb @@ -6,16 +6,57 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase # has more specific subclasses (such as application/zip), these subclasses cannot usually # be recognised by magic alone; their name is also needed to correctly identify them. each_content_type_fixture('magic') do |file, name, content_type| - test "gets type for #{content_type} by using only magic bytes #{name}" do + test "detects #{content_type} given magic bytes from #{name}" do assert_equal content_type, Marcel::MimeType.for(file) end end + test "switch canonical type" do + Marcel::Magic.add('canonical/type', aliases: 'alias/type', extensions: %w[ canonical ], parents: 'canonical/parent', magic: [[0, 'magic']]) + assert Marcel::Magic.child?('canonical/type', 'canonical/parent') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/type') + assert_equal 'canonical/type', Marcel::Magic.by_extension('canonical').type + assert_equal 'canonical/type', Marcel::Magic.by_magic('magic').type + + Marcel::Magic.canonicalize('alias/type', instead_of: 'canonical/type') + assert Marcel::Magic.child?('alias/type', 'canonical/parent') + assert_equal 'alias/type', Marcel::Magic.canonical('alias/type') + assert_equal 'alias/type', Marcel::Magic.canonical('canonical/type') + assert_equal 'alias/type', Marcel::Magic.by_extension('canonical').type + assert_equal 'alias/type', Marcel::Magic.by_magic('magic').type + end + test "add and remove type" do Marcel::Magic.add('application/x-my-thing', extensions: 'mtg', parents: 'application/json') Marcel::Magic.remove('application/x-my-thing') end + test "removing alias" do + Marcel::Magic.add('canonical/type', aliases: 'alias/type') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/type') + + Marcel::Magic.remove('alias/type') + assert_equal 'alias/type', Marcel::Magic.canonical('alias/type') + end + + test "removing canonical removes aliases" do + Marcel::Magic.add('canonical/type', aliases: %w[ alias/one alias/two ]) + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/one') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/two') + + Marcel::Magic.remove('canonical/type') + assert_equal 'alias/one', Marcel::Magic.canonical('alias/one') + assert_equal 'alias/two', Marcel::Magic.canonical('alias/two') + end + + test "adding type removes existing alias" do + Marcel::Magic.add('canonical/type', aliases: 'alias/type') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/type') + + Marcel::Magic.add('alias/type', comment: "overrides old alias") + assert_equal 'alias/type', Marcel::Magic.canonical('alias/type') + end + test "#extensions" do json = Marcel::Magic.by_extension('json') assert_equal ['json'], json.extensions @@ -25,4 +66,11 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase assert Marcel::Magic.child?('text/csv', 'text/plain') refute Marcel::Magic.child?('text/plain', 'text/csv') end + + test "child? with aliases" do + Marcel::Magic.add('canonical/parent', aliases: 'alias/parent') + Marcel::Magic.add('canonical/child', aliases: 'alias/child', parents: 'canonical/parent') + + assert Marcel::Magic.child?('alias/child', 'alias/parent') + end end diff --git a/test/name_test.rb b/test/name_test.rb index 030046e..287e35f 100644 --- a/test/name_test.rb +++ b/test/name_test.rb @@ -3,7 +3,7 @@ class Marcel::MimeType::NameTest < Marcel::TestCase each_content_type_fixture('name') do |file, name, content_type| - test "gets type for #{content_type} by filename from #{name}" do + test "detects #{content_type} given filename #{name}" do assert_equal content_type, Marcel::MimeType.for(name: name) end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 8721e03..e4b0c06 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -4,6 +4,12 @@ require 'debug' rescue nil +# Simplify testing. No need for reverse mapping at runtime. +ALIASED = Hash.new { |h, k| h[k] = [] } +Marcel::TYPE_ALIASES.each do |aliased, type| + ALIASED[type] << aliased +end + class Marcel::TestCase < Minitest::Test class << self def setup(&block)