From f4dc218c399dea89fcf6c530923516b5f43138ae Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Wed, 7 Dec 2022 16:06:17 -0800 Subject: [PATCH 01/11] Issue #75: remove some unused method temps --- src/FileSystem-Tests-Core/FileReferenceTest.class.st | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/FileSystem-Tests-Core/FileReferenceTest.class.st b/src/FileSystem-Tests-Core/FileReferenceTest.class.st index c2ca5a5..18d0b34 100644 --- a/src/FileSystem-Tests-Core/FileReferenceTest.class.st +++ b/src/FileSystem-Tests-Core/FileReferenceTest.class.st @@ -1164,7 +1164,7 @@ FileReferenceTest >> testReadStreamIfAbsent [ { #category : 'tests' } FileReferenceTest >> testRelativeTo [ - | alpha beta reference path result | + | alpha beta | alpha := sandbox / 'alpha'. beta := alpha / 'beta'. self @@ -1326,7 +1326,7 @@ FileReferenceTest >> testWithExtension [ { #category : 'tests' } FileReferenceTest >> testWithoutExtension [ - | reference result | + | reference | reference := sandbox / 'alpha.beta.gamma'. reference := reference withoutExtension. self From 8cd2604bc95a33c603cf2c922408f73f8b2fe1ba Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Fri, 9 Dec 2022 17:40:26 -0800 Subject: [PATCH 02/11] Issue #79: checkout ZnCharacterStreamTests >> testUpToAll is passing, ZnCharacterStreamTests >> testUpToAllTwice is not ... --- .../CharacterCollection.extension.st | 11 +- .../Zn8BITEncoder.class.st | 122 ++++++++++++++++++ .../ZnBufferedReadStream.class.st | 16 ++- .../Zn8BITCharacterEncoderTests.class.st | 37 ++++++ .../ZnCharacterEncoderTests.class.st | 49 ++++--- .../ZnCharacterStreamTests.class.st | 106 +++++++++++++-- .../ZnUTF8CharacterEncoderTests.class.st | 30 +++++ 7 files changed, 329 insertions(+), 42 deletions(-) create mode 100644 src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st create mode 100644 src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st create mode 100644 src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st diff --git a/src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st b/src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st index b9eceb8..3ead192 100644 --- a/src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st +++ b/src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st @@ -20,12 +20,15 @@ CharacterCollection >> asResolvedBy: aFileSystem [ { #category : '*filesystem-gemstone-kernel' } CharacterCollection >> asZnCharacterEncoder [ "Return a ZnCharacterEncoder instance using the receiver as identifier" - + " 'UTF-8' asZnCharacterEncoder " - - ((self select: [ :each | each isAlphaNumeric ]) asLowercase) = 'utf8' ifFalse: [ self error: 'Only utf8 encoding supported']. - ^ ZnUTF8Encoder new + (self select: [ :each | each isAlphaNumeric ]) asLowercase = 'utf8' + ifTrue: [ ^ ZnUTF8Encoder new ] + ifFalse: [ + (self select: [ :each | each isAlphaNumeric ]) asLowercase = '8bit' + ifFalse: [ self error: 'only 8bit or utf8 encoding supported' ] ]. + ^ Zn8BITEncoder new ] { #category : '*filesystem-gemstone-kernel' } diff --git a/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st b/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st new file mode 100644 index 0000000..bf4c281 --- /dev/null +++ b/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st @@ -0,0 +1,122 @@ +" +Part of FileSystem + +========= + +I implement the encoding and decoding of Extended ASCII (8 bit character encoding) that produces instances of class String. + +The encoding is consistent with topaz 'fileformat 8BIT' (see section 1.3 Handling text outside the ASCII range in the topaz manual[1] for more details). + +[1] https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/GS64-Topaz-3.6.htm?https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/1-Tutorial.htm#pgfId-1130673 +" +Class { + #name : 'Zn8BITEncoder', + #superclass : 'ZnCharacterEncoder', + #classVars : [ + 'Default' + ], + #category : 'Zinc-Character-Encoding-Core' +} + +{ #category : 'accessing' } +Zn8BITEncoder class >> default [ + "Return a cached instance of the most commonly used encoder, + which is faster than going via #newForEncoding: that does a subclass search" + + ^ Default ifNil: [ Default := self new ] +] + +{ #category : 'accessing' } +Zn8BITEncoder class >> handlesEncoding: string [ + "Return true when my instances handle the encoding described by string" + + ^ (self canonicalEncodingIdentifier: string) = '8bit' +] + +{ #category : 'accessing' } +Zn8BITEncoder class >> knownEncodingIdentifiers [ + ^ #( #'8bit' ) +] + +{ #category : 'instance creation' } +Zn8BITEncoder class >> newForEncoding: string [ + "No further parametrization needed" + + ^ self new +] + +{ #category : 'converting' } +Zn8BITEncoder >> backOnStream: stream [ + "Move back one character on stream" + + stream position = 0 + ifTrue: [Error signal: 'Cannot move backward past the start of the stream.']. + stream skip: -1 +] + +{ #category : 'convenience' } +Zn8BITEncoder >> decodeAsCodePoints: bytes [ + "Decode bytes and return the resulting code points" + + ^ String withBytes: bytes +] + +{ #category : 'convenience' } +Zn8BITEncoder >> decodeBytes: bytes [ + "Decode bytes and return the resulting string" + + ^ String withBytes: bytes +] + +{ #category : 'converting' } +Zn8BITEncoder >> encodedByteCountFor: character [ + "Return how many bytes are needed to encode character" + + ^ 1 +] + +{ #category : 'convenience' } +Zn8BITEncoder >> encodeString: string [ + "Encode string and return the resulting Utf8 instance" + + ^ string asByteArray +] + +{ #category : 'accessing' } +Zn8BITEncoder >> identifier [ + ^ #'8bit' +] + +{ #category : 'converting' } +Zn8BITEncoder >> nextCodePointFromStream: stream [ + "Read and return the next integer code point from stream" + + ^ stream next +] + +{ #category : 'converting' } +Zn8BITEncoder >> nextFromStream: stream [ + "Read and return the next character from stream" + + ^ Character codePoint: stream next +] + +{ #category : 'converting' } +Zn8BITEncoder >> nextPutCodePoint: codePoint toStream: stream [ + "Write the encoding for Integer code point to stream" + + ^ stream nextPut: (Character codePoint: codePoint) +] + +{ #category : 'convenience' } +Zn8BITEncoder >> readInto: string startingAt: offset count: requestedCount fromStream: stream [ + "Read requestedCount characters into string starting at offset, + returning the number read, there could be less available when stream is atEnd." + + | stringBuffer | + stringBuffer := string. + offset to: offset + requestedCount - 1 do: [ :index | + stream atEnd ifTrue: [ ^ index - offset ]. + stringBuffer codePointAt: index put: (self nextCodePointFromStream: stream)]. + ^ requestedCount +] diff --git a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st index 614f8fa..adda150 100644 --- a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st @@ -407,9 +407,9 @@ ZnBufferedReadStream >> upTo: value [ This could be further optimzed." ^ self collectionSpecies - streamContents: [ :writeStream | | element | - [ self atEnd or: [ (element := self next) = value ] ] whileFalse: [ - writeStream nextPut: element ] ] + streamContents: [ :writeStream | + [ self atEnd or: [ (self peek) = value ] ] whileFalse: [ + writeStream nextPut: self next ] ] ] { #category : 'accessing' } @@ -417,11 +417,15 @@ ZnBufferedReadStream >> upToAll: aCollection [ "Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream." | startPos endMatch result x | +aCollection isEmpty ifTrue: [ ^aCollection ]. startPos := self position. +"upTo: will stop before aCollection first" x := self upTo: aCollection first. -self atEnd ifTrue: [ ^ x ]. -2 to: aCollection size do: [:i | - self peek = (aCollection at: i) +(self atEnd or: [aCollection size = 1 ]) + ifTrue: [ ^ x ]. +self next. "move past the matching char from upTo:" +2 to: aCollection size do: [:i | | y | + (y := self peek) = (aCollection at: i) ifTrue: [ self next ] ifFalse: [ self position: startPos. ^ self upToEnd ] ]. diff --git a/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st new file mode 100644 index 0000000..a4616a9 --- /dev/null +++ b/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st @@ -0,0 +1,37 @@ +Class { + #name : 'Zn8BITCharacterEncoderTests', + #superclass : 'ZnCharacterEncoderTests', + #category : 'Zinc-Character-Encoding-Tests' +} + +{ #category : 'private' } +Zn8BITCharacterEncoderTests >> _encoder [ + + ^ Zn8BITEncoder new +] + +{ #category : 'private' } +Zn8BITCharacterEncoderTests >> _encoderId [ + ^ #'8bit' +] + +{ #category : 'private' } +Zn8BITCharacterEncoderTests >> decodeBytes: bytes with: encoder [ + | input | + input := bytes readStream. + ^ String streamContents: [ :stream | + [ input atEnd ] whileFalse: [ + stream nextPut: (encoder nextFromStream: input) ] ] +] + +{ #category : 'testing' } +Zn8BITCharacterEncoderTests >> testByteEncoding [ + | encoder bytes string | + encoder := self _encoder. + string := '123AbC', (Character codePoint: 128), (Character codePoint: 255), (Character codePoint: 150), (Character codePoint: 192), (Character codePoint: 224). + bytes := encoder encodeString: string. + self assert: (bytes decodeWith: encoder) equals: (encoder decodeBytes: bytes). + self assert: (bytes decodeWith: self _encoderId) equals: (encoder decodeBytes: bytes). + self assert: (String withBytes: bytes) equals: string. + self assert: string asByteArray equals: bytes. +] diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st index 918789c..ad1e215 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st @@ -34,6 +34,33 @@ ZnCharacterEncoderTests class >> unicodeCharacterSource [ ^ ($A to: $Z), ($a to: $z), ($0 to: $9), '.-_/*+=|,;?!$&<>^%#', ' ', 'éèçüäßñα', '€∏' ] +{ #category : 'Testing' } +ZnCharacterEncoderTests class >> isAbstract [ + "Override to true if a TestCase subclass is Abstract and should not have + TestCase instances built from it" + + ^ self sunitName = #'ZnCharacterEncoderTests' +] + +{ #category : 'private' } +ZnCharacterEncoderTests >> _encoderId [ + self subclassResponsibility +] + +{ #category : 'private' } +ZnCharacterEncoderTests >> _sourceClass [ + self subclassResponsibility +] + +{ #category : 'public' } +ZnCharacterEncoderTests >> assert: anObject unicodeEquals: otherObj [ + "allow comparison of unicode and legacy strings" + + self + assert: (anObject _unicodeEqual: otherObj) + description: anObject printString , ' is not equal to ' , otherObj printString. +] + { #category : 'public' } ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj [ "allow comparison between unitcode and legacy strings in legacy mode" @@ -45,17 +72,7 @@ ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj { #category : 'private' } ZnCharacterEncoderTests >> decodeBytes: bytes with: encoder [ -true - ifTrue: [ - "GemStone does not support streamed decoding ... hack for tests" - ^ bytes decodeFromUTF8 - ] ifFalse: [ - - | input | - input := bytes readStream. - ^ String streamContents: [ :stream | - [ input atEnd ] whileFalse: [ - stream nextPut: (encoder nextFromStream: input) ] ] ] + self subclassResponsibility ] { #category : 'private' } @@ -70,16 +87,6 @@ ZnCharacterEncoderTests >> encodeString: string with: encoder [ encoder nextPut: each toStream: stream ] ] ] -{ #category : 'testing' } -ZnCharacterEncoderTests >> testByteDecoding [ - | encoder bytes | - encoder := ZnUTF8Encoder new. - bytes := encoder encodeString: 'élève en Français'. - self assert: (bytes decodeWith: encoder) equals: (encoder decodeBytes: bytes). - self assert: (bytes decodeWith: #utf8) equals: (encoder decodeBytes: bytes). - self assert: bytes utf8Decoded equals: (encoder decodeBytes: bytes) -] - { #category : 'testing' } ZnCharacterEncoderTests >> testCodePointEncodingDecoding [ | encoder input output | diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st index cdd96f1..2b3c56f 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st @@ -11,12 +11,35 @@ Class { #category : 'Zinc-Character-Encoding-Tests' } +{ #category : 'testing' } +ZnCharacterStreamTests >> assertUpTo: array [ + | encodingStream | + encodingStream := array first class == String + ifTrue: [ self eightBitReadStreamOn: array first ] + ifFalse: [ self utf8ReadStreamOn: array first ]. + self assert: (array first readStream upTo: array second) equals: array third. + self assert: (encodingStream upTo: array second) equals: array third +] + { #category : 'testing' } ZnCharacterStreamTests >> assertUpToAll: array [ - | utf8Stream | - utf8Stream := self utf8ReadStreamOn: array first. + | encodingStream | + encodingStream := array first class == String + ifTrue: [ self eightBitReadStreamOn: array first ] + ifFalse: [ self utf8ReadStreamOn: array first ]. self assert: (array first readStream upToAll: array second) equals: array third. - self assert: (utf8Stream upToAll: array second) equals: array third + self assert: (encodingStream upToAll: array second) equals: array third +] + +{ #category : 'testing' } +ZnCharacterStreamTests >> eightBitReadStreamOn: string [ + | bytes stream | + bytes := Zn8BITEncoder new encodeString: string. + stream := ZnBufferedReadStream on: (ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'8bit'). + stream sizeBuffer: string size. + ^stream ] { #category : 'testing' } @@ -56,7 +79,6 @@ ZnCharacterStreamTests >> testNextLine [ { #category : 'testing' } ZnCharacterStreamTests >> testPeek [ | string bytes readStream | - true ifTrue: [ "bypass" ^ self ]. string := 'élève en Français'. bytes := ZnUTF8Encoder new encodeString: string. readStream := ZnBufferedReadStream on: (ZnCharacterReadStream on: bytes readStreamPortable). @@ -113,9 +135,62 @@ ZnCharacterStreamTests >> testSimpleUTF8WriteStream [ equals: bytes asByteArray ] +{ #category : 'testing' } +ZnCharacterStreamTests >> testUpTo [ + | char1 char2 string1 string2 | + char1 := (Character codePoint: 257). + string1 := '', char1. "DoubleByteString" + char2 := (Character codePoint:16rffff1). + string2 := '', char2. "QuadByteString" + { + "extended ASCII String" + {'' . $ß . '' } . + {'ß' . $ß . '' } . + {'ße' . $ß . '' } . + {'ß' . $e . 'ß' } . + {'ße' . $e . 'ß' } . + {'ßen' . $e . 'ß' } . + {'ißen' . $e . 'iß' } . + {'iß' . $ß . 'i' } . + {'iße' . $ß . 'i' } . + + "DoubleByteString" + {'' . char1 . '' } . + {string1 . char1 . '' } . + {(string1, 'e') . char1 . '' } . + {string1 . $e . string1 } . + {(string1, 'e') . $e . string1 } . + {(string1, 'en') . $e . string1 } . + {'i', string1, 'en' . $e . 'i', string1 } . + {'i', string1 . char1 . 'i' } . + {'i', string1, 'e' . char1 . 'i' } . + + "QuadByteString" + {'' . char2 . '' } . + {string2 . char2 . '' } . + {(string2, 'e') . char2 . '' } . + {string2 . $e . string2 } . + {(string2, 'e') . $e . string2 } . + {(string2, 'en') . $e . string2 } . + {'i', string2, 'en' . $e . 'i', string2 }. + {'i', string2 . char2 . 'i' } . + {'i', string2, 'e' . char2 . 'i' } . + + "ASCII String" + {'a' . $a . '' } . + {'a' . $b . 'a' } . + {'ab' . $a . '' } . + {'ab' . $b . 'a' } . + {'ab' . $c . 'ab' } . + {'abc' . $a . '' } . + {'abc' . $b . 'a' } . + {'abc' . $c . 'ab' } . + {'abc' . $d . 'abc' } . + } do: [ :array | self assertUpTo: array ] +] + { #category : 'testing' } ZnCharacterStreamTests >> testUpToAll [ - true ifTrue: [ "upToAll: not supported for buffered streams ... skip test for now" ^ self ]. #( ('' '' '') ('' 'ß' '') @@ -136,6 +211,7 @@ ZnCharacterStreamTests >> testUpToAll [ ('ißend' 'en' 'iß') ('iß' 'ß' 'i') ('iße' 'ß' 'i') + ('eißen' 'ße' 'ei') ('eißen' 'ßend' 'eißen') ('abcdefgh' 'cd' 'ab') ('a' '' '') @@ -159,15 +235,23 @@ ZnCharacterStreamTests >> testUpToAll [ { #category : 'testing' } ZnCharacterStreamTests >> testUpToAllTwice [ - | utf8Stream stream | - true ifTrue: [ "upToAll: not supported for buffered streams ... skip test for now" ^ self ]. - utf8Stream := self utf8ReadStreamOn: 'eißendeße'. + | string utf8Stream stream eightBitStream a b | + + string := 'eißendeße'. + stream := string readStreamPortable. + self assert: (stream upToAll: 'ße') equals: 'ei'. + self assert: (stream upToAll: 'ße') equals: 'nde'. + + utf8Stream := self utf8ReadStreamOn: string. self assert: (utf8Stream upToAll: 'ße') equals: 'ei'. self assert: (utf8Stream upToAll: 'ße') equals: 'nde'. - stream := 'eißendeße' readStreamPortable. - self assert: (stream upToAll: 'ße') equals: 'ei'. - self assert: (stream upToAll: 'ße') equals: 'nde' + a := 'ABC', (Character codePoint: 128), (Character codePoint: 255). + b := '', (Character codePoint: 150), (Character codePoint: 192), (Character codePoint: 224). + eightBitStream := self eightBitReadStreamOn: ( a, '123', b, '123'). + self assert: (eightBitStream upToAll: '123') equals: a. + self assert: (eightBitStream upToAll: '123') equals: b + ] { #category : 'testing' } diff --git a/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st new file mode 100644 index 0000000..863b32f --- /dev/null +++ b/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st @@ -0,0 +1,30 @@ +Class { + #name : 'ZnUTF8CharacterEncoderTests', + #superclass : 'ZnCharacterEncoderTests', + #category : 'Zinc-Character-Encoding-Tests' +} + +{ #category : 'private' } +ZnUTF8CharacterEncoderTests >> _encoder [ + + ^ ZnUTF8Encoder new +] + +{ #category : 'private' } +ZnUTF8CharacterEncoderTests >> _encoderId [ + ^ #utf8 +] + +{ #category : 'private' } +ZnUTF8CharacterEncoderTests >> _sourceClass [ + ^ Unicode7 +] + +{ #category : 'private' } +ZnUTF8CharacterEncoderTests >> decodeBytes: bytes with: encoder [ + | input | + input := bytes readStream. + ^ String streamContents: [ :stream | + [ input atEnd ] whileFalse: [ + stream nextPut: (encoder nextFromStream: input) ] ] +] From 762fd3f6cc6c5668d17ab115f1f3b4ba06d4d318 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Sat, 10 Dec 2022 13:44:35 -0800 Subject: [PATCH 03/11] Issue #79: #testUpToAll wasn't testing utf8 encoded streams, unlike #testUpToAllTwice, so added utf8 encoded streams to #testUpToAll (and boom) also updated #testUpToAll samples to include strings from #testUpToAllTwice ... failing tests --- .../ZnCharacterStreamTests.class.st | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st index 2b3c56f..527803d 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st @@ -24,11 +24,11 @@ ZnCharacterStreamTests >> assertUpTo: array [ { #category : 'testing' } ZnCharacterStreamTests >> assertUpToAll: array [ | encodingStream | - encodingStream := array first class == String - ifTrue: [ self eightBitReadStreamOn: array first ] - ifFalse: [ self utf8ReadStreamOn: array first ]. self assert: (array first readStream upToAll: array second) equals: array third. - self assert: (encodingStream upToAll: array second) equals: array third + encodingStream := self utf8ReadStreamOn: array first . + self assert: (encodingStream upToAll: array second) equals: array third. + encodingStream := self eightBitReadStreamOn: array first. + self assert: (encodingStream upToAll: array second) equals: array third. ] { #category : 'testing' } @@ -213,6 +213,7 @@ ZnCharacterStreamTests >> testUpToAll [ ('iße' 'ß' 'i') ('eißen' 'ße' 'ei') ('eißen' 'ßend' 'eißen') + ('eißendeße' 'ße' 'ei') ('abcdefgh' 'cd' 'ab') ('a' '' '') ('a' 'a' '') @@ -239,11 +240,15 @@ ZnCharacterStreamTests >> testUpToAllTwice [ string := 'eißendeße'. stream := string readStreamPortable. - self assert: (stream upToAll: 'ße') equals: 'ei'. - self assert: (stream upToAll: 'ße') equals: 'nde'. - + eightBitStream := self eightBitReadStreamOn: string. utf8Stream := self utf8ReadStreamOn: string. + + self assert: (stream upToAll: 'ße') equals: 'ei'. + self assert: (eightBitStream upToAll: 'ße') equals: 'ei'. self assert: (utf8Stream upToAll: 'ße') equals: 'ei'. + + self assert: (stream upToAll: 'ße') equals: 'nde'. + self assert: (eightBitStream upToAll: 'ße') equals: 'nde'. self assert: (utf8Stream upToAll: 'ße') equals: 'nde'. a := 'ABC', (Character codePoint: 128), (Character codePoint: 255). From 6115ecba2e1b34ab6ec80e5ff2539282f1c6e63a Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Sat, 10 Dec 2022 16:51:29 -0800 Subject: [PATCH 04/11] Issue #79: added new test ZnCharacterStreamTests >> testUtf8EncodingStreamPosition to illustrate what I think should be a bug --- .../ZnCharacterStreamTests.class.st | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st index 527803d..060b78b 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st @@ -25,10 +25,11 @@ ZnCharacterStreamTests >> assertUpTo: array [ ZnCharacterStreamTests >> assertUpToAll: array [ | encodingStream | self assert: (array first readStream upToAll: array second) equals: array third. - encodingStream := self utf8ReadStreamOn: array first . - self assert: (encodingStream upToAll: array second) equals: array third. encodingStream := self eightBitReadStreamOn: array first. self assert: (encodingStream upToAll: array second) equals: array third. + encodingStream := self utf8ReadStreamOn: array first . + self assert: (encodingStream upToAll: array second) equals: array third. + ] { #category : 'testing' } @@ -135,6 +136,18 @@ ZnCharacterStreamTests >> testSimpleUTF8WriteStream [ equals: bytes asByteArray ] +{ #category : 'testing' } +ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ + | string bytes stream res | + string := 'eißendeße'. + bytes := ZnUTF8Encoder new encodeString: string. + stream := (ZnCharacterReadStream on: bytes readStreamPortable). + res := stream next; next; next. + self assert: res equals: $ß. + self assert: stream halt position equals: 3. + +] + { #category : 'testing' } ZnCharacterStreamTests >> testUpTo [ | char1 char2 string1 string2 | From 69e837d28922bd8d0a4502640e3495323a544ba1 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Sat, 10 Dec 2022 17:20:22 -0800 Subject: [PATCH 05/11] Issue #79: remove halt from ZnCharacterStreamTests >> testUtf8EncodingStreamPosition --- .../ZnCharacterStreamTests.class.st | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st index 060b78b..7a50bbf 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st @@ -144,7 +144,7 @@ ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ stream := (ZnCharacterReadStream on: bytes readStreamPortable). res := stream next; next; next. self assert: res equals: $ß. - self assert: stream halt position equals: 3. + self assert: stream position equals: 3. ] From a8bd0a0674a9e9dd5206241419a7a92cdea7b9c2 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Mon, 12 Dec 2022 09:53:39 -0800 Subject: [PATCH 06/11] Issue #79: add ZnCharacterStreamTests >> test8BitEncodingStreamPosition test for completeness, reorder the samples in ZnCharacterStreamTests >> testUpToAll and fix some regressions that snuck in while working on utf8 variant --- .../ZnBufferedReadStream.class.st | 18 +++-- .../ZnCharacterStreamTests.class.st | 80 ++++++++++++------- 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st index adda150..00f16bc 100644 --- a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st @@ -403,13 +403,12 @@ ZnBufferedReadStream >> uint8 [ { #category : 'accessing' } ZnBufferedReadStream >> upTo: value [ "Read upto but not including value and return them as a collection. - If value is not found, return the entire contents of the stream. - This could be further optimzed." + If value is not found, return the entire contents of the stream." ^ self collectionSpecies - streamContents: [ :writeStream | - [ self atEnd or: [ (self peek) = value ] ] whileFalse: [ - writeStream nextPut: self next ] ] + streamContents: [ :writeStream | | ch | + [ self atEnd or: [ (ch := self next) = value ] ] whileFalse: [ + writeStream nextPut: ch ] ] ] { #category : 'accessing' } @@ -421,9 +420,12 @@ aCollection isEmpty ifTrue: [ ^aCollection ]. startPos := self position. "upTo: will stop before aCollection first" x := self upTo: aCollection first. -(self atEnd or: [aCollection size = 1 ]) - ifTrue: [ ^ x ]. -self next. "move past the matching char from upTo:" +self atEnd + ifTrue: [ + aCollection size <= 1 + ifTrue: [ ^ x ]. + self position: startPos. + ^ self upToEnd]. 2 to: aCollection size do: [:i | | y | (y := self peek) = (aCollection at: i) ifTrue: [ self next ] diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st index 7a50bbf..aba8dca 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st @@ -112,6 +112,18 @@ ZnCharacterStreamTests >> testReadStream [ self assert: stream peek isNil ] +{ #category : 'testing' } +ZnCharacterStreamTests >> testReadUpTo [ + | string | + string := '0123456789'. + {(self eightBitReadStreamOn: string). + (self utf8ReadStreamOn: string)} + do: [ :stream | + self assert: (stream upTo: $5) equals: '01234'. + self assert: stream upToEnd equals: '6789'. + self assert: stream atEnd ] +] + { #category : 'testing' } ZnCharacterStreamTests >> testSimpleUTF8ReadStream [ | string bytes stream | @@ -136,18 +148,6 @@ ZnCharacterStreamTests >> testSimpleUTF8WriteStream [ equals: bytes asByteArray ] -{ #category : 'testing' } -ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ - | string bytes stream res | - string := 'eißendeße'. - bytes := ZnUTF8Encoder new encodeString: string. - stream := (ZnCharacterReadStream on: bytes readStreamPortable). - res := stream next; next; next. - self assert: res equals: $ß. - self assert: stream position equals: 3. - -] - { #category : 'testing' } ZnCharacterStreamTests >> testUpTo [ | char1 char2 string1 string2 | @@ -206,6 +206,22 @@ ZnCharacterStreamTests >> testUpTo [ ZnCharacterStreamTests >> testUpToAll [ #( ('' '' '') + ('a' '' '') + ('a' 'a' '') + ('a' 'b' 'a') + ('ab' '' '') + ('ab' 'a' '') + ('ab' 'b' 'a') + ('ab' 'c' 'ab') + ('ab' 'ab' '') + ('abc' '' '') + ('abc' 'a' '') + ('abc' 'b' 'a') + ('abc' 'c' 'ab') + ('abc' 'd' 'abc') + ('abc' 'ab' '') + ('abc' 'bc' 'a') + ('abc' 'cd' 'abc') ('' 'ß' '') ('' 'ße' '') ('ß' '' '') @@ -228,22 +244,6 @@ ZnCharacterStreamTests >> testUpToAll [ ('eißen' 'ßend' 'eißen') ('eißendeße' 'ße' 'ei') ('abcdefgh' 'cd' 'ab') - ('a' '' '') - ('a' 'a' '') - ('a' 'b' 'a') - ('ab' '' '') - ('ab' 'a' '') - ('ab' 'b' 'a') - ('ab' 'c' 'ab') - ('ab' 'ab' '') - ('abc' '' '') - ('abc' 'a' '') - ('abc' 'b' 'a') - ('abc' 'c' 'ab') - ('abc' 'd' 'abc') - ('abc' 'ab' '') - ('abc' 'bc' 'a') - ('abc' 'cd' 'abc') ) do: [ :array | self assertUpToAll: array ] ] @@ -272,6 +272,30 @@ ZnCharacterStreamTests >> testUpToAllTwice [ ] +{ #category : 'testing' } +ZnCharacterStreamTests >> test8BitEncodingStreamPosition [ + | string bytes stream res | + string := 'eiSendeSe'. + bytes := Zn8BITEncoder new encodeString: string. + stream := (ZnCharacterReadStream on: bytes readStreamPortable). + res := stream next; next; next. + self assert: res equals: $S. + self assert: stream position equals: 3. + +] + +{ #category : 'testing' } +ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ + | string bytes stream res | + string := 'eißendeße'. + bytes := ZnUTF8Encoder new encodeString: string. + stream := (ZnCharacterReadStream on: bytes readStreamPortable). + res := stream next; next; next. + self assert: res equals: $ß. + self assert: stream position equals: 3. + +] + { #category : 'testing' } ZnCharacterStreamTests >> testUTF8ReadStreamReadInto [ | string bytes stream buffer | From 421a19e8bda5ccf3bb1f0f915181f6e107cf07ae Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Mon, 12 Dec 2022 09:59:33 -0800 Subject: [PATCH 07/11] Issue #75: add the notion of stringClass (Unicode7 or String) to ZnCharacterReadStream and ZnCharacterReadWriteStream, needed to allow end user to choose between Unicode and Legacy stream flavors --- .../AbstractFileReference.class.st | 5 ++- .../ZnCharacterReadStream.class.st | 24 +++++++++++- .../ZnCharacterReadWriteStream.class.st | 37 +++++++++++++++++-- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/FileSystem-Core/AbstractFileReference.class.st b/src/FileSystem-Core/AbstractFileReference.class.st index 084c0ec..1f3d9da 100644 --- a/src/FileSystem-Core/AbstractFileReference.class.st +++ b/src/FileSystem-Core/AbstractFileReference.class.st @@ -743,10 +743,13 @@ AbstractFileReference >> readStreamDo: doBlock ifAbsent: absentBlock [ { #category : 'streams' } AbstractFileReference >> readStreamEncoded: anEncoding [ - ^ ZnCharacterReadStream on: self binaryReadStream encoding: anEncoding + stringClass: + (String isInUnicodeComparisonMode + ifTrue: [ Unicode7 ] + ifFalse: [ String ]) ] { #category : 'streams' } diff --git a/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st index 0871396..9fbebb1 100644 --- a/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st @@ -13,12 +13,24 @@ Part of Zinc HTTP Components. Class { #name : 'ZnCharacterReadStream', #superclass : 'ZnEncodedReadStream', + #instVars : [ + 'stringClass' + ], #category : 'Zinc-Character-Encoding-Core' } +{ #category : 'instance creation' } +ZnCharacterReadStream class >> on: wrappedStream encoding: encoding stringClass: stringClass [ + ^ self new + on: wrappedStream; + encoding: encoding; + stringClass: stringClass; + yourself +] + { #category : 'accessing' } ZnCharacterReadStream >> collectionSpecies [ - ^ String + ^ self stringClass ] { #category : 'accessing' } @@ -91,6 +103,16 @@ ZnCharacterReadStream >> readInto: collection startingAt: offset count: requeste ] +{ #category : 'accessing' } +ZnCharacterReadStream >> stringClass [ + ^ stringClass ifNil: [ stringClass := String ] +] + +{ #category : 'accessing' } +ZnCharacterReadStream >> stringClass: object [ + stringClass := object +] + { #category : 'accessing' } ZnCharacterReadStream >> upToAll: aCollection [ "Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream." diff --git a/src/Zinc-Character-Encoding-Core/ZnCharacterReadWriteStream.class.st b/src/Zinc-Character-Encoding-Core/ZnCharacterReadWriteStream.class.st index a2a44b2..cdc920c 100644 --- a/src/Zinc-Character-Encoding-Core/ZnCharacterReadWriteStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnCharacterReadWriteStream.class.st @@ -10,7 +10,8 @@ Class { #superclass : 'Object', #instVars : [ 'readStream', - 'writeStream' + 'writeStream', + 'stringClass' ], #category : 'Zinc-Character-Encoding-Core' } @@ -23,6 +24,11 @@ ZnCharacterReadWriteStream class >> on: wrappedStream encoding: encoding [ yourself ] +{ #category : 'instance creation' } +ZnCharacterReadWriteStream class >> on: wrappedStream encoding: encoding stringClass: stringClass [ + ^ self new on: wrappedStream encoding: encoding stringClass: stringClass +] + { #category : 'accessing' } ZnCharacterReadWriteStream >> atEnd [ @@ -42,7 +48,7 @@ ZnCharacterReadWriteStream >> closed [ { #category : 'accessing' } ZnCharacterReadWriteStream >> collectionSpecies [ - ^ String + ^ self stringClass ] { #category : 'accessing' } @@ -91,7 +97,22 @@ ZnCharacterReadWriteStream >> nextPutAll: aString [ ZnCharacterReadWriteStream >> on: aStream encoding: encoding [ | encoder | encoder := encoding asZnCharacterEncoder. - readStream := ZnCharacterReadStream on: aStream encoding: encoder. + readStream := ZnCharacterReadStream + on: aStream + encoding: encoder + stringClass: self stringClass. + writeStream := ZnCharacterWriteStream on: aStream encoding: encoder +] + +{ #category : 'instance creation' } +ZnCharacterReadWriteStream >> on: aStream encoding: encoding stringClass: aStringClass [ + | encoder | + self stringClass: aStringClass. + encoder := encoding asZnCharacterEncoder. + readStream := ZnCharacterReadStream + on: aStream + encoding: encoder + stringClass: aStringClass. writeStream := ZnCharacterWriteStream on: aStream encoding: encoder ] @@ -143,6 +164,16 @@ ZnCharacterReadWriteStream >> space [ writeStream space ] +{ #category : 'accessing' } +ZnCharacterReadWriteStream >> stringClass [ + ^ stringClass ifNil: [ stringClass := String ] +] + +{ #category : 'accessing' } +ZnCharacterReadWriteStream >> stringClass: object [ + stringClass := object +] + { #category : 'accessing' } ZnCharacterReadWriteStream >> upToAll: aCollection [ "Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream." From 431588741c8435e4a497ec3b7cf35e9440b0c005 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Mon, 12 Dec 2022 11:30:05 -0800 Subject: [PATCH 08/11] Issue #75: turn ZnCharacterStreamTests into ZnAbstractCharacterStreamTests so we can test ZnCharacterStream with Unicode7 and String stringClass --- ...> ZnAbstractCharacterStreamTests.class.st} | 100 ++++++++++++------ .../ZnLegacyCharacterStreamTests.class.st | 10 ++ .../ZnUnicodeCharacterStreamTests.class.st | 10 ++ 3 files changed, 86 insertions(+), 34 deletions(-) rename src/Zinc-Character-Encoding-Tests/{ZnCharacterStreamTests.class.st => ZnAbstractCharacterStreamTests.class.st} (81%) create mode 100644 src/Zinc-Character-Encoding-Tests/ZnLegacyCharacterStreamTests.class.st create mode 100644 src/Zinc-Character-Encoding-Tests/ZnUnicodeCharacterStreamTests.class.st diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st similarity index 81% rename from src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st rename to src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st index aba8dca..fce5087 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st @@ -6,34 +6,60 @@ Part of FileSystem Test cases for ZnCharacterStream " Class { - #name : 'ZnCharacterStreamTests', + #name : 'ZnAbstractCharacterStreamTests', #superclass : 'TestCase', #category : 'Zinc-Character-Encoding-Tests' } +{ #category : 'Testing' } +ZnAbstractCharacterStreamTests class >> isAbstract [ + "Override to true if a TestCase subclass is Abstract and should not have + TestCase instances built from it" + + ^ self sunitName = #'ZnAbstractCharacterStreamTests' +] + { #category : 'testing' } -ZnCharacterStreamTests >> assertUpTo: array [ - | encodingStream | - encodingStream := array first class == String - ifTrue: [ self eightBitReadStreamOn: array first ] - ifFalse: [ self utf8ReadStreamOn: array first ]. +ZnAbstractCharacterStreamTests >> assertUpTo: theArray [ + | array encodingStream | + array := self convertStringsToStringClass: theArray. self assert: (array first readStream upTo: array second) equals: array third. + encodingStream := self eightBitReadStreamOn: array first. + self assert: (encodingStream upTo: array second) equals: array third. + encodingStream := self utf8ReadStreamOn: array first. self assert: (encodingStream upTo: array second) equals: array third ] { #category : 'testing' } -ZnCharacterStreamTests >> assertUpToAll: array [ - | encodingStream | +ZnAbstractCharacterStreamTests >> assertUpToAll: theArray [ + | array encodingStream | + array := self convertStringsToStringClass: theArray. self assert: (array first readStream upToAll: array second) equals: array third. encodingStream := self eightBitReadStreamOn: array first. self assert: (encodingStream upToAll: array second) equals: array third. encodingStream := self utf8ReadStreamOn: array first . self assert: (encodingStream upToAll: array second) equals: array third. +] +{ #category : 'private' } +ZnAbstractCharacterStreamTests >> convertStringsToStringClass: theArray [ + | literalSringClass | + literalSringClass := '' class. + ^ theArray + collect: [ :str | + str class == Character + ifTrue: [ str ] + ifFalse: [ + literalSringClass == self stringClass + ifTrue: [ str ] + ifFalse: [ + self stringClass == Unicode7 + ifTrue: [ str decodeFromUTF8 ] + ifFalse: [ str asString ] ] ] ] ] { #category : 'testing' } -ZnCharacterStreamTests >> eightBitReadStreamOn: string [ +ZnAbstractCharacterStreamTests >> eightBitReadStreamOn: string [ | bytes stream | bytes := Zn8BITEncoder new encodeString: string. stream := ZnBufferedReadStream on: (ZnCharacterReadStream @@ -43,8 +69,25 @@ ZnCharacterStreamTests >> eightBitReadStreamOn: string [ ^stream ] +{ #category : 'private' } +ZnAbstractCharacterStreamTests >> stringClass [ + ^ self subclassResponsibility +] + { #category : 'testing' } -ZnCharacterStreamTests >> testNextLine [ +ZnAbstractCharacterStreamTests >> test8BitEncodingStreamPosition [ + | string bytes stream res | + string := 'eiSendeSe'. + bytes := Zn8BITEncoder new encodeString: string. + stream := (ZnCharacterReadStream on: bytes readStreamPortable). + res := stream next; next; next. + self assert: res equals: $S. + self assert: stream position equals: 3. + +] + +{ #category : 'testing' } +ZnAbstractCharacterStreamTests >> testNextLine [ | stream | stream := ZnCharacterReadStream on: 'abc' asByteArray readStreamPortable. self assert: stream nextLine equals: 'abc'. @@ -78,7 +121,7 @@ ZnCharacterStreamTests >> testNextLine [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testPeek [ +ZnAbstractCharacterStreamTests >> testPeek [ | string bytes readStream | string := 'élève en Français'. bytes := ZnUTF8Encoder new encodeString: string. @@ -92,7 +135,7 @@ ZnCharacterStreamTests >> testPeek [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testReadStream [ +ZnAbstractCharacterStreamTests >> testReadStream [ | stream | stream := ZnBufferedReadStream on: (ZnCharacterReadStream on: 'ABC' asByteArray readStreamPortable). stream sizeBuffer: 3. @@ -113,7 +156,7 @@ ZnCharacterStreamTests >> testReadStream [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testReadUpTo [ +ZnAbstractCharacterStreamTests >> testReadUpTo [ | string | string := '0123456789'. {(self eightBitReadStreamOn: string). @@ -125,7 +168,7 @@ ZnCharacterStreamTests >> testReadUpTo [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testSimpleUTF8ReadStream [ +ZnAbstractCharacterStreamTests >> testSimpleUTF8ReadStream [ | string bytes stream | string := 'élève en Français'. bytes := ZnUTF8Encoder new encodeString: string. @@ -137,7 +180,7 @@ ZnCharacterStreamTests >> testSimpleUTF8ReadStream [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testSimpleUTF8WriteStream [ +ZnAbstractCharacterStreamTests >> testSimpleUTF8WriteStream [ | string bytes stream | string := 'élève en Français'. bytes := ZnUTF8Encoder new encodeString: string. @@ -149,7 +192,7 @@ ZnCharacterStreamTests >> testSimpleUTF8WriteStream [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testUpTo [ +ZnAbstractCharacterStreamTests >> testUpTo [ | char1 char2 string1 string2 | char1 := (Character codePoint: 257). string1 := '', char1. "DoubleByteString" @@ -203,7 +246,7 @@ ZnCharacterStreamTests >> testUpTo [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testUpToAll [ +ZnAbstractCharacterStreamTests >> testUpToAll [ #( ('' '' '') ('a' '' '') @@ -248,7 +291,7 @@ ZnCharacterStreamTests >> testUpToAll [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testUpToAllTwice [ +ZnAbstractCharacterStreamTests >> testUpToAllTwice [ | string utf8Stream stream eightBitStream a b | string := 'eißendeße'. @@ -273,19 +316,7 @@ ZnCharacterStreamTests >> testUpToAllTwice [ ] { #category : 'testing' } -ZnCharacterStreamTests >> test8BitEncodingStreamPosition [ - | string bytes stream res | - string := 'eiSendeSe'. - bytes := Zn8BITEncoder new encodeString: string. - stream := (ZnCharacterReadStream on: bytes readStreamPortable). - res := stream next; next; next. - self assert: res equals: $S. - self assert: stream position equals: 3. - -] - -{ #category : 'testing' } -ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ +ZnAbstractCharacterStreamTests >> testUtf8EncodingStreamPosition [ | string bytes stream res | string := 'eißendeße'. bytes := ZnUTF8Encoder new encodeString: string. @@ -297,7 +328,7 @@ ZnCharacterStreamTests >> testUtf8EncodingStreamPosition [ ] { #category : 'testing' } -ZnCharacterStreamTests >> testUTF8ReadStreamReadInto [ +ZnAbstractCharacterStreamTests >> testUTF8ReadStreamReadInto [ | string bytes stream buffer | string := 'élève en Français'. bytes := ZnUTF8Encoder new encodeString: string. @@ -318,12 +349,13 @@ ZnCharacterStreamTests >> testUTF8ReadStreamReadInto [ ] { #category : 'testing' } -ZnCharacterStreamTests >> utf8ReadStreamOn: string [ +ZnAbstractCharacterStreamTests >> utf8ReadStreamOn: string [ | bytes stream | bytes := ZnUTF8Encoder new encodeString: string. stream := ZnBufferedReadStream on: (ZnCharacterReadStream on: bytes readStreamPortable - encoding: #utf8). + encoding: #utf8 + stringClass: self stringClass). stream sizeBuffer: string size. ^stream ] diff --git a/src/Zinc-Character-Encoding-Tests/ZnLegacyCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnLegacyCharacterStreamTests.class.st new file mode 100644 index 0000000..01a1c22 --- /dev/null +++ b/src/Zinc-Character-Encoding-Tests/ZnLegacyCharacterStreamTests.class.st @@ -0,0 +1,10 @@ +Class { + #name : 'ZnLegacyCharacterStreamTests', + #superclass : 'ZnAbstractCharacterStreamTests', + #category : 'Zinc-Character-Encoding-Tests' +} + +{ #category : 'private' } +ZnLegacyCharacterStreamTests >> stringClass [ + ^ String +] diff --git a/src/Zinc-Character-Encoding-Tests/ZnUnicodeCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnUnicodeCharacterStreamTests.class.st new file mode 100644 index 0000000..77e8467 --- /dev/null +++ b/src/Zinc-Character-Encoding-Tests/ZnUnicodeCharacterStreamTests.class.st @@ -0,0 +1,10 @@ +Class { + #name : 'ZnUnicodeCharacterStreamTests', + #superclass : 'ZnAbstractCharacterStreamTests', + #category : 'Zinc-Character-Encoding-Tests' +} + +{ #category : 'private' } +ZnUnicodeCharacterStreamTests >> stringClass [ + ^ Unicode7 +] From 5096874b6b16604c5cd51d577194cc150e298d95 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Tue, 13 Dec 2022 13:14:25 -0800 Subject: [PATCH 09/11] Issue #75: another round of changes to resolve the string comparison issues; testUpTo and testUpToAll tests are still not passing --- .../Zn8BITEncoder.class.st | 11 +-- .../ZnBufferedReadStream.class.st | 2 +- .../ZnCharacterEncoder.class.st | 32 ++++++- .../ZnCharacterReadStream.class.st | 5 ++ .../ZnUTF8Encoder.class.st | 9 +- .../Zn8BITCharacterEncoderTests.class.st | 5 +- ... ZnAbstractCharacterEncoderTests.class.st} | 70 +++++++-------- .../ZnAbstractCharacterStreamTests.class.st | 85 +++++++++++-------- .../ZnUTF8CharacterEncoderTests.class.st | 10 +-- 9 files changed, 136 insertions(+), 93 deletions(-) rename src/Zinc-Character-Encoding-Tests/{ZnCharacterEncoderTests.class.st => ZnAbstractCharacterEncoderTests.class.st} (83%) diff --git a/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st b/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st index bf4c281..a972d90 100644 --- a/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st +++ b/src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st @@ -39,10 +39,11 @@ Zn8BITEncoder class >> knownEncodingIdentifiers [ ] { #category : 'instance creation' } -Zn8BITEncoder class >> newForEncoding: string [ - "No further parametrization needed" - - ^ self new +Zn8BITEncoder class >> newForEncoding: string stringClass: stringClass [ + "Return a new character encoder object for an encoding described by string. + Search for a subclass that handles it and delegate (subclassResponsibility)." + + ^ self new stringClass: stringClass ] { #category : 'converting' } @@ -65,7 +66,7 @@ Zn8BITEncoder >> decodeAsCodePoints: bytes [ Zn8BITEncoder >> decodeBytes: bytes [ "Decode bytes and return the resulting string" - ^ String withBytes: bytes + ^ self stringClass withBytes: bytes ] { #category : 'converting' } diff --git a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st index 00f16bc..863ab5d 100644 --- a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st @@ -68,7 +68,7 @@ ZnBufferedReadStream >> closed [ ZnBufferedReadStream >> collectionSpecies [ ^ stream isBinary ifTrue: [ ByteArray ] - ifFalse: [ String ] + ifFalse: [ stream stringClass ] ] { #category : 'accessing' } diff --git a/src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st b/src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st index 8d37c50..8ac5990 100644 --- a/src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st @@ -41,6 +41,9 @@ Part of Zinc HTTP Components. Class { #name : 'ZnCharacterEncoder', #superclass : 'Object', + #instVars : [ + 'stringClass' + ], #category : 'Zinc-Character-Encoding-Core' } @@ -70,12 +73,25 @@ ZnCharacterEncoder class >> knownEncodingIdentifiers [ ZnCharacterEncoder class >> newForEncoding: string [ "Return a new character encoder object for an encoding described by string. Search for a subclass that handles it and delegate (subclassResponsibility)." + + ^ self + newForEncoding: string + stringClass: + (String isInUnicodeComparisonMode + ifTrue: [ Unicode7 ] + ifFalse: [ String ]) +] + +{ #category : 'instance creation' } +ZnCharacterEncoder class >> newForEncoding: string stringClass: stringClass [ + "Return a new character encoder object for an encoding described by string. + Search for a subclass that handles it and delegate (subclassResponsibility)." | concreteSubclass | concreteSubclass := self allSubclasses detect: [ :each | each handlesEncoding: string ] ifNone: [ ^ self error: 'The ', string printString, ' is not currently supported.' ]. - ^ concreteSubclass newForEncoding: string + ^ concreteSubclass newForEncoding: string stringClass: stringClass ] { #category : 'converting' } @@ -157,3 +173,17 @@ ZnCharacterEncoder >> nextPut: character toStream: stream [ self nextPutCodePoint: character asInteger toStream: stream ] + +{ #category : 'accessing' } +ZnCharacterEncoder >> stringClass [ + ^ stringClass + ifNil: [ + stringClass := String isInUnicodeComparisonMode + ifTrue: [ Unicode7 ] + ifFalse: [ String ] ] +] + +{ #category : 'accessing' } +ZnCharacterEncoder >> stringClass: object [ + stringClass := object +] diff --git a/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st index 9fbebb1..fb30f4b 100644 --- a/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st @@ -33,6 +33,11 @@ ZnCharacterReadStream >> collectionSpecies [ ^ self stringClass ] +{ #category : 'accessing' } +ZnCharacterReadStream >> encoder [ + ^ encoder ifNil: [ encoder := super encoder stringClass: self stringClass ] +] + { #category : 'accessing' } ZnCharacterReadStream >> match: subCollection [ "Set the access position of the receiver to be past the next occurrence of the subCollection. Answer whether subCollection is found. No wildcards, and case does matter." diff --git a/src/Zinc-Character-Encoding-Core/ZnUTF8Encoder.class.st b/src/Zinc-Character-Encoding-Core/ZnUTF8Encoder.class.st index 76282bc..19bf04f 100644 --- a/src/Zinc-Character-Encoding-Core/ZnUTF8Encoder.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnUTF8Encoder.class.st @@ -42,10 +42,11 @@ ZnUTF8Encoder class >> knownEncodingIdentifiers [ ] { #category : 'instance creation' } -ZnUTF8Encoder class >> newForEncoding: string [ - "No further parametrization needed" - - ^ self new +ZnUTF8Encoder class >> newForEncoding: string stringClass: stringClass [ + "Return a new character encoder object for an encoding described by string. + Search for a subclass that handles it and delegate (subclassResponsibility)." + + ^ self new stringClass: stringClass ] { #category : 'converting' } diff --git a/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st index a4616a9..264eda9 100644 --- a/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/Zn8BITCharacterEncoderTests.class.st @@ -1,13 +1,12 @@ Class { #name : 'Zn8BITCharacterEncoderTests', - #superclass : 'ZnCharacterEncoderTests', + #superclass : 'ZnAbstractCharacterEncoderTests', #category : 'Zinc-Character-Encoding-Tests' } { #category : 'private' } Zn8BITCharacterEncoderTests >> _encoder [ - - ^ Zn8BITEncoder new + ^ Zn8BITEncoder new stringClass: self _sourceClass ] { #category : 'private' } diff --git a/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterEncoderTests.class.st similarity index 83% rename from src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st rename to src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterEncoderTests.class.st index ad1e215..a19581c 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterEncoderTests.class.st @@ -6,23 +6,31 @@ Part of FileSystem Test cases for character encoding " Class { - #name : 'ZnCharacterEncoderTests', + #name : 'ZnAbstractCharacterEncoderTests', #superclass : 'TestCase', #category : 'Zinc-Character-Encoding-Tests' } { #category : 'accessing' } -ZnCharacterEncoderTests class >> asciiCharacterSource [ +ZnAbstractCharacterEncoderTests class >> asciiCharacterSource [ ^ ($A to: $Z), ($a to: $z), ($0 to: $9), '.-_/*+=|,;?!$&<>^%#', ' ' ] +{ #category : 'Testing' } +ZnAbstractCharacterEncoderTests class >> isAbstract [ + "Override to true if a TestCase subclass is Abstract and should not have + TestCase instances built from it" + + ^ self sunitName = #'ZnAbstractCharacterEncoderTests' +] + { #category : 'accessing' } -ZnCharacterEncoderTests class >> latin1CharacterSource [ +ZnAbstractCharacterEncoderTests class >> latin1CharacterSource [ ^ ($A to: $Z), ($a to: $z), ($0 to: $9), '.-_/*+=|,;?!$&<>^%#', ' ', 'éèçüäßñ' ] { #category : 'accessing' } -ZnCharacterEncoderTests class >> stringOfSize: size fromSource: source [ +ZnAbstractCharacterEncoderTests class >> stringOfSize: size fromSource: source [ "self stringOfSize: 1024 fromSource: self unicodeCharacterSource" ^ String new: size streamContents: [ :out | @@ -30,30 +38,24 @@ ZnCharacterEncoderTests class >> stringOfSize: size fromSource: source [ ] { #category : 'accessing' } -ZnCharacterEncoderTests class >> unicodeCharacterSource [ +ZnAbstractCharacterEncoderTests class >> unicodeCharacterSource [ ^ ($A to: $Z), ($a to: $z), ($0 to: $9), '.-_/*+=|,;?!$&<>^%#', ' ', 'éèçüäßñα', '€∏' ] -{ #category : 'Testing' } -ZnCharacterEncoderTests class >> isAbstract [ - "Override to true if a TestCase subclass is Abstract and should not have - TestCase instances built from it" - - ^ self sunitName = #'ZnCharacterEncoderTests' -] - { #category : 'private' } -ZnCharacterEncoderTests >> _encoderId [ +ZnAbstractCharacterEncoderTests >> _encoderId [ self subclassResponsibility ] { #category : 'private' } -ZnCharacterEncoderTests >> _sourceClass [ - self subclassResponsibility +ZnAbstractCharacterEncoderTests >> _sourceClass [ + ^ String isInUnicodeComparisonMode + ifTrue: [ Unicode7 ] + ifFalse: [ String ] ] { #category : 'public' } -ZnCharacterEncoderTests >> assert: anObject unicodeEquals: otherObj [ +ZnAbstractCharacterEncoderTests >> assert: anObject unicodeEquals: otherObj [ "allow comparison of unicode and legacy strings" self @@ -62,7 +64,7 @@ ZnCharacterEncoderTests >> assert: anObject unicodeEquals: otherObj [ ] { #category : 'public' } -ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj [ +ZnAbstractCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj [ "allow comparison between unitcode and legacy strings in legacy mode" self @@ -71,12 +73,12 @@ ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj ] { #category : 'private' } -ZnCharacterEncoderTests >> decodeBytes: bytes with: encoder [ +ZnAbstractCharacterEncoderTests >> decodeBytes: bytes with: encoder [ self subclassResponsibility ] { #category : 'private' } -ZnCharacterEncoderTests >> encodeString: string with: encoder [ +ZnAbstractCharacterEncoderTests >> encodeString: string with: encoder [ " ^ ByteArray streamContents: [ :stream | stream nextPutAll: string encodeAsUTF8 ] @@ -88,7 +90,7 @@ ZnCharacterEncoderTests >> encodeString: string with: encoder [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testCodePointEncodingDecoding [ +ZnAbstractCharacterEncoderTests >> testCodePointEncodingDecoding [ | encoder input output | input := {}. 'Düsseldorf Königsallee' do: [:each | input add: each codePoint ]. @@ -104,7 +106,7 @@ ZnCharacterEncoderTests >> testCodePointEncodingDecoding [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testConvencienceMethods [ +ZnAbstractCharacterEncoderTests >> testConvencienceMethods [ | encoder string | encoder := ZnUTF8Encoder new. string := 'élève en Français'. @@ -116,7 +118,7 @@ ZnCharacterEncoderTests >> testConvencienceMethods [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testKnownEncodingIdentifiers [ +ZnAbstractCharacterEncoderTests >> testKnownEncodingIdentifiers [ | all minimal asciiString | all := ZnCharacterEncoder knownEncodingIdentifiers asSet. minimal := #(utf8) asSet. @@ -134,7 +136,7 @@ ZnCharacterEncoderTests >> testKnownEncodingIdentifiers [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testNextPutAllStartingAtToStream [ +ZnAbstractCharacterEncoderTests >> testNextPutAllStartingAtToStream [ | encoder | encoder := ZnUTF8Encoder new. #( 'ccc' 'ççç' 'c' 'ç' 'çc' 'cç' 'çç' ) do: [ :each | @@ -152,7 +154,7 @@ ZnCharacterEncoderTests >> testNextPutAllStartingAtToStream [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStream [ +ZnAbstractCharacterEncoderTests >> testReadIntoStartingAtCountFromStream [ | encoder | encoder := ZnUTF8Encoder new. #( 'ccc' 'ççç' 'c' 'ç' 'çc' 'cç' 'çç' ) do: [ :each | @@ -172,7 +174,7 @@ ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStream [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamAtEnd [ +ZnAbstractCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamAtEnd [ | input encoder bytes readStream string read | encoder := ZnUTF8Encoder new. input := 'élève'. @@ -185,7 +187,7 @@ ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamAtEnd [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamWithOffset [ +ZnAbstractCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamWithOffset [ | input encoder bytes readStream string read | encoder := ZnUTF8Encoder new. input := '_élève_'. @@ -207,7 +209,7 @@ ZnCharacterEncoderTests >> testReadIntoStartingAtCountFromStreamWithOffset [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testStringEncoding [ +ZnAbstractCharacterEncoderTests >> testStringEncoding [ | encoder string | encoder := ZnUTF8Encoder new. string := 'élève en Français'. @@ -218,7 +220,7 @@ ZnCharacterEncoderTests >> testStringEncoding [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8Back [ +ZnAbstractCharacterEncoderTests >> testUTF8Back [ | encoder stream | encoder := ZnUTF8Encoder new. stream := (encoder encodeString: 'Les élèves Françaises') readStream. @@ -233,7 +235,7 @@ ZnCharacterEncoderTests >> testUTF8Back [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8Boundaries [ +ZnAbstractCharacterEncoderTests >> testUTF8Boundaries [ "Test encoding and decoding of the characters at the boundaries between 1, 2, 3, and 4 multi-byte sequences. Values taken from http://en.wikipedia.org/wiki/Utf8#Description with the new RFC 3629 limit" @@ -252,7 +254,7 @@ ZnCharacterEncoderTests >> testUTF8Boundaries [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8Encoder [ +ZnAbstractCharacterEncoderTests >> testUTF8Encoder [ "The examples are taken from http://en.wikipedia.org/wiki/UTF-8#Description" | encoder inputBytes outputBytes inputString outputString | @@ -266,7 +268,7 @@ ZnCharacterEncoderTests >> testUTF8Encoder [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8EncoderAuto [ +ZnAbstractCharacterEncoderTests >> testUTF8EncoderAuto [ | encoder inputString bytes outputString | encoder := ZnUTF8Encoder new. inputString := String withAll: ((1 to: 3072) collect: [ :each | Character codePoint: each ]). @@ -276,7 +278,7 @@ ZnCharacterEncoderTests >> testUTF8EncoderAuto [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8EncoderByteCount [ +ZnAbstractCharacterEncoderTests >> testUTF8EncoderByteCount [ | encoder | encoder := ZnUTF8Encoder new. self assert: (encoder encodedByteCountFor: $$) = 1. @@ -286,7 +288,7 @@ ZnCharacterEncoderTests >> testUTF8EncoderByteCount [ ] { #category : 'testing' } -ZnCharacterEncoderTests >> testUTF8EncoderWide [ +ZnAbstractCharacterEncoderTests >> testUTF8EncoderWide [ | encoder | encoder := ZnUTF8Encoder new. { 'abc'. 'élève en Français'. 'Pra-ská' copy at: 4 put: (Character codePoint: 382); yourself. '' } diff --git a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st index fce5087..954976f 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st @@ -43,30 +43,37 @@ ZnAbstractCharacterStreamTests >> assertUpToAll: theArray [ { #category : 'private' } ZnAbstractCharacterStreamTests >> convertStringsToStringClass: theArray [ - | literalSringClass | - literalSringClass := '' class. ^ theArray collect: [ :str | str class == Character ifTrue: [ str ] ifFalse: [ - literalSringClass == self stringClass - ifTrue: [ str ] - ifFalse: [ - self stringClass == Unicode7 - ifTrue: [ str decodeFromUTF8 ] - ifFalse: [ str asString ] ] ] ] + self stringClass == Unicode7 + ifTrue: [ (str encodeWith: #utf8) decodeFromUTF8 ] + ifFalse: [ str asString ] ] ] +] + +{ #category : 'private' } +ZnAbstractCharacterStreamTests >> convertStringToStringClass: aString [ + ^ self stringClass == Unicode7 + ifTrue: [ (aString encodeWith: #utf8) decodeFromUTF8 ] + ifFalse: [ aString asString ] ] { #category : 'testing' } ZnAbstractCharacterStreamTests >> eightBitReadStreamOn: string [ | bytes stream | - bytes := Zn8BITEncoder new encodeString: string. - stream := ZnBufferedReadStream on: (ZnCharacterReadStream - on: bytes readStreamPortable - encoding: #'8bit'). + bytes := Zn8BITEncoder new + stringClass: self stringClass; + encodeString: string. + stream := ZnBufferedReadStream + on: + (ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'8bit' + stringClass: self stringClass). stream sizeBuffer: string size. - ^stream + ^ stream ] { #category : 'private' } @@ -158,12 +165,12 @@ ZnAbstractCharacterStreamTests >> testReadStream [ { #category : 'testing' } ZnAbstractCharacterStreamTests >> testReadUpTo [ | string | - string := '0123456789'. + string := self convertStringToStringClass: '0123456789'. {(self eightBitReadStreamOn: string). (self utf8ReadStreamOn: string)} do: [ :stream | - self assert: (stream upTo: $5) equals: '01234'. - self assert: stream upToEnd equals: '6789'. + self assert: (stream upTo: $5) equals: (self convertStringToStringClass: '01234'). + self assert: stream upToEnd equals: (self convertStringToStringClass: '6789'). self assert: stream atEnd ] ] @@ -194,10 +201,10 @@ ZnAbstractCharacterStreamTests >> testSimpleUTF8WriteStream [ { #category : 'testing' } ZnAbstractCharacterStreamTests >> testUpTo [ | char1 char2 string1 string2 | - char1 := (Character codePoint: 257). - string1 := '', char1. "DoubleByteString" + char1 := (Character codePoint: 257). + string1 := self convertStringToStringClass: '', char1. "DoubleByteString" char2 := (Character codePoint:16rffff1). - string2 := '', char2. "QuadByteString" + string2 := self convertStringToStringClass: '', char2. "QuadByteString" { "extended ASCII String" {'' . $ß . '' } . @@ -294,24 +301,24 @@ ZnAbstractCharacterStreamTests >> testUpToAll [ ZnAbstractCharacterStreamTests >> testUpToAllTwice [ | string utf8Stream stream eightBitStream a b | - string := 'eißendeße'. + string := self convertStringToStringClass: 'eißendeße'. stream := string readStreamPortable. eightBitStream := self eightBitReadStreamOn: string. utf8Stream := self utf8ReadStreamOn: string. - self assert: (stream upToAll: 'ße') equals: 'ei'. - self assert: (eightBitStream upToAll: 'ße') equals: 'ei'. - self assert: (utf8Stream upToAll: 'ße') equals: 'ei'. + self assert: (stream upToAll: (self convertStringToStringClass: 'ße')) equals: (self convertStringToStringClass: 'ei'). + self assert: (eightBitStream upToAll: (self convertStringToStringClass: 'ße')) equals: (self convertStringToStringClass: 'ei'). + self assert: (utf8Stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'ei'). - self assert: (stream upToAll: 'ße') equals: 'nde'. - self assert: (eightBitStream upToAll: 'ße') equals: 'nde'. - self assert: (utf8Stream upToAll: 'ße') equals: 'nde'. + self assert: (stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). + self assert: (eightBitStream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). + self assert: (utf8Stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). - a := 'ABC', (Character codePoint: 128), (Character codePoint: 255). - b := '', (Character codePoint: 150), (Character codePoint: 192), (Character codePoint: 224). - eightBitStream := self eightBitReadStreamOn: ( a, '123', b, '123'). - self assert: (eightBitStream upToAll: '123') equals: a. - self assert: (eightBitStream upToAll: '123') equals: b + a := (self convertStringToStringClass: 'ABC'), (Character codePoint: 128), (Character codePoint: 255). + b := (self convertStringToStringClass: ''), (Character codePoint: 150), (Character codePoint: 192), (Character codePoint: 224). + eightBitStream := self eightBitReadStreamOn: { a. (self convertStringToStringClass: '123'). b. (self convertStringToStringClass: '123')}. + self assert: (eightBitStream upToAll: (self convertStringToStringClass: '123')) equals: a. + self assert: (eightBitStream upToAll: (self convertStringToStringClass: '123')) equals: b ] @@ -351,11 +358,15 @@ ZnAbstractCharacterStreamTests >> testUTF8ReadStreamReadInto [ { #category : 'testing' } ZnAbstractCharacterStreamTests >> utf8ReadStreamOn: string [ | bytes stream | - bytes := ZnUTF8Encoder new encodeString: string. - stream := ZnBufferedReadStream on: (ZnCharacterReadStream - on: bytes readStreamPortable - encoding: #utf8 - stringClass: self stringClass). + bytes := ZnUTF8Encoder new + stringClass: self stringClass; + encodeString: string. + stream := ZnBufferedReadStream + on: + (ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'utf8' + stringClass: self stringClass). stream sizeBuffer: string size. - ^stream + ^ stream ] diff --git a/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st index 863b32f..ad54369 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnUTF8CharacterEncoderTests.class.st @@ -1,13 +1,12 @@ Class { #name : 'ZnUTF8CharacterEncoderTests', - #superclass : 'ZnCharacterEncoderTests', + #superclass : 'ZnAbstractCharacterEncoderTests', #category : 'Zinc-Character-Encoding-Tests' } { #category : 'private' } ZnUTF8CharacterEncoderTests >> _encoder [ - - ^ ZnUTF8Encoder new + ^ ZnUTF8Encoder new stringClass: self _sourceClass ] { #category : 'private' } @@ -15,11 +14,6 @@ ZnUTF8CharacterEncoderTests >> _encoderId [ ^ #utf8 ] -{ #category : 'private' } -ZnUTF8CharacterEncoderTests >> _sourceClass [ - ^ Unicode7 -] - { #category : 'private' } ZnUTF8CharacterEncoderTests >> decodeBytes: bytes with: encoder [ | input | From f88a0a78c80356f24e6b890c4a76f9d922a782be Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Tue, 13 Dec 2022 15:36:28 -0800 Subject: [PATCH 10/11] Issue #75: add tests for the full range of string sizes --- .../ZnAbstractCharacterStreamTests.class.st | 72 +++++++++++++++---- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st index 954976f..a86dcc6 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st @@ -24,8 +24,10 @@ ZnAbstractCharacterStreamTests >> assertUpTo: theArray [ | array encodingStream | array := self convertStringsToStringClass: theArray. self assert: (array first readStream upTo: array second) equals: array third. - encodingStream := self eightBitReadStreamOn: array first. - self assert: (encodingStream upTo: array second) equals: array third. + ({String . Unicode7} includes: (array at: 1) class) + ifTrue: [ + encodingStream := self eightBitReadStreamOn: array first. + self assert: (encodingStream upTo: array second) equals: array third ]. encodingStream := self utf8ReadStreamOn: array first. self assert: (encodingStream upTo: array second) equals: array third ] @@ -35,10 +37,13 @@ ZnAbstractCharacterStreamTests >> assertUpToAll: theArray [ | array encodingStream | array := self convertStringsToStringClass: theArray. self assert: (array first readStream upToAll: array second) equals: array third. - encodingStream := self eightBitReadStreamOn: array first. - self assert: (encodingStream upToAll: array second) equals: array third. - encodingStream := self utf8ReadStreamOn: array first . - self assert: (encodingStream upToAll: array second) equals: array third. + ({String. + Unicode7} includes: (array at: 1) class) + ifTrue: [ + encodingStream := self eightBitReadStreamOn: array first. + self assert: (encodingStream upToAll: array second) equals: array third ]. + encodingStream := self utf8ReadStreamOn: array first. + self assert: (encodingStream upToAll: array second) equals: array third ] { #category : 'private' } @@ -82,15 +87,20 @@ ZnAbstractCharacterStreamTests >> stringClass [ ] { #category : 'testing' } -ZnAbstractCharacterStreamTests >> test8BitEncodingStreamPosition [ - | string bytes stream res | - string := 'eiSendeSe'. - bytes := Zn8BITEncoder new encodeString: string. - stream := (ZnCharacterReadStream on: bytes readStreamPortable). +ZnAbstractCharacterStreamTests >> test8BitEncodingStreamPositionForString [ + | string char bytes stream res | + string := 'eißendeße'. + char := $ß. + bytes := Zn8BITEncoder new + stringClass: self stringClass; + encodeString: string. + stream := ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'8bit' + stringClass: self stringClass. res := stream next; next; next. - self assert: res equals: $S. + self assert: res equals: char. self assert: stream position equals: 3. - ] { #category : 'testing' } @@ -323,7 +333,41 @@ ZnAbstractCharacterStreamTests >> testUpToAllTwice [ ] { #category : 'testing' } -ZnAbstractCharacterStreamTests >> testUtf8EncodingStreamPosition [ +ZnAbstractCharacterStreamTests >> testUtf8EncodingStreamPositionForDoubleByteString [ + | string char bytes stream res | + char := (Character codePoint: 257). + string := self convertStringToStringClass: ('ei', char). + bytes := ZnUTF8Encoder new + stringClass: self stringClass; + encodeString: string. + stream := ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'utf8' + stringClass: self stringClass. + res := stream next; next; next. + self assert: res equals: char. + self assert: stream position equals: 3. +] + +{ #category : 'testing' } +ZnAbstractCharacterStreamTests >> testUtf8EncodingStreamPositionForQuadByteString [ + | string char bytes stream res | + char := (Character codePoint:16rffff1). + string := self convertStringToStringClass: ('ei', char). + bytes := ZnUTF8Encoder new + stringClass: self stringClass; + encodeString: string. + stream := ZnCharacterReadStream + on: bytes readStreamPortable + encoding: #'utf8' + stringClass: self stringClass. + res := stream next; next; next. + self assert: res equals: char. + self assert: stream position equals: 3. +] + +{ #category : 'testing' } +ZnAbstractCharacterStreamTests >> testUtf8EncodingStreamPositionForString [ | string bytes stream res | string := 'eißendeße'. bytes := ZnUTF8Encoder new encodeString: string. From 9e50db4bb494c72b633c22d23c0598bf4bfbd987 Mon Sep 17 00:00:00 2001 From: Dale Henrichs Date: Wed, 14 Dec 2022 09:16:12 -0800 Subject: [PATCH 11/11] Issue #75: the current set of failing tests (for ZnUnicodeCharacterStreamTests and ZnLegacyCharacterStreamTests: testUpToAll, testUpToAllTwice and testUtf8EncodingStreamPositionFor...) are all apparently due to Issue #80 --- .../ZnBufferedReadStream.class.st | 8 +++++++- .../ZnAbstractCharacterStreamTests.class.st | 10 ++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st index 863ab5d..7732384 100644 --- a/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st +++ b/src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st @@ -68,7 +68,13 @@ ZnBufferedReadStream >> closed [ ZnBufferedReadStream >> collectionSpecies [ ^ stream isBinary ifTrue: [ ByteArray ] - ifFalse: [ stream stringClass ] + ifFalse: [ + (stream respondsTo: #'stringClass') + ifTrue: [ stream stringClass ] + ifFalse: [ + String isInUnicodeComparisonMode + ifTrue: [ Unicode7 ] + ifFalse: [ String ] ] ] ] { #category : 'accessing' } diff --git a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st index a86dcc6..5ff7b15 100644 --- a/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st +++ b/src/Zinc-Character-Encoding-Tests/ZnAbstractCharacterStreamTests.class.st @@ -309,19 +309,21 @@ ZnAbstractCharacterStreamTests >> testUpToAll [ { #category : 'testing' } ZnAbstractCharacterStreamTests >> testUpToAllTwice [ - | string utf8Stream stream eightBitStream a b | + | string utf8Stream stream eightBitStream a b use8BitStream | string := self convertStringToStringClass: 'eißendeße'. + use8BitStream := {String . Unicode7} includes: string class. + stream := string readStreamPortable. - eightBitStream := self eightBitReadStreamOn: string. + use8BitStream ifTrue: [ eightBitStream := self eightBitReadStreamOn: string ]. utf8Stream := self utf8ReadStreamOn: string. self assert: (stream upToAll: (self convertStringToStringClass: 'ße')) equals: (self convertStringToStringClass: 'ei'). - self assert: (eightBitStream upToAll: (self convertStringToStringClass: 'ße')) equals: (self convertStringToStringClass: 'ei'). + use8BitStream ifTrue: [ self assert: (eightBitStream upToAll: (self convertStringToStringClass: 'ße')) equals: (self convertStringToStringClass: 'ei') ]. self assert: (utf8Stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'ei'). self assert: (stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). - self assert: (eightBitStream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). + use8BitStream ifTrue: [ self assert: (eightBitStream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde') ]. self assert: (utf8Stream upToAll: (self convertStringToStringClass:'ße')) equals: (self convertStringToStringClass: 'nde'). a := (self convertStringToStringClass: 'ABC'), (Character codePoint: 128), (Character codePoint: 255).