Skip to content

Commit

Permalink
Issue #79: checkout ZnCharacterStreamTests >> testUpToAll is passing,…
Browse files Browse the repository at this point in the history
… ZnCharacterStreamTests >> testUpToAllTwice is not ...
  • Loading branch information
dalehenrich committed Dec 10, 2022
1 parent f4dc218 commit 8cd2604
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 42 deletions.
11 changes: 7 additions & 4 deletions src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ CharacterCollection >> asResolvedBy: aFileSystem [
{ #category : '*filesystem-gemstone-kernel' }
CharacterCollection >> asZnCharacterEncoder [
"Return a ZnCharacterEncoder instance using the receiver as identifier"

" 'UTF-8' asZnCharacterEncoder "

((self select: [ :each | each isAlphaNumeric ]) asLowercase) = 'utf8' ifFalse: [ self error: 'Only utf8 encoding supported'].
^ ZnUTF8Encoder new

(self select: [ :each | each isAlphaNumeric ]) asLowercase = 'utf8'
ifTrue: [ ^ ZnUTF8Encoder new ]
ifFalse: [
(self select: [ :each | each isAlphaNumeric ]) asLowercase = '8bit'
ifFalse: [ self error: 'only 8bit or utf8 encoding supported' ] ].
^ Zn8BITEncoder new
]

{ #category : '*filesystem-gemstone-kernel' }
Expand Down
122 changes: 122 additions & 0 deletions src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"
Part of FileSystem
=========
I implement the encoding and decoding of Extended ASCII (8 bit character encoding) that produces instances of class String.
The encoding is consistent with topaz 'fileformat 8BIT' (see section 1.3 Handling text outside the ASCII range in the topaz manual[1] for more details).
[1] https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/GS64-Topaz-3.6.htm?https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/1-Tutorial.htm#pgfId-1130673
"
Class {
#name : 'Zn8BITEncoder',
#superclass : 'ZnCharacterEncoder',
#classVars : [
'Default'
],
#category : 'Zinc-Character-Encoding-Core'
}

{ #category : 'accessing' }
Zn8BITEncoder class >> default [
"Return a cached instance of the most commonly used encoder,
which is faster than going via #newForEncoding: that does a subclass search"

^ Default ifNil: [ Default := self new ]
]

{ #category : 'accessing' }
Zn8BITEncoder class >> handlesEncoding: string [
"Return true when my instances handle the encoding described by string"

^ (self canonicalEncodingIdentifier: string) = '8bit'
]

{ #category : 'accessing' }
Zn8BITEncoder class >> knownEncodingIdentifiers [
^ #( #'8bit' )
]

{ #category : 'instance creation' }
Zn8BITEncoder class >> newForEncoding: string [
"No further parametrization needed"

^ self new
]

{ #category : 'converting' }
Zn8BITEncoder >> backOnStream: stream [
"Move back one character on stream"

stream position = 0
ifTrue: [Error signal: 'Cannot move backward past the start of the stream.'].
stream skip: -1
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeAsCodePoints: bytes [
"Decode bytes and return the resulting code points"

^ String withBytes: bytes
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeBytes: bytes [
"Decode bytes and return the resulting string"

^ String withBytes: bytes
]

{ #category : 'converting' }
Zn8BITEncoder >> encodedByteCountFor: character [
"Return how many bytes are needed to encode character"

^ 1
]

{ #category : 'convenience' }
Zn8BITEncoder >> encodeString: string [
"Encode string and return the resulting Utf8 instance"

^ string asByteArray
]

{ #category : 'accessing' }
Zn8BITEncoder >> identifier [
^ #'8bit'
]

{ #category : 'converting' }
Zn8BITEncoder >> nextCodePointFromStream: stream [
"Read and return the next integer code point from stream"

^ stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextFromStream: stream [
"Read and return the next character from stream"

^ Character codePoint: stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextPutCodePoint: codePoint toStream: stream [
"Write the encoding for Integer code point to stream"

^ stream nextPut: (Character codePoint: codePoint)
]

{ #category : 'convenience' }
Zn8BITEncoder >> readInto: string startingAt: offset count: requestedCount fromStream: stream [
"Read requestedCount characters into string starting at offset,
returning the number read, there could be less available when stream is atEnd."

| stringBuffer |
stringBuffer := string.
offset to: offset + requestedCount - 1 do: [ :index |
stream atEnd ifTrue: [ ^ index - offset ].
stringBuffer codePointAt: index put: (self nextCodePointFromStream: stream)].
^ requestedCount
]
16 changes: 10 additions & 6 deletions src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -407,21 +407,25 @@ ZnBufferedReadStream >> upTo: value [
This could be further optimzed."

^ self collectionSpecies
streamContents: [ :writeStream | | element |
[ self atEnd or: [ (element := self next) = value ] ] whileFalse: [
writeStream nextPut: element ] ]
streamContents: [ :writeStream |
[ self atEnd or: [ (self peek) = value ] ] whileFalse: [
writeStream nextPut: self next ] ]
]

{ #category : 'accessing' }
ZnBufferedReadStream >> upToAll: aCollection [
"Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream."

| startPos endMatch result x |
aCollection isEmpty ifTrue: [ ^aCollection ].
startPos := self position.
"upTo: will stop before aCollection first"
x := self upTo: aCollection first.
self atEnd ifTrue: [ ^ x ].
2 to: aCollection size do: [:i |
self peek = (aCollection at: i)
(self atEnd or: [aCollection size = 1 ])
ifTrue: [ ^ x ].
self next. "move past the matching char from upTo:"
2 to: aCollection size do: [:i | | y |
(y := self peek) = (aCollection at: i)
ifTrue: [ self next ]
ifFalse: [ self position: startPos.
^ self upToEnd ] ].
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Class {
#name : 'Zn8BITCharacterEncoderTests',
#superclass : 'ZnCharacterEncoderTests',
#category : 'Zinc-Character-Encoding-Tests'
}

{ #category : 'private' }
Zn8BITCharacterEncoderTests >> _encoder [

^ Zn8BITEncoder new
]

{ #category : 'private' }
Zn8BITCharacterEncoderTests >> _encoderId [
^ #'8bit'
]

{ #category : 'private' }
Zn8BITCharacterEncoderTests >> decodeBytes: bytes with: encoder [
| input |
input := bytes readStream.
^ String streamContents: [ :stream |
[ input atEnd ] whileFalse: [
stream nextPut: (encoder nextFromStream: input) ] ]
]

{ #category : 'testing' }
Zn8BITCharacterEncoderTests >> testByteEncoding [
| encoder bytes string |
encoder := self _encoder.
string := '123AbC', (Character codePoint: 128), (Character codePoint: 255), (Character codePoint: 150), (Character codePoint: 192), (Character codePoint: 224).
bytes := encoder encodeString: string.
self assert: (bytes decodeWith: encoder) equals: (encoder decodeBytes: bytes).
self assert: (bytes decodeWith: self _encoderId) equals: (encoder decodeBytes: bytes).
self assert: (String withBytes: bytes) equals: string.
self assert: string asByteArray equals: bytes.
]
49 changes: 28 additions & 21 deletions src/Zinc-Character-Encoding-Tests/ZnCharacterEncoderTests.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,33 @@ ZnCharacterEncoderTests class >> unicodeCharacterSource [
^ ($A to: $Z), ($a to: $z), ($0 to: $9), '.-_/*+=|,;?!$&<>^%#', ' ', 'éèçüäßñα', '€∏'
]

{ #category : 'Testing' }
ZnCharacterEncoderTests class >> isAbstract [
"Override to true if a TestCase subclass is Abstract and should not have
TestCase instances built from it"

^ self sunitName = #'ZnCharacterEncoderTests'
]

{ #category : 'private' }
ZnCharacterEncoderTests >> _encoderId [
self subclassResponsibility
]

{ #category : 'private' }
ZnCharacterEncoderTests >> _sourceClass [
self subclassResponsibility
]

{ #category : 'public' }
ZnCharacterEncoderTests >> assert: anObject unicodeEquals: otherObj [
"allow comparison of unicode and legacy strings"

self
assert: (anObject _unicodeEqual: otherObj)
description: anObject printString , ' is not equal to ' , otherObj printString.
]

{ #category : 'public' }
ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj [
"allow comparison between unitcode and legacy strings in legacy mode"
Expand All @@ -45,17 +72,7 @@ ZnCharacterEncoderTests >> assertCharacterCollection: anObject equals: otherObj

{ #category : 'private' }
ZnCharacterEncoderTests >> decodeBytes: bytes with: encoder [
true
ifTrue: [
"GemStone does not support streamed decoding ... hack for tests"
^ bytes decodeFromUTF8
] ifFalse: [

| input |
input := bytes readStream.
^ String streamContents: [ :stream |
[ input atEnd ] whileFalse: [
stream nextPut: (encoder nextFromStream: input) ] ] ]
self subclassResponsibility
]

{ #category : 'private' }
Expand All @@ -70,16 +87,6 @@ ZnCharacterEncoderTests >> encodeString: string with: encoder [
encoder nextPut: each toStream: stream ] ]
]

{ #category : 'testing' }
ZnCharacterEncoderTests >> testByteDecoding [
| encoder bytes |
encoder := ZnUTF8Encoder new.
bytes := encoder encodeString: 'élève en Français'.
self assert: (bytes decodeWith: encoder) equals: (encoder decodeBytes: bytes).
self assert: (bytes decodeWith: #utf8) equals: (encoder decodeBytes: bytes).
self assert: bytes utf8Decoded equals: (encoder decodeBytes: bytes)
]

{ #category : 'testing' }
ZnCharacterEncoderTests >> testCodePointEncodingDecoding [
| encoder input output |
Expand Down
Loading

0 comments on commit 8cd2604

Please sign in to comment.