Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 75: add stringClass attributes to FileSystem #82

Merged
merged 11 commits into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/FileSystem-Core/AbstractFileReference.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -743,10 +743,13 @@ AbstractFileReference >> readStreamDo: doBlock ifAbsent: absentBlock [

{ #category : 'streams' }
AbstractFileReference >> readStreamEncoded: anEncoding [

^ ZnCharacterReadStream
on: self binaryReadStream
encoding: anEncoding
stringClass:
(String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ])
]

{ #category : 'streams' }
Expand Down
11 changes: 7 additions & 4 deletions src/FileSystem-GemStone-Kernel/CharacterCollection.extension.st
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ CharacterCollection >> asResolvedBy: aFileSystem [
{ #category : '*filesystem-gemstone-kernel' }
CharacterCollection >> asZnCharacterEncoder [
"Return a ZnCharacterEncoder instance using the receiver as identifier"

" 'UTF-8' asZnCharacterEncoder "

((self select: [ :each | each isAlphaNumeric ]) asLowercase) = 'utf8' ifFalse: [ self error: 'Only utf8 encoding supported'].
^ ZnUTF8Encoder new

(self select: [ :each | each isAlphaNumeric ]) asLowercase = 'utf8'
ifTrue: [ ^ ZnUTF8Encoder new ]
ifFalse: [
(self select: [ :each | each isAlphaNumeric ]) asLowercase = '8bit'
ifFalse: [ self error: 'only 8bit or utf8 encoding supported' ] ].
^ Zn8BITEncoder new
]

{ #category : '*filesystem-gemstone-kernel' }
Expand Down
4 changes: 2 additions & 2 deletions src/FileSystem-Tests-Core/FileReferenceTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ FileReferenceTest >> testReadStreamIfAbsent [
{ #category : 'tests' }
FileReferenceTest >> testRelativeTo [

| alpha beta reference path result |
| alpha beta |
alpha := sandbox / 'alpha'.
beta := alpha / 'beta'.
self
Expand Down Expand Up @@ -1326,7 +1326,7 @@ FileReferenceTest >> testWithExtension [
{ #category : 'tests' }
FileReferenceTest >> testWithoutExtension [

| reference result |
| reference |
reference := sandbox / 'alpha.beta.gamma'.
reference := reference withoutExtension.
self
Expand Down
123 changes: 123 additions & 0 deletions src/Zinc-Character-Encoding-Core/Zn8BITEncoder.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"
Part of FileSystem

=========

I implement the encoding and decoding of Extended ASCII (8 bit character encoding) that produces instances of class String.

The encoding is consistent with topaz 'fileformat 8BIT' (see section 1.3 Handling text outside the ASCII range in the topaz manual[1] for more details).

[1] https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/GS64-Topaz-3.6.htm?https://downloads.gemtalksystems.com/docs/GemStone64/3.6.x/GS64-Topaz-3.6/1-Tutorial.htm#pgfId-1130673
"
Class {
#name : 'Zn8BITEncoder',
#superclass : 'ZnCharacterEncoder',
#classVars : [
'Default'
],
#category : 'Zinc-Character-Encoding-Core'
}

{ #category : 'accessing' }
Zn8BITEncoder class >> default [
"Return a cached instance of the most commonly used encoder,
which is faster than going via #newForEncoding: that does a subclass search"

^ Default ifNil: [ Default := self new ]
]

{ #category : 'accessing' }
Zn8BITEncoder class >> handlesEncoding: string [
"Return true when my instances handle the encoding described by string"

^ (self canonicalEncodingIdentifier: string) = '8bit'
]

{ #category : 'accessing' }
Zn8BITEncoder class >> knownEncodingIdentifiers [
^ #( #'8bit' )
]

{ #category : 'instance creation' }
Zn8BITEncoder class >> newForEncoding: string stringClass: stringClass [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

^ self new stringClass: stringClass
]

{ #category : 'converting' }
Zn8BITEncoder >> backOnStream: stream [
"Move back one character on stream"

stream position = 0
ifTrue: [Error signal: 'Cannot move backward past the start of the stream.'].
stream skip: -1
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeAsCodePoints: bytes [
"Decode bytes and return the resulting code points"

^ String withBytes: bytes
]

{ #category : 'convenience' }
Zn8BITEncoder >> decodeBytes: bytes [
"Decode bytes and return the resulting string"

^ self stringClass withBytes: bytes
]

{ #category : 'converting' }
Zn8BITEncoder >> encodedByteCountFor: character [
"Return how many bytes are needed to encode character"

^ 1
]

{ #category : 'convenience' }
Zn8BITEncoder >> encodeString: string [
"Encode string and return the resulting Utf8 instance"

^ string asByteArray
]

{ #category : 'accessing' }
Zn8BITEncoder >> identifier [
^ #'8bit'
]

{ #category : 'converting' }
Zn8BITEncoder >> nextCodePointFromStream: stream [
"Read and return the next integer code point from stream"

^ stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextFromStream: stream [
"Read and return the next character from stream"

^ Character codePoint: stream next
]

{ #category : 'converting' }
Zn8BITEncoder >> nextPutCodePoint: codePoint toStream: stream [
"Write the encoding for Integer code point to stream"

^ stream nextPut: (Character codePoint: codePoint)
]

{ #category : 'convenience' }
Zn8BITEncoder >> readInto: string startingAt: offset count: requestedCount fromStream: stream [
"Read requestedCount characters into string starting at offset,
returning the number read, there could be less available when stream is atEnd."

| stringBuffer |
stringBuffer := string.
offset to: offset + requestedCount - 1 do: [ :index |
stream atEnd ifTrue: [ ^ index - offset ].
stringBuffer codePointAt: index put: (self nextCodePointFromStream: stream)].
^ requestedCount
]
30 changes: 21 additions & 9 deletions src/Zinc-Character-Encoding-Core/ZnBufferedReadStream.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ ZnBufferedReadStream >> closed [
ZnBufferedReadStream >> collectionSpecies [
^ stream isBinary
ifTrue: [ ByteArray ]
ifFalse: [ String ]
ifFalse: [
(stream respondsTo: #'stringClass')
ifTrue: [ stream stringClass ]
ifFalse: [
String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ] ] ]
]

{ #category : 'accessing' }
Expand Down Expand Up @@ -403,25 +409,31 @@ ZnBufferedReadStream >> uint8 [
{ #category : 'accessing' }
ZnBufferedReadStream >> upTo: value [
"Read upto but not including value and return them as a collection.
If value is not found, return the entire contents of the stream.
This could be further optimzed."
If value is not found, return the entire contents of the stream."

^ self collectionSpecies
streamContents: [ :writeStream | | element |
[ self atEnd or: [ (element := self next) = value ] ] whileFalse: [
writeStream nextPut: element ] ]
streamContents: [ :writeStream | | ch |
[ self atEnd or: [ (ch := self next) = value ] ] whileFalse: [
writeStream nextPut: ch ] ]
]

{ #category : 'accessing' }
ZnBufferedReadStream >> upToAll: aCollection [
"Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream."

| startPos endMatch result x |
aCollection isEmpty ifTrue: [ ^aCollection ].
startPos := self position.
"upTo: will stop before aCollection first"
x := self upTo: aCollection first.
self atEnd ifTrue: [ ^ x ].
2 to: aCollection size do: [:i |
self peek = (aCollection at: i)
self atEnd
ifTrue: [
aCollection size <= 1
ifTrue: [ ^ x ].
self position: startPos.
^ self upToEnd].
2 to: aCollection size do: [:i | | y |
(y := self peek) = (aCollection at: i)
ifTrue: [ self next ]
ifFalse: [ self position: startPos.
^ self upToEnd ] ].
Expand Down
32 changes: 31 additions & 1 deletion src/Zinc-Character-Encoding-Core/ZnCharacterEncoder.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Part of Zinc HTTP Components.
Class {
#name : 'ZnCharacterEncoder',
#superclass : 'Object',
#instVars : [
'stringClass'
],
#category : 'Zinc-Character-Encoding-Core'
}

Expand Down Expand Up @@ -70,12 +73,25 @@ ZnCharacterEncoder class >> knownEncodingIdentifiers [
ZnCharacterEncoder class >> newForEncoding: string [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

^ self
newForEncoding: string
stringClass:
(String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ])
]

{ #category : 'instance creation' }
ZnCharacterEncoder class >> newForEncoding: string stringClass: stringClass [
"Return a new character encoder object for an encoding described by string.
Search for a subclass that handles it and delegate (subclassResponsibility)."

| concreteSubclass |
concreteSubclass := self allSubclasses
detect: [ :each | each handlesEncoding: string ]
ifNone: [ ^ self error: 'The ', string printString, ' is not currently supported.' ].
^ concreteSubclass newForEncoding: string
^ concreteSubclass newForEncoding: string stringClass: stringClass
]

{ #category : 'converting' }
Expand Down Expand Up @@ -157,3 +173,17 @@ ZnCharacterEncoder >> nextPut: character toStream: stream [

self nextPutCodePoint: character asInteger toStream: stream
]

{ #category : 'accessing' }
ZnCharacterEncoder >> stringClass [
^ stringClass
ifNil: [
stringClass := String isInUnicodeComparisonMode
ifTrue: [ Unicode7 ]
ifFalse: [ String ] ]
]

{ #category : 'accessing' }
ZnCharacterEncoder >> stringClass: object [
stringClass := object
]
29 changes: 28 additions & 1 deletion src/Zinc-Character-Encoding-Core/ZnCharacterReadStream.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,29 @@ Part of Zinc HTTP Components.
Class {
#name : 'ZnCharacterReadStream',
#superclass : 'ZnEncodedReadStream',
#instVars : [
'stringClass'
],
#category : 'Zinc-Character-Encoding-Core'
}

{ #category : 'instance creation' }
ZnCharacterReadStream class >> on: wrappedStream encoding: encoding stringClass: stringClass [
^ self new
on: wrappedStream;
encoding: encoding;
stringClass: stringClass;
yourself
]

{ #category : 'accessing' }
ZnCharacterReadStream >> collectionSpecies [
^ String
^ self stringClass
]

{ #category : 'accessing' }
ZnCharacterReadStream >> encoder [
^ encoder ifNil: [ encoder := super encoder stringClass: self stringClass ]
]

{ #category : 'accessing' }
Expand Down Expand Up @@ -91,6 +108,16 @@ ZnCharacterReadStream >> readInto: collection startingAt: offset count: requeste

]

{ #category : 'accessing' }
ZnCharacterReadStream >> stringClass [
^ stringClass ifNil: [ stringClass := String ]
]

{ #category : 'accessing' }
ZnCharacterReadStream >> stringClass: object [
stringClass := object
]

{ #category : 'accessing' }
ZnCharacterReadStream >> upToAll: aCollection [
"Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of aCollection. If aCollection is not in the stream, answer the entire rest of the stream."
Expand Down
Loading