Skip to content

Commit

Permalink
Merge pull request #1138 from nugu-developers/develop/1.9.3
Browse files Browse the repository at this point in the history
Release to `1.9.3`
  • Loading branch information
jayce1116 authored Mar 11, 2024
2 parents f94b851 + 65c8f0a commit 404818a
Show file tree
Hide file tree
Showing 54 changed files with 539 additions and 190 deletions.
2 changes: 1 addition & 1 deletion JadeMarble.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'JadeMarble'
s.version = '1.9.2'
s.version = '1.9.3'
s.license = 'Apache License, Version 2.0'
s.summary = 'End Point Detector for NUGU ASR'
s.homepage = 'https://github.com/nugu-developers/nugu-ios'
Expand Down
46 changes: 26 additions & 20 deletions JadeMarble/Sources/TycheEndPointDetectorEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class TycheEndPointDetectorEngine {
private var flushedLength: Int = 0
private var flushLength: Int = 0
private var engineHandle: EpdHandle?
private var speexEncoder: SpeexEncoder?
public weak var delegate: TycheEndPointDetectorEngineDelegate?

#if DEBUG
Expand Down Expand Up @@ -97,44 +98,46 @@ public class TycheEndPointDetectorEngine {
return
}

let engineState = ptrPcmData.withMemoryRebound(to: UInt8.self, capacity: Int(buffer.frameLength*2)) { (ptrData) -> Int32 in
let (engineState, inputData) = ptrPcmData.withMemoryRebound(to: UInt8.self, capacity: Int(buffer.frameLength * 2)) { (ptrData) -> (Int32, Data) in
#if DEBUG
self.inputData.append(ptrData, count: Int(buffer.frameLength)*2)
self.inputData.append(ptrData, count: Int(buffer.frameLength) * 2)
#endif
let inputData = Data(bytes: ptrData, count: Int(buffer.frameLength) * 2)

// Calculate flushed audio frame length.
var adjustLength = 0
if self.flushedLength + Int(buffer.frameLength) <= self.flushLength {
self.flushedLength += Int(buffer.frameLength)
return -1
return (-1, inputData)
} else if self.flushedLength < self.flushLength {
self.flushedLength += Int(buffer.frameLength)
adjustLength = Int(buffer.frameLength) - (self.flushedLength - self.flushLength)
}

return epdClientChannelRUN(
let engineState = epdClientChannelRUN(
self.engineHandle,
ptrData,
myint(UInt32(buffer.frameLength) - UInt32(adjustLength))*2, // data length is double of frame length, because It is 16bit audio data.
myint(UInt32(buffer.frameLength) - UInt32(adjustLength)) * 2, // data length is double of frame length, because It is 16bit audio data.
0
)

return (engineState, inputData)
}
guard 0 <= engineState else { return }
guard .zero <= engineState else { return }

let length = epdClientChannelGetOutputDataSize(self.engineHandle)
if 0 < length {
let detectedBytes = UnsafeMutablePointer<Int8>.allocate(capacity: Int(length))
defer { detectedBytes.deallocate() }

let result = epdClientChannelGetOutputData(self.engineHandle, detectedBytes, length)
if 0 < result {
let detectedData = Data(bytes: detectedBytes, count: Int(result))
self.delegate?.tycheEndPointDetectorEngineDidExtract(speechData: detectedData)

#if DEBUG
self.outputData.append(detectedData)
#endif
}
guard let speexEncoder else {
log.error("SpeexEncoder is not exist. Please initDetectorEngine first.")
return
}

do {
let speexData = try speexEncoder.encode(data: inputData)
self.delegate?.tycheEndPointDetectorEngineDidExtract(speechData: speexData)
#if DEBUG
self.outputData.append(speexData)
#endif
} catch {
log.error("Failed to speex encoding, error: \(error)")
}

self.state = TycheEndPointDetectorEngine.State(engineState: engineState)
Expand Down Expand Up @@ -181,6 +184,7 @@ public class TycheEndPointDetectorEngine {
log.debug("engine is destroyed")
}

speexEncoder = nil
state = .idle
}

Expand All @@ -200,6 +204,8 @@ public class TycheEndPointDetectorEngine {
let modelPath = Bundle.module.url(forResource: "skt_epd_model", withExtension: "raw")!.path
#endif

let speexEncoder = SpeexEncoder(sampleRate: Int(sampleRate), inputType: EndPointDetectorConst.inputStreamType)
self.speexEncoder = speexEncoder
guard let epdHandle = epdClientChannelSTART(
modelPath,
myint(sampleRate),
Expand Down
2 changes: 1 addition & 1 deletion KeenSense.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'KeenSense'
s.version = '1.9.2'
s.version = '1.9.3'
s.license = 'Apache License, Version 2.0'
s.summary = 'Key Word Detector for NUGU'
s.homepage = 'https://github.com/nugu-developers/nugu-ios'
Expand Down
2 changes: 1 addition & 1 deletion NuguAgents.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'NuguAgents'
s.version = '1.9.2'
s.version = '1.9.3'
s.license = 'Apache License, Version 2.0'
s.summary = 'Nugu Agents'
s.description = <<-DESC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ final class AudioPlayer {
private var lastReportedOffset: Int = 0

private var lastDataAppended = false
private var canReportDelayEvent: Bool = true

init(directive: Downstream.Directive) throws {
payload = try JSONDecoder().decode(AudioPlayerPlayPayload.self, from: directive.payload)
Expand Down Expand Up @@ -259,7 +260,7 @@ private extension AudioPlayer {
seconds.isInfinite == false else {
return 0
}
return Int(ceil(seconds))
return Int(floor(seconds))
})
.filter { [weak self] offset in
guard let self = self else { return false }
Expand All @@ -277,7 +278,8 @@ private extension AudioPlayer {

// Check if there is any report target between last offset and current offset.
let offsetRange = (self.lastReportedOffset + 1...offset)
if delayReportTime > 0, offsetRange.contains(delayReportTime) {
if delayReportTime > 0, offsetRange.contains(delayReportTime), canReportDelayEvent {
self.canReportDelayEvent = false
self.progressDelegate?.audioPlayerDidReportDelay(self)
}
if intervalReportTime > 0, offsetRange.contains(intervalReportTime * (self.lastReportedOffset / intervalReportTime + 1)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import RxSwift

public final class AudioPlayerAgent: AudioPlayerAgentProtocol {
// CapabilityAgentable
public var capabilityAgentProperty: CapabilityAgentProperty = CapabilityAgentProperty(category: .audioPlayer, version: "1.8")
public var capabilityAgentProperty: CapabilityAgentProperty = CapabilityAgentProperty(category: .audioPlayer, version: "1.9")
private let playSyncProperty = PlaySyncProperty(layerType: .media, contextType: .sound)

// AudioPlayerAgentProtocol
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public struct AudioPlayerPlaylist: Codable {
}

private enum CodingKeys: String, CodingKey {
case text = "text"
case text
case subText
case imageUrl
case badgeUrl
Expand All @@ -77,6 +77,7 @@ public struct AudioPlayerPlaylist: Codable {
case token
case postback
case favorite
case libraryAvailable
}

public let text: TextObject?
Expand All @@ -88,6 +89,7 @@ public struct AudioPlayerPlaylist: Codable {
public let token: String
public let postback: [String: AnyHashable]?
public var favorite: Favorite?
public let libraryAvailable: Bool?

public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
Expand All @@ -100,6 +102,7 @@ public struct AudioPlayerPlaylist: Codable {
token = try container.decode(String.self, forKey: .token)
postback = try container.decodeIfPresent([String: AnyHashable].self, forKey: .postback)
favorite = try container.decodeIfPresent(Favorite.self, forKey: .favorite)
libraryAvailable = try container.decodeIfPresent(Bool.self, forKey: .libraryAvailable)
}

public func encode(to encoder: Encoder) throws {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
public struct AudioPlayerSettingsTemplate: Decodable {
public let favorite: Bool?
public let `repeat`: AudioPlayerDisplayRepeat?
public let libraryAvailable: Bool?
public let shuffle: Bool?
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ extension ASRAgent {
let referrerDialogRequestId: String?

enum TypeInfo {
case recognize(initiator: ASRInitiator, options: ASROptions)
case recognize(initiator: ASRInitiator, options: ASROptions, service: [String: AnyHashable]?)
case responseTimeout
case listenTimeout
case stopRecognize
Expand All @@ -52,7 +52,7 @@ extension ASRAgent.Event: Eventable {
var payload: [String: AnyHashable] {
var payload: [String: AnyHashable?]
switch typeInfo {
case .recognize(let initiator, let options):
case .recognize(let initiator, let options, let service):
payload = [
"codec": "SPEEX",
"language": "KOR",
Expand All @@ -61,11 +61,13 @@ extension ASRAgent.Event: Eventable {
"playServiceId": dialogAttributes?["playServiceId"],
"domainTypes": dialogAttributes?["domainTypes"],
"asrContext": dialogAttributes?["asrContext"],
"service": service,
"timeout": [
"listen": options.timeout.truncatedMilliSeconds,
"maxSpeech": options.maxDuration.truncatedMilliSeconds,
"response": 10000
]
],
"requestType": options.requestType
]

if case let .wakeUpWord(keyword, _, start, end, detection) = initiator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ import RxSwift

public final class ASRAgent: ASRAgentProtocol {
// CapabilityAgentable
// TODO: ASR interface version 1.1 -> ASR.Recognize(wakeup/power)
public var capabilityAgentProperty: CapabilityAgentProperty = CapabilityAgentProperty(category: .automaticSpeechRecognition, version: "1.7")
public var capabilityAgentProperty: CapabilityAgentProperty = CapabilityAgentProperty(category: .automaticSpeechRecognition, version: "1.8")
private let playSyncProperty = PlaySyncProperty(layerType: .asr, contextType: .sound)

public weak var delegate: ASRAgentDelegate?

// Private
private let focusManager: FocusManageable
private let contextManager: ContextManageable
Expand Down Expand Up @@ -93,15 +94,17 @@ public final class ASRAgent: ASRAgentProtocol {
log.error("ASR request: \(String(describing: asrRequest)), result: \(String(describing: asrResult))")
return
}
log.info("\(asrResult)")
log.info("asrResult: \(asrResult)")

// `ASRState` -> Event -> `expectSpeechDirective` -> `ASRAgentDelegate`
switch asrResult {
case .none:
asrState = .idle
expectSpeech = nil
case .partial:
break
case .complete:
asrState = .idle
expectSpeech = nil
case .cancel:
asrState = .idle
Expand Down Expand Up @@ -255,6 +258,8 @@ public final class ASRAgent: ASRAgentProtocol {
public extension ASRAgent {
@discardableResult func startRecognition(
initiator: ASRInitiator,
service: [String: AnyHashable]?,
requestType: String?,
completion: ((StreamDataState) -> Void)?
) -> String {
log.debug("startRecognition, initiator: \(initiator)")
Expand All @@ -268,7 +273,13 @@ public extension ASRAgent {
return
}

self.startRecognition(initiator: initiator, eventIdentifier: eventIdentifier, completion: completion)
startRecognition(
initiator: initiator,
eventIdentifier: eventIdentifier,
service: service,
requestType: requestType,
completion: completion
)
}

return eventIdentifier.dialogRequestId
Expand Down Expand Up @@ -445,7 +456,7 @@ private extension ASRAgent {
defer { completion(.finished) }

self?.asrDispatchQueue.sync { [weak self] in
guard let self = self else { return }
guard let self = self, let delegate = self.delegate else { return }
// ex> TTS 도중 stopRecognition 호출.
guard let expectSpeech = self.expectSpeech, expectSpeech.messageId == directive.header.messageId else {
log.info("Message id does not match")
Expand All @@ -456,9 +467,21 @@ private extension ASRAgent {
log.warning("ExpectSpeech only allowed in IDLE or BUSY state.")
return
}
let service = expectSpeech.payload.service
guard service == nil || delegate.asrAgentWillStartExpectSpeech(service: service) else {
log.warning("ExpectSpeech service field is not nil. service: \(String(describing: service))")
self.asrResult = nil
return
}

self.asrState = .expectingSpeech
self.startRecognition(initiator: .expectSpeech, eventIdentifier: EventIdentifier(), completion: nil)
startRecognition(
initiator: .expectSpeech,
eventIdentifier: EventIdentifier(),
service: service,
requestType: options.requestType,
completion: nil
)
}
}
}
Expand Down Expand Up @@ -489,7 +512,7 @@ private extension ASRAgent {
case .partial:
self.asrResult = .partial(text: item.result ?? "", header: directive.header)
case .complete:
self.asrResult = .complete(text: item.result ?? "", header: directive.header)
self.asrResult = .complete(text: item.result ?? "", header: directive.header, requestType: item.requestType)
case .none:
self.asrResult = .none(header: directive.header)
case .error:
Expand Down Expand Up @@ -590,7 +613,7 @@ private extension ASRAgent {
}
upstreamDataSender.sendStream(
Event(
typeInfo: .recognize(initiator: asrRequest.initiator, options: asrRequest.options),
typeInfo: .recognize(initiator: asrRequest.initiator, options: asrRequest.options, service: asrRequest.service),
dialogAttributes: dialogAttributeStore.requestAttributes(key: expectSpeech?.messageId),
referrerDialogRequestId: asrRequest.referrerDialogRequestId
).makeEventMessage(
Expand All @@ -603,8 +626,6 @@ private extension ASRAgent {
guard self?.asrRequest?.eventIdentifier == asrRequest.eventIdentifier else { return }

switch state {
case .finished:
self?.asrState = .idle
case .error(let error):
self?.asrResult = .error(error)
case .sent:
Expand Down Expand Up @@ -677,10 +698,12 @@ private extension ASRAgent {
func startRecognition(
initiator: ASRInitiator,
eventIdentifier: EventIdentifier,
service: [String: AnyHashable]?,
requestType: String?,
completion: ((StreamDataState) -> Void)?
) {
let semaphore = DispatchSemaphore(value: 0)
let options: ASROptions
var options: ASROptions
if let epd = self.expectSpeech?.payload.epd {
options = ASROptions(
maxDuration: epd.maxDuration ?? self.options.maxDuration,
Expand All @@ -692,11 +715,13 @@ private extension ASRAgent {
} else {
options = self.options
}
options.updateRequestType(requestType)
asrRequest = ASRRequest(
eventIdentifier: eventIdentifier,
initiator: initiator,
options: options,
referrerDialogRequestId: expectSpeech?.dialogRequestId,
service: service,
completion: completion
)
self.contextManager.getContexts { [weak self] contextPayload in
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// ASRAgentDelegate.swift
// NuguAgents
//
// Created by Jaycesub on 17/04/2019.
// Copyright (c) 2024 SK Telecom Co., Ltd. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

import Foundation

public protocol ASRAgentDelegate: AnyObject {
/// ASRAgent start recognition after receiving expectSpeech directive.
/// Returns true if start recognition without being affected by the condition.
/// - Parameter service: The service object included in expectSpeech payload
/// - Returns: True if the ASRAgent start recognition after receiving expectSpeech directive.
func asrAgentWillStartExpectSpeech(service: [String: AnyHashable]?) -> Bool
}
Loading

0 comments on commit 404818a

Please sign in to comment.