-
Notifications
You must be signed in to change notification settings - Fork 55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add metadata api #267
Add metadata api #267
Changes from 7 commits
b2319c1
9d21e1b
c5d41a0
f918cba
787bb33
d310a79
8a50293
35babef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ export class DocHandle<T> // | |
#machine: DocHandleXstateMachine<T> | ||
#timeoutDelay: number | ||
#remoteHeads: Record<StorageId, A.Heads> = {} | ||
#changeMetadata: ChangeMetadataFunction | ||
|
||
/** The URL of this document | ||
* | ||
|
@@ -54,18 +55,31 @@ export class DocHandle<T> // | |
/** @hidden */ | ||
constructor( | ||
public documentId: DocumentId, | ||
{ isNew = false, timeoutDelay = 60_000 }: DocHandleOptions = {} | ||
{ | ||
timeoutDelay = 60_000, | ||
changeMetadata: changeMetadataFunction = () => undefined, | ||
init = false, | ||
}: DocHandleOptions<T> = {} | ||
) { | ||
super() | ||
this.#timeoutDelay = timeoutDelay | ||
this.#changeMetadata = changeMetadataFunction | ||
this.#log = debug(`automerge-repo:dochandle:${this.documentId.slice(0, 5)}`) | ||
|
||
// initial doc | ||
let doc = A.init<T>() | ||
|
||
// Make an empty change so that we have something to save to disk | ||
if (isNew) { | ||
doc = A.emptyChange(doc, {}) | ||
if (init) { | ||
const options = init === true ? {} : init | ||
|
||
doc = A.emptyChange( | ||
doc, | ||
optionsWithGlobalMetadata( | ||
options, | ||
this.#changeMetadata(this.documentId) ?? {} | ||
) | ||
) | ||
} | ||
|
||
/** | ||
|
@@ -217,7 +231,7 @@ export class DocHandle<T> // | |
}) | ||
.start() | ||
|
||
this.#machine.send(isNew ? CREATE : FIND) | ||
this.#machine.send(init ? CREATE : FIND) | ||
} | ||
|
||
// PRIVATE | ||
|
@@ -340,7 +354,7 @@ export class DocHandle<T> // | |
} | ||
|
||
/** `change` is called by the repo when the document is changed locally */ | ||
change(callback: A.ChangeFn<T>, options: A.ChangeOptions<T> = {}) { | ||
change(callback: A.ChangeFn<T>, options: DocHandleChangeOptions<T> = {}) { | ||
if (!this.isReady()) { | ||
throw new Error( | ||
`DocHandle#${this.documentId} is not ready. Check \`handle.isReady()\` before accessing the document.` | ||
|
@@ -349,7 +363,14 @@ export class DocHandle<T> // | |
this.#machine.send(UPDATE, { | ||
payload: { | ||
callback: (doc: A.Doc<T>) => { | ||
return A.change(doc, options, callback) | ||
return A.change( | ||
doc, | ||
optionsWithGlobalMetadata( | ||
options, | ||
this.#changeMetadata(this.documentId) ?? {} | ||
), | ||
callback | ||
) | ||
}, | ||
}, | ||
}) | ||
|
@@ -362,7 +383,7 @@ export class DocHandle<T> // | |
changeAt( | ||
heads: A.Heads, | ||
callback: A.ChangeFn<T>, | ||
options: A.ChangeOptions<T> = {} | ||
options: DocHandleChangeOptions<T> = {} | ||
): string[] | undefined { | ||
if (!this.isReady()) { | ||
throw new Error( | ||
|
@@ -373,7 +394,15 @@ export class DocHandle<T> // | |
this.#machine.send(UPDATE, { | ||
payload: { | ||
callback: (doc: A.Doc<T>) => { | ||
const result = A.changeAt(doc, heads, options, callback) | ||
const result = A.changeAt( | ||
doc, | ||
heads, | ||
optionsWithGlobalMetadata( | ||
options, | ||
this.#changeMetadata(this.documentId) ?? {} | ||
), | ||
callback | ||
) | ||
resultHeads = result.newHeads | ||
return result.newDoc | ||
}, | ||
|
@@ -448,14 +477,83 @@ export class DocHandle<T> // | |
} | ||
} | ||
|
||
function optionsWithGlobalMetadata<T>( | ||
options: DocHandleChangeOptions<T>, | ||
globalMetadata: ChangeMetadata | ||
): A.ChangeOptions<T> { | ||
const mergedMetadata: MergedMetadata = { metadata: {} } | ||
|
||
mergeMetadata(mergedMetadata, globalMetadata) | ||
|
||
if (options.metadata) { | ||
mergeMetadata(mergedMetadata, options.metadata) | ||
} | ||
|
||
const { metadata, time } = mergedMetadata | ||
|
||
return { | ||
time, | ||
message: | ||
Object.values(metadata).length > 0 ? JSON.stringify(metadata) : undefined, | ||
patchCallback: options.patchCallback, | ||
} | ||
} | ||
|
||
function mergeMetadata(target: MergedMetadata, metadata: ChangeMetadata) { | ||
for (const [key, value] of Object.entries(metadata)) { | ||
const type = typeof value | ||
|
||
// remove time from metadata, because it can be stored more effiently as a time delta | ||
// this will be no longer necessary once we have proper metadata support | ||
if (key === "time" && type === "number") { | ||
target.time = value as number | ||
continue | ||
} | ||
|
||
if (type !== "number" && type !== "string" && type !== "boolean") { | ||
throw new Error( | ||
`Only primive values "number", "string" and "boolean" are allowed in metadata` | ||
) | ||
} | ||
|
||
target.metadata[key] = value | ||
} | ||
} | ||
|
||
interface MergedMetadata { | ||
metadata: ChangeMetadata | ||
time?: number | ||
} | ||
|
||
// WRAPPER CLASS TYPES | ||
|
||
/** @hidden */ | ||
export interface DocHandleOptions { | ||
isNew?: boolean | ||
export interface DocHandleOptions<T> { | ||
timeoutDelay?: number | ||
changeMetadata?: ChangeMetadataFunction | ||
// set init to true or pass in initialization options to create a new empty document | ||
init?: boolean | DocHandleChangeOptions<T> | ||
} | ||
|
||
// todo: remove this type once we have real metadata on changes in automerge | ||
// as an interim solution we use the message attribute to store the metadata as a JSON string | ||
export interface DocHandleChangeOptions<T> { | ||
metadata?: ChangeMetadata | ||
patchCallback?: A.PatchCallback<T> | ||
} | ||
|
||
export type ChangeMetadata = Record<string, number | string | boolean> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking ahead to how we compress this I am not sure this API will do everything we need. When compressing this metadata we don't store the names of the fields, but instead an integer column ID. This means that the application will need to provide some mapping from a column ID to the name of the field in the metadata object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’d suggest that for app-defined metadata columns we identify the columns by name+type, rather than by a numeric column ID. That would simplify the API and only cost a few bytes more space. The question is how the type should be identified in the API. With a non-null value we could check if the value is an integer, string, or byte array, and assign it to the appropriate typed column. With a null value we could just treat it as absent, and any metadata columns that exist because of non-null values on other changes will just be filled in with null anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah that makes sense. This would imply storing the union of every metadata key of every change in the documen in a lookup table somewhere in the serialized document chunk right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think so. The first time a change has a non-null value in its metadata, we create a column identified by its metadata key and the type of the value. The serialised document will have to store every metadata column that exists on any of the changes. Changes that don't mention a particular metadata column just fill it in with null, as is the behaviour for the Automerge-internal columns. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does mean that if a user never puts anything except null as the value for a metadata key then we would have to do something like not write it to the document at all right (because we don't know what column type to write). This means it would not be possible to distinguish between a null value and a not-present value. Maybe we should say that you can't write There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup agree, let's disallow nulls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been thinking a little bit more about this. At some point we're going to want to have some kind of squash/rebase workflow I think. In such a workflow we would need to decide what to do with the metadata on each change. I think ideally we would just encode all the metadata into the squashed change. This suggests to me that we should actually treat the metadata as a multimap, somewhat like the query parameters in a URL. @ept @paulsonnentag what do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would a squash look like? Could a user still override the metadata to set it to something custom for the squashed change, or would it be purely mechanical that the metadata of the squashed changes would always be the union of the metadata of the individual changes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alexjg Good point. I'd think that a squash commit would need to bring in custom logic for compacting the metadata: for example, we might not want to keep every single timestamp, but only the minimum and maximum among the timestamps in the squashed range. For authors we might want to keep the set of distinct users who have contributed at least one change, and for signatures we might want to keep the most recent signature per branch per signing key. This suggests to me that we can keep the data model for metadata on a single change simple (a single value per entry in the map), and figure out how to represent changes on squash commits once we get to that point. I just realised something: if we do author attribution using metadata on changes, it would probably not be possible to do attribution on a squash commit or shallow clone, because the per-change information is not available. On the other hand, if we do attribution by mapping actor IDs to user IDs, attribution should still be possible, because the squash should preserve opIds, and the actor-to-user mapping can be included in the squash. That would be an argument for using the actorIds for attribution. |
||
|
||
/** A function that defines default meta data for each change on the handle | ||
* | ||
* @remarks | ||
* This function can be defined globally on the {@link Repo} and is passed down to all {@link DocHandle}. | ||
* The metadata can be override by explicitly passing metadata in {@link DocHandle.change} or {@link DocHandle.changeAt}. | ||
* */ | ||
export type ChangeMetadataFunction = ( | ||
documentId: DocumentId | ||
) => ChangeMetadata | undefined | ||
|
||
export interface DocHandleMessagePayload { | ||
destinationId: PeerId | ||
documentId: DocumentId | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can also allow
Uint8Array
here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although I guess we currently can't serialize that to JSON in a nice way so maybe we leave that for the future.