diff --git a/workflows/VGP-assembly-v2/Curation-TreeValGal/TreeValGal_optionalwindowmasker.ga b/workflows/VGP-assembly-v2/Curation-TreeValGal/TreeValGal_optionalwindowmasker.ga index 807dc5e66..a2fd66043 100644 --- a/workflows/VGP-assembly-v2/Curation-TreeValGal/TreeValGal_optionalwindowmasker.ga +++ b/workflows/VGP-assembly-v2/Curation-TreeValGal/TreeValGal_optionalwindowmasker.ga @@ -3,38 +3,36 @@ "annotation": "", "comments": [ { - "child_comments": [ - 1 - ], - "color": "black", + "color": "red", "data": { - "title": "" + "size": 2, + "text": "Notes on inputs for mashmap pafs:

mashmap settings varied here are % identity and match length - like steps 27-29 to cover a range since the evolutionary distances depend on the curator's choice of comparison organisms for some of them.

At this point, \"tuning\" for dotplots has aimed for a \"reasonable\" number of paf rows. Too many gets slow and dark. Too few and the patterns might be too faint to see clearly or when zoomed.

Need curator eyeballs and feedback for that.

Two different mashmap uses - for the selected related species sequence similarity patterns, and for independent haplotype or reference, if available

The 2 optional related species sequence similarity will depend on evolutionary distance. Discernable diagonal segments of mashmap hits become less likely - although they are seen with close relatives.  

There are two mashmaps to give some spread but more could be added. Nice thing about TreeValGal workflow is that adding tracks is not at all difficult. Adding a wider range of parameters makes sense - particularly once the curators start looking at what they are getting?

Need to use the track menu for the main reference vs external reference/haplotype tracks - off by default.

Again there's a spread of 3 mashmap settings to get different numbers of hits - 20-40k seems to be a sweet spot for plots - more gets slower and clumsy but could be important if high resolution needed.
" }, - "id": 0, + "id": 2, "position": [ - 10, - 40 + 0, + 257.03358562228095 ], "size": [ - 1430, - 140 + 1020, + 1030 ], - "type": "frame" + "type": "text" }, { - "color": "red", + "color": "blue", "data": { "size": 2, - "text": "Notes on inputs for mashmap pafs:

mashmap settings varied here are % identity and match length - like steps 27-29 to cover a range since the evolutionary distances depend on the curator's choice of comparison organisms for some of them.

At this point, \"tuning\" for dotplots has aimed for a \"reasonable\" number of paf rows. Too many gets slow and dark. Too few and the patterns might be too faint to see clearly or when zoomed.

Need curator eyeballs and feedback for that.

Two different mashmap uses - for the selected related species sequence similarity patterns, and for independent haplotype or reference, if available

The 2 optional related species sequence similarity will depend on evolutionary distance. Discernable diagonal segments of mashmap hits become less likely - although they are seen with close relatives.  

There are two mashmaps to give some spread but more could be added. Nice thing about TreeValGal workflow is that adding tracks is not at all difficult. Adding a wider range of parameters makes sense - particularly once the curators start looking at what they are getting?

Need to use the track menu for the main reference vs external reference/haplotype tracks - off by default.

Again there's a spread of 3 mashmap settings to get different numbers of hits - 20-40k seems to be a sweet spot for plots - more gets slower and clumsy but could be important if high resolution needed.
" + "text": "General:

Uses Delphine's method for optional inputs.

Mashmap reference uses repeatmasker masked sequence to filter noise from low complexity element matches, and \"orthologous\" mapping for both haplotypes and related species. Not sure what's optimal. Advice appeciated

JBrowse tracks can be off in the default view to make things less overwhelming. Need to open the track menu to see them. The independent haplotype paf tracks are hidden by default for example

The output reports on an assembly, optionally mat/pat haplotypes if contig names do not overlap - rubbish will be displayed if they do. With some pacbio reads, the main tracks can be produced. The annotation tracks require human choices for external genes and proteins, and comparison species for pafs.

Mashmap paf tracks are prepared for two comparison species references, and for an independent haplotype or refererence genome.



" }, - "id": 2, + "id": 4, "position": [ - 0, - 257.03358562228095 + 20, + 1537.033585622281 ], "size": [ - 1020, - 1030 + 700, + 870 ], "type": "text" }, @@ -56,28 +54,30 @@ "type": "text" }, { + "child_comments": [ + 1 + ], "color": "black", "data": { - "size": 2, - "text": "Technical discussion document under development here:
https://docs.google.com/document/d/1YbQZsFC8oDJIGos5yLFbApumpK4yIz02qfwYEypDfI8" + "title": "" }, - "id": 1, + "id": 0, "position": [ - 20, - 80 + 10, + 40 ], "size": [ - 1380, - 80 + 1430, + 140 ], - "type": "text" + "type": "frame" }, { "child_steps": [ + 14, 25, 30, 15, - 14, 16, 17 ], @@ -96,17 +96,34 @@ ], "type": "frame" }, + { + "color": "black", + "data": { + "size": 2, + "text": "Technical discussion document under development here:
https://docs.google.com/document/d/1YbQZsFC8oDJIGos5yLFbApumpK4yIz02qfwYEypDfI8" + }, + "id": 1, + "position": [ + 20, + 80 + ], + "size": [ + 1380, + 80 + ], + "type": "text" + }, { "child_steps": [ 24, - 6, 12, 7, 10, 8, 11, 13, - 9 + 9, + 6 ], "color": "turquoise", "data": { @@ -122,23 +139,6 @@ 900 ], "type": "frame" - }, - { - "color": "blue", - "data": { - "size": 2, - "text": "General:

Uses Delphine's method for optional inputs.

Mashmap reference uses repeatmasker masked sequence to filter noise from low complexity element matches, and \"orthologous\" mapping for both haplotypes and related species. Not sure what's optimal. Advice appeciated

JBrowse tracks can be off in the default view to make things less overwhelming. Need to open the track menu to see them. The independent haplotype paf tracks are hidden by default for example

The output reports on an assembly, optionally mat/pat haplotypes if contig names do not overlap - rubbish will be displayed if they do. With some pacbio reads, the main tracks can be produced. The annotation tracks require human choices for external genes and proteins, and comparison species for pafs.

Mashmap paf tracks are prepared for two comparison species references, and for an independent haplotype or refererence genome.



" - }, - "id": 4, - "position": [ - 20, - 1537.033585622281 - ], - "size": [ - 700, - 870 - ], - "type": "text" } ], "creator": [ @@ -195,13 +195,7 @@ "type": "parameter_input", "uuid": "35d60a1d-85f2-4c8b-af5e-2ac2e8db6ef3", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "22941942-7c0b-44f2-845d-94089066ff4f" - } - ] + "workflow_outputs": [] }, "1": { "annotation": "This will be the sequence, divided into individual fasta contigs, on which tracks are arranged and displayed.", @@ -282,13 +276,7 @@ "type": "parameter_input", "uuid": "e57810b3-9f4f-477d-97f6-929f5dfca0e9", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "e49b8515-4e8f-4506-8b9a-5b3b0ff173fa" - } - ] + "workflow_outputs": [] }, "4": { "annotation": "dfam is problematic. Aves works for birds! That's why there's also a model free one - windowmasker - for comparison.", @@ -315,13 +303,7 @@ "type": "parameter_input", "uuid": "338f375e-10f9-4461-a938-44eca7c8f966", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "ca457060-575c-4e27-8b60-7b146d5697fd" - } - ] + "workflow_outputs": [] }, "5": { "annotation": "CCCTAA for the treeval test data or for vertebrates seems to work. This uses seqtk-telo and seems to give plausible results with the appropriate repeat element for vertebrates....", @@ -348,13 +330,7 @@ "type": "parameter_input", "uuid": "cb1c7f9f-1bca-43a8-a068-38b00bb37af6", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "7e0abb41-6311-4e67-8de6-6c0f9c1a4ea2" - } - ] + "workflow_outputs": [] }, "6": { "annotation": "", @@ -381,13 +357,7 @@ "type": "parameter_input", "uuid": "d1f53ba4-a094-44db-b4ce-2e534e0dab16", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "cd12ef9a-4ff5-4c29-ba21-0969f6da461c" - } - ] + "workflow_outputs": [] }, "7": { "annotation": "", @@ -414,13 +384,7 @@ "type": "data_input", "uuid": "00be13df-0a64-4ed0-87c7-3f97e9badf3e", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "9ed8a017-60e0-4f67-a25c-35a8600c087f" - } - ] + "workflow_outputs": [] }, "8": { "annotation": "", @@ -447,13 +411,7 @@ "type": "parameter_input", "uuid": "93f00f8e-2226-4ef0-aacf-f9ab84cca0d5", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "0a26ce63-af55-4912-a56f-5e04dd7ccd34" - } - ] + "workflow_outputs": [] }, "9": { "annotation": "", @@ -464,10 +422,10 @@ "inputs": [ { "description": "", - "name": "genes fasta in" + "name": "genes fasta input" } ], - "label": "genes fasta in", + "label": "genes fasta input", "name": "Input dataset", "outputs": [], "position": { @@ -480,13 +438,7 @@ "type": "data_input", "uuid": "05aeefc3-a4b1-4788-9d1b-095621fa1cb3", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "563e0ac9-a7ae-4909-8226-1060c1e58857" - } - ] + "workflow_outputs": [] }, "10": { "annotation": "", @@ -513,13 +465,7 @@ "type": "parameter_input", "uuid": "0235e72e-1526-4fa4-8601-d4eb5e15d7b4", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "be386965-a5f1-4e47-be73-036e06455716" - } - ] + "workflow_outputs": [] }, "11": { "annotation": "", @@ -530,10 +476,10 @@ "inputs": [ { "description": "", - "name": "rna fasta in" + "name": "rna fasta input" } ], - "label": "rna fasta in", + "label": "rna fasta input", "name": "Input dataset", "outputs": [], "position": { @@ -546,13 +492,7 @@ "type": "data_input", "uuid": "b10094d0-c733-4825-bbec-cb56e46239c4", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "e6fcaede-2d4c-4f25-8296-f269666404de" - } - ] + "workflow_outputs": [] }, "12": { "annotation": "", @@ -579,13 +519,7 @@ "type": "parameter_input", "uuid": "4abbe4ea-96bf-436b-9e8b-24b24700f47e", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "2b922eee-d8e9-431a-9246-b9863d7ae121" - } - ] + "workflow_outputs": [] }, "13": { "annotation": "", @@ -596,15 +530,15 @@ "inputs": [ { "description": "", - "name": "AA fasta in" + "name": "AA fasta input" } ], - "label": "AA fasta in", + "label": "AA fasta input", "name": "Input dataset", "outputs": [], "position": { "left": 1350, - "top": 1730 + "top": 1730.40625 }, "tool_id": null, "tool_state": "{\"optional\": true, \"tag\": null}", @@ -612,13 +546,7 @@ "type": "data_input", "uuid": "0efaa539-d992-4816-bb1a-b1d704fef9d8", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "f66306c9-eb4a-4fea-97a3-df3e08468965" - } - ] + "workflow_outputs": [] }, "14": { "annotation": "", @@ -645,13 +573,7 @@ "type": "parameter_input", "uuid": "5421a1f3-648c-4994-a4bb-9d9721fdc80b", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "196c4974-e55b-4eb2-b9c7-2d1d4296b226" - } - ] + "workflow_outputs": [] }, "15": { "annotation": "", @@ -678,13 +600,7 @@ "type": "data_input", "uuid": "3864c5b2-08fd-40ee-9922-363960eeaf5e", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "9b1bba31-20e1-40c6-827f-b67d71246b94" - } - ] + "workflow_outputs": [] }, "16": { "annotation": "", @@ -711,13 +627,7 @@ "type": "parameter_input", "uuid": "86415a69-308a-4f72-af00-54a93c22680a", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "5633d6e2-a796-4866-9405-7e0e763ce941" - } - ] + "workflow_outputs": [] }, "17": { "annotation": "", @@ -2230,48 +2140,7 @@ "type": "subworkflow", "uuid": "4871e101-dadd-42a4-8523-0286d6c6a045", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "1:output", - "uuid": "86a66354-3131-44c8-9d32-fe033c534d33" - }, - { - "label": null, - "output_name": "2:output", - "uuid": "8bc050a6-17fc-4deb-929a-859a9f2ab5d9" - }, - { - "label": null, - "output_name": "3:output", - "uuid": "c6250f26-3208-4df9-9e13-abac28c738ab" - }, - { - "label": null, - "output_name": "4:output", - "uuid": "6d4a15eb-5729-4ade-ab31-66e82adf5fd5" - }, - { - "label": null, - "output_name": "5:output", - "uuid": "0fca515d-4d36-4102-a3bd-e810ccca20f4" - }, - { - "label": null, - "output_name": "6:output", - "uuid": "38fdd8d8-9476-4fba-b0b0-1c41a36b6ec4" - }, - { - "label": null, - "output_name": "7:output", - "uuid": "01149453-4bbe-4c21-803c-8cf15035c2ff" - }, - { - "label": null, - "output_name": "8:output", - "uuid": "6c01d677-b23a-4dab-bf8f-9994dcb848e3" - } - ] + "workflow_outputs": [] }, "25": { "annotation": "", @@ -3134,18 +3003,7 @@ "type": "subworkflow", "uuid": "64cf26e7-60c0-4b8a-af77-50f55ce7a035", "when": null, - "workflow_outputs": [ - { - "label": null, - "output_name": "0:output", - "uuid": "843ed348-6b5e-4d9f-aafe-a20064558380" - }, - { - "label": null, - "output_name": "4:output", - "uuid": "c66f7e28-79a1-43e6-a7fd-59680b3f8012" - } - ] + "workflow_outputs": [] }, "31": { "annotation": "", @@ -4499,6 +4357,6 @@ } }, "tags": [], - "uuid": "7cc58d30-31b2-4fe9-aae0-d73b75ad321c", - "version": 5 + "uuid": "824f3781-7284-4046-ab22-91f8aa1dab1c", + "version": 6 } \ No newline at end of file