Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
python_graph_minhash
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Juhász Judit
python_graph_minhash
Commits
abbabdb3
Commit
abbabdb3
authored
1 year ago
by
Ligeti Balázs
Browse files
Options
Downloads
Patches
Plain Diff
Assembly query
parent
aa0396c9
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
bin/assembly_example.ipynb
+96
-0
96 additions, 0 deletions
bin/assembly_example.ipynb
with
96 additions
and
0 deletions
bin/assembly_example.ipynb
0 → 100644
+
96
−
0
View file @
abbabdb3
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "43ecd078-674a-4027-a226-f2ca8d7edb02",
"metadata": {},
"outputs": [],
"source": [
"# Dependencies\n",
"from os.path import join\n",
"from Bio import SeqIO\n",
"import pandas as pd\n"
]
},
{
"cell_type": "markdown",
"id": "1273f0c1-2851-4508-98d1-1098a5126e0e",
"metadata": {},
"source": [
"## Some function without error handling\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4334e544-06de-41f2-8760-6b4d94604534",
"metadata": {},
"outputs": [],
"source": [
"def get_phage_filenames_fromdb(phage_mapping, query_srr):\n",
"\n",
" phage_name_list = list(phage_mapping[phage_mapping['SRR number'] == query_srr]['Phage name'])\n",
"\n",
"\n",
" return phage_name_list\n",
" \n",
"def get_phage_fasta_files(phage_name_list, phage_seqdir):\n",
"\n",
" phage_seq_paths = [join(phage_seqdir, f'{phage_name}.fa' )for phage_name in phage_name_list]\n",
"\n",
" return phage_seq_paths\n",
" \n",
"\n",
"def get_phage_sequences(phage_fasta_files):\n",
" ''' Loading the phage sequences with biopython'''\n",
"\n",
" phage_seqs = []\n",
" for phage_seq_file in phage_fasta_files:\n",
" act_phage_seqs = list(SeqIO.parse(phage_seq_file, \"fasta\"))\n",
" phage_seqs.extend(act_phage_seqs)\n",
" return phage_seqs\n",
"\n",
"def get_assembly_folder(run_id, assembly_batch_mapping, assembly_basedir):\n",
"\n",
" srr_batch_id = list(assembly_batch_mapping[assembly_batch_mapping['Run'] == run_id]['srr_batch'])[0]\n",
"\n",
" expected_assembly_folder = join(assembly_basedir, srr_batch_id, run_id)\n",
" \n",
" return expected_assembly_folder"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8730f244-ed8e-4ec4-bea9-30419b206614",
"metadata": {},
"outputs": [],
"source": [
"## Loading the database\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
%% Cell type:code id:43ecd078-674a-4027-a226-f2ca8d7edb02 tags:
```
python
# Dependencies
from
os.path
import
join
from
Bio
import
SeqIO
import
pandas
as
pd
```
%% Cell type:markdown id:1273f0c1-2851-4508-98d1-1098a5126e0e tags:
## Some function without error handling
%% Cell type:code id:4334e544-06de-41f2-8760-6b4d94604534 tags:
```
python
def
get_phage_filenames_fromdb
(
phage_mapping
,
query_srr
):
phage_name_list
=
list
(
phage_mapping
[
phage_mapping
[
'
SRR number
'
]
==
query_srr
][
'
Phage name
'
])
return
phage_name_list
def
get_phage_fasta_files
(
phage_name_list
,
phage_seqdir
):
phage_seq_paths
=
[
join
(
phage_seqdir
,
f
'
{
phage_name
}
.fa
'
)
for
phage_name
in
phage_name_list
]
return
phage_seq_paths
def
get_phage_sequences
(
phage_fasta_files
):
'''
Loading the phage sequences with biopython
'''
phage_seqs
=
[]
for
phage_seq_file
in
phage_fasta_files
:
act_phage_seqs
=
list
(
SeqIO
.
parse
(
phage_seq_file
,
"
fasta
"
))
phage_seqs
.
extend
(
act_phage_seqs
)
return
phage_seqs
def
get_assembly_folder
(
run_id
,
assembly_batch_mapping
,
assembly_basedir
):
srr_batch_id
=
list
(
assembly_batch_mapping
[
assembly_batch_mapping
[
'
Run
'
]
==
run_id
][
'
srr_batch
'
])[
0
]
expected_assembly_folder
=
join
(
assembly_basedir
,
srr_batch_id
,
run_id
)
return
expected_assembly_folder
```
%% Cell type:code id:8730f244-ed8e-4ec4-bea9-30419b206614 tags:
```
python
## Loading the database
```
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment