caterpillar/plugins/bio.py
2024-08-31 15:48:35 +09:00

109 lines
4.7 KiB
Python

#!/usr/bin/python3
#
# bio.py
# Biopython plugin for Caterpillar Proxy
#
# Euiseo Cha (Wonkwang University) <zeroday0619_dev@outlook.com>
# https://github.com/gnh1201/caterpillar
# Created at: 2024-07-02
# Updated at: 2024-07-02
#
from socket import socket
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction
from base import Extension
def _analyze_sequence(sequence: str) -> dict[str, str]:
"""
Analyze a given DNA sequence to provide various nucleotide transformations and translations.
:param sequence: DNA sequence (string) to be analyzed.
:return: Dictionary containing the following analyses of the sequence:
- complement: DNA complement of the sequence.
- complement_rna: RNA complement of the sequence.
- reverse_complement: Reverse complement of the DNA sequence.
- reverse_complement_rna: Reverse complement of the RNA sequence.
- transcription: Transcription of the DNA sequence to RNA.
- translation: Translation of the RNA sequence to an amino acid sequence.
- back_transcribe: Back-transcription of the RNA sequence to DNA.
"""
sequence_object = Seq(sequence)
return dict(
complement=str(sequence_object.complement()),
complement_rna=str(sequence_object.complement_rna()),
reverse_complement=str(sequence_object.reverse_complement()),
reverse_complement_rna=str(sequence_object.reverse_complement_rna()),
transcription=str(sequence_object.transcribe()),
translation=str(sequence_object.translate()),
back_transcribe=str(sequence_object.back_transcribe()),
)
def _gc_content_calculation(sequence: str) -> dict[str, str]:
"""
Calculate the GC content of a given DNA sequence and return it as a float.
:param sequence: DNA sequence (string) for which to calculate the GC content.
:return: Dictionary containing the GC content as a float.
"""
gc_content = gc_fraction(sequence)
return dict(
gc_content=gc_content,
)
class PyBio(Extension):
def __init__(self):
self.type = "rpcmethod"
self.method = "analyze_sequence_init"
self.exported_methods = ["analyze_sequence", "gc_content_calculation"]
def dispatch(self, type, id, params, conn):
conn.send(b"Greeting! dispatch")
def analyze_sequence(self, type, id, params, conn: socket):
"""
Analyze a DNA sequence provided in the params dictionary.
:param type: Not used in this function.
:param id: Not used in this function.
:param params: Dictionary containing the DNA sequence with the key "sequence".
Example: {"sequence": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT"}
:param conn: Not used in this function.
:return: Dictionary containing various analyses of the DNA sequence:
- back_transcribe: Back-transcription of the RNA sequence to DNA.
- complement: DNA complement of the sequence.
- complement_rna: RNA complement of the sequence.
- reverse_complement: Reverse complement of the DNA sequence.
- reverse_complement_rna: Reverse complement of the RNA sequence.
- transcription: Transcription of the DNA sequence to RNA.
- translation: Translation of the RNA sequence to an amino acid sequence.
Example: {"back_transcribe": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT",
"complement": "TACGCATGCATCGATCGATCGCATCGATCGACTGA",
"complement_rna": "UACGCAUGCAUCGAUCGAUCGCAUCGAUCGACUGA",
"reverse_complement": "AGTCAGCTAGCTACGCTAGCTAGCTACGTACGCAT",
"reverse_complement_rna": "AGUCAGCUAGCUACGCUAGCUAGCUACGUACGCAU",
"transcription": "AUGCGUACGUAGCUAGCUAGCGUAGCUAGCUGACU",
"translation": "MRT*LASVAS*"}
"""
result = _analyze_sequence(params["sequence"])
return result
def gc_content_calculation(self, type, id, params, conn: socket):
"""
Calculate the GC content for a given DNA sequence provided in the params dictionary.
:param type: Not used in this function.
:param id: Not used in this function.
:param params: Dictionary containing the DNA sequence with the key "sequence".
Example: {"sequence": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT"}
:param conn: Not used in this function.
:return: Dictionary containing the GC content as a float.
Example: {"gc_content": 0.5142857142857142}
"""
result = _gc_content_calculation(params["sequence"])
return result