-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprot.py
More file actions
98 lines (90 loc) · 2.74 KB
/
prot.py
File metadata and controls
98 lines (90 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("rna_file", type=str, help="The path of the RNA string file to read.")
args = parser.parse_args()
# get a long rna string
lines = open(args.rna_file, "r").readlines()
rna = ""
for line in lines:
rna += line.strip()
# key: triplet(codon), value: AA
# shoutout to https://gist.github.com/juanfal/09d7fb53bd367742127e17284b9c47bf#file-gistfile1-txt for the cool table
codontab = {
'TCA': 'S', # Serina
'TCC': 'S', # Serina
'TCG': 'S', # Serina
'TCT': 'S', # Serina
'TTC': 'F', # Fenilalanina
'TTT': 'F', # Fenilalanina
'TTA': 'L', # Leucina
'TTG': 'L', # Leucina
'TAC': 'Y', # Tirosina
'TAT': 'Y', # Tirosina
'TAA': '*', # Stop
'TAG': '*', # Stop
'TGC': 'C', # Cisteina
'TGT': 'C', # Cisteina
'TGA': '*', # Stop
'TGG': 'W', # Triptofano
'CTA': 'L', # Leucina
'CTC': 'L', # Leucina
'CTG': 'L', # Leucina
'CTT': 'L', # Leucina
'CCA': 'P', # Prolina
'CCC': 'P', # Prolina
'CCG': 'P', # Prolina
'CCT': 'P', # Prolina
'CAC': 'H', # Histidina
'CAT': 'H', # Histidina
'CAA': 'Q', # Glutamina
'CAG': 'Q', # Glutamina
'CGA': 'R', # Arginina
'CGC': 'R', # Arginina
'CGG': 'R', # Arginina
'CGT': 'R', # Arginina
'ATA': 'I', # Isoleucina
'ATC': 'I', # Isoleucina
'ATT': 'I', # Isoleucina
'ATG': 'M', # Methionina
'ACA': 'T', # Treonina
'ACC': 'T', # Treonina
'ACG': 'T', # Treonina
'ACT': 'T', # Treonina
'AAC': 'N', # Asparagina
'AAT': 'N', # Asparagina
'AAA': 'K', # Lisina
'AAG': 'K', # Lisina
'AGC': 'S', # Serina
'AGT': 'S', # Serina
'AGA': 'R', # Arginina
'AGG': 'R', # Arginina
'GTA': 'V', # Valina
'GTC': 'V', # Valina
'GTG': 'V', # Valina
'GTT': 'V', # Valina
'GCA': 'A', # Alanina
'GCC': 'A', # Alanina
'GCG': 'A', # Alanina
'GCT': 'A', # Alanina
'GAC': 'D', # Acido Aspartico
'GAT': 'D', # Acido Aspartico
'GAA': 'E', # Acido Glutamico
'GAG': 'E', # Acido Glutamico
'GGA': 'G', # Glicina
'GGC': 'G', # Glicina
'GGG': 'G', # Glicina
'GGT': 'G' # Glicina
}
# change U to T cos the table I got uses T
rna = rna.replace("U", "T")
# read 3 nucleotides at once, translate into AA, add it to the protein string
# leftover nucleotides are dismissed
codon = ""
protein = ""
while len(rna) > 2:
codon = rna[:3]
rna = rna[3:] # remove the nucleotides that are already read from the RNA string
if codontab[codon] == '*':
break
protein += codontab[codon]
print(protein)