本文共 4847 字,大约阅读时间需要 16 分钟。
展开全部
您安好.
----------------------------
# File : warmup.py
def lines_starts_with(f, s):
____"""Return a list containing the lines of the given open file that start with
____the given str. Exclude leading and trailing whitespace.
____
____Arguments:
____- `32313133353236313431303231363533e58685e5aeb931333238646361f`: the file
____- `s`: str at the beginning of the line
____"""
____result = []
____for line in f:
________if line.startswith(s):
____________result.append(line.strip())
____return result
def file_to_dictionary(f, s):
____"""Given an open file that contains a unique string followed by a given
____string delimiter and more text on each line, return a dictionary that
____contains an entry for each line in the file: the unique string as the key
____and the rest of the text as the value (excluding the delimiter, and leading
____and trailing whitespace).
____
____Arguments:
____- `f`: the file
____- `s`: the delimiter str
____"""
____result = {}
____for line in f:
________k, _, v = line.partition(s)
________result[k.strip()] = v.strip()
____return result
def merge_dictionaries(d1, d2):
____"""Return a dictionary that is the result of merging the two given
____dictionaries. In the new dictionary, the values should be lists. If a key is
____in both of the given dictionaries, the value in the new dictionary should
____contain both of the values from the given dictionaries, even if they are the
____same.
____merge_dictionaries({ 1 : 'a', 2 : 9, -8 : 'w'}, {2 : 7, 'x' : 3, 1 : 'a'})
____should return {1 : ['a', 'a'], 2 : [9, 7], -8 : ['w'], 'x' : [3]}
____Arguments:
____- `d1`, 'd2': dicts to be merged
____"""
____result = {}
____for key in d1:
________if key in d2:
____________result[key] = [d1[key], d2[key]]
________else:
____________result[key] = [d1[key]]
____for key in d2:
________if not key in result:
____________result[key] = [d2[key]]
____return result
-----------------------------------------------------
# File fasta.py
from warmup import *
def count_sequences(f):
____"""Return the number of FASTA sequences in the given open file. Each
____sequence begins with a single line that starts with >.
____
____Arguments:
____- `f`: the file
____"""
____return len(lines_starts_with(f, '>'))
def get_sequence_list(f):
____"""Return a nested list where each element is a 2-element list containing a
____FASTA header and a FASTA sequence from the given open file (both strs).
____
____Arguments:
____- `f`: the file
____"""
____result = []
____current_key = ''
____current_data = ''
____for line in f:
________if line.startswith('>'):
____________if current_data:
________________result.append([current_key, current_data])
________________current_data = ''
____________current_key = line.strip()
________else:
____________current_data+= line.strip()
____result.append([current_key, current_data])
____return result
____
def merge_files(f1, f2):
____"""Return a nested list containing every unique FASTA header and sequence
____pair from the two open input files. Each element of the list to be returned
____is a 2-element list containing a FASTA header and a FASTA sequence.
____
____Arguments:
____- `f1`, 'f2': The File
____"""
____seq = get_sequence_list(f1) + get_sequence_list(f2)
____seq = list( set(seq) )
____return seq
def get_codons(s):
____"""Return a list of strings containing the codons in the given DNA sequence.
____
____Arguments:
____- `s`: DNA sequence, divied by 3
____"""
____return [s[3*i : 3*(i+1)] for i in xrange(len(s)/3)]
def get_amino_name_dict(f):
____"""Return a dictionary constructed from the given open file, where the keys
____are amino acid codes and the values are the amino acid names.
____
____Arguments:
____- `f`: the file, like amino_names.txt
____"""
____return file_to_dictionary(f, ' ')
def get_codon_amino_dict(f):
____"""Return a dictionary where the keys are codons and the values are amino
____acid codes from the given open file.
____
____Arguments:
____- `f`: the file, like codon_aminos.txt
____"""
____return file_to_dictionary(f, ':')
def translate(s, d):
____"""Return the given DNA sequence (the str parameter) translated into its
____amino acid codes, according to the given dictionary where the keys are
____codons and the values are amino acid codes. You may assume that the length
____of the string is divisible by 3.
____
____Arguments:
____- `s`: given DNA sequence
____- `d`: given dictionary
____"""
____codons = get_codons(s)
____result = []
____for i in codes:
________result.append(d[i])
____return result
def codon_to_name(s, d1, d2):
____"""Return the name of the amino acid for the given codon (the str
____parameter).
____
____Arguments:
____- `s`: given codon
____- `d1`: codons for keys and amino acid codes for values
____- `d2`: amino acid codes for keys and name for values
____"""
____return d2[d1[s]]
本回答由网友推荐
已赞过
已踩过<
你对这个回答的评价是?
评论
收起
转载地址:http://mktnx.baihongyu.com/