import csv with open('links.csv', encoding='utf8', mode='r') as infile: reader = csv.reader(infile, delimiter='\t') links_dict = {} for line in reader: if line[0] in links_dict: links_dict[line[0]] += [line[1]] else: links_dict[line[0]] = [line[1]] print('Finished reading in links dictionary.') with open('sentences.csv', encoding='utf8', mode='r') as infile: sentence_lines = infile.readlines() sentences_delim_csv = csv.reader(sentence_lines, delimiter='\t') sentences_list = [['0', 'keinespr', 'leer']] sentences_list += list(sentences_delim_csv) print('Finished reading in sentence list.') sentences_dict_lang = {sents[0]:sents[1] for sents in sentences_list} sentences_dict_sent = {sents[0]:sents[2] for sents in sentences_list} print('Finished converting to sentences dictionary.') outlist = '' for line in sentences_list: # Go through all the sentences if 'cmn' in line[1] and line[0] in links_dict: # Choose only Chinese sentences for i in links_dict[line[0]]: # Go through all translations of the Chinese sentences in all languages if i in sentences_dict_sent and 'jpn' in sentences_dict_lang[i]: # Only choose a particular language as translation outlist += line[2] + '\t' + sentences_dict_sent[i] + '\n' with open('sentences_cmn_jpn.txt', encoding='utf8', mode='w') as outfile: outfile.write(outlist)