"""Common functions and classes for phonagen tools""" import json import io import sys import csv import random import unicodedata class Phonology: """Phonology class""" def __init__(self, id = '', description = '', mainTranscription = ''): self.id = id self.description = description self.transcriptions = [] self.mainTranscription = mainTranscription self.entries = {} # id -> entry def isValid(self): return self.id != '' def has(self, id): return id in self.entries def toJsonStruct(self): """Convert a Phonology to a Json structure""" return { 'id': self.id, 'description': self.description, 'transcriptions': self.transcriptions, 'main-transcription': self.mainTranscription, 'entries': [x for x in self.entries.values()] } def fromJsonStruct(self, struct): """Fill a Phonology from a Json structure""" self.id = struct['id'] self.description = struct['description'] self.transcriptions = struct['transcriptions'] self.mainTranscription = struct['main-transcription'] self.entries = {x['id']: x for x in struct['entries']} def fromCsv(self, file): """Fill a Phonology from a Csv file""" with open(file) as csvfile: fileReader = csv.reader(csvfile) # get csv header header = next(fileReader) # get the transcriptions (header items not id or description) self.transcriptions = [x for x in header if x not in ['id', 'description']] # Check: self.transcriptions should contain 'phoneme' if 'phoneme' not in self.transcriptions: raise Exception('phoneme column not found in ', file) # Check: self.transcriptions should have at least two items if len(self.transcriptions) < 2: raise Exception('No transcription found outside phoneme in file ', file, 'Did you named it id or description ?') # get the first header item which is not one of those: id, description, phoneme guessedMainTranscription = next(x for x in header if x not in ['id', 'description', 'phoneme']) # If main-transcription was not given on the command line, use the guess as main-transcription if self.mainTranscription == '': self.mainTranscription = guessedMainTranscription # Check: self.mainTranscription should be in self.transcriptions if self.mainTranscription not in self.mainTranscription: raise Exception('main-transcription', self.mainTranscription, 'not in list of transcriptions') # If id was not given on the command line, use the mainTranscription as the id if self.id == '': self.id = self.mainTranscription # parse entries for row in fileReader: entry = dict() for i in range(len(row)): entry.update({header[i]: row[i]}) # All absent elements are set to '' for i in range(len(row), len(header)): entry.update({header[i]: ''}) # if both phoneme and main-transcription are empty, skip the rest if (entry['phoneme'] != '') or (entry[self.mainTranscription] != ''): # if id is not provided, generate it if 'id' not in header: entry.update({'id': entry['phoneme'] + '-' + entry[self.mainTranscription]}) # if description is not provided, add an empty one if 'description' not in header: entry.update({'description': ''}) self.entries.update({entry['id']: entry}) def formatWord(self, idList): """Return a table of transcription -> string corresponding to the same word""" result = {x: "" for x in self.transcriptions} for x in idList: phoneme = self.entries[x] for y in result: result[y] = result[y] + phoneme[y] return result def isStress(self, id): entry = self.entries[id] description = entry['description'] phoneme = entry['phoneme'] return (('#stress' in description) and ('#stressed' not in description)) or ("'" in phoneme) or ("ˈ" in phoneme) def getStress(self): """Return the phoneme id of the stress phoneme""" # search for #stress tag in description found = [x['id'] for x in self.entries.values() if ('#stress' in x['description']) and ('#stressed' not in x['description'])] if len(found) == 0: # if not tagged, search for "'" (apostrophe, u+0027) or "ˈ" (primary stress, u+02C8) in phoneme transcription found = [x['id'] for x in self.entries.values() if ("'" in x['phoneme']) or ("ˈ" in x['phoneme'])] if len(found) == 0: raise Exception('No stress phoneme in phonology', self.id) return found[0] def isSyllableBreak(self, id): entry = self.entries[id] description = entry['description'] phoneme = entry['phoneme'] return ('#syllable-break' in description) or ("." in phoneme) def getSyllableBreak(self): """Return the phoneme id of the syllable break phoneme""" # search for #syllable-break tag in description found = [x['id'] for x in self.entries.values() if '#syllable-break' in x['description']] if len(found) == 0: # if not tagged, search for '.' (full stop, u+002E) in phoneme transcription found = [x['id'] for x in self.entries.values() if '.' in x['phoneme']] if len(found) == 0: raise Exception('No syllable break phoneme in phonology', self.id) return found[0] vowels = "iyɨʉɯuɪʏʊɯeøɘɵɤoəɛœɜɞʌɔæɐaɶɒɑ" def isVowel(phoneme): return (len(phoneme) > 0) and (unicodedata.normalize('NFD', phoneme)[0] in Phonology.vowels) consonants = "mɱnɳɲŋɴpbtdʈɖcɟkɡgqɢʡʔszʃʒʂʐɕʑɸβfvθðçʝxɣχʁħʕhɦʋɹɻjɰⱱɾɽʙrʀʜʢɬɮlɭʎʟɺʘǀǃǂǁɓɗᶑʄɠʛɧʍwɫɥ" def isConsonant(phoneme): return (len(phoneme) > 0) and (unicodedata.normalize('NFD', phoneme)[0] in Phonology.consonants) def isOnset(self, id): """Check if an id corresponds to a phoneme that can be in an onset, either from description, or if not available, guessed from the phonemic transcription""" entry = self.entries[id] description = entry['description'] result = (not self.isSyllableBreak(id)) and (not self.isStress(id)) and (('#onset' in description) or ('#consonant' in description)) if (not result) and ('#vowel' not in description) and ('#nucleus' not in description) and ('#coda' not in description): result = Phonology.isConsonant(entry['phoneme']) return result def isNucleus(self, id): """Check if an id corresponds to a phoneme that can be in a nucleus, either from description, or if not available, guessed from the phonemic transcription""" entry = self.entries[id] description = entry['description'] result = (not self.isSyllableBreak(id)) and (not self.isStress(id)) and (('#nucleus' in description) or ('#vowel' in description)) if (not result) and ('#consonant' not in description) and ('#onset' not in description) and ('#coda' not in description): result = Phonology.isVowel(entry['phoneme']) return result def isCoda(self, id): """Check if an id corresponds to a phoneme that can be in a coda, either from description, or if not available, guessed from the phonemic transcription""" entry = self.entries[id] description = entry['description'] result = (not self.isSyllableBreak(id)) and (not self.isStress(id)) and (('#coda' in description) or ('#consonant' in description)) if (not result) and ('#vowel' not in description) and ('#nucleus' not in description) and ('#onset' not in description): result = Phonology.isConsonant(entry['phoneme']) return result def isInSingleSyllables(self, id): """Check if an id corresponds to a phoneme that can be in a single syllable, from description""" entry = self.entries[id] description = entry['description'] result = ('#single' in description) or ('#initial' in description) or ('#final' in description) if (not result) and ('#middle' not in description): result = True return result def isInInitialSyllables(self, id): """Check if an id corresponds to a phoneme that can be in an initial syllable, from description""" entry = self.entries[id] description = entry['description'] result = ('#initial' in description) if (not result) and ('#single' not in description) and ('#middle' not in description) and ('#final' not in description): result = True return result def isInMiddleSyllables(self, id): """Check if an id corresponds to a phoneme that can be in a middle syllable, from description""" entry = self.entries[id] description = entry['description'] result = ('#middle' in description) if (not result) and ('#single' not in description) and ('#initial' not in description) and ('#final' not in description): result = True return result def isInFinalSyllables(self, id): """Check if an id corresponds to a phoneme that can be in a final syllable, from description""" entry = self.entries[id] description = entry['description'] result = ('#final' in description) if (not result) and ('#single' not in description) and ('#initial' not in description) and ('#middle' not in description): result = True return result def isInStressedSyllables(self, id): """Check if an id corresponds to a phoneme that can be in a stressed syllable, from description""" entry = self.entries[id] description = entry['description'] return ('#stressed' in description) or ('#unstressed' not in description) def isInUnstressedSyllables(self, id): """Check if an id corresponds to a phoneme that can be in an unstressed syllable, from description""" entry = self.entries[id] description = entry['description'] return ('#unstressed' in description) or ('#stressed' not in description) def getPhonemesFromTags(self, tags): """Return a list of phoneme id verifying the tag list""" phonemeList = [] tagToPredicate = { '#onset': Phonology.isOnset, '#nucleus': Phonology.isNucleus, '#coda': Phonology.isCoda, '#single': Phonology.isInSingleSyllables, '#initial': Phonology.isInInitialSyllables, '#middle': Phonology.isInMiddleSyllables, '#final': Phonology.isInFinalSyllables, '#stressed': Phonology.isInStressedSyllables, '#unstressed': Phonology.isInUnstressedSyllables } for id in self.entries: # skip stress and syllable break if (id == self.getStress()) or (id == self.getSyllableBreak()): pass checklist = [tagToPredicate[t](self, id) for t in tags] if all(checklist): phonemeList.append(id) return phonemeList def hasStressedVowels(self): """Check if all vowels are tagged #unstressed""" hasStressed = False for id in self.entries: if self.isNucleus(id) and self.isInStressedSyllables(id): hasStressed = True break return hasStressed class Distribution: """Discrete distribution""" def __init__(self): self.items = {} def addTo(self, value, occurences = 1): oc = occurences if value in self.items: oc = oc + self.items[value] self.items.update({value: oc}) def pickFrom(self): return random.choices([k for k in self.items.keys()], [v for v in self.items.values()])[0] def toJsonStruct(self, itemRef = 'value', occurencesRef = 'occurences'): return [{itemRef: x, occurencesRef: self.items[x]} for x in self.items] def fromJsonStruct(self, struct, itemRef = 'value', occurencesRef = 'occurences'): self.items = {} for item in struct: self.items.update({item[itemRef]: item[occurencesRef]}) def isEmpty(self): return len(self.items) == 0 class Generator: """Parent class for all generators""" def __init__(self, id = '', description = '', phonology = ''): self.id = id self.description = description self.phonology = phonology self.isTyped = False def isValid(self): return (self.id != '') and self.isTyped def toJsonStruct(self): return { 'id': self.id, 'description': self.description, 'phonology': self.phonology } def fromJsonStruct(self, struct): self.id = struct['id'] self.description = struct['description'] self.phonology = struct['phonology'] def generateWord(self): raise Exception('Word generation not supported on abstract generator') class ChainGenerator(Generator): """Chains-based generator""" def __init__(self, order = 1, **kwargs): super().__init__(**kwargs) self.order = order self.chains = {} # input -> distribution of outputs self.isTyped = True def toJsonStruct(self): struct = super().toJsonStruct() struct.update({'type': 'chains', 'order': self.order, 'chains': [{'input': x, 'possible-outputs': self.chains[x].toJsonStruct(itemRef = 'value', occurencesRef = 'occurences')} for x in self.chains]}) return struct def fromJsonStruct(self, struct): super().fromJsonStruct(struct) self.order = struct['order'] for chainStruct in struct['chains']: dist = Distribution() dist.fromJsonStruct(chainStruct['possible-outputs'], itemRef = 'value', occurencesRef = 'occurences') self.chains.update({tuple(chainStruct['input']): dist}) def fromExamples(self, file, phonology): """Train a chain generator on an example file""" with open(file) as exampleFile: fileReader = csv.reader(exampleFile, delimiter=' ', skipinitialspace=True) for row in fileReader: if len(row) != 0: row.append('') # Add terminator element (empty string) previous = tuple('' for i in range(self.order)) # Initial sequence (a list of empty string of length = self.order) for item in row: if (item != '') and (phonology.isValid()) and (not phonology.has(item)): raise Exception('In row', row, ':', item, 'is not an id in phonology', phonology.id) if previous in self.chains: self.chains[previous].addTo(item) else: dist = Distribution() dist.addTo(item) self.chains.update({previous: dist}) previous = previous[1:] + (item,) def generateWord(self): outputIdList = [] nextItem = '.' # previous = tuple('' for i in range(self.order)) # Initial sequence (a list of empty string of length = self.order) while nextItem != '': nextItem = self.chains[previous].pickFrom() if nextItem != '': outputIdList.append(nextItem) previous = previous[1:] + (nextItem,) return outputIdList class RuleGenerator(Generator): """Rules-based generator""" def __init__(self, **kwargs): super().__init__(**kwargs) self.rules = {} self.isTyped = True def toJsonStruct(self): struct = super().toJsonStruct() struct.update({'type': 'rules', 'rules': [{'id': x, 'distribution': self.rules[x].toJsonStruct(itemRef = 'pattern', occurencesRef = 'occurences')} for x in self.rules]}) return struct def fromJsonStruct(self, struct): super().fromJsonStruct(struct) for ruleStruct in struct['rules']: dist = Distribution() # The pattern should be converted from a list to a tuple dist.fromJsonStruct([{'pattern': tuple(x['pattern']), 'occurences':x['occurences']} for x in ruleStruct['distribution']], itemRef = 'pattern', occurencesRef = 'occurences') self.rules.update({ruleStruct['id']: dist}) def generatePattern(self, pattern): output = [] for x in pattern: if x in self.rules: output = output + self.generatePattern(self.rules[x].pickFrom()) else: output.append(x) return output def generateWord(self): return self.generatePattern(self.rules['word'].pickFrom()) def processRowFromExample(self, row, stressId, syllableBreakId): # Check the number of stress nbStress = row.count(stressId) if nbStress > 1: print("Too much stress in " + str(row) + ": skip the example") return # Build the syllable list syllables = [] currentSyllable = [] stressedSyllableIdx = -1 syllableIdx = 0 for x in row: # Append to the current syllable if not a syllable separator if (x != stressId) and (x != syllableBreakId): currentSyllable.append(x) # In case of syllable separator, only add the syllable to the list if it is not empty elif len(currentSyllable) != 0: syllables.append(currentSyllable) currentSyllable = [] syllableIdx = syllableIdx + 1 # If current id is stress, remember the position of the stressed syllable if (x == stressId): stressedSyllableIdx = syllableIdx # After the loop, the current syllable should be non-empty, add it to the list of syllables if len(currentSyllable) != 0: syllables.append(currentSyllable) # Single syllable case if len(syllables) == 1: if stressedSyllableIdx == 0: self.rules['word'].addTo(tuple([stressId, 'single'])) else: self.rules['word'].addTo(tuple(['single'])) self.rules['single'].addTo(tuple(syllables[0])) # Other cases else: wordPattern = [] for x in range(len(syllables)): rule = '' separator = syllableBreakId if x == 0: rule = 'initial' elif x == (len(syllables) - 1): rule = 'final' else: rule = 'middle' if x == stressedSyllableIdx: rule = rule + '-stressed' separator = stressId # only add the syllable separator to the pattern if it's not the first syllable unless it's stressed if (separator == stressId) or (x > 0): wordPattern.append(separator) # Add the rule to the pattern wordPattern.append(rule) # The syllable is added to the corresponding rule self.rules[rule].addTo(tuple(syllables[x])) self.rules['word'].addTo(tuple(wordPattern)) def splitSyllableRule(self, syllableRule, phonology): """Replace syllable rules with onset/nucleus/coda pattern""" newDist = Distribution() oldDist = self.rules[syllableRule] # Add onset/nucleus/coda rules onsetRule = syllableRule + '-onset' nucleusRule = syllableRule +'-nucleus' codaRule = syllableRule + '-coda' self.rules[onsetRule] = Distribution() self.rules[nucleusRule] = Distribution() self.rules[codaRule] = Distribution() # For each pattern, split into onset/nucleus/coda for pattern in oldDist.items: isOnset = True onset = [] isNucleus = False nucleus = [] isCoda = False coda = [] for phoneme in pattern: # Check is there is a change of element if isOnset and (phonology.isNucleus(phoneme)): isOnset = False isNucleus = True elif isNucleus and (phonology.isCoda(phoneme)): isNucleus = False isCoda = True # Add to the respective list if isOnset: onset.append(phoneme) elif isNucleus: nucleus.append(phoneme) else: coda.append(phoneme) # Add to the specific distributions and determine the pattern in new distribution occurences = oldDist.items[pattern] distPattern = [] if len(onset) != 0: distPattern.append(onsetRule) self.rules[onsetRule].addTo(tuple(onset), occurences) if len(nucleus) != 0: distPattern.append(nucleusRule) self.rules[nucleusRule].addTo(tuple(nucleus), occurences) if len(coda) != 0: distPattern.append(codaRule) self.rules[codaRule].addTo(tuple(coda), occurences) # Add patterns to distributions newDist.addTo(tuple(distPattern), occurences) # Replace the old rules with the new rules self.rules[syllableRule] = newDist def cleanRules(self): """Remove the empty rules""" self.rules = {x: self.rules[x] for x in self.rules if not self.rules[x].isEmpty()} def fromExamples(self, file, phonology): """Train a rule generator on an example file""" stressId = phonology.getStress() syllableBreakId = phonology.getSyllableBreak() # Words are modelled as lists of syllables, with one of those being stressed (optionally) # The syllables are classed among: single (1 syllable words), initial (first syllable), final (last syllable), middle (other syllables) # Words are split among 7 rules (single, initial, initial-stressed, final, final-stressed, middle, middle-stressed) # # Add the 'word' rule, and syllable rules, initialized with an empty distribution self.rules.update({'word': Distribution()}) syllableRules = ['single', 'initial', 'initial-stressed', 'final', 'final-stressed', 'middle', 'middle-stressed'] for x in syllableRules: self.rules.update({x: Distribution()}) # Step 1: open the file and find how words look like with open(file) as exampleFile: fileReader = csv.reader(exampleFile, delimiter=' ', skipinitialspace=True) for row in fileReader: if len(row) != 0: # Check the items in row for item in row: if (item != '') and (not phonology.has(item)): raise Exception('In row', row, ':', item, 'is not an id in phonology', phonology.id) # Process the row self.processRowFromExample(row, stressId, syllableBreakId) # Step 2: Check the syllable rules and split them into onset/nucleus/coda rules for x in syllableRules: self.splitSyllableRule(x, phonology) # Step 3: remove the empty rules self.cleanRules() def randomOccurences(mean, range): """Generate a random number in the range [mean-range, mean+range+1]""" return random.randint(mean - range, mean + range + 1) def isStressPosition(position, numberSyllables, stressPosition): """Check if a given position is the position of the stress. The position goes from 1 to numberSyllables included.""" isPosition = False if (stressPosition > 0) and (stressPosition <= numberSyllables): isPosition = position == stressPosition elif (stressPosition < 0) and (abs(stressPosition) <= numberSyllables): isPosition = position == (numberSyllables + 1 + stressPosition) elif (position == numberSyllables) and (stressPosition > numberSyllables): isPosition = True elif (position == 1) and (stressPosition < 0) and (abs(stressPosition) > numberSyllables): isPosition = True return isPosition def fromPhonology(self, phonology, minNumberSyllables = 1, maxNumberSyllables = 4, stressPosition = -2, distributionMean = 20, distributionRange = 5): """ Generate a rule-based generator just from a phonology and some parameters. - minNumberSyllables must be strictly positive. - maxNumberSyllables must be greater than minNumberSyllables - stressPosition indicates on which syllable the stress occurs. Positive index count from the beginning to the end (with the first syllable being at index 1). Negative index count from the end to the beginning (with the last syllable being at index -1) Set this to zero if no stress should be generated. - distributionMean indicates the medium value for the occurences of a phoneme - distributionRange indicates the maximum absolute difference from distributionMean for the occurences of phonemes and syllables """ # Reinitialize self.phonology = phonology.id self.rules = {} # Check the parameters if maxNumberSyllables < minNumberSyllables: raise Exception("Maximum number of syllables", maxNumberSyllables, "must be higher than the minimum number of syllables", minNumberSyllables) if maxNumberSyllables < abs(stressPosition): raise Exception("Can't set a stress at position", stressPosition, "with a maximum number of syllables", maxNumberSyllables) if distributionMean < 1: raise Exception("Distribution mean must be strictly positive. Given", distributionMean) if distributionMean < distributionRange: raise Exception("Distribution mean", distributionMean, "must be strictly higher than distribution range", distributionRange) if distributionRange < 0: raise Exception("Distribution range must be positive or nul. Given", distributionRange) # Step 1: Generate the word rules based on the min and max number of syllables, and the presence of stress stressId = phonology.getStress() syllableBreakId = phonology.getSyllableBreak() isStressed = (stressPosition != 0) and phonology.hasStressedVowels() # Add the 'word' rule, initialized with an empty distribution self.rules.update({'word': Distribution()}) # Add the syllable rules and word patterns syllableRules = [] syllableRulesToTags = {} if minNumberSyllables == 1: syllableRules.append('single') syllableRulesToTags.update({'single': ['#single']}) wordPattern = [] if isStressed: syllableRulesToTags['single'].append('#stressed') wordPattern.append(stressId) wordPattern.append('single') self.rules['word'].addTo(tuple(wordPattern), RuleGenerator.randomOccurences(distributionMean, distributionRange)) if maxNumberSyllables > 1: syllableRules = syllableRules + ['initial', 'middle', 'final'] syllableRulesToTags.update({'initial': ['#initial'], 'middle': ['#middle'], 'final': ['#final']}) if isStressed: syllableRules = syllableRules + ['initial-stressed', 'middle-stressed', 'final-stressed'] syllableRulesToTags.update({'initial-stressed': ['#initial', '#stressed'], 'middle-stressed': ['#middle', '#stressed'], 'final-stressed': ['#final', '#stressed']}) syllableRulesToTags.update({'initial': ['#initial', '#unstressed'], 'middle': ['#middle', '#unstressed'], 'final': ['#final', '#unstressed']}) for nbMiddleSyllables in range(maxNumberSyllables - 1): nbSyllables = nbMiddleSyllables + 2 wordPattern = [] for position in range(1, nbSyllables + 1): isStressPosition = isStressed and RuleGenerator.isStressPosition(position, nbSyllables, stressPosition) # add syllable separator if isStressPosition: wordPattern.append(stressId) elif position > 1: wordPattern.append(syllableBreakId) # add syllable if position == 1: if isStressPosition: wordPattern.append('initial-stressed') else: wordPattern.append('initial') elif position == nbSyllables: if isStressPosition: wordPattern.append('final-stressed') else: wordPattern.append('final') else: if isStressPosition: wordPattern.append('middle-stressed') else: wordPattern.append('middle') self.rules['word'].addTo(tuple(wordPattern), RuleGenerator.randomOccurences(distributionMean, distributionRange)) # Step 2: Generate the syllable rules # Add the rules in the distributions phonemeRules = [] phonemeRulesToTag = {} for syllable in syllableRules: self.rules.update({syllable: Distribution()}) onset = syllable + '-onset' nucleus = syllable + '-nucleus' coda = syllable + '-coda' phonemeRules = phonemeRules + [onset, nucleus, coda] ruleTags = syllableRulesToTags[syllable] phonemeRulesToTag.update({onset: ruleTags + ['#onset'], nucleus: ruleTags + ['#nucleus'], coda: ruleTags + ['#coda']}) # Fill the syllable rules # For the generated rules, initial and single syllables may not have onset if ('#initial' in ruleTags) or ('#single' in ruleTags): self.rules[syllable].addTo(tuple([nucleus]), RuleGenerator.randomOccurences(distributionMean, distributionRange)) self.rules[syllable].addTo(tuple([nucleus, coda]), RuleGenerator.randomOccurences(distributionMean, distributionRange)) self.rules[syllable].addTo(tuple([onset, nucleus]), RuleGenerator.randomOccurences(distributionMean, distributionRange)) self.rules[syllable].addTo(tuple([onset, nucleus, coda]), RuleGenerator.randomOccurences(distributionMean, distributionRange)) # Step 3: Generate the phoneme distributions for each phoneme rule for rule in phonemeRules: self.rules.update({rule: Distribution()}) tags = phonemeRulesToTag[rule] phonemeList = phonology.getPhonemesFromTags(tags) for phoneme in phonemeList: self.rules[rule].addTo(tuple([phoneme]), RuleGenerator.randomOccurences(distributionMean, distributionRange)) # Step 4: Clean the rules self.cleanRules() generatorTypeToClass = { 'chains': ChainGenerator, 'rules': RuleGenerator } def makeGenerator(struct): """Function instanciating a generator from a JSON structure""" if struct['type'] in generatorTypeToClass: generator = generatorTypeToClass[struct['type']]() else: generator = Generator() generator.fromJsonStruct(struct) return generator class PhonagenFile: """A phonagen file, with phonologies and generators""" def __init__(self): self.phonologies = {} self.generators = {} def addPhonology(self, phonology): if (phonology.isValid()): self.phonologies.update({phonology.id: phonology}) def addGenerator(self, generator): if (generator.isValid()): self.generators.update({generator.id: generator}) def getPhonology(self, id): return self.phonologies[id] def getGenerator(self, id): return self.generators[id] def load(self, file): """Load from a JSON file""" with open(file, 'r', encoding='utf-8') as inputFile: jsonStruct = json.load(inputFile) # Load phonologies for struct in jsonStruct['phonologies']: phonology = Phonology() phonology.fromJsonStruct(struct) self.addPhonology(phonology) # Load generators for struct in jsonStruct['generators']: self.addGenerator(makeGenerator(struct)) def writeTo(self, file = ''): """Output to a JSON file (or stdout)""" outputStruct = { 'phonologies': [x.toJsonStruct() for x in self.phonologies.values()], 'generators': [x.toJsonStruct() for x in self.generators.values()] } if file == '': json.dump(outputStruct, sys.stdout, ensure_ascii=False) else: with open(file, 'w', encoding='utf-8') as outputFile: json.dump(outputStruct, outputFile, ensure_ascii=False) def mergeFrom(self, otherFile): """Add all phonologies and generators from the other file into this one.""" for phonology in otherFile.phonologies.values(): self.addPhonology(phonology) for generator in otherFile.generators.values(): self.addGenerator(generator) def generateWord(self, generator = ''): gen = generator if gen == '': gen = random.choice([x for x in self.generators]) idList = self.generators[gen].generateWord() phonology = self.phonologies[self.generators[gen].phonology] return phonology.formatWord(idList)