#! /usr/bin/env python3 import argparse import phonagen import random class Stress: """Stress representation""" def __init__(self): self.transcriptions = {} def __str__(self): return "\u02C8" def getDescription(self): return "#stress" class SyllableBreak: """Syllable break representation""" def __init__(self): self.transcriptions = {} def __str__(self): return "." def getDescription(self): return "#syllable-break" ### # Vowels representation and generation class Vowel: """Vowel representation""" # Simplified vowel model matrixPhoneme = [ ["i", "y", "ɨ", "ɯ", "u"], # close ["e", "ø", "ə", "ɤ", "o"], # mid close ["ɛ", "œ", "ɐ", "ʌ", "ɔ"], # mid open ["æ", "ɶ", "a", "ɑ", "ɒ"], # open ] # Vowel height close = 0 midClose = 1 midOpen = 2 open = 3 # Vowel backness (+ roundness) frontUnrounded = 0 frontRounded = 1 central = 2 backUnrounded = 3 backRounded = 4 def __init__(self, height = midClose, backness = central): """Constructor""" self.height = height self.backness = backness self.isNasal = False self.isLong = False self.isStressed = False self.transcriptions = {} def __str__(self): """To String operator: Get the phoneme representation in IPA""" result = Vowel.matrixPhoneme[self.height][self.backness] if self.isNasal: result = result + "\u0303" # Conbining tilde if self.isLong: result = result + "ː" # return result def clone(self): """Clone the vowel""" result = Vowel(self.height, self.backness) result.isNasal = self.isNasal result.isLong = self.isLong result.isStressed = self.isStressed return result def getDescription(self): result = "#vowel" if self.isStressed: result = result + " #stressed" else: result = result + " #unstressed" return result # Common vowels Vowel.A = Vowel(Vowel.open, Vowel.central) Vowel.E = Vowel(Vowel.midClose, Vowel.frontUnrounded) Vowel.I = Vowel(Vowel.close, Vowel.frontUnrounded) Vowel.O = Vowel(Vowel.midClose, Vowel.backRounded) Vowel.U = Vowel(Vowel.close, Vowel.backRounded) Vowel.Schwa = Vowel(Vowel.midClose, Vowel.central) # Less common vowels Vowel.openO = Vowel(Vowel.midOpen, Vowel.backRounded) Vowel.openE = Vowel(Vowel.midOpen, Vowel.frontUnrounded) Vowel.Y = Vowel(Vowel.close, Vowel.frontRounded) Vowel.W = Vowel(Vowel.close, Vowel.backUnrounded) Vowel.OE = Vowel(Vowel.midClose, Vowel.frontRounded) Vowel.openOE = Vowel(Vowel.midOpen, Vowel.frontRounded) Vowel.AE = Vowel(Vowel.open, Vowel.frontUnrounded) Vowel.AO = Vowel(Vowel.open, Vowel.backRounded) # Distributions of vowel features # Stress stressDistribution = phonagen.Distribution() stressDistribution.addTo(True, 4) stressDistribution.addTo(False, 6) # Long vowels longVowelDistribution = phonagen.Distribution() longVowelDistribution.addTo(True, 2) longVowelDistribution.addTo(False, 8) # Nasal vowels nasalVowelDistribution = phonagen.Distribution() nasalVowelDistribution.addTo(True, 2) nasalVowelDistribution.addTo(False, 8) # Base vowels def pickBoolean(): return random.choice([True,False]) # Generative functions def twoVowelSet(): if pickBoolean(): return (Vowel.A, Vowel.Schwa,) # Open/Close contrast else: return (Vowel.E, Vowel.O,) # Front/Back contrast def threeVowelSet(): return (Vowel.A, Vowel.I, Vowel.U,) # Extreme of the vowel triangle def fourVowelSet(): if pickBoolean(): return (Vowel.A, Vowel.I, Vowel.U, Vowel.Schwa,) # Extreme + central else: # Choose wether contrast is between close and midOpen or midClose and open heightClose = random.choice([Vowel.close, Vowel.midClose]) return (Vowel(heightClose, Vowel.frontUnrounded), Vowel(heightClose, Vowel.backRounded), Vowel(heightClose + 2, Vowel.frontUnrounded), Vowel(heightClose + 2, Vowel.backRounded),) def fiveVowelSet(): if pickBoolean(): return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U,) else: # Choose wether contrast is between close and midOpen or midClose and open heightClose = random.choice([Vowel.close, Vowel.midClose]) return (Vowel.Schwa, Vowel(heightClose, Vowel.frontUnrounded), Vowel(heightClose, Vowel.backRounded), Vowel(heightClose + 2, Vowel.frontUnrounded), Vowel(heightClose + 2, Vowel.backRounded),) def sixVowelSet(): if pickBoolean(): return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.Schwa,) else: return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO,) def sevenVowelSet(): if pickBoolean(): return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO,) else: return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO, Vowel.Schwa,) def eightVowelSet(): rnd = random.randrange(3) if rnd == 0: return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO, Vowel.Schwa) elif rnd == 1: central = random.choice([Vowel.central, Vowel.backUnrounded]) open = random.choice([Vowel.midOpen, Vowel.open]) close = random.choice([Vowel.midClose, Vowel.close]) return (Vowel.I, Vowel.E, Vowel.Y, Vowel.openOE, Vowel.U, Vowel.O, Vowel(close, central), Vowel(open, central),) else: return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,) def nineVowelSet(): if pickBoolean(): return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, Vowel.U, Vowel.O, Vowel.openO, Vowel.AO, Vowel.Schwa,) else: return (Vowel.I, Vowel.E, Vowel.openE, Vowel(Vowel.close, Vowel.central), Vowel.Schwa, Vowel.A, Vowel.U, Vowel.O, Vowel.openO,) def tenVowelSet(): if pickBoolean(): return (Vowel.I, Vowel.E, Vowel.openE, Vowel.Y, Vowel.OE, Vowel.openOE, Vowel.A, Vowel.U, Vowel.O, Vowel.openO,) else: return (Vowel.I, Vowel.Y, Vowel.Schwa, Vowel.W, Vowel.U, Vowel.E, Vowel.openOE, Vowel.A, Vowel(Vowel.midOpen, Vowel.backUnrounded), Vowel.O,) def elevenVowelSet(): if pickBoolean(): return (Vowel.I, Vowel.E, Vowel.openE, Vowel.Y, Vowel.OE, Vowel.openOE, Vowel.A, Vowel.Schwa, Vowel.U, Vowel.O, Vowel.openO,) else: return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, Vowel.Y, Vowel.OE, Vowel.openOE, Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,) # Distribution baseVowelDistribution = phonagen.Distribution() baseVowelDistribution.addTo(twoVowelSet, 2) baseVowelDistribution.addTo(threeVowelSet, 6) baseVowelDistribution.addTo(fourVowelSet, 8) baseVowelDistribution.addTo(fiveVowelSet, 10) baseVowelDistribution.addTo(sixVowelSet, 8) baseVowelDistribution.addTo(sevenVowelSet, 8) baseVowelDistribution.addTo(eightVowelSet, 6) baseVowelDistribution.addTo(nineVowelSet, 4) baseVowelDistribution.addTo(tenVowelSet, 2) baseVowelDistribution.addTo(elevenVowelSet, 2) def generateVowelSet(): """Generate a set of vowels for a phonology""" # Choose some language features on the vowel set isStressPhonemic = stressDistribution.pickFrom() isLongVowelPhonemic = longVowelDistribution.pickFrom() isNasalVowelPhonemic = nasalVowelDistribution.pickFrom() # Generate a set of base vowels baseVowelSet = baseVowelDistribution.pickFrom()() # Is stress on long vowel ? isLongStressed = False if isStressPhonemic and isLongVowelPhonemic and pickBoolean(): isLongStressed = True # result = [] for v in baseVowelSet: result.append(v) if isLongStressed: vls = v.clone() vls.isStressed = True vls.isLong = True result.append(vls) if isNasalVowelPhonemic: vlsn = vls.clone() vlsn.isNasal = True result.append(vlsn) if (not isLongStressed) and isLongVowelPhonemic: vl = v.clone() vl.isLong = True result.append(vl) if isNasalVowelPhonemic: vln = vl.clone() vln.isNasal = True result.append(vln) if (not isLongStressed) and isStressPhonemic: vs = v.clone() vs.isStressed = True result.append(vs) if isNasalVowelPhonemic: vsn = vs.clone() vsn.isNasal = True result.append(vsn) if isNasalVowelPhonemic: vn = v.clone() vn.isNasal = True result.append(vn) return result ### # Consonants representation and generation class Consonant: """Consonant representation""" # Simplified model # '%' means impossible articulation matrixPhonemes = [ ["m", "ɱ", "n", "ɳ", "ɲ", "ŋ", "ɴ", "%"], # nasal ["p", "p̪", "t", "ʈ", "c", "k", "q", "ʔ"], # stop voiceless ["b", "b̪", "d", "ɖ", "ɟ", "ɡ", "ɢ", "%"], # stop voiced ["ɓ", "ɓ̪", "ɗ", "ᶑ", "ʄ", "ɠ", "ʛ", "%"], # stop implosive ["pf", "tθ", "ts", "ʈʂ", "tʃ", "kx", "qχ", "ʔh"], # affricate voiceless ["bv", "dð", "dz", "ɖʐ", "dʒ", "ɡɣ", "ɢʁ", "ʡʕ" ], # affricate voiced ["f", "θ", "s", "ʂ", "ʃ", "x", "χ", "h"], # fricative voiceless ["v", "ð", "z", "ʐ", "ʒ", "ɣ", "ʁ", "ɦ"], # fricative voiced ["β", "ʋ", "ɹ", "ɻ", "j", "w", "ʁ", "ʕ"], # approximant ["ⱱ", "ⱱ", "ɾ", "ɽ", "%", "%", "ɢ̆", "ʡ̮"], # tap/flap ["ʙ", "ʙ̪", "r", "ɽr", "%", "%", "ʀ", "ʢ"], # trill ["%", "l", "l", "ɭ", "ʎ", "ʟ", "ʟ̠", "%"], # lateral ["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "%", "%"], # click ] # left>right: place of articulation: labial = 0 dental = 1 alveolar = 2 retroflex = 3 palatal = 4 velar = 5 uvular = 6 glottal = 7 # top>bottom: manner nasal = 0 stopVoiceless = 1 stopVoiced = 2 implosive = 3 affricateVoiceless = 4 affricateVoiced = 5 fricativeVoiceless = 6 fricativeVoiced = 7 approximant = 8 tapFlap = 9 trill = 10 lateral = 11 click = 12 def __init__(self, manner = stopVoiceless, place = alveolar): """Constructor""" # Primary features self.place = place self.manner = manner # Secondary feature # Phonation self.isEjective = False self.isAspirated = False # or murmured, for voiced self.isGlotalized = False # Secondary articulation self.isLabialized = False self.isPalatalized = False self.isVelarized = False self.isPharyngealized = False # self.transcriptions = {} def __str__(self): """To String operator: Get the phoneme representation in IPA""" result = Consonant.matrixPhonemes[self.manner][self.place] if self.isEjective: result = result + "ʼ" if self.isAspirated: result = result + "ʰ" if self.isGlotalized: result = result + "ˀ" if self.isLabialized: result = result + "ʷ" if self.isPalatalized: result = result + "ʲ" if self.isVelarized: result = result + "ˠ" if self.isPharyngealized: result = result + "ˤ" # return result def clone(self): """Clone the consonant""" result = Consonant() result.place = self.place result.manner = self.manner result.isEjective = self.isEjective result.isAspirated = self.isAspirated result.isGlotalized = self.isGlotalized result.isLabialized = self.isLabialized result.isPalatalized = self.isPalatalized result.isVelarized = self.isVelarized result.isPharyngealized = self.isPharyngealized return result def getDescription(self): return "#consonant" def isPossible(manner, place): return '%' != Consonant.matrixPhonemes[manner][place] # Has retroflex consonants ? retroflexDistribution = phonagen.Distribution() retroflexDistribution.addTo(True, 5) retroflexDistribution.addTo(False, 15) # Has glottal consonants ? glottalDistribution = phonagen.Distribution() glottalDistribution.addTo(True, 2) glottalDistribution.addTo(False, 18) # Has uvular consonants ? uvularDistribution = phonagen.Distribution() uvularDistribution.addTo(True, 2) uvularDistribution.addTo(False, 18) # Has dental consonants ? dentalDistribution = phonagen.Distribution() dentalDistribution.addTo(True, 1) dentalDistribution.addTo(False, 19) # Are the affricates distinguished from stops ? affricateDistribution = phonagen.Distribution() affricateDistribution.addTo(True, 2) affricateDistribution.addTo(False, 18) # Are voiced distinguished from unvoiced ? voicedDistribution = phonagen.Distribution() voicedDistribution.addTo(True, 15) voicedDistribution.addTo(False, 5) # Has click ? clickDistribution = phonagen.Distribution() clickDistribution.addTo(True, 1) clickDistribution.addTo(False, 69) # Rhotic realisation rhoticRealisationDistribution = phonagen.Distribution() rhoticRealisationDistribution.addTo(False, 10) rhoticRealisationDistribution.addTo(Consonant.tapFlap, 30) rhoticRealisationDistribution.addTo(Consonant.trill, 30) rhoticRealisationDistribution.addTo(Consonant.approximant, 40) rhoticRealisationDistribution.addTo(Consonant.fricativeVoiced, 20) # Is aspiration phonemic ? aspirationDistribution = phonagen.Distribution() aspirationDistribution.addTo(True, 6) aspirationDistribution.addTo(False, 14) # TODO: other stuff ? def generateConsonantSet(): """Generate a set of consonants for a phonology""" # Places features hasRetroflex = retroflexDistribution.pickFrom() hasGlottal = glottalDistribution.pickFrom() hasUvular = uvularDistribution.pickFrom() hasDental = dentalDistribution.pickFrom() # Places of articulation # Minimal set places = [Consonant.labial, Consonant.alveolar, Consonant.palatal, Consonant.velar] # Add the other positions if hasRetroflex: places.append(Consonant.retroflex) if hasDental: places.append(Consonant.dental) if hasGlottal: places.append(Consonant.glottal) if hasUvular: places.append(Consonant.uvular) # Nominal place is alveolar: this place will get all the possible manners # Other places will be more limited nominalPlace = Consonant.alveolar # Manner features hasVoiced = voicedDistribution.pickFrom() hasSeparateAffricates = affricateDistribution.pickFrom() hasClick = clickDistribution.pickFrom() rhoticRealisation = rhoticRealisationDistribution.pickFrom() hasAspirated = aspirationDistribution.pickFrom() # Minimal set of manners manners = [Consonant.nasal, Consonant.stopVoiceless, Consonant.fricativeVoiceless, Consonant.approximant] if hasSeparateAffricates: manners.append(Consonant.affricateVoiceless) if hasVoiced: manners = manners + [Consonant.stopVoiced, Consonant.fricativeVoiced] if hasSeparateAffricates: manners.append(Consonant.affricateVoiced) if hasClick: manners.append(Consonant.click) # Generate the set of consonants result = [] rhoticAdded = False for pl in places: for mn in manners: # there is a small chance that a phoneme not on the nominal place will be skipped if Consonant.isPossible(mn, pl) and ((pl == nominalPlace) or (random.randrange(8) != 0)): cons = Consonant(mn, pl) # there may be some modifications on the manner or place depending on how contrastive are the consonants if (not hasSeparateAffricates) and (pl == Consonant.palatal) and (random.randrange(10) < 8): if (mn == Consonant.stopVoiceless): cons.manner = Consonant.affricateVoiceless elif (mn == Consonant.stopVoiced): cons.manner = Consonant.affricateVoiced # TODO : other common modifications result.append(cons) # Rhotic added ? if (mn == rhoticRealisation): rhoticAdded =((mn != Consonant.fricativeVoiced) and (pl == nominalPlace)) or ((mn == Consonant.fricativeVoiced) and (pl == Consonant.uvular)) # Aspirated consonants if hasAspirated and (mn >= Consonant.stopVoiceless) and (mn <= Consonant.fricativeVoiced): asp = cons.clone() asp.isAspirated = True result.append(asp) # lateral if ((pl == nominalPlace) and (random.randrange(6) != 0)) or (random.randrange(20) == 0): lat = Consonant(Consonant.lateral, pl) result.append(lat) # rhotic if rhoticRealisation and (not rhoticAdded): if (rhoticRealisation != Consonant.fricativeVoiced): rhot = Consonant(rhoticRealisation, nominalPlace) result.append(rhot) else: rhot = Consonant(rhoticRealisation, Consonant.uvular) result.append(rhot) # return result ### # Transcriptions def addSimpleLatinTranscription(transcriptions, phonemeList): transcriptions.append('simple-latin') vowelTranslationMatrix = [ ["i", "ú", "ï", "í", "u"], # close ["e", "ê", "ë", "o", "o"], # mid close ["é", "ê", "ä", "ó", "ó"], # mid open ["á", "a", "a", "a", "â"], # open ] consonantTranslationMatrix = [ ["m", "ḿ", "n", "ň", "ñ", "ǹ", "ń", "ń"], # nasal ["p", "ṕ", "t", "ť", "c", "k", "q", "q"], # stop voiceless ["b", "ṕ", "d", "ď", "j", "g", "ǵ", "ǵ"], # stop voiced ["b'", "b'", "d'", "ď'", "j'", "g'", "ǵ'", "ǵ'"], # stop implosive ["pf", "tŝ", "ts", "tš", "tś", "kx", "qẍ", "qh"], # affricate voiceless ["bv", "dẑ", "dz", "dž", "dź", "ǵĝ", "ǵr", "ǵh" ], # affricate voiced ["f", "ŝ", "s", "š", "ś", "x", "ẍ", "h"], # fricative voiceless ["v", "ẑ", "z", "ž", "ź", "ĝ", "r", "h"], # fricative voiced ["v", "v", "r", "ř", "y", "w", "r", "h"], # approximant ["ṽ", "ṽ", "r", "ř", "r", "gy", "gr", "hg"], # tap/flap ["br", "br", "rr", "řr", "ry", "ŕr", "ŕr", "hŕ"], # trill ["l", "l", "l", "ľ", "ly", "ĺl", "ĺl", "ĺl"], # lateral ["p*", "ṕ*", "t*", "ť*", "c*", "k*", "q*", "q*"], # click ] nasalSign = random.choice(["\u0328", "\u0330", "n"]) # combining ogonek, combining tilde below, n for ph in phonemeList: tr = "" if isinstance(ph, Vowel): tr = vowelTranslationMatrix[ph.height][ph.backness] if ph.isLong: tr = tr + tr # Double if ph.isNasal: tr = tr + nasalSign if isinstance(ph, Consonant): tr = consonantTranslationMatrix[ph.manner][ph.place] if ph.isAspirated: tr = tr + "h" ph.transcriptions.update({'simple-latin': tr}) def makePhonology(id, description): phonology = phonagen.Phonology(id = id, description = description) # Define phonemes, from their IPA notation phonemeList = [] # Step 0: stress, syllable break phonemeList.append(Stress()) phonemeList.append(SyllableBreak()) # Step 1: Vowels phonemeList = phonemeList + generateVowelSet() # Step 2: consonants phonemeList = phonemeList + generateConsonantSet() # Step 3: Transcriptions, and decide the main transcriptions = ['phoneme'] addSimpleLatinTranscription(transcriptions, phonemeList) # set main transcription phonology.transcriptions = transcriptions # TODO: change this phonology.mainTranscription = 'simple-latin' # Step 4: translate phoneme into phonology entries for ph in phonemeList: id = str(ph) if isinstance(ph, Vowel) and ph.isStressed: id = "'" + id entry = {'id': id, 'description': ph.getDescription(), 'phoneme': str(ph)} for tr in ph.transcriptions: entry.update({tr: ph.transcriptions[tr]}) phonology.entries.update({entry['id']: entry}) return phonology def parseArgs(): # Define argument parser parser = argparse.ArgumentParser(description='Make a new phonology.') parser.add_argument('--id', metavar='id', help='id of the phonology', required = True) parser.add_argument('--description', metavar='description', help='description of the phonology; empty if not provided', default='') parser.add_argument('--output', metavar='output-file', help='Output file for the generator. The file is printed to standard output if not given.', default='') # Parse arguments return parser.parse_args() # Main if __name__ == '__main__': args = parseArgs() phonology = makePhonology(args.id, args.description) outputFile = phonagen.PhonagenFile() outputFile.addPhonology(phonology) outputFile.writeTo(args.output)