From cc7973df2b8e56def36c3abf27a5eea9a7d998c7 Mon Sep 17 00:00:00 2001 From: Feufochmar Date: Fri, 22 Jun 2018 23:58:20 +0200 Subject: [PATCH] Add a phonology maker --- py-phonagen/phonology-maker.py | 589 +++++++++++++++++++++++++++++++++ 1 file changed, 589 insertions(+) create mode 100755 py-phonagen/phonology-maker.py diff --git a/py-phonagen/phonology-maker.py b/py-phonagen/phonology-maker.py new file mode 100755 index 0000000..25d1362 --- /dev/null +++ b/py-phonagen/phonology-maker.py @@ -0,0 +1,589 @@ +#! /usr/bin/env python3 +import argparse +import phonagen +import random + +class Stress: + """Stress representation""" + def __init__(self): + self.transcriptions = {} + + def __str__(self): + return "\u02C8" + + def getDescription(self): + return "#stress" + +class SyllableBreak: + """Syllable break representation""" + def __init__(self): + self.transcriptions = {} + + def __str__(self): + return "." + + def getDescription(self): + return "#syllable-break" + +### +# Vowels representation and generation +class Vowel: + """Vowel representation""" + + # Simplified vowel model + matrixPhoneme = [ + ["i", "y", "ɨ", "ɯ", "u"], # close + ["e", "ø", "ə", "ɤ", "o"], # mid close + ["ɛ", "œ", "ɐ", "ʌ", "ɔ"], # mid open + ["æ", "ɶ", "a", "ɑ", "ɒ"], # open + ] + # Vowel height + close = 0 + midClose = 1 + midOpen = 2 + open = 3 + # Vowel backness (+ roundness) + frontUnrounded = 0 + frontRounded = 1 + central = 2 + backUnrounded = 3 + backRounded = 4 + + def __init__(self, height = midClose, backness = central): + """Constructor""" + self.height = height + self.backness = backness + self.isNasal = False + self.isLong = False + self.isStressed = False + self.transcriptions = {} + + def __str__(self): + """To String operator: Get the phoneme representation in IPA""" + result = Vowel.matrixPhoneme[self.height][self.backness] + if self.isNasal: + result = result + "\u0303" # Conbining tilde + if self.isLong: + result = result + "ː" + # + return result + + def clone(self): + """Clone the vowel""" + result = Vowel(self.height, self.backness) + result.isNasal = self.isNasal + result.isLong = self.isLong + result.isStressed = self.isStressed + return result + + def getDescription(self): + result = "#vowel" + if self.isStressed: + result = result + " #stressed" + else: + result = result + " #unstressed" + return result + +# Common vowels +Vowel.A = Vowel(Vowel.open, Vowel.central) +Vowel.E = Vowel(Vowel.midClose, Vowel.frontUnrounded) +Vowel.I = Vowel(Vowel.close, Vowel.frontUnrounded) +Vowel.O = Vowel(Vowel.midClose, Vowel.backRounded) +Vowel.U = Vowel(Vowel.close, Vowel.backRounded) +Vowel.Schwa = Vowel(Vowel.midClose, Vowel.central) +# Less common vowels +Vowel.openO = Vowel(Vowel.midOpen, Vowel.backRounded) +Vowel.openE = Vowel(Vowel.midOpen, Vowel.frontUnrounded) +Vowel.Y = Vowel(Vowel.close, Vowel.frontRounded) +Vowel.W = Vowel(Vowel.close, Vowel.backUnrounded) +Vowel.OE = Vowel(Vowel.midClose, Vowel.frontRounded) +Vowel.openOE = Vowel(Vowel.midOpen, Vowel.frontRounded) +Vowel.AE = Vowel(Vowel.open, Vowel.frontUnrounded) +Vowel.AO = Vowel(Vowel.open, Vowel.backRounded) + +# Distributions of vowel features +# Stress +stressDistribution = phonagen.Distribution() +stressDistribution.addTo(True, 4) +stressDistribution.addTo(False, 6) +# Long vowels +longVowelDistribution = phonagen.Distribution() +longVowelDistribution.addTo(True, 2) +longVowelDistribution.addTo(False, 8) +# Nasal vowels +nasalVowelDistribution = phonagen.Distribution() +nasalVowelDistribution.addTo(True, 2) +nasalVowelDistribution.addTo(False, 8) + +# Base vowels +def pickBoolean(): + return random.choice([True,False]) + +# Generative functions +def twoVowelSet(): + if pickBoolean(): + return (Vowel.A, Vowel.Schwa,) # Open/Close contrast + else: + return (Vowel.E, Vowel.O,) # Front/Back contrast + +def threeVowelSet(): + return (Vowel.A, Vowel.I, Vowel.U,) # Extreme of the vowel triangle + +def fourVowelSet(): + if pickBoolean(): + return (Vowel.A, Vowel.I, Vowel.U, Vowel.Schwa,) # Extreme + central + else: + # Choose wether contrast is between close and midOpen or midClose and open + heightClose = random.choice([Vowel.close, Vowel.midClose]) + return (Vowel(heightClose, Vowel.frontUnrounded), + Vowel(heightClose, Vowel.backRounded), + Vowel(heightClose + 2, Vowel.frontUnrounded), + Vowel(heightClose + 2, Vowel.backRounded),) + +def fiveVowelSet(): + if pickBoolean(): + return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U,) + else: + # Choose wether contrast is between close and midOpen or midClose and open + heightClose = random.choice([Vowel.close, Vowel.midClose]) + return (Vowel.Schwa, + Vowel(heightClose, Vowel.frontUnrounded), + Vowel(heightClose, Vowel.backRounded), + Vowel(heightClose + 2, Vowel.frontUnrounded), + Vowel(heightClose + 2, Vowel.backRounded),) + +def sixVowelSet(): + if pickBoolean(): + return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.Schwa,) + else: + return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO,) + +def sevenVowelSet(): + if pickBoolean(): + return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO,) + else: + return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO, Vowel.Schwa,) + +def eightVowelSet(): + rnd = random.randrange(3) + if rnd == 0: + return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO, Vowel.Schwa) + elif rnd == 1: + central = random.choice([Vowel.central, Vowel.backUnrounded]) + open = random.choice([Vowel.midOpen, Vowel.open]) + close = random.choice([Vowel.midClose, Vowel.close]) + return (Vowel.I, Vowel.E, Vowel.Y, Vowel.openOE, + Vowel.U, Vowel.O, Vowel(close, central), Vowel(open, central),) + else: + return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, + Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,) + +def nineVowelSet(): + if pickBoolean(): + return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, + Vowel.U, Vowel.O, Vowel.openO, Vowel.AO, + Vowel.Schwa,) + else: + return (Vowel.I, Vowel.E, Vowel.openE, + Vowel(Vowel.close, Vowel.central), Vowel.Schwa, Vowel.A, + Vowel.U, Vowel.O, Vowel.openO,) + +def tenVowelSet(): + if pickBoolean(): + return (Vowel.I, Vowel.E, Vowel.openE, + Vowel.Y, Vowel.OE, Vowel.openOE, + Vowel.A, + Vowel.U, Vowel.O, Vowel.openO,) + else: + return (Vowel.I, Vowel.Y, Vowel.Schwa, Vowel.W, Vowel.U, + Vowel.E, Vowel.openOE, Vowel.A, Vowel(Vowel.midOpen, Vowel.backUnrounded), Vowel.O,) + +def elevenVowelSet(): + if pickBoolean(): + return (Vowel.I, Vowel.E, Vowel.openE, + Vowel.Y, Vowel.OE, Vowel.openOE, + Vowel.A, Vowel.Schwa, + Vowel.U, Vowel.O, Vowel.openO,) + else: + return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE, + Vowel.Y, Vowel.OE, Vowel.openOE, + Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,) + +# Distribution +baseVowelDistribution = phonagen.Distribution() +baseVowelDistribution.addTo(twoVowelSet, 2) +baseVowelDistribution.addTo(threeVowelSet, 6) +baseVowelDistribution.addTo(fourVowelSet, 8) +baseVowelDistribution.addTo(fiveVowelSet, 10) +baseVowelDistribution.addTo(sixVowelSet, 8) +baseVowelDistribution.addTo(sevenVowelSet, 8) +baseVowelDistribution.addTo(eightVowelSet, 6) +baseVowelDistribution.addTo(nineVowelSet, 4) +baseVowelDistribution.addTo(tenVowelSet, 2) +baseVowelDistribution.addTo(elevenVowelSet, 2) + + +def generateVowelSet(): + """Generate a set of vowels for a phonology""" + # Choose some language features on the vowel set + isStressPhonemic = stressDistribution.pickFrom() + isLongVowelPhonemic = longVowelDistribution.pickFrom() + isNasalVowelPhonemic = nasalVowelDistribution.pickFrom() + # Generate a set of base vowels + baseVowelSet = baseVowelDistribution.pickFrom()() + # Is stress on long vowel ? + isLongStressed = False + if isStressPhonemic and isLongVowelPhonemic and pickBoolean(): + isLongStressed = True + # + result = [] + for v in baseVowelSet: + result.append(v) + if isLongStressed: + vls = v.clone() + vls.isStressed = True + vls.isLong = True + result.append(vls) + if isNasalVowelPhonemic: + vlsn = vls.clone() + vlsn.isNasal = True + result.append(vlsn) + if (not isLongStressed) and isLongVowelPhonemic: + vl = v.clone() + vl.isLong = True + result.append(vl) + if isNasalVowelPhonemic: + vln = vl.clone() + vln.isNasal = True + result.append(vln) + if (not isLongStressed) and isStressPhonemic: + vs = v.clone() + vs.isStressed = True + result.append(vs) + if isNasalVowelPhonemic: + vsn = vs.clone() + vsn.isNasal = True + result.append(vsn) + if isNasalVowelPhonemic: + vn = v.clone() + vn.isNasal = True + result.append(vn) + return result + +### +# Consonants representation and generation +class Consonant: + """Consonant representation""" + + # Simplified model + matrixPhonemes = [ + ["m", "ɱ", "n", "ɳ", "ɲ", "ŋ", "ɴ", "ɴ"], # nasal + ["p", "p̪", "t", "ʈ", "c", "k", "q", "ʔ"], # stop voiceless + ["b", "b̪", "d", "ɖ", "ɟ", "ɡ", "ɢ", "ɢ"], # stop voiced + ["ɓ", "ɓ̪", "ɗ", "ᶑ", "ʄ", "ɠ", "ʛ", "ʛ"], # stop implosive + ["pf", "tθ", "ts", "ʈʂ", "tʃ", "kx", "qχ", "ʔh"], # affricate voiceless + ["bv", "dð", "dz", "ɖʐ", "dʒ", "ɡɣ", "ɢʁ", "ʡʕ" ], # affricate voiced + ["f", "θ", "s", "ʂ", "ʃ", "x", "χ", "h"], # fricative voiceless + ["v", "ð", "z", "ʐ", "ʒ", "ɣ", "ʁ", "ɦ"], # fricative voiced + ["β", "ʋ", "ɹ", "ɻ", "j", "w", "ʁ", "ʕ"], # approximant + ["ⱱ", "ⱱ", "ɾ", "ɽ", "ɾ", "ɢ̆", "ɢ̆", "ʡ̮"], # tap/flap + ["ʙ", "ʙ̪", "r", "ɽr", "r", "ʀ", "ʀ", "ʢ"], # trill + ["l", "l", "l", "ɭ", "ʎ", "ʟ", "ʟ̠", "ʟ̠"], # lateral + ["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "ʞ", "ʞ"], # click + ] + + # left>right: place of articulation: + labial = 0 + dental = 1 + alveolar = 2 + retroflex = 3 + palatal = 4 + velar = 5 + uvular = 6 + glottal = 7 + # top>bottom: manner + nasal = 0 + stopVoiceless = 1 + stopVoiced = 2 + implosive = 3 + affricateVoiceless = 4 + affricateVoiced = 5 + fricativeVoiceless = 6 + fricativeVoiced = 7 + approximant = 8 + tapFlap = 9 + trill = 10 + lateral = 11 + click = 12 + + def __init__(self, manner = stopVoiceless, place = alveolar): + """Constructor""" + # Primary features + self.place = place + self.manner = manner + # Secondary feature + # Phonation + self.isEjective = False + self.isAspirated = False # or murmured, for voiced + self.isGlotalized = False + # Secondary articulation + self.isLabialized = False + self.isPalatalized = False + self.isVelarized = False + self.isPharyngealized = False + # + self.transcriptions = {} + + def __str__(self): + """To String operator: Get the phoneme representation in IPA""" + result = Consonant.matrixPhonemes[self.manner][self.place] + if self.isEjective: + result = result + "ʼ" + if self.isAspirated: + result = result + "ʰ" + if self.isGlotalized: + result = result + "ˀ" + if self.isLabialized: + result = result + "ʷ" + if self.isPalatalized: + result = result + "ʲ" + if self.isVelarized: + result = result + "ˠ" + if self.isPharyngealized: + result = result + "ˤ" + # + return result + + def clone(self): + """Clone the consonant""" + result = Consonant() + result.place = self.place + result.manner = self.manner + result.isEjective = self.isEjective + result.isAspirated = self.isAspirated + result.isGlotalized = self.isGlotalized + result.isLabialized = self.isLabialized + result.isPalatalized = self.isPalatalized + result.isVelarized = self.isVelarized + result.isPharyngealized = self.isPharyngealized + return result + + def getDescription(self): + return "#consonant" + +# Has retroflex consonants ? +retroflexDistribution = phonagen.Distribution() +retroflexDistribution.addTo(True, 5) +retroflexDistribution.addTo(False, 15) +# Has glottal consonants ? +glottalDistribution = phonagen.Distribution() +glottalDistribution.addTo(True, 2) +glottalDistribution.addTo(False, 18) +# Has uvular consonants ? +uvularDistribution = phonagen.Distribution() +uvularDistribution.addTo(True, 2) +uvularDistribution.addTo(False, 18) +# Has dental consonants ? +dentalDistribution = phonagen.Distribution() +dentalDistribution.addTo(True, 1) +dentalDistribution.addTo(False, 19) + +# Are the affricates distinguished from stops ? +affricateDistribution = phonagen.Distribution() +affricateDistribution.addTo(True, 2) +affricateDistribution.addTo(False, 18) +# Are voiced distinguished from unvoiced ? +voicedDistribution = phonagen.Distribution() +voicedDistribution.addTo(True, 15) +voicedDistribution.addTo(False, 5) +# Has click ? +clickDistribution = phonagen.Distribution() +clickDistribution.addTo(True, 1) +clickDistribution.addTo(False, 29) + +# Rhotic realisation +rhoticRealisationDistribution = phonagen.Distribution() +rhoticRealisationDistribution.addTo(False, 10) +rhoticRealisationDistribution.addTo(Consonant.tapFlap, 30) +rhoticRealisationDistribution.addTo(Consonant.trill, 30) +rhoticRealisationDistribution.addTo(Consonant.approximant, 40) +rhoticRealisationDistribution.addTo(Consonant.fricativeVoiced, 20) + +# Is aspiration phonemic ? +aspirationDistribution = phonagen.Distribution() +aspirationDistribution.addTo(True, 6) +aspirationDistribution.addTo(False, 14) + +# TODO: other stuff ? + + +def generateConsonantSet(): + """Generate a set of consonants for a phonology""" + # Places features + hasRetroflex = retroflexDistribution.pickFrom() + hasGlottal = glottalDistribution.pickFrom() + hasUvular = uvularDistribution.pickFrom() + hasDental = dentalDistribution.pickFrom() + + # Places of articulation + # Minimal set + places = [Consonant.labial, Consonant.alveolar, Consonant.palatal, Consonant.velar] + # Add the other positions + if hasRetroflex: + places.append(Consonant.retroflex) + if hasDental: + places.append(Consonant.dental) + if hasGlottal: + places.append(Consonant.glottal) + if hasUvular: + places.append(Consonant.uvular) + + # Nominal place is alveolar: this place will get all the possible manners + # Other places will be more limited + nominalPlace = Consonant.alveolar + + # Manner features + hasVoiced = voicedDistribution.pickFrom() + hasSeparateAffricates = affricateDistribution.pickFrom() + hasClick = clickDistribution.pickFrom() + rhoticRealisation = rhoticRealisationDistribution.pickFrom() + hasAspirated = aspirationDistribution.pickFrom() + + # Minimal set of manners + manners = [Consonant.nasal, Consonant.stopVoiceless, Consonant.fricativeVoiceless, Consonant.approximant] + if hasSeparateAffricates: + manners.append(Consonant.affricateVoiceless) + if hasVoiced: + manners = manners + [Consonant.stopVoiced, Consonant.fricativeVoiced] + if hasSeparateAffricates: + manners.append(Consonant.affricateVoiced) + if hasClick: + manners.append(Consonant.click) + + # Generate the set of consonants + result = [] + rhoticAdded = False + for pl in places: + for mn in manners: + # there is a small chance that a phoneme not on the nominal place will be skipped + if (pl == nominalPlace) or (random.randrange(8) != 0): + cons = Consonant(mn, pl) + # there may be some modifications on the manner or place depending on how contrastive are the consonants + if (not hasSeparateAffricates) and (pl == Consonant.palatal) and (random.randrange(10) < 8): + if (mn == Consonant.stopVoiceless): + cons.manner = Consonant.affricateVoiceless + elif (mn == Consonant.stopVoiced): + cons.manner = Consonant.affricateVoiced + # TODO : other common modifications + result.append(cons) + # Rhotic added ? + if (mn == rhoticRealisation): + rhoticAdded =((mn != Consonant.fricativeVoiced) and (pl == nominalPlace)) or ((mn == Consonant.fricativeVoiced) and (pl == Consonant.uvular)) + # Aspirated consonants + if hasAspirated and (mn >= Consonant.stopVoiceless) and (mn <= Consonant.fricativeVoiced): + asp = cons.clone() + asp.isAspirated = True + result.append(asp) + # lateral + if ((pl == nominalPlace) and (random.randrange(6) != 0)) or (random.randrange(20) == 0): + lat = Consonant(Consonant.lateral, pl) + result.append(lat) + + # rhotic + if rhoticRealisation and (not rhoticAdded): + if (rhoticRealisation != Consonant.fricativeVoiced): + rhot = Consonant(rhoticRealisation, nominalPlace) + result.append(rhot) + else: + rhot = Consonant(rhoticRealisation, Consonant.uvular) + result.append(rhot) + # + return result + + +### +# Transcriptions +def addSimpleLatinTranscription(transcriptions, phonemeList): + transcriptions.append('simple-latin') + vowelTranslationMatrix = [ + ["i", "ú", "ï", "í", "u"], # close + ["e", "ê", "ë", "o", "o"], # mid close + ["é", "ê", "ä", "ó", "ó"], # mid open + ["á", "a", "a", "a", "â"], # open + ] + consonantTranslationMatrix = [ + ["m", "ḿ", "n", "ň", "ñ", "ǹ", "ń", "ń"], # nasal + ["p", "ṕ", "t", "ť", "c", "k", "q", "q"], # stop voiceless + ["b", "ṕ", "d", "ď", "j", "ɡ", "ǵ", "ǵ"], # stop voiced + ["b'", "b'", "d'", "ď'", "j'", "g'", "ǵ'", "ǵ'"], # stop implosive + ["pf", "tŝ", "ts", "tš", "tś", "kx", "qẍ", "qh"], # affricate voiceless + ["bv", "dẑ", "dz", "dž", "dź", "ǵĝ", "ǵr", "ǵh" ], # affricate voiced + ["f", "ŝ", "s", "š", "ś", "x", "ẍ", "h"], # fricative voiceless + ["v", "ẑ", "z", "ž", "ź", "ĝ", "r", "h"], # fricative voiced + ["v", "v", "r", "ř", "y", "w", "r", "h"], # approximant + ["ṽ", "ṽ", "r", "ř", "r", "gy", "gr", "hg"], # tap/flap + ["br", "br", "rr", "řr", "ry", "ŕr", "ŕr", "hŕ"], # trill + ["l", "l", "l", "ľ", "ly", "ĺl", "ĺl", "ĺl"], # lateral + ["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "ʞ", "ʞ"], # click + ] + nasalSign = random.choice(["\u0328", "\u0330", "n"]) # combining ogonek, combining tilde below, n + for ph in phonemeList: + tr = "" + if isinstance(ph, Vowel): + tr = vowelTranslationMatrix[ph.height][ph.backness] + if ph.isNasal: + tr = tr + nasalSign + if ph.isLong: + tr = tr + tr # Double + if isinstance(ph, Consonant): + tr = consonantTranslationMatrix[ph.manner][ph.place] + if ph.isAspirated: + tr = tr + "h" + ph.transcriptions.update({'simple-latin': tr}) + +def makePhonology(id, description): + phonology = phonagen.Phonology(id = id, description = description) + # Define phonemes, from their IPA notation + phonemeList = [] + # Step 0: stress, syllable break + phonemeList.append(Stress()) + phonemeList.append(SyllableBreak()) + # Step 1: Vowels + phonemeList = phonemeList + generateVowelSet() + # Step 2: consonants + phonemeList = phonemeList + generateConsonantSet() + # Step 3: Transcriptions, and decide the main + transcriptions = ['phoneme'] + addSimpleLatinTranscription(transcriptions, phonemeList) + # set main transcription + phonology.transcriptions = transcriptions + # TODO: change this + phonology.mainTranscription = 'simple-latin' + # Step 4: translate phoneme into phonology entries + for ph in phonemeList: + id = str(ph) + if isinstance(ph, Vowel) and ph.isStressed: + id = "'" + id + entry = {'id': id, 'description': ph.getDescription(), 'phoneme': str(ph)} + for tr in ph.transcriptions: + entry.update({tr: ph.transcriptions[tr]}) + phonology.entries.update({entry['id']: entry}) + return phonology + + +def parseArgs(): + # Define argument parser + parser = argparse.ArgumentParser(description='Make a new phonology.') + parser.add_argument('--id', metavar='id', help='id of the phonology', required = True) + parser.add_argument('--description', metavar='description', help='description of the phonology; empty if not provided', default='') + parser.add_argument('--output', metavar='output-file', help='Output file for the generator. The file is printed to standard output if not given.', default='') + # Parse arguments + return parser.parse_args() + +# Main +if __name__ == '__main__': + args = parseArgs() + phonology = makePhonology(args.id, args.description) + outputFile = phonagen.PhonagenFile() + outputFile.addPhonology(phonology) + outputFile.writeTo(args.output)