Add a phonology maker

This commit is contained in:
Feufochmar 2018-06-22 23:58:20 +02:00
parent dd0a756a6b
commit cc7973df2b
1 changed files with 589 additions and 0 deletions

589
py-phonagen/phonology-maker.py Executable file
View File

@ -0,0 +1,589 @@
#! /usr/bin/env python3
import argparse
import phonagen
import random
class Stress:
"""Stress representation"""
def __init__(self):
self.transcriptions = {}
def __str__(self):
return "\u02C8"
def getDescription(self):
return "#stress"
class SyllableBreak:
"""Syllable break representation"""
def __init__(self):
self.transcriptions = {}
def __str__(self):
return "."
def getDescription(self):
return "#syllable-break"
###
# Vowels representation and generation
class Vowel:
"""Vowel representation"""
# Simplified vowel model
matrixPhoneme = [
["i", "y", "ɨ", "ɯ", "u"], # close
["e", "ø", "ə", "ɤ", "o"], # mid close
["ɛ", "œ", "ɐ", "ʌ", "ɔ"], # mid open
["æ", "ɶ", "a", "ɑ", "ɒ"], # open
]
# Vowel height
close = 0
midClose = 1
midOpen = 2
open = 3
# Vowel backness (+ roundness)
frontUnrounded = 0
frontRounded = 1
central = 2
backUnrounded = 3
backRounded = 4
def __init__(self, height = midClose, backness = central):
"""Constructor"""
self.height = height
self.backness = backness
self.isNasal = False
self.isLong = False
self.isStressed = False
self.transcriptions = {}
def __str__(self):
"""To String operator: Get the phoneme representation in IPA"""
result = Vowel.matrixPhoneme[self.height][self.backness]
if self.isNasal:
result = result + "\u0303" # Conbining tilde
if self.isLong:
result = result + "ː"
#
return result
def clone(self):
"""Clone the vowel"""
result = Vowel(self.height, self.backness)
result.isNasal = self.isNasal
result.isLong = self.isLong
result.isStressed = self.isStressed
return result
def getDescription(self):
result = "#vowel"
if self.isStressed:
result = result + " #stressed"
else:
result = result + " #unstressed"
return result
# Common vowels
Vowel.A = Vowel(Vowel.open, Vowel.central)
Vowel.E = Vowel(Vowel.midClose, Vowel.frontUnrounded)
Vowel.I = Vowel(Vowel.close, Vowel.frontUnrounded)
Vowel.O = Vowel(Vowel.midClose, Vowel.backRounded)
Vowel.U = Vowel(Vowel.close, Vowel.backRounded)
Vowel.Schwa = Vowel(Vowel.midClose, Vowel.central)
# Less common vowels
Vowel.openO = Vowel(Vowel.midOpen, Vowel.backRounded)
Vowel.openE = Vowel(Vowel.midOpen, Vowel.frontUnrounded)
Vowel.Y = Vowel(Vowel.close, Vowel.frontRounded)
Vowel.W = Vowel(Vowel.close, Vowel.backUnrounded)
Vowel.OE = Vowel(Vowel.midClose, Vowel.frontRounded)
Vowel.openOE = Vowel(Vowel.midOpen, Vowel.frontRounded)
Vowel.AE = Vowel(Vowel.open, Vowel.frontUnrounded)
Vowel.AO = Vowel(Vowel.open, Vowel.backRounded)
# Distributions of vowel features
# Stress
stressDistribution = phonagen.Distribution()
stressDistribution.addTo(True, 4)
stressDistribution.addTo(False, 6)
# Long vowels
longVowelDistribution = phonagen.Distribution()
longVowelDistribution.addTo(True, 2)
longVowelDistribution.addTo(False, 8)
# Nasal vowels
nasalVowelDistribution = phonagen.Distribution()
nasalVowelDistribution.addTo(True, 2)
nasalVowelDistribution.addTo(False, 8)
# Base vowels
def pickBoolean():
return random.choice([True,False])
# Generative functions
def twoVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.Schwa,) # Open/Close contrast
else:
return (Vowel.E, Vowel.O,) # Front/Back contrast
def threeVowelSet():
return (Vowel.A, Vowel.I, Vowel.U,) # Extreme of the vowel triangle
def fourVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.I, Vowel.U, Vowel.Schwa,) # Extreme + central
else:
# Choose wether contrast is between close and midOpen or midClose and open
heightClose = random.choice([Vowel.close, Vowel.midClose])
return (Vowel(heightClose, Vowel.frontUnrounded),
Vowel(heightClose, Vowel.backRounded),
Vowel(heightClose + 2, Vowel.frontUnrounded),
Vowel(heightClose + 2, Vowel.backRounded),)
def fiveVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U,)
else:
# Choose wether contrast is between close and midOpen or midClose and open
heightClose = random.choice([Vowel.close, Vowel.midClose])
return (Vowel.Schwa,
Vowel(heightClose, Vowel.frontUnrounded),
Vowel(heightClose, Vowel.backRounded),
Vowel(heightClose + 2, Vowel.frontUnrounded),
Vowel(heightClose + 2, Vowel.backRounded),)
def sixVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.Schwa,)
else:
return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO,)
def sevenVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO,)
else:
return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO, Vowel.Schwa,)
def eightVowelSet():
rnd = random.randrange(3)
if rnd == 0:
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO, Vowel.Schwa)
elif rnd == 1:
central = random.choice([Vowel.central, Vowel.backUnrounded])
open = random.choice([Vowel.midOpen, Vowel.open])
close = random.choice([Vowel.midClose, Vowel.close])
return (Vowel.I, Vowel.E, Vowel.Y, Vowel.openOE,
Vowel.U, Vowel.O, Vowel(close, central), Vowel(open, central),)
else:
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,)
def nineVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,
Vowel.Schwa,)
else:
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel(Vowel.close, Vowel.central), Vowel.Schwa, Vowel.A,
Vowel.U, Vowel.O, Vowel.openO,)
def tenVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.A,
Vowel.U, Vowel.O, Vowel.openO,)
else:
return (Vowel.I, Vowel.Y, Vowel.Schwa, Vowel.W, Vowel.U,
Vowel.E, Vowel.openOE, Vowel.A, Vowel(Vowel.midOpen, Vowel.backUnrounded), Vowel.O,)
def elevenVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.A, Vowel.Schwa,
Vowel.U, Vowel.O, Vowel.openO,)
else:
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,)
# Distribution
baseVowelDistribution = phonagen.Distribution()
baseVowelDistribution.addTo(twoVowelSet, 2)
baseVowelDistribution.addTo(threeVowelSet, 6)
baseVowelDistribution.addTo(fourVowelSet, 8)
baseVowelDistribution.addTo(fiveVowelSet, 10)
baseVowelDistribution.addTo(sixVowelSet, 8)
baseVowelDistribution.addTo(sevenVowelSet, 8)
baseVowelDistribution.addTo(eightVowelSet, 6)
baseVowelDistribution.addTo(nineVowelSet, 4)
baseVowelDistribution.addTo(tenVowelSet, 2)
baseVowelDistribution.addTo(elevenVowelSet, 2)
def generateVowelSet():
"""Generate a set of vowels for a phonology"""
# Choose some language features on the vowel set
isStressPhonemic = stressDistribution.pickFrom()
isLongVowelPhonemic = longVowelDistribution.pickFrom()
isNasalVowelPhonemic = nasalVowelDistribution.pickFrom()
# Generate a set of base vowels
baseVowelSet = baseVowelDistribution.pickFrom()()
# Is stress on long vowel ?
isLongStressed = False
if isStressPhonemic and isLongVowelPhonemic and pickBoolean():
isLongStressed = True
#
result = []
for v in baseVowelSet:
result.append(v)
if isLongStressed:
vls = v.clone()
vls.isStressed = True
vls.isLong = True
result.append(vls)
if isNasalVowelPhonemic:
vlsn = vls.clone()
vlsn.isNasal = True
result.append(vlsn)
if (not isLongStressed) and isLongVowelPhonemic:
vl = v.clone()
vl.isLong = True
result.append(vl)
if isNasalVowelPhonemic:
vln = vl.clone()
vln.isNasal = True
result.append(vln)
if (not isLongStressed) and isStressPhonemic:
vs = v.clone()
vs.isStressed = True
result.append(vs)
if isNasalVowelPhonemic:
vsn = vs.clone()
vsn.isNasal = True
result.append(vsn)
if isNasalVowelPhonemic:
vn = v.clone()
vn.isNasal = True
result.append(vn)
return result
###
# Consonants representation and generation
class Consonant:
"""Consonant representation"""
# Simplified model
matrixPhonemes = [
["m", "ɱ", "n", "ɳ", "ɲ", "ŋ", "ɴ", "ɴ"], # nasal
["p", "", "t", "ʈ", "c", "k", "q", "ʔ"], # stop voiceless
["b", "", "d", "ɖ", "ɟ", "ɡ", "ɢ", "ɢ"], # stop voiced
["ɓ", "ɓ̪", "ɗ", "", "ʄ", "ɠ", "ʛ", "ʛ"], # stop implosive
["pf", "", "ts", "ʈʂ", "", "kx", "", "ʔh"], # affricate voiceless
["bv", "", "dz", "ɖʐ", "", "ɡɣ", "ɢʁ", "ʡʕ" ], # affricate voiced
["f", "θ", "s", "ʂ", "ʃ", "x", "χ", "h"], # fricative voiceless
["v", "ð", "z", "ʐ", "ʒ", "ɣ", "ʁ", "ɦ"], # fricative voiced
["β", "ʋ", "ɹ", "ɻ", "j", "w", "ʁ", "ʕ"], # approximant
["", "", "ɾ", "ɽ", "ɾ", "ɢ̆", "ɢ̆", "ʡ̮"], # tap/flap
["ʙ", "ʙ̪", "r", "ɽr", "r", "ʀ", "ʀ", "ʢ"], # trill
["l", "l", "l", "ɭ", "ʎ", "ʟ", "ʟ̠", "ʟ̠"], # lateral
["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "ʞ", "ʞ"], # click
]
# left>right: place of articulation:
labial = 0
dental = 1
alveolar = 2
retroflex = 3
palatal = 4
velar = 5
uvular = 6
glottal = 7
# top>bottom: manner
nasal = 0
stopVoiceless = 1
stopVoiced = 2
implosive = 3
affricateVoiceless = 4
affricateVoiced = 5
fricativeVoiceless = 6
fricativeVoiced = 7
approximant = 8
tapFlap = 9
trill = 10
lateral = 11
click = 12
def __init__(self, manner = stopVoiceless, place = alveolar):
"""Constructor"""
# Primary features
self.place = place
self.manner = manner
# Secondary feature
# Phonation
self.isEjective = False
self.isAspirated = False # or murmured, for voiced
self.isGlotalized = False
# Secondary articulation
self.isLabialized = False
self.isPalatalized = False
self.isVelarized = False
self.isPharyngealized = False
#
self.transcriptions = {}
def __str__(self):
"""To String operator: Get the phoneme representation in IPA"""
result = Consonant.matrixPhonemes[self.manner][self.place]
if self.isEjective:
result = result + "ʼ"
if self.isAspirated:
result = result + "ʰ"
if self.isGlotalized:
result = result + "ˀ"
if self.isLabialized:
result = result + "ʷ"
if self.isPalatalized:
result = result + "ʲ"
if self.isVelarized:
result = result + "ˠ"
if self.isPharyngealized:
result = result + "ˤ"
#
return result
def clone(self):
"""Clone the consonant"""
result = Consonant()
result.place = self.place
result.manner = self.manner
result.isEjective = self.isEjective
result.isAspirated = self.isAspirated
result.isGlotalized = self.isGlotalized
result.isLabialized = self.isLabialized
result.isPalatalized = self.isPalatalized
result.isVelarized = self.isVelarized
result.isPharyngealized = self.isPharyngealized
return result
def getDescription(self):
return "#consonant"
# Has retroflex consonants ?
retroflexDistribution = phonagen.Distribution()
retroflexDistribution.addTo(True, 5)
retroflexDistribution.addTo(False, 15)
# Has glottal consonants ?
glottalDistribution = phonagen.Distribution()
glottalDistribution.addTo(True, 2)
glottalDistribution.addTo(False, 18)
# Has uvular consonants ?
uvularDistribution = phonagen.Distribution()
uvularDistribution.addTo(True, 2)
uvularDistribution.addTo(False, 18)
# Has dental consonants ?
dentalDistribution = phonagen.Distribution()
dentalDistribution.addTo(True, 1)
dentalDistribution.addTo(False, 19)
# Are the affricates distinguished from stops ?
affricateDistribution = phonagen.Distribution()
affricateDistribution.addTo(True, 2)
affricateDistribution.addTo(False, 18)
# Are voiced distinguished from unvoiced ?
voicedDistribution = phonagen.Distribution()
voicedDistribution.addTo(True, 15)
voicedDistribution.addTo(False, 5)
# Has click ?
clickDistribution = phonagen.Distribution()
clickDistribution.addTo(True, 1)
clickDistribution.addTo(False, 29)
# Rhotic realisation
rhoticRealisationDistribution = phonagen.Distribution()
rhoticRealisationDistribution.addTo(False, 10)
rhoticRealisationDistribution.addTo(Consonant.tapFlap, 30)
rhoticRealisationDistribution.addTo(Consonant.trill, 30)
rhoticRealisationDistribution.addTo(Consonant.approximant, 40)
rhoticRealisationDistribution.addTo(Consonant.fricativeVoiced, 20)
# Is aspiration phonemic ?
aspirationDistribution = phonagen.Distribution()
aspirationDistribution.addTo(True, 6)
aspirationDistribution.addTo(False, 14)
# TODO: other stuff ?
def generateConsonantSet():
"""Generate a set of consonants for a phonology"""
# Places features
hasRetroflex = retroflexDistribution.pickFrom()
hasGlottal = glottalDistribution.pickFrom()
hasUvular = uvularDistribution.pickFrom()
hasDental = dentalDistribution.pickFrom()
# Places of articulation
# Minimal set
places = [Consonant.labial, Consonant.alveolar, Consonant.palatal, Consonant.velar]
# Add the other positions
if hasRetroflex:
places.append(Consonant.retroflex)
if hasDental:
places.append(Consonant.dental)
if hasGlottal:
places.append(Consonant.glottal)
if hasUvular:
places.append(Consonant.uvular)
# Nominal place is alveolar: this place will get all the possible manners
# Other places will be more limited
nominalPlace = Consonant.alveolar
# Manner features
hasVoiced = voicedDistribution.pickFrom()
hasSeparateAffricates = affricateDistribution.pickFrom()
hasClick = clickDistribution.pickFrom()
rhoticRealisation = rhoticRealisationDistribution.pickFrom()
hasAspirated = aspirationDistribution.pickFrom()
# Minimal set of manners
manners = [Consonant.nasal, Consonant.stopVoiceless, Consonant.fricativeVoiceless, Consonant.approximant]
if hasSeparateAffricates:
manners.append(Consonant.affricateVoiceless)
if hasVoiced:
manners = manners + [Consonant.stopVoiced, Consonant.fricativeVoiced]
if hasSeparateAffricates:
manners.append(Consonant.affricateVoiced)
if hasClick:
manners.append(Consonant.click)
# Generate the set of consonants
result = []
rhoticAdded = False
for pl in places:
for mn in manners:
# there is a small chance that a phoneme not on the nominal place will be skipped
if (pl == nominalPlace) or (random.randrange(8) != 0):
cons = Consonant(mn, pl)
# there may be some modifications on the manner or place depending on how contrastive are the consonants
if (not hasSeparateAffricates) and (pl == Consonant.palatal) and (random.randrange(10) < 8):
if (mn == Consonant.stopVoiceless):
cons.manner = Consonant.affricateVoiceless
elif (mn == Consonant.stopVoiced):
cons.manner = Consonant.affricateVoiced
# TODO : other common modifications
result.append(cons)
# Rhotic added ?
if (mn == rhoticRealisation):
rhoticAdded =((mn != Consonant.fricativeVoiced) and (pl == nominalPlace)) or ((mn == Consonant.fricativeVoiced) and (pl == Consonant.uvular))
# Aspirated consonants
if hasAspirated and (mn >= Consonant.stopVoiceless) and (mn <= Consonant.fricativeVoiced):
asp = cons.clone()
asp.isAspirated = True
result.append(asp)
# lateral
if ((pl == nominalPlace) and (random.randrange(6) != 0)) or (random.randrange(20) == 0):
lat = Consonant(Consonant.lateral, pl)
result.append(lat)
# rhotic
if rhoticRealisation and (not rhoticAdded):
if (rhoticRealisation != Consonant.fricativeVoiced):
rhot = Consonant(rhoticRealisation, nominalPlace)
result.append(rhot)
else:
rhot = Consonant(rhoticRealisation, Consonant.uvular)
result.append(rhot)
#
return result
###
# Transcriptions
def addSimpleLatinTranscription(transcriptions, phonemeList):
transcriptions.append('simple-latin')
vowelTranslationMatrix = [
["i", "ú", "ï", "í", "u"], # close
["e", "ê", "ë", "o", "o"], # mid close
["é", "ê", "ä", "ó", "ó"], # mid open
["á", "a", "a", "a", "â"], # open
]
consonantTranslationMatrix = [
["m", "ḿ", "n", "ň", "ñ", "ǹ", "ń", "ń"], # nasal
["p", "", "t", "ť", "c", "k", "q", "q"], # stop voiceless
["b", "", "d", "ď", "j", "ɡ", "ǵ", "ǵ"], # stop voiced
["b'", "b'", "d'", "ď'", "j'", "g'", "ǵ'", "ǵ'"], # stop implosive
["pf", "", "ts", "", "", "kx", "qẍ", "qh"], # affricate voiceless
["bv", "dẑ", "dz", "", "", "ǵĝ", "ǵr", "ǵh" ], # affricate voiced
["f", "ŝ", "s", "š", "ś", "x", "", "h"], # fricative voiceless
["v", "", "z", "ž", "ź", "ĝ", "r", "h"], # fricative voiced
["v", "v", "r", "ř", "y", "w", "r", "h"], # approximant
["", "", "r", "ř", "r", "gy", "gr", "hg"], # tap/flap
["br", "br", "rr", "řr", "ry", "ŕr", "ŕr", ""], # trill
["l", "l", "l", "ľ", "ly", "ĺl", "ĺl", "ĺl"], # lateral
["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "ʞ", "ʞ"], # click
]
nasalSign = random.choice(["\u0328", "\u0330", "n"]) # combining ogonek, combining tilde below, n
for ph in phonemeList:
tr = ""
if isinstance(ph, Vowel):
tr = vowelTranslationMatrix[ph.height][ph.backness]
if ph.isNasal:
tr = tr + nasalSign
if ph.isLong:
tr = tr + tr # Double
if isinstance(ph, Consonant):
tr = consonantTranslationMatrix[ph.manner][ph.place]
if ph.isAspirated:
tr = tr + "h"
ph.transcriptions.update({'simple-latin': tr})
def makePhonology(id, description):
phonology = phonagen.Phonology(id = id, description = description)
# Define phonemes, from their IPA notation
phonemeList = []
# Step 0: stress, syllable break
phonemeList.append(Stress())
phonemeList.append(SyllableBreak())
# Step 1: Vowels
phonemeList = phonemeList + generateVowelSet()
# Step 2: consonants
phonemeList = phonemeList + generateConsonantSet()
# Step 3: Transcriptions, and decide the main
transcriptions = ['phoneme']
addSimpleLatinTranscription(transcriptions, phonemeList)
# set main transcription
phonology.transcriptions = transcriptions
# TODO: change this
phonology.mainTranscription = 'simple-latin'
# Step 4: translate phoneme into phonology entries
for ph in phonemeList:
id = str(ph)
if isinstance(ph, Vowel) and ph.isStressed:
id = "'" + id
entry = {'id': id, 'description': ph.getDescription(), 'phoneme': str(ph)}
for tr in ph.transcriptions:
entry.update({tr: ph.transcriptions[tr]})
phonology.entries.update({entry['id']: entry})
return phonology
def parseArgs():
# Define argument parser
parser = argparse.ArgumentParser(description='Make a new phonology.')
parser.add_argument('--id', metavar='id', help='id of the phonology', required = True)
parser.add_argument('--description', metavar='description', help='description of the phonology; empty if not provided', default='')
parser.add_argument('--output', metavar='output-file', help='Output file for the generator. The file is printed to standard output if not given.', default='')
# Parse arguments
return parser.parse_args()
# Main
if __name__ == '__main__':
args = parseArgs()
phonology = makePhonology(args.id, args.description)
outputFile = phonagen.PhonagenFile()
outputFile.addPhonology(phonology)
outputFile.writeTo(args.output)