phonagen/py-phonagen/phonology-maker.py

594 lines
20 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#! /usr/bin/env python3
import argparse
import phonagen
import random
class Stress:
"""Stress representation"""
def __init__(self):
self.transcriptions = {}
def __str__(self):
return "\u02C8"
def getDescription(self):
return "#stress"
class SyllableBreak:
"""Syllable break representation"""
def __init__(self):
self.transcriptions = {}
def __str__(self):
return "."
def getDescription(self):
return "#syllable-break"
###
# Vowels representation and generation
class Vowel:
"""Vowel representation"""
# Simplified vowel model
matrixPhoneme = [
["i", "y", "ɨ", "ɯ", "u"], # close
["e", "ø", "ə", "ɤ", "o"], # mid close
["ɛ", "œ", "ɐ", "ʌ", "ɔ"], # mid open
["æ", "ɶ", "a", "ɑ", "ɒ"], # open
]
# Vowel height
close = 0
midClose = 1
midOpen = 2
open = 3
# Vowel backness (+ roundness)
frontUnrounded = 0
frontRounded = 1
central = 2
backUnrounded = 3
backRounded = 4
def __init__(self, height = midClose, backness = central):
"""Constructor"""
self.height = height
self.backness = backness
self.isNasal = False
self.isLong = False
self.isStressed = False
self.transcriptions = {}
def __str__(self):
"""To String operator: Get the phoneme representation in IPA"""
result = Vowel.matrixPhoneme[self.height][self.backness]
if self.isNasal:
result = result + "\u0303" # Conbining tilde
if self.isLong:
result = result + "ː"
#
return result
def clone(self):
"""Clone the vowel"""
result = Vowel(self.height, self.backness)
result.isNasal = self.isNasal
result.isLong = self.isLong
result.isStressed = self.isStressed
return result
def getDescription(self):
result = "#vowel"
if self.isStressed:
result = result + " #stressed"
else:
result = result + " #unstressed"
return result
# Common vowels
Vowel.A = Vowel(Vowel.open, Vowel.central)
Vowel.E = Vowel(Vowel.midClose, Vowel.frontUnrounded)
Vowel.I = Vowel(Vowel.close, Vowel.frontUnrounded)
Vowel.O = Vowel(Vowel.midClose, Vowel.backRounded)
Vowel.U = Vowel(Vowel.close, Vowel.backRounded)
Vowel.Schwa = Vowel(Vowel.midClose, Vowel.central)
# Less common vowels
Vowel.openO = Vowel(Vowel.midOpen, Vowel.backRounded)
Vowel.openE = Vowel(Vowel.midOpen, Vowel.frontUnrounded)
Vowel.Y = Vowel(Vowel.close, Vowel.frontRounded)
Vowel.W = Vowel(Vowel.close, Vowel.backUnrounded)
Vowel.OE = Vowel(Vowel.midClose, Vowel.frontRounded)
Vowel.openOE = Vowel(Vowel.midOpen, Vowel.frontRounded)
Vowel.AE = Vowel(Vowel.open, Vowel.frontUnrounded)
Vowel.AO = Vowel(Vowel.open, Vowel.backRounded)
# Distributions of vowel features
# Stress
stressDistribution = phonagen.Distribution()
stressDistribution.addTo(True, 4)
stressDistribution.addTo(False, 6)
# Long vowels
longVowelDistribution = phonagen.Distribution()
longVowelDistribution.addTo(True, 2)
longVowelDistribution.addTo(False, 8)
# Nasal vowels
nasalVowelDistribution = phonagen.Distribution()
nasalVowelDistribution.addTo(True, 2)
nasalVowelDistribution.addTo(False, 8)
# Base vowels
def pickBoolean():
return random.choice([True,False])
# Generative functions
def twoVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.Schwa,) # Open/Close contrast
else:
return (Vowel.E, Vowel.O,) # Front/Back contrast
def threeVowelSet():
return (Vowel.A, Vowel.I, Vowel.U,) # Extreme of the vowel triangle
def fourVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.I, Vowel.U, Vowel.Schwa,) # Extreme + central
else:
# Choose wether contrast is between close and midOpen or midClose and open
heightClose = random.choice([Vowel.close, Vowel.midClose])
return (Vowel(heightClose, Vowel.frontUnrounded),
Vowel(heightClose, Vowel.backRounded),
Vowel(heightClose + 2, Vowel.frontUnrounded),
Vowel(heightClose + 2, Vowel.backRounded),)
def fiveVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U,)
else:
# Choose wether contrast is between close and midOpen or midClose and open
heightClose = random.choice([Vowel.close, Vowel.midClose])
return (Vowel.Schwa,
Vowel(heightClose, Vowel.frontUnrounded),
Vowel(heightClose, Vowel.backRounded),
Vowel(heightClose + 2, Vowel.frontUnrounded),
Vowel(heightClose + 2, Vowel.backRounded),)
def sixVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.Schwa,)
else:
return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO,)
def sevenVowelSet():
if pickBoolean():
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO,)
else:
return (Vowel.I, Vowel.U, Vowel.E, Vowel.O, Vowel.AE, Vowel.AO, Vowel.Schwa,)
def eightVowelSet():
rnd = random.randrange(3)
if rnd == 0:
return (Vowel.A, Vowel.E, Vowel.I, Vowel.O, Vowel.U, Vowel.openE, Vowel.openO, Vowel.Schwa)
elif rnd == 1:
central = random.choice([Vowel.central, Vowel.backUnrounded])
open = random.choice([Vowel.midOpen, Vowel.open])
close = random.choice([Vowel.midClose, Vowel.close])
return (Vowel.I, Vowel.E, Vowel.Y, Vowel.openOE,
Vowel.U, Vowel.O, Vowel(close, central), Vowel(open, central),)
else:
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,)
def nineVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,
Vowel.Schwa,)
else:
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel(Vowel.close, Vowel.central), Vowel.Schwa, Vowel.A,
Vowel.U, Vowel.O, Vowel.openO,)
def tenVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.A,
Vowel.U, Vowel.O, Vowel.openO,)
else:
return (Vowel.I, Vowel.Y, Vowel.Schwa, Vowel.W, Vowel.U,
Vowel.E, Vowel.openOE, Vowel.A, Vowel(Vowel.midOpen, Vowel.backUnrounded), Vowel.O,)
def elevenVowelSet():
if pickBoolean():
return (Vowel.I, Vowel.E, Vowel.openE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.A, Vowel.Schwa,
Vowel.U, Vowel.O, Vowel.openO,)
else:
return (Vowel.I, Vowel.E, Vowel.openE, Vowel.AE,
Vowel.Y, Vowel.OE, Vowel.openOE,
Vowel.U, Vowel.O, Vowel.openO, Vowel.AO,)
# Distribution
baseVowelDistribution = phonagen.Distribution()
baseVowelDistribution.addTo(twoVowelSet, 2)
baseVowelDistribution.addTo(threeVowelSet, 6)
baseVowelDistribution.addTo(fourVowelSet, 8)
baseVowelDistribution.addTo(fiveVowelSet, 10)
baseVowelDistribution.addTo(sixVowelSet, 8)
baseVowelDistribution.addTo(sevenVowelSet, 8)
baseVowelDistribution.addTo(eightVowelSet, 6)
baseVowelDistribution.addTo(nineVowelSet, 4)
baseVowelDistribution.addTo(tenVowelSet, 2)
baseVowelDistribution.addTo(elevenVowelSet, 2)
def generateVowelSet():
"""Generate a set of vowels for a phonology"""
# Choose some language features on the vowel set
isStressPhonemic = stressDistribution.pickFrom()
isLongVowelPhonemic = longVowelDistribution.pickFrom()
isNasalVowelPhonemic = nasalVowelDistribution.pickFrom()
# Generate a set of base vowels
baseVowelSet = baseVowelDistribution.pickFrom()()
# Is stress on long vowel ?
isLongStressed = False
if isStressPhonemic and isLongVowelPhonemic and pickBoolean():
isLongStressed = True
#
result = []
for v in baseVowelSet:
result.append(v)
if isLongStressed:
vls = v.clone()
vls.isStressed = True
vls.isLong = True
result.append(vls)
if isNasalVowelPhonemic:
vlsn = vls.clone()
vlsn.isNasal = True
result.append(vlsn)
if (not isLongStressed) and isLongVowelPhonemic:
vl = v.clone()
vl.isLong = True
result.append(vl)
if isNasalVowelPhonemic:
vln = vl.clone()
vln.isNasal = True
result.append(vln)
if (not isLongStressed) and isStressPhonemic:
vs = v.clone()
vs.isStressed = True
result.append(vs)
if isNasalVowelPhonemic:
vsn = vs.clone()
vsn.isNasal = True
result.append(vsn)
if isNasalVowelPhonemic:
vn = v.clone()
vn.isNasal = True
result.append(vn)
return result
###
# Consonants representation and generation
class Consonant:
"""Consonant representation"""
# Simplified model
# '%' means impossible articulation
matrixPhonemes = [
["m", "ɱ", "n", "ɳ", "ɲ", "ŋ", "ɴ", "%"], # nasal
["p", "", "t", "ʈ", "c", "k", "q", "ʔ"], # stop voiceless
["b", "", "d", "ɖ", "ɟ", "ɡ", "ɢ", "%"], # stop voiced
["ɓ", "ɓ̪", "ɗ", "", "ʄ", "ɠ", "ʛ", "%"], # stop implosive
["pf", "", "ts", "ʈʂ", "", "kx", "", "ʔh"], # affricate voiceless
["bv", "", "dz", "ɖʐ", "", "ɡɣ", "ɢʁ", "ʡʕ" ], # affricate voiced
["f", "θ", "s", "ʂ", "ʃ", "x", "χ", "h"], # fricative voiceless
["v", "ð", "z", "ʐ", "ʒ", "ɣ", "ʁ", "ɦ"], # fricative voiced
["β", "ʋ", "ɹ", "ɻ", "j", "w", "ʁ", "ʕ"], # approximant
["", "", "ɾ", "ɽ", "%", "%", "ɢ̆", "ʡ̮"], # tap/flap
["ʙ", "ʙ̪", "r", "ɽr", "%", "%", "ʀ", "ʢ"], # trill
["%", "l", "l", "ɭ", "ʎ", "ʟ", "ʟ̠", "%"], # lateral
["ʘ", "ǀ", "ǃ", "ǁ", "ǂ", "ʞ", "%", "%"], # click
]
# left>right: place of articulation:
labial = 0
dental = 1
alveolar = 2
retroflex = 3
palatal = 4
velar = 5
uvular = 6
glottal = 7
# top>bottom: manner
nasal = 0
stopVoiceless = 1
stopVoiced = 2
implosive = 3
affricateVoiceless = 4
affricateVoiced = 5
fricativeVoiceless = 6
fricativeVoiced = 7
approximant = 8
tapFlap = 9
trill = 10
lateral = 11
click = 12
def __init__(self, manner = stopVoiceless, place = alveolar):
"""Constructor"""
# Primary features
self.place = place
self.manner = manner
# Secondary feature
# Phonation
self.isEjective = False
self.isAspirated = False # or murmured, for voiced
self.isGlotalized = False
# Secondary articulation
self.isLabialized = False
self.isPalatalized = False
self.isVelarized = False
self.isPharyngealized = False
#
self.transcriptions = {}
def __str__(self):
"""To String operator: Get the phoneme representation in IPA"""
result = Consonant.matrixPhonemes[self.manner][self.place]
if self.isEjective:
result = result + "ʼ"
if self.isAspirated:
result = result + "ʰ"
if self.isGlotalized:
result = result + "ˀ"
if self.isLabialized:
result = result + "ʷ"
if self.isPalatalized:
result = result + "ʲ"
if self.isVelarized:
result = result + "ˠ"
if self.isPharyngealized:
result = result + "ˤ"
#
return result
def clone(self):
"""Clone the consonant"""
result = Consonant()
result.place = self.place
result.manner = self.manner
result.isEjective = self.isEjective
result.isAspirated = self.isAspirated
result.isGlotalized = self.isGlotalized
result.isLabialized = self.isLabialized
result.isPalatalized = self.isPalatalized
result.isVelarized = self.isVelarized
result.isPharyngealized = self.isPharyngealized
return result
def getDescription(self):
return "#consonant"
def isPossible(manner, place):
return '%' != Consonant.matrixPhonemes[manner][place]
# Has retroflex consonants ?
retroflexDistribution = phonagen.Distribution()
retroflexDistribution.addTo(True, 5)
retroflexDistribution.addTo(False, 15)
# Has glottal consonants ?
glottalDistribution = phonagen.Distribution()
glottalDistribution.addTo(True, 2)
glottalDistribution.addTo(False, 18)
# Has uvular consonants ?
uvularDistribution = phonagen.Distribution()
uvularDistribution.addTo(True, 2)
uvularDistribution.addTo(False, 18)
# Has dental consonants ?
dentalDistribution = phonagen.Distribution()
dentalDistribution.addTo(True, 1)
dentalDistribution.addTo(False, 19)
# Are the affricates distinguished from stops ?
affricateDistribution = phonagen.Distribution()
affricateDistribution.addTo(True, 2)
affricateDistribution.addTo(False, 18)
# Are voiced distinguished from unvoiced ?
voicedDistribution = phonagen.Distribution()
voicedDistribution.addTo(True, 15)
voicedDistribution.addTo(False, 5)
# Has click ?
clickDistribution = phonagen.Distribution()
clickDistribution.addTo(True, 1)
clickDistribution.addTo(False, 69)
# Rhotic realisation
rhoticRealisationDistribution = phonagen.Distribution()
rhoticRealisationDistribution.addTo(False, 10)
rhoticRealisationDistribution.addTo(Consonant.tapFlap, 30)
rhoticRealisationDistribution.addTo(Consonant.trill, 30)
rhoticRealisationDistribution.addTo(Consonant.approximant, 40)
rhoticRealisationDistribution.addTo(Consonant.fricativeVoiced, 20)
# Is aspiration phonemic ?
aspirationDistribution = phonagen.Distribution()
aspirationDistribution.addTo(True, 6)
aspirationDistribution.addTo(False, 14)
# TODO: other stuff ?
def generateConsonantSet():
"""Generate a set of consonants for a phonology"""
# Places features
hasRetroflex = retroflexDistribution.pickFrom()
hasGlottal = glottalDistribution.pickFrom()
hasUvular = uvularDistribution.pickFrom()
hasDental = dentalDistribution.pickFrom()
# Places of articulation
# Minimal set
places = [Consonant.labial, Consonant.alveolar, Consonant.palatal, Consonant.velar]
# Add the other positions
if hasRetroflex:
places.append(Consonant.retroflex)
if hasDental:
places.append(Consonant.dental)
if hasGlottal:
places.append(Consonant.glottal)
if hasUvular:
places.append(Consonant.uvular)
# Nominal place is alveolar: this place will get all the possible manners
# Other places will be more limited
nominalPlace = Consonant.alveolar
# Manner features
hasVoiced = voicedDistribution.pickFrom()
hasSeparateAffricates = affricateDistribution.pickFrom()
hasClick = clickDistribution.pickFrom()
rhoticRealisation = rhoticRealisationDistribution.pickFrom()
hasAspirated = aspirationDistribution.pickFrom()
# Minimal set of manners
manners = [Consonant.nasal, Consonant.stopVoiceless, Consonant.fricativeVoiceless, Consonant.approximant]
if hasSeparateAffricates:
manners.append(Consonant.affricateVoiceless)
if hasVoiced:
manners = manners + [Consonant.stopVoiced, Consonant.fricativeVoiced]
if hasSeparateAffricates:
manners.append(Consonant.affricateVoiced)
if hasClick:
manners.append(Consonant.click)
# Generate the set of consonants
result = []
rhoticAdded = False
for pl in places:
for mn in manners:
# there is a small chance that a phoneme not on the nominal place will be skipped
if Consonant.isPossible(mn, pl) and ((pl == nominalPlace) or (random.randrange(8) != 0)):
cons = Consonant(mn, pl)
# there may be some modifications on the manner or place depending on how contrastive are the consonants
if (not hasSeparateAffricates) and (pl == Consonant.palatal) and (random.randrange(10) < 8):
if (mn == Consonant.stopVoiceless):
cons.manner = Consonant.affricateVoiceless
elif (mn == Consonant.stopVoiced):
cons.manner = Consonant.affricateVoiced
# TODO : other common modifications
result.append(cons)
# Rhotic added ?
if (mn == rhoticRealisation):
rhoticAdded =((mn != Consonant.fricativeVoiced) and (pl == nominalPlace)) or ((mn == Consonant.fricativeVoiced) and (pl == Consonant.uvular))
# Aspirated consonants
if hasAspirated and (mn >= Consonant.stopVoiceless) and (mn <= Consonant.fricativeVoiced):
asp = cons.clone()
asp.isAspirated = True
result.append(asp)
# lateral
if ((pl == nominalPlace) and (random.randrange(6) != 0)) or (random.randrange(20) == 0):
lat = Consonant(Consonant.lateral, pl)
result.append(lat)
# rhotic
if rhoticRealisation and (not rhoticAdded):
if (rhoticRealisation != Consonant.fricativeVoiced):
rhot = Consonant(rhoticRealisation, nominalPlace)
result.append(rhot)
else:
rhot = Consonant(rhoticRealisation, Consonant.uvular)
result.append(rhot)
#
return result
###
# Transcriptions
def addSimpleLatinTranscription(transcriptions, phonemeList):
transcriptions.append('simple-latin')
vowelTranslationMatrix = [
["i", "ú", "ï", "í", "u"], # close
["e", "ê", "ë", "o", "o"], # mid close
["é", "ê", "ä", "ó", "ó"], # mid open
["á", "a", "a", "a", "â"], # open
]
consonantTranslationMatrix = [
["m", "ḿ", "n", "ň", "ñ", "ǹ", "ń", "ń"], # nasal
["p", "", "t", "ť", "c", "k", "q", "q"], # stop voiceless
["b", "", "d", "ď", "j", "g", "ǵ", "ǵ"], # stop voiced
["b'", "b'", "d'", "ď'", "j'", "g'", "ǵ'", "ǵ'"], # stop implosive
["pf", "", "ts", "", "", "kx", "qẍ", "qh"], # affricate voiceless
["bv", "dẑ", "dz", "", "", "ǵĝ", "ǵr", "ǵh" ], # affricate voiced
["f", "ŝ", "s", "š", "ś", "x", "", "h"], # fricative voiceless
["v", "", "z", "ž", "ź", "ĝ", "r", "h"], # fricative voiced
["v", "v", "r", "ř", "y", "w", "r", "h"], # approximant
["", "", "r", "ř", "r", "gy", "gr", "hg"], # tap/flap
["br", "br", "rr", "řr", "ry", "ŕr", "ŕr", ""], # trill
["l", "l", "l", "ľ", "ly", "ĺl", "ĺl", "ĺl"], # lateral
["p*", "ṕ*", "t*", "ť*", "c*", "k*", "q*", "q*"], # click
]
nasalSign = random.choice(["\u0328", "\u0330", "n"]) # combining ogonek, combining tilde below, n
for ph in phonemeList:
tr = ""
if isinstance(ph, Vowel):
tr = vowelTranslationMatrix[ph.height][ph.backness]
if ph.isLong:
tr = tr + tr # Double
if ph.isNasal:
tr = tr + nasalSign
if isinstance(ph, Consonant):
tr = consonantTranslationMatrix[ph.manner][ph.place]
if ph.isAspirated:
tr = tr + "h"
ph.transcriptions.update({'simple-latin': tr})
def makePhonology(id, description):
phonology = phonagen.Phonology(id = id, description = description)
# Define phonemes, from their IPA notation
phonemeList = []
# Step 0: stress, syllable break
phonemeList.append(Stress())
phonemeList.append(SyllableBreak())
# Step 1: Vowels
phonemeList = phonemeList + generateVowelSet()
# Step 2: consonants
phonemeList = phonemeList + generateConsonantSet()
# Step 3: Transcriptions, and decide the main
transcriptions = ['phoneme']
addSimpleLatinTranscription(transcriptions, phonemeList)
# set main transcription
phonology.transcriptions = transcriptions
# TODO: change this
phonology.mainTranscription = 'simple-latin'
# Step 4: translate phoneme into phonology entries
for ph in phonemeList:
id = str(ph)
if isinstance(ph, Vowel) and ph.isStressed:
id = "'" + id
entry = {'id': id, 'description': ph.getDescription(), 'phoneme': str(ph)}
for tr in ph.transcriptions:
entry.update({tr: ph.transcriptions[tr]})
phonology.entries.update({entry['id']: entry})
return phonology
def parseArgs():
# Define argument parser
parser = argparse.ArgumentParser(description='Make a new phonology.')
parser.add_argument('--id', metavar='id', help='id of the phonology', required = True)
parser.add_argument('--description', metavar='description', help='description of the phonology; empty if not provided', default='')
parser.add_argument('--output', metavar='output-file', help='Output file for the generator. The file is printed to standard output if not given.', default='')
# Parse arguments
return parser.parse_args()
# Main
if __name__ == '__main__':
args = parseArgs()
phonology = makePhonology(args.id, args.description)
outputFile = phonagen.PhonagenFile()
outputFile.addPhonology(phonology)
outputFile.writeTo(args.output)