Initial commit

This commit is contained in:
Feufochmar 2018-06-03 19:02:19 +02:00
commit 340e459517
5 changed files with 624 additions and 0 deletions

20
README.md Normal file
View File

@ -0,0 +1,20 @@
## Phonagen
Phonemic word generation tools.
Phonagen provide several tools to make words generators based on the prononciation, transcriptions, and translitterations of phonemes.
The tools are built around a JSON representation of phonemes and word generators.
### Web interface
The `web` directory contains a sample web interface to generate words from the JSON description included in the `web/data.json` file.
The implementation of generators is located in the script `web/phonagen.js`.
To use it on any webpage:
- include the script on your page (`<script src='phonagen.js'></script>` in the headers)
- add a div (or another block element) with the `phonagen` id
- call the `phonagen.load()` function with the JSON file to use as an argument (either in the `onload` method of the body, or in a script tags placed after the `phonagen` block ex: `<script>phonagen.load('data.json')</script>`)
TODO:
- Description of JSON
- Modify the Racket to use JSON inputs
- Editor

201
web/data.json Normal file
View File

@ -0,0 +1,201 @@
{
"phonologies": [
{ "id": "sample-greek",
"description": "A sample for testing purpose.",
"transcriptions": ["phoneme", "latin", "greek", "runic"],
"main-transcription": "greek",
"entries": [
{ "id": "'", "description": "Stress", "phoneme": "'", "latin": "", "greek": "", "runic": "" },
{ "id": ".", "description": "Syllable break", "phoneme": ".", "latin": "", "greek": "", "runic": "" },
{ "id": "a", "description": "α /a/", "phoneme": "a", "latin": "a", "greek": "α", "runic": "ᚫ" },
{ "id": "o", "description": "ο /o/", "phoneme": "o", "latin": "o", "greek": "ο", "runic": "ᚩ" },
{ "id": "oo", "description": "ω /oː/", "phoneme": "oː", "latin": "ô", "greek": "ω", "runic": "ᚩ" },
{ "id": "k", "description": "κ /k/", "phoneme": "k", "latin": "k", "greek": "κ", "runic": "ᚴ" },
{ "id": "t", "description": "τ /t/", "phoneme": "t", "latin": "t", "greek": "τ", "runic": "ᛏ" }
]
},
{ "id": "sample-runic",
"description": "Another sample for testing purpose.",
"transcriptions": ["phoneme", "latin", "greek", "runic"],
"main-transcription": "runic",
"entries": [
{ "id": "'", "description": "Stress", "phoneme": "'", "latin": "", "greek": "", "runic": "" },
{ "id": ".", "description": "Syllable break", "phoneme": ".", "latin": "", "greek": "", "runic": "" },
{ "id": "a", "description": "ᚫ /a/", "phoneme": "a", "latin": "a", "greek": "α", "runic": "ᚫ" },
{ "id": "o", "description": "ᚩ /o/", "phoneme": "o", "latin": "o", "greek": "ω", "runic": "ᚩ" },
{ "id": "k", "description": "ᚴ /k/", "phoneme": "k", "latin": "k", "greek": "κ", "runic": "ᚴ" },
{ "id": "t", "description": "ᛏ /t/", "phoneme": "t", "latin": "t", "greek": "τ", "runic": "ᛏ" }
]
}
],
"generators": [
{ "id": "sample-rules",
"description": "Rule-based generator for testing purposes.",
"phonology": "sample-greek",
"type": "rules",
"rules": [
{ "id": "C",
"distribution": [
{ "pattern": ["k"], "occurences": 10 },
{ "pattern": ["t"], "occurences": 10 },
{ "pattern": ["k", "t"], "occurences": 5 }
]
},
{ "id": "V",
"distribution": [
{ "pattern": ["a"], "occurences": 8 },
{ "pattern": ["o"], "occurences": 8 },
{ "pattern": ["oo"], "occurences": 2 },
{ "pattern": ["a", "o"], "occurences": 1 },
{ "pattern": ["a", "oo"], "occurences": 1 }
]
},
{ "id": "Syl",
"distribution": [
{ "pattern": ["C", "V"], "occurences": 1 }
]
},
{ "id": "FinalSyl",
"distribution": [
{ "pattern": ["Syl"], "occurences": 3 },
{ "pattern": ["Syl", "C"], "occurences": 7 }
]
},
{ "id": "word",
"distribution": [
{ "pattern": ["'", "FinalSyl"], "occurences": 5 },
{ "pattern": ["'", "Syl", ".", "FinalSyl"], "occurences": 40 },
{ "pattern": ["a", "'", "Syl", ".", "FinalSyl"], "occurences": 10 }
]
}
]
},
{ "id": "sample-chains",
"description": "Chain-based generator for testing purpose.",
"phonology": "sample-runic",
"type": "chains",
"order": 2,
"chains": [
{ "input": ["", ""],
"possible-outputs": [
{ "value": "'", "occurences": 45 },
{ "value": "a", "occurences": 10 }
]
},
{ "input": ["", "'"],
"possible-outputs": [
{ "value": "k", "occurences": 1 },
{ "value": "t", "occurences": 1 }
]
},
{ "input": ["", "a"],
"possible-outputs": [
{ "value": "'", "occurences": 1 }
]
},
{ "input": ["a", "'"],
"possible-outputs": [
{ "value": "k", "occurences": 1 },
{ "value": "t", "occurences": 1 }
]
},
{ "input": ["'", "k"],
"possible-outputs": [
{ "value": "t", "occurences": 1 },
{ "value": "a", "occurences": 4 },
{ "value": "o", "occurences": 5 }
]
},
{ "input": ["'", "t"],
"possible-outputs": [
{ "value": "a", "occurences": 1 },
{ "value": "o", "occurences": 1 }
]
},
{ "input": ["k", "t"],
"possible-outputs": [
{ "value": "a", "occurences": 1 },
{ "value": "o", "occurences": 1 }
]
},
{ "input": ["k", "a"],
"possible-outputs": [
{ "value": "", "occurences": 1 },
{ "value": "k", "occurences": 2 },
{ "value": "t", "occurences": 2 },
{ "value": ".", "occurences": 5 }
]
},
{ "input": ["k", "o"],
"possible-outputs": [
{ "value": "", "occurences": 1 },
{ "value": "k", "occurences": 2 },
{ "value": "t", "occurences": 2 },
{ "value": ".", "occurences": 5 }
]
},
{ "input": ["t", "a"],
"possible-outputs": [
{ "value": "", "occurences": 1 },
{ "value": "k", "occurences": 2 },
{ "value": "t", "occurences": 2 },
{ "value": ".", "occurences": 5 }
]
},
{ "input": ["t", "o"],
"possible-outputs": [
{ "value": "", "occurences": 1 },
{ "value": "k", "occurences": 2 },
{ "value": "t", "occurences": 2 },
{ "value": ".", "occurences": 5 }
]
},
{ "input": ["o", "k"],
"possible-outputs": [
{ "value": "", "occurences": 1 }
]
},
{ "input": ["o", "t"],
"possible-outputs": [
{ "value": "", "occurences": 1 }
]
},
{ "input": ["a", "k"],
"possible-outputs": [
{ "value": "", "occurences": 1 }
]
},
{ "input": ["a", "t"],
"possible-outputs": [
{ "value": "", "occurences": 1 }
]
},
{ "input": ["o", "."],
"possible-outputs": [
{ "value": "k", "occurences": 1 },
{ "value": "t", "occurences": 1 }
]
},
{ "input": ["a", "."],
"possible-outputs": [
{ "value": "k", "occurences": 1 },
{ "value": "t", "occurences": 1 }
]
},
{ "input": [".", "k"],
"possible-outputs": [
{ "value": "t", "occurences": 1 },
{ "value": "a", "occurences": 4 },
{ "value": "o", "occurences": 5 }
]
},
{ "input": [".", "t"],
"possible-outputs": [
{ "value": "a", "occurences": 1 },
{ "value": "o", "occurences": 1 }
]
}
]
}
]
}

19
web/index.html Normal file
View File

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<head>
<title>Phonagen-Web</title>
<meta charset="UTF-8" />
<link href="phonagen.css" rel="stylesheet" type="text/css" media="all" />
<script src="phonagen.js"></script>
</head>
<body>
<header>
<h1>Phonagen</h1>
</header>
<article>
<div id="phonagen"></div>
</article>
<footer>Sample example of Phonagen-Web, made by Feufochmar.</footer>
</body>
<script>phonagen.load('data.json')</script>
</html>

148
web/phonagen.css Normal file
View File

@ -0,0 +1,148 @@
body {
margin-left: 1.5%;
margin-right: 1.5%;
color: hsla(230, 10%, 15%, 1.0);
border-color: hsla(230, 10%, 15%, 1.0);
}
h1 {
margin: 1px;
padding: 1px;
}
h2 {
margin: 1px;
padding: 1px;
}
h3 {
margin: 1px;
padding: 1px;
}
h4 {
margin: 1px;
padding: 1px;
}
h5 {
margin: 1px;
padding: 1px;
}
h6 {
margin: 1px;
padding: 1px;
}
hr {
border-style: solid;
border-width: thin;
}
header {
background-color: hsla(230, 10%, 90%, 0.60);
border-style: solid;
border-width: thin;
border-radius: 5px;
margin: 2px;
padding: 5px;
text-align: center;
}
footer {
background-color: hsla(230, 10%, 90%, 0.60);
border-style: solid;
border-width: thin;
border-radius: 5px;
margin: 2px;
padding: 5px;
text-align: center;
}
article {
background-color: hsla(230, 10%, 90%, 0.60);
border-style: solid;
border-width: thin;
border-radius: 5px;
margin: 2px;
padding: 10px;
}
nav {
background-color: hsla(230, 10%, 90%, 0.60);
border-style: solid;
border-width: thin;
border-radius: 5px;
margin: 2px;
padding: 3px;
}
section {
margin-left: 5px;
}
p {
margin-left: 7px;
}
.nav-item {
display: inline-block;
padding-left: 10px;
padding-right: 10px;
}
.author-date {
text-align: right;
font-style: italic;
}
.author {
font-variant: small-caps;
}
.banner {
text-align: center;
opacity: 1.0;
}
a:link {
color: hsla(230, 90%, 35%, 1.0);
text-decoration: none;
}
a:visited {
color: hsla(280, 90%, 35%, 1.0);
text-decoration: none;
}
a:hover {
color: hsla(230, 90%, 90%, 1.0);
text-shadow: 1px 0px 2px hsla(230, 90%, 15%, 1.0), -1px 0px 2px hsla(230, 90%, 15%, 1.0), 0px 1px 2px hsla(230, 90%, 15%, 1.0), 0px -1px 2px hsla(230, 90%, 15%, 1.0), 0px 0px 0.2em hsla(230, 90%, 25%, 1.0)
}
table {
border-style: solid;
border-width: thin;
border-collapse: collapse;
}
th {
background-color: hsla(230, 10%, 90%, 1.0);
border-style: solid;
border-width: thin;
border-collapse: collapse;
padding: 5px;
text-align: center;
}
td {
border-style: solid;
border-width: thin;
border-collapse: collapse;
padding: 5px;
text-align: center;
}

236
web/phonagen.js Normal file
View File

@ -0,0 +1,236 @@
// Phonagen-web
// Browser-based gerenator for phonagen outputs
// Main object
var phonagen = {}
// Discrete distribution
// The distribution object defines a discrete distribution
phonagen.Distribution = function () {
this.total = 0 // Total number of elements in the distribution
this.items = new Map() // Map of item -> number of elements
}
// Add 'occur' number of 'elem' elements into the distribution
phonagen.Distribution.prototype.addTo = function (elem, occur) {
this.total += occur
if (this.items.has(elem)) {
this.items.set(elem, occur + this.items.get(elem))
} else {
this.items.set(elem, occur)
}
}
// Pick a random element from the distribution
phonagen.Distribution.prototype.pickFrom = function () {
var idx = Math.floor(Math.random() * this.total)
var acc = 0
var pick = undefined
for (var [elem, occur] of this.items)
{
acc += occur
pick = elem
if (acc > idx) {
break
}
}
return pick
}
// Chain generator
// This generator makes an array of elements from a table of current chain -> next element
phonagen.ChainGenerator = function (order) {
this.order = order
this.nextItems = new Map()
}
// Populate the table of next elements from a description
phonagen.ChainGenerator.prototype.initializeFromDescription = function (description) {
for (var elem of description) {
var dist = new phonagen.Distribution()
for (var next of elem['possible-outputs']) {
dist.addTo(next.value, next.occurences)
}
this.nextItems.set(elem.input.toString(), dist) // note: toString() when setting and getting to be able to compare the arrays
}
}
// Generate an array of elements
phonagen.ChainGenerator.prototype.generate = function () {
var next = function (cg, current, output) {
var nextElem = cg.nextItems.get(current.toString()).pickFrom()
if (nextElem === "") {
return output
} else {
current.shift()
current.push(nextElem)
output.push(nextElem)
return next(cg, current, output)
}
}
var current = new Array()
for (var i = 0; i < this.order; ++i) {
current.push("")
}
return next(this, current, new Array())
}
// Rule generator
// This generator makes an array of elements from rules describing how to generate the array
// Each rule is associated to a distribution of patterns indicating how to replace a rule identifier
phonagen.RuleGenerator = function () {
this.rules = new Map()
}
// Populate the rules table from a description
phonagen.RuleGenerator.prototype.initializeFromDescription = function (description) {
for (var rule of description) {
var dist = new phonagen.Distribution()
for (var pat of rule.distribution) {
dist.addTo(pat.pattern, pat.occurences)
}
this.rules.set(rule.id, dist)
}
}
// Generate an array of elements
phonagen.RuleGenerator.prototype.generate = function () {
var replacePattern = function (rules, input) {
var output = new Array()
for (var i of input) {
if (rules.has(i)) {
output = output.concat(replacePattern(rules, rules.get(i).pickFrom()))
} else {
output.push(i)
}
}
return output
}
return replacePattern(this.rules, this.rules.get('word').pickFrom())
}
// Model of the phonagen generator
phonagen.model = {}
// Generate a word from the generator id
phonagen.generateWord = function () {
var selector = document.getElementById('generator-selector')
var generatorId = selector.options[selector.selectedIndex].value
var generator = phonagen.model.generators.get(generatorId)
document.getElementById('phonagen-output').innerHTML = generator.generate()
}
// Transform a list of phoneme ids to a formatted strings
phonagen.formatWord = function (phonemes, phonology) {
// Construct the different transcriptions
var wordTranscriptions = new Map()
for (var writing of phonology.transcriptions) {
var word = ""
for (var id of phonemes) {
var letter = phonology.entries.get(id)[writing]
if (typeof letter !== "undefined") {
word += letter
}
}
wordTranscriptions.set(writing, word)
}
// Construct what should be displayed
var text = "<p>"
text += "<strong>" + wordTranscriptions.get(phonology.mainTranscription) + "</strong><br/>"
text += "Pronounciation: /" + wordTranscriptions.get("phoneme") +"/<br/>"
for (var [writing, value] of wordTranscriptions) {
if ( (writing !== "phoneme")
&& (writing !== phonology.mainTranscription) ) {
text += "Translitteration (" + writing + "): " + value + "<br/>"
}
}
text += "</p>"
return text
}
// Make a rule generator
phonagen.makeRuleBasedGenerator = function (rules, phonology) {
var ruleGenerator = new phonagen.RuleGenerator()
ruleGenerator.initializeFromDescription(rules)
return function () {
var phonemes = ruleGenerator.generate()
return phonagen.formatWord(phonemes, phonology)
}
}
// Make a chain generator
phonagen.makeChainBasedGenerator = function (order, chains, phonology) {
var chainGenerator = new phonagen.ChainGenerator(order)
chainGenerator.initializeFromDescription(chains)
return function () {
var phonemes = chainGenerator.generate()
return phonagen.formatWord(phonemes, phonology)
}
}
// Parse a raw phonology
phonagen.parsePhonology = function (phon) {
var phonology = {}
phonology.description = phon.description
phonology.transcriptions = phon.transcriptions
phonology.mainTranscription = phon['main-transcription']
phonology.entries = new Map()
for (var entry of phon.entries) {
phonology.entries.set(entry.id, entry)
}
return phonology
}
// Parse a raw generator description
phonagen.parseGenerator = function (gen, phonologies) {
var generator = {}
generator.description = gen.description
generator.phonology = phonologies.get(gen.phonology)
// Type
if (gen.type === "rules") {
generator.generate = phonagen.makeRuleBasedGenerator(gen.rules, generator.phonology)
} else if (gen.type === "chains") {
generator.generate = phonagen.makeChainBasedGenerator(gen.order, gen.chains, generator.phonology)
} else {
generator.generate = function () { return "Error: unsupported generator type: " + gen.type }
}
return generator
}
// Draw the phonagen div
phonagen.drawDiv = function (model) {
var contents = ''
contents += '<select id="generator-selector">'
for (var [id, gen] of model.generators) {
contents += '<option value="' + id + '">' + gen.description + '</option>'
}
contents += '</select>'
contents += '<button onClick="phonagen.generateWord()">Generate</button>'
contents += '<br/>'
contents += '<div id="phonagen-output"><br/></div>'
document.getElementById('phonagen').innerHTML = contents
}
// Load the model
phonagen.loadModel = function (model) {
// Parse the raw model
// Phonologies
phonagen.model.phonologies = new Map()
for (let phon of model.phonologies) {
phonagen.model.phonologies.set(phon.id, phonagen.parsePhonology(phon))
}
// Generators
phonagen.model.generators = new Map()
for (let gen of model.generators) {
phonagen.model.generators.set(gen.id, phonagen.parseGenerator(gen, phonagen.model.phonologies))
}
// Draw the phonagen <div>
phonagen.drawDiv(phonagen.model)
}
// Loading function for Phonagen-htmljs
phonagen.load = function (jsonFile) {
fetch(jsonFile)
.then(function(response) {
return response.json()
})
.then(phonagen.loadModel)
}
// Export info
//module.exports = phonagen