commit 340e4595175e5c22656a0277736530fdee7e4d19 Author: Feufochmar Date: Sun Jun 3 19:02:19 2018 +0200 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..f8c436e --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +## Phonagen +Phonemic word generation tools. +Phonagen provide several tools to make words generators based on the prononciation, transcriptions, and translitterations of phonemes. + +The tools are built around a JSON representation of phonemes and word generators. + +### Web interface +The `web` directory contains a sample web interface to generate words from the JSON description included in the `web/data.json` file. +The implementation of generators is located in the script `web/phonagen.js`. +To use it on any webpage: + - include the script on your page (`` in the headers) + - add a div (or another block element) with the `phonagen` id + - call the `phonagen.load()` function with the JSON file to use as an argument (either in the `onload` method of the body, or in a script tags placed after the `phonagen` block ex: ``) + + + +TODO: +- Description of JSON +- Modify the Racket to use JSON inputs +- Editor diff --git a/web/data.json b/web/data.json new file mode 100644 index 0000000..eb78756 --- /dev/null +++ b/web/data.json @@ -0,0 +1,201 @@ +{ + "phonologies": [ + { "id": "sample-greek", + "description": "A sample for testing purpose.", + "transcriptions": ["phoneme", "latin", "greek", "runic"], + "main-transcription": "greek", + "entries": [ + { "id": "'", "description": "Stress", "phoneme": "'", "latin": "", "greek": "", "runic": "" }, + { "id": ".", "description": "Syllable break", "phoneme": ".", "latin": "", "greek": "", "runic": "" }, + { "id": "a", "description": "α /a/", "phoneme": "a", "latin": "a", "greek": "α", "runic": "ᚫ" }, + { "id": "o", "description": "ο /o/", "phoneme": "o", "latin": "o", "greek": "ο", "runic": "ᚩ" }, + { "id": "oo", "description": "ω /oː/", "phoneme": "oː", "latin": "ô", "greek": "ω", "runic": "ᚩ" }, + { "id": "k", "description": "κ /k/", "phoneme": "k", "latin": "k", "greek": "κ", "runic": "ᚴ" }, + { "id": "t", "description": "τ /t/", "phoneme": "t", "latin": "t", "greek": "τ", "runic": "ᛏ" } + ] + }, + { "id": "sample-runic", + "description": "Another sample for testing purpose.", + "transcriptions": ["phoneme", "latin", "greek", "runic"], + "main-transcription": "runic", + "entries": [ + { "id": "'", "description": "Stress", "phoneme": "'", "latin": "", "greek": "", "runic": "" }, + { "id": ".", "description": "Syllable break", "phoneme": ".", "latin": "", "greek": "", "runic": "" }, + { "id": "a", "description": "ᚫ /a/", "phoneme": "a", "latin": "a", "greek": "α", "runic": "ᚫ" }, + { "id": "o", "description": "ᚩ /o/", "phoneme": "o", "latin": "o", "greek": "ω", "runic": "ᚩ" }, + { "id": "k", "description": "ᚴ /k/", "phoneme": "k", "latin": "k", "greek": "κ", "runic": "ᚴ" }, + { "id": "t", "description": "ᛏ /t/", "phoneme": "t", "latin": "t", "greek": "τ", "runic": "ᛏ" } + ] + } + ], + "generators": [ + { "id": "sample-rules", + "description": "Rule-based generator for testing purposes.", + "phonology": "sample-greek", + "type": "rules", + "rules": [ + { "id": "C", + "distribution": [ + { "pattern": ["k"], "occurences": 10 }, + { "pattern": ["t"], "occurences": 10 }, + { "pattern": ["k", "t"], "occurences": 5 } + ] + }, + { "id": "V", + "distribution": [ + { "pattern": ["a"], "occurences": 8 }, + { "pattern": ["o"], "occurences": 8 }, + { "pattern": ["oo"], "occurences": 2 }, + { "pattern": ["a", "o"], "occurences": 1 }, + { "pattern": ["a", "oo"], "occurences": 1 } + ] + }, + { "id": "Syl", + "distribution": [ + { "pattern": ["C", "V"], "occurences": 1 } + ] + }, + { "id": "FinalSyl", + "distribution": [ + { "pattern": ["Syl"], "occurences": 3 }, + { "pattern": ["Syl", "C"], "occurences": 7 } + ] + }, + { "id": "word", + "distribution": [ + { "pattern": ["'", "FinalSyl"], "occurences": 5 }, + { "pattern": ["'", "Syl", ".", "FinalSyl"], "occurences": 40 }, + { "pattern": ["a", "'", "Syl", ".", "FinalSyl"], "occurences": 10 } + ] + } + ] + }, + { "id": "sample-chains", + "description": "Chain-based generator for testing purpose.", + "phonology": "sample-runic", + "type": "chains", + "order": 2, + "chains": [ + { "input": ["", ""], + "possible-outputs": [ + { "value": "'", "occurences": 45 }, + { "value": "a", "occurences": 10 } + ] + }, + { "input": ["", "'"], + "possible-outputs": [ + { "value": "k", "occurences": 1 }, + { "value": "t", "occurences": 1 } + ] + }, + { "input": ["", "a"], + "possible-outputs": [ + { "value": "'", "occurences": 1 } + ] + }, + { "input": ["a", "'"], + "possible-outputs": [ + { "value": "k", "occurences": 1 }, + { "value": "t", "occurences": 1 } + ] + }, + { "input": ["'", "k"], + "possible-outputs": [ + { "value": "t", "occurences": 1 }, + { "value": "a", "occurences": 4 }, + { "value": "o", "occurences": 5 } + ] + }, + { "input": ["'", "t"], + "possible-outputs": [ + { "value": "a", "occurences": 1 }, + { "value": "o", "occurences": 1 } + ] + }, + { "input": ["k", "t"], + "possible-outputs": [ + { "value": "a", "occurences": 1 }, + { "value": "o", "occurences": 1 } + ] + }, + { "input": ["k", "a"], + "possible-outputs": [ + { "value": "", "occurences": 1 }, + { "value": "k", "occurences": 2 }, + { "value": "t", "occurences": 2 }, + { "value": ".", "occurences": 5 } + ] + }, + { "input": ["k", "o"], + "possible-outputs": [ + { "value": "", "occurences": 1 }, + { "value": "k", "occurences": 2 }, + { "value": "t", "occurences": 2 }, + { "value": ".", "occurences": 5 } + ] + }, + { "input": ["t", "a"], + "possible-outputs": [ + { "value": "", "occurences": 1 }, + { "value": "k", "occurences": 2 }, + { "value": "t", "occurences": 2 }, + { "value": ".", "occurences": 5 } + ] + }, + { "input": ["t", "o"], + "possible-outputs": [ + { "value": "", "occurences": 1 }, + { "value": "k", "occurences": 2 }, + { "value": "t", "occurences": 2 }, + { "value": ".", "occurences": 5 } + ] + }, + { "input": ["o", "k"], + "possible-outputs": [ + { "value": "", "occurences": 1 } + ] + }, + { "input": ["o", "t"], + "possible-outputs": [ + { "value": "", "occurences": 1 } + ] + }, + { "input": ["a", "k"], + "possible-outputs": [ + { "value": "", "occurences": 1 } + ] + }, + { "input": ["a", "t"], + "possible-outputs": [ + { "value": "", "occurences": 1 } + ] + }, + { "input": ["o", "."], + "possible-outputs": [ + { "value": "k", "occurences": 1 }, + { "value": "t", "occurences": 1 } + ] + }, + { "input": ["a", "."], + "possible-outputs": [ + { "value": "k", "occurences": 1 }, + { "value": "t", "occurences": 1 } + ] + }, + { "input": [".", "k"], + "possible-outputs": [ + { "value": "t", "occurences": 1 }, + { "value": "a", "occurences": 4 }, + { "value": "o", "occurences": 5 } + ] + }, + { "input": [".", "t"], + "possible-outputs": [ + { "value": "a", "occurences": 1 }, + { "value": "o", "occurences": 1 } + ] + } + ] + } + ] +} diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..293d535 --- /dev/null +++ b/web/index.html @@ -0,0 +1,19 @@ + + + + Phonagen-Web + + + + + +
+

Phonagen

+
+
+
+
+ + + + diff --git a/web/phonagen.css b/web/phonagen.css new file mode 100644 index 0000000..7524a3e --- /dev/null +++ b/web/phonagen.css @@ -0,0 +1,148 @@ +body { + margin-left: 1.5%; + margin-right: 1.5%; + + color: hsla(230, 10%, 15%, 1.0); + border-color: hsla(230, 10%, 15%, 1.0); +} + +h1 { + margin: 1px; + padding: 1px; +} + +h2 { + margin: 1px; + padding: 1px; +} + +h3 { + margin: 1px; + padding: 1px; +} + +h4 { + margin: 1px; + padding: 1px; +} + +h5 { + margin: 1px; + padding: 1px; +} + +h6 { + margin: 1px; + padding: 1px; +} + +hr { + border-style: solid; + border-width: thin; +} + +header { + background-color: hsla(230, 10%, 90%, 0.60); + + border-style: solid; + border-width: thin; + + border-radius: 5px; + margin: 2px; + padding: 5px; + text-align: center; +} + +footer { + background-color: hsla(230, 10%, 90%, 0.60); + + border-style: solid; + border-width: thin; + + border-radius: 5px; + margin: 2px; + padding: 5px; + text-align: center; +} + +article { + background-color: hsla(230, 10%, 90%, 0.60); + border-style: solid; + border-width: thin; + border-radius: 5px; + margin: 2px; + padding: 10px; +} + +nav { + background-color: hsla(230, 10%, 90%, 0.60); + border-style: solid; + border-width: thin; + border-radius: 5px; + margin: 2px; + padding: 3px; +} + +section { + margin-left: 5px; +} + +p { + margin-left: 7px; +} + +.nav-item { + display: inline-block; + padding-left: 10px; + padding-right: 10px; +} + +.author-date { + text-align: right; + font-style: italic; +} + +.author { + font-variant: small-caps; +} + +.banner { + text-align: center; + opacity: 1.0; +} + +a:link { + color: hsla(230, 90%, 35%, 1.0); + text-decoration: none; +} +a:visited { + color: hsla(280, 90%, 35%, 1.0); + text-decoration: none; +} +a:hover { + color: hsla(230, 90%, 90%, 1.0); + text-shadow: 1px 0px 2px hsla(230, 90%, 15%, 1.0), -1px 0px 2px hsla(230, 90%, 15%, 1.0), 0px 1px 2px hsla(230, 90%, 15%, 1.0), 0px -1px 2px hsla(230, 90%, 15%, 1.0), 0px 0px 0.2em hsla(230, 90%, 25%, 1.0) +} + +table { + border-style: solid; + border-width: thin; + border-collapse: collapse; +} + +th { + background-color: hsla(230, 10%, 90%, 1.0); + border-style: solid; + border-width: thin; + border-collapse: collapse; + padding: 5px; + text-align: center; +} + +td { + border-style: solid; + border-width: thin; + border-collapse: collapse; + padding: 5px; + text-align: center; +} diff --git a/web/phonagen.js b/web/phonagen.js new file mode 100644 index 0000000..0894a6a --- /dev/null +++ b/web/phonagen.js @@ -0,0 +1,236 @@ +// Phonagen-web +// Browser-based gerenator for phonagen outputs + +// Main object +var phonagen = {} + +// Discrete distribution +// The distribution object defines a discrete distribution +phonagen.Distribution = function () { + this.total = 0 // Total number of elements in the distribution + this.items = new Map() // Map of item -> number of elements +} +// Add 'occur' number of 'elem' elements into the distribution +phonagen.Distribution.prototype.addTo = function (elem, occur) { + this.total += occur + if (this.items.has(elem)) { + this.items.set(elem, occur + this.items.get(elem)) + } else { + this.items.set(elem, occur) + } +} +// Pick a random element from the distribution +phonagen.Distribution.prototype.pickFrom = function () { + var idx = Math.floor(Math.random() * this.total) + var acc = 0 + var pick = undefined + for (var [elem, occur] of this.items) + { + acc += occur + pick = elem + if (acc > idx) { + break + } + } + return pick +} + +// Chain generator +// This generator makes an array of elements from a table of current chain -> next element +phonagen.ChainGenerator = function (order) { + this.order = order + this.nextItems = new Map() +} +// Populate the table of next elements from a description +phonagen.ChainGenerator.prototype.initializeFromDescription = function (description) { + for (var elem of description) { + var dist = new phonagen.Distribution() + for (var next of elem['possible-outputs']) { + dist.addTo(next.value, next.occurences) + } + this.nextItems.set(elem.input.toString(), dist) // note: toString() when setting and getting to be able to compare the arrays + } +} +// Generate an array of elements +phonagen.ChainGenerator.prototype.generate = function () { + var next = function (cg, current, output) { + var nextElem = cg.nextItems.get(current.toString()).pickFrom() + if (nextElem === "") { + return output + } else { + current.shift() + current.push(nextElem) + output.push(nextElem) + return next(cg, current, output) + } + } + var current = new Array() + for (var i = 0; i < this.order; ++i) { + current.push("") + } + return next(this, current, new Array()) +} + +// Rule generator +// This generator makes an array of elements from rules describing how to generate the array +// Each rule is associated to a distribution of patterns indicating how to replace a rule identifier +phonagen.RuleGenerator = function () { + this.rules = new Map() +} +// Populate the rules table from a description +phonagen.RuleGenerator.prototype.initializeFromDescription = function (description) { + for (var rule of description) { + var dist = new phonagen.Distribution() + for (var pat of rule.distribution) { + dist.addTo(pat.pattern, pat.occurences) + } + this.rules.set(rule.id, dist) + } +} +// Generate an array of elements +phonagen.RuleGenerator.prototype.generate = function () { + var replacePattern = function (rules, input) { + var output = new Array() + for (var i of input) { + if (rules.has(i)) { + output = output.concat(replacePattern(rules, rules.get(i).pickFrom())) + } else { + output.push(i) + } + } + return output + } + return replacePattern(this.rules, this.rules.get('word').pickFrom()) +} + +// Model of the phonagen generator +phonagen.model = {} + +// Generate a word from the generator id +phonagen.generateWord = function () { + var selector = document.getElementById('generator-selector') + var generatorId = selector.options[selector.selectedIndex].value + var generator = phonagen.model.generators.get(generatorId) + document.getElementById('phonagen-output').innerHTML = generator.generate() +} + +// Transform a list of phoneme ids to a formatted strings +phonagen.formatWord = function (phonemes, phonology) { + // Construct the different transcriptions + var wordTranscriptions = new Map() + for (var writing of phonology.transcriptions) { + var word = "" + for (var id of phonemes) { + var letter = phonology.entries.get(id)[writing] + if (typeof letter !== "undefined") { + word += letter + } + } + wordTranscriptions.set(writing, word) + } + // Construct what should be displayed + var text = "

" + text += "" + wordTranscriptions.get(phonology.mainTranscription) + "
" + text += "Pronounciation: /" + wordTranscriptions.get("phoneme") +"/
" + for (var [writing, value] of wordTranscriptions) { + if ( (writing !== "phoneme") + && (writing !== phonology.mainTranscription) ) { + text += "Translitteration (" + writing + "): " + value + "
" + } + } + text += "

" + return text +} + +// Make a rule generator +phonagen.makeRuleBasedGenerator = function (rules, phonology) { + var ruleGenerator = new phonagen.RuleGenerator() + ruleGenerator.initializeFromDescription(rules) + return function () { + var phonemes = ruleGenerator.generate() + return phonagen.formatWord(phonemes, phonology) + } +} + +// Make a chain generator +phonagen.makeChainBasedGenerator = function (order, chains, phonology) { + var chainGenerator = new phonagen.ChainGenerator(order) + chainGenerator.initializeFromDescription(chains) + return function () { + var phonemes = chainGenerator.generate() + return phonagen.formatWord(phonemes, phonology) + } +} + +// Parse a raw phonology +phonagen.parsePhonology = function (phon) { + var phonology = {} + phonology.description = phon.description + phonology.transcriptions = phon.transcriptions + phonology.mainTranscription = phon['main-transcription'] + phonology.entries = new Map() + for (var entry of phon.entries) { + phonology.entries.set(entry.id, entry) + } + return phonology +} + +// Parse a raw generator description +phonagen.parseGenerator = function (gen, phonologies) { + var generator = {} + generator.description = gen.description + generator.phonology = phonologies.get(gen.phonology) + // Type + if (gen.type === "rules") { + generator.generate = phonagen.makeRuleBasedGenerator(gen.rules, generator.phonology) + } else if (gen.type === "chains") { + generator.generate = phonagen.makeChainBasedGenerator(gen.order, gen.chains, generator.phonology) + } else { + generator.generate = function () { return "Error: unsupported generator type: " + gen.type } + } + return generator +} + +// Draw the phonagen div +phonagen.drawDiv = function (model) { + var contents = '' + contents += '' + contents += '' + contents += '
' + contents += '

' + document.getElementById('phonagen').innerHTML = contents +} + +// Load the model +phonagen.loadModel = function (model) { + // Parse the raw model + // Phonologies + phonagen.model.phonologies = new Map() + for (let phon of model.phonologies) { + phonagen.model.phonologies.set(phon.id, phonagen.parsePhonology(phon)) + } + // Generators + phonagen.model.generators = new Map() + for (let gen of model.generators) { + phonagen.model.generators.set(gen.id, phonagen.parseGenerator(gen, phonagen.model.phonologies)) + } + // Draw the phonagen
+ phonagen.drawDiv(phonagen.model) +} + + +// Loading function for Phonagen-htmljs +phonagen.load = function (jsonFile) { + fetch(jsonFile) + .then(function(response) { + return response.json() + }) + .then(phonagen.loadModel) +} + +// Export info +//module.exports = phonagen