gener-communes/name-generator.rkt

229 lines
7.5 KiB
Racket

#lang racket
(require
json
"markov.rkt"
"distribution.rkt")
; Structure for each region
(struct region
(id ; id of region
name ; name of region
departements ; table of departements (id->dep)
))
; Constructor for a region
(define (make-region id name)
(region id name (make-hash)))
; Structure for each departement
(struct departement
(id ; id of departement
name ; name of departement
patterns ; distribution of patterns
word-masculine ; distribution of masculine words
word-feminine ; distribution of feminine words
word-plural ; distribution of plural words
word-masculine-plural ; distribution of masculine plural words
word-feminine-plural ; distribution of feminine plural words
person-name-masculine ; distributino of masculine person name
person-name-feminine ; distribution of feminine person name
locality-name ; markov chain for locality names
area-name ; markov chain for area names
river-name ; markov chain for river names
))
; Constructor for a departement
(define (make-departement id name)
(departement
id
name
(make-distribution)
(make-distribution)
(make-distribution)
(make-distribution)
(make-distribution)
(make-distribution)
(make-distribution)
(make-distribution)
(make-markov 3)
(make-markov 3)
(make-markov 3)))
; Generate names
; Replace symbols with a generated element
(define (replace-with sym dep)
(cond
((symbol? sym)
(case sym
(($WordMasculine$) (distribution-pick-from (departement-word-masculine dep)))
(($WordFeminine$) (distribution-pick-from (departement-word-feminine dep)))
(($WordPlural$) (distribution-pick-from (departement-word-plural dep)))
(($WordFemininePlural$) (distribution-pick-from (departement-word-feminine-plural dep)))
(($WordMasculinePlural$) (distribution-pick-from (departement-word-masculine-plural dep)))
(($PersonNameMasculine$) (distribution-pick-from (departement-person-name-masculine dep)))
(($PersonNameFeminine$) (distribution-pick-from (departement-person-name-feminine dep)))
(($LocalityName$) (list->string (markov-generate (departement-locality-name dep))))
(($AreaName$) (list->string (markov-generate (departement-area-name dep))))
(($RiverName$) (list->string (markov-generate (departement-river-name dep))))
(else sym)))
((list? sym)
(map (lambda (x) (replace-with x dep)) sym))
(#t sym)))
; Indicate if a string starts with a vowel
; Accent taken into account
(define (start-with-vowel? str)
(member
(car
(string->list
(string-normalize-nfd str)))
(list #\A #\E #\I #\O #\U #\Y)))
; Transform the tree structure of a pattern back to a string
(define (tree->string pattern)
(cond
( (list? pattern)
(case (car pattern)
((el sans la les las los Saint San Sainte Santa)
(string-append
(symbol->string (car pattern))
"-"
(tree->string (cadr pattern))))
((le)
; elision possible: much check the initial of the following word
(let ((following (tree->string (cadr pattern))))
(string-append
(if (start-with-vowel? following)
"l'"
"le-")
following)))
((Santo)
; elision possible: much check the initial of the following
(let ((following (tree->string (cadr pattern))))
(string-append
(if (start-with-vowel? following)
"Sant'"
"Santo-")
following)))
((et sous lès lez près à en di sur)
(string-append
(tree->string (cadr pattern))
"-"
(symbol->string (car pattern))
"-"
(tree->string (caddr pattern))))
((de)
; ellision possible: must check the initial of the following word
(let ((following (tree->string (caddr pattern))))
(string-append
(tree->string (cadr pattern))
"-"
(if (start-with-vowel? following)
"d'"
"de-")
following)))
(else
(string-join
(map tree->string pattern)
"-"))))
( (string? pattern)
pattern)
(#t
(error "Unexpected element: " pattern))))
; Apply ellision rules
(define (apply-ellisions str)
(define res str)
(set! res (string-replace res "-à-les-" "-aux-"))
(set! res (string-replace res "-de-los-" "-dels-")) ; Catalan
(set! res (string-replace res "-de-el-" "-del-")) ; Catalan
(set! res (string-replace res "-de-les-" "-des-"))
(set! res (string-replace res "-de-le-" "-du-"))
(set! res (string-replace res "-à-le-" "-au-"))
; Start of word
(cond
((string-prefix? res "la-")
(string-replace res "la-" "La " #:all? #f))
((string-prefix? res "le-")
(string-replace res "le-" "Le " #:all? #f))
((string-prefix? res "les-")
(string-replace res "les-" "Les " #:all? #f))
((string-prefix? res "l'")
(string-replace res "l'" "L'" #:all? #f))
((string-prefix? res "los-")
(string-replace res "los-" "Los " #:all? #f))
(#t res)))
; Generate a new name from a departement
(define (departement-generate dep)
(apply-ellisions
(tree->string
(replace-with
(distribution-pick-from (departement-patterns dep))
dep))))
; Generate a new name from a region
(define (region-generate reg)
(departement-generate (car (shuffle (hash-values (region-departements reg))))))
; Generate a new name
(define (generate gen)
(region-generate (car (shuffle (hash-values gen)))))
(define (pattern-translator x)
(cond
((string? x) (string->symbol x))
((list? x) (map pattern-translator x))
(#t x)))
(define (markov-translator x)
(cond
((string? x) (car (string->list x))) ; single-character strings
(#t x)))
; Convert a json-compatible structure to a departement
(define (jsexpr->departement js)
(departement
(hash-ref js 'id)
(hash-ref js 'name)
(jsexpr->distribution (hash-ref js 'patterns) #:translator pattern-translator)
(jsexpr->distribution (hash-ref js 'word-masculine))
(jsexpr->distribution (hash-ref js 'word-feminine))
(jsexpr->distribution (hash-ref js 'word-plural))
(jsexpr->distribution (hash-ref js 'word-masculine-plural))
(jsexpr->distribution (hash-ref js 'word-feminine-plural))
(jsexpr->distribution (hash-ref js 'person-name-masculine))
(jsexpr->distribution (hash-ref js 'person-name-feminine))
(jsexpr->markov (hash-ref js 'locality-name) #:translator markov-translator)
(jsexpr->markov (hash-ref js 'area-name) #:translator markov-translator)
(jsexpr->markov (hash-ref js 'river-name) #:translator markov-translator)))
; Convert a json-compatible structure to a region
(define (jsexpr->region js)
(define reg (make-region (hash-ref js 'id) (hash-ref js 'name)))
(for-each
(lambda (dep)
(hash-set! (region-departements reg) (departement-id dep) dep))
(map jsexpr->departement (hash-ref js 'departements)))
reg)
; Convert a json-compatible structure to a generator
(define (jsexpr->generator js)
(define gen (make-hash))
(for-each
(lambda (reg)
(hash-set! gen (region-id reg) reg))
(map jsexpr->region js))
gen)
; Read the file containing all the data
(define *input* (open-input-file "./web/gener-communes.json"))
; Process the input and train
(define *generator* (jsexpr->generator (read-json *input*)))
(for-each
displayln
(build-list 100 (lambda (_) (generate *generator*))))