229 lines
7.5 KiB
Racket
229 lines
7.5 KiB
Racket
#lang racket
|
|
|
|
(require
|
|
json
|
|
"markov.rkt"
|
|
"distribution.rkt")
|
|
|
|
; Structure for each region
|
|
(struct region
|
|
(id ; id of region
|
|
name ; name of region
|
|
departements ; table of departements (id->dep)
|
|
))
|
|
|
|
; Constructor for a region
|
|
(define (make-region id name)
|
|
(region id name (make-hash)))
|
|
|
|
; Structure for each departement
|
|
(struct departement
|
|
(id ; id of departement
|
|
name ; name of departement
|
|
patterns ; distribution of patterns
|
|
word-masculine ; distribution of masculine words
|
|
word-feminine ; distribution of feminine words
|
|
word-plural ; distribution of plural words
|
|
word-masculine-plural ; distribution of masculine plural words
|
|
word-feminine-plural ; distribution of feminine plural words
|
|
person-name-masculine ; distributino of masculine person name
|
|
person-name-feminine ; distribution of feminine person name
|
|
locality-name ; markov chain for locality names
|
|
area-name ; markov chain for area names
|
|
river-name ; markov chain for river names
|
|
))
|
|
|
|
; Constructor for a departement
|
|
(define (make-departement id name)
|
|
(departement
|
|
id
|
|
name
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-distribution)
|
|
(make-markov 3)
|
|
(make-markov 3)
|
|
(make-markov 3)))
|
|
|
|
; Generate names
|
|
; Replace symbols with a generated element
|
|
(define (replace-with sym dep)
|
|
(cond
|
|
((symbol? sym)
|
|
(case sym
|
|
(($WordMasculine$) (distribution-pick-from (departement-word-masculine dep)))
|
|
(($WordFeminine$) (distribution-pick-from (departement-word-feminine dep)))
|
|
(($WordPlural$) (distribution-pick-from (departement-word-plural dep)))
|
|
(($WordFemininePlural$) (distribution-pick-from (departement-word-feminine-plural dep)))
|
|
(($WordMasculinePlural$) (distribution-pick-from (departement-word-masculine-plural dep)))
|
|
(($PersonNameMasculine$) (distribution-pick-from (departement-person-name-masculine dep)))
|
|
(($PersonNameFeminine$) (distribution-pick-from (departement-person-name-feminine dep)))
|
|
(($LocalityName$) (list->string (markov-generate (departement-locality-name dep))))
|
|
(($AreaName$) (list->string (markov-generate (departement-area-name dep))))
|
|
(($RiverName$) (list->string (markov-generate (departement-river-name dep))))
|
|
(else sym)))
|
|
((list? sym)
|
|
(map (lambda (x) (replace-with x dep)) sym))
|
|
(#t sym)))
|
|
|
|
; Indicate if a string starts with a vowel
|
|
; Accent taken into account
|
|
(define (start-with-vowel? str)
|
|
(member
|
|
(car
|
|
(string->list
|
|
(string-normalize-nfd str)))
|
|
(list #\A #\E #\I #\O #\U #\Y)))
|
|
|
|
; Transform the tree structure of a pattern back to a string
|
|
(define (tree->string pattern)
|
|
(cond
|
|
( (list? pattern)
|
|
(case (car pattern)
|
|
((el sans la les las los Saint San Sainte Santa)
|
|
(string-append
|
|
(symbol->string (car pattern))
|
|
"-"
|
|
(tree->string (cadr pattern))))
|
|
((le)
|
|
; elision possible: much check the initial of the following word
|
|
(let ((following (tree->string (cadr pattern))))
|
|
(string-append
|
|
(if (start-with-vowel? following)
|
|
"l'"
|
|
"le-")
|
|
following)))
|
|
((Santo)
|
|
; elision possible: much check the initial of the following
|
|
(let ((following (tree->string (cadr pattern))))
|
|
(string-append
|
|
(if (start-with-vowel? following)
|
|
"Sant'"
|
|
"Santo-")
|
|
following)))
|
|
((et sous lès lez près à en di sur)
|
|
(string-append
|
|
(tree->string (cadr pattern))
|
|
"-"
|
|
(symbol->string (car pattern))
|
|
"-"
|
|
(tree->string (caddr pattern))))
|
|
((de)
|
|
; ellision possible: must check the initial of the following word
|
|
(let ((following (tree->string (caddr pattern))))
|
|
(string-append
|
|
(tree->string (cadr pattern))
|
|
"-"
|
|
(if (start-with-vowel? following)
|
|
"d'"
|
|
"de-")
|
|
following)))
|
|
(else
|
|
(string-join
|
|
(map tree->string pattern)
|
|
"-"))))
|
|
( (string? pattern)
|
|
pattern)
|
|
(#t
|
|
(error "Unexpected element: " pattern))))
|
|
|
|
; Apply ellision rules
|
|
(define (apply-ellisions str)
|
|
(define res str)
|
|
(set! res (string-replace res "-à-les-" "-aux-"))
|
|
(set! res (string-replace res "-de-los-" "-dels-")) ; Catalan
|
|
(set! res (string-replace res "-de-el-" "-del-")) ; Catalan
|
|
(set! res (string-replace res "-de-les-" "-des-"))
|
|
(set! res (string-replace res "-de-le-" "-du-"))
|
|
(set! res (string-replace res "-à-le-" "-au-"))
|
|
; Start of word
|
|
(cond
|
|
((string-prefix? res "la-")
|
|
(string-replace res "la-" "La " #:all? #f))
|
|
((string-prefix? res "le-")
|
|
(string-replace res "le-" "Le " #:all? #f))
|
|
((string-prefix? res "les-")
|
|
(string-replace res "les-" "Les " #:all? #f))
|
|
((string-prefix? res "l'")
|
|
(string-replace res "l'" "L'" #:all? #f))
|
|
((string-prefix? res "los-")
|
|
(string-replace res "los-" "Los " #:all? #f))
|
|
(#t res)))
|
|
|
|
; Generate a new name from a departement
|
|
(define (departement-generate dep)
|
|
(apply-ellisions
|
|
(tree->string
|
|
(replace-with
|
|
(distribution-pick-from (departement-patterns dep))
|
|
dep))))
|
|
|
|
; Generate a new name from a region
|
|
(define (region-generate reg)
|
|
(departement-generate (car (shuffle (hash-values (region-departements reg))))))
|
|
|
|
; Generate a new name
|
|
(define (generate gen)
|
|
(region-generate (car (shuffle (hash-values gen)))))
|
|
|
|
(define (pattern-translator x)
|
|
(cond
|
|
((string? x) (string->symbol x))
|
|
((list? x) (map pattern-translator x))
|
|
(#t x)))
|
|
|
|
(define (markov-translator x)
|
|
(cond
|
|
((string? x) (car (string->list x))) ; single-character strings
|
|
(#t x)))
|
|
|
|
; Convert a json-compatible structure to a departement
|
|
(define (jsexpr->departement js)
|
|
(departement
|
|
(hash-ref js 'id)
|
|
(hash-ref js 'name)
|
|
(jsexpr->distribution (hash-ref js 'patterns) #:translator pattern-translator)
|
|
(jsexpr->distribution (hash-ref js 'word-masculine))
|
|
(jsexpr->distribution (hash-ref js 'word-feminine))
|
|
(jsexpr->distribution (hash-ref js 'word-plural))
|
|
(jsexpr->distribution (hash-ref js 'word-masculine-plural))
|
|
(jsexpr->distribution (hash-ref js 'word-feminine-plural))
|
|
(jsexpr->distribution (hash-ref js 'person-name-masculine))
|
|
(jsexpr->distribution (hash-ref js 'person-name-feminine))
|
|
(jsexpr->markov (hash-ref js 'locality-name) #:translator markov-translator)
|
|
(jsexpr->markov (hash-ref js 'area-name) #:translator markov-translator)
|
|
(jsexpr->markov (hash-ref js 'river-name) #:translator markov-translator)))
|
|
|
|
; Convert a json-compatible structure to a region
|
|
(define (jsexpr->region js)
|
|
(define reg (make-region (hash-ref js 'id) (hash-ref js 'name)))
|
|
(for-each
|
|
(lambda (dep)
|
|
(hash-set! (region-departements reg) (departement-id dep) dep))
|
|
(map jsexpr->departement (hash-ref js 'departements)))
|
|
reg)
|
|
|
|
; Convert a json-compatible structure to a generator
|
|
(define (jsexpr->generator js)
|
|
(define gen (make-hash))
|
|
(for-each
|
|
(lambda (reg)
|
|
(hash-set! gen (region-id reg) reg))
|
|
(map jsexpr->region js))
|
|
gen)
|
|
|
|
; Read the file containing all the data
|
|
(define *input* (open-input-file "./web/gener-communes.json"))
|
|
|
|
; Process the input and train
|
|
(define *generator* (jsexpr->generator (read-json *input*)))
|
|
|
|
(for-each
|
|
displayln
|
|
(build-list 100 (lambda (_) (generate *generator*))))
|