(svn r1929) Feature: [namegen] Support for dynamic generation of the Czech town names.

The static names are still used in 1/4 of cases. I think the tables for
dynamic generation must look pretty spectacular. :-) New stems are still
needed and there can be occasional glitches, please let me know.

I guess that this method of dynamic generation could be used for at least
Slovak town names, too. And possibly other Slavic languages?
This commit is contained in:
pasky
2005-03-05 21:00:13 +00:00
parent 4325f13f74
commit d0f9cc27a3
2 changed files with 376 additions and 1 deletions

View File

@@ -1659,6 +1659,228 @@ static const char *name_czech_real[] = {
"Znojmo"
};
/* The advanced hyperintelligent Czech town names generator! */
// Sing., pl.
enum CzechGender {
CZG_SMASC,
CZG_SFEM,
CZG_SNEUT,
CZG_PMASC,
CZG_PFEM,
CZG_PNEUT,
// Special for substantive stems - the ending chooses the gender.
CZG_FREE,
// Like CZG_FREE, but disallow CZG_SNEUT.
CZG_NFREE
};
enum CzechPattern {
CZP_JARNI,
CZP_MLADY,
CZP_PRIVL
};
/* [CzechGender][CzechPattern] - replaces the last character of the adjective
* by this. */
// XXX: [CZG_SMASC][CZP_PRIVL] needs special handling: -ovX -> -uv.
static const char name_czech_patmod[6][3] = {
/* CZG_SMASC */ { '<EFBFBD>', '<EFBFBD>', 'X' },
/* CZG_SFEM */ { '<EFBFBD>', '<EFBFBD>', 'a' },
/* CZG_SNEUT */ { '<EFBFBD>', '<EFBFBD>', 'o' },
/* CZG_PMASC */ { '<EFBFBD>', '<EFBFBD>', 'y' },
/* CZG_PFEM */ { '<EFBFBD>', '<EFBFBD>', 'y' },
/* CZG_PNEUT */ { '<EFBFBD>', '<EFBFBD>', 'a' }
};
// This way the substantives can choose only some adjectives/endings:
// At least one of these flags must be satisfied:
enum CzechAllow {
CZA_SHORT = 1,
CZA_MIDDLE = 2,
CZA_LONG = 4,
CZA_ALL = ~0
};
// All these flags must be satisfied (in the stem->others direction):
enum CzechChoose {
CZC_NORMAL = 1,
CZC_COLOR = 2,
CZC_POSTFIX = 4, // Matched if postfix was inserted.
CZC_NOPOSTFIX = 8, // Matched if no postfix was inserted.
CZC_ANY = ~0
};
struct CzechNameSubst {
enum CzechGender gender;
enum CzechAllow allow;
enum CzechChoose choose;
const char *name;
};
struct CzechNameAdj {
enum CzechPattern pattern;
enum CzechChoose choose;
const char *name;
};
// Some of items which should be common are doubled.
static const struct CzechNameAdj name_czech_adj[] = {
{ CZP_JARNI, CZC_ANY, "Horn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Horn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Doln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Doln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Predn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Zadn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Kosteln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Havran<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "R<EFBFBD>cn<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Velk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Velk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mal<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mal<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Vysok<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Cesk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Moravsk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Slov<EFBFBD>ck<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Uhersk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Star<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Star<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Nov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Nov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mlad<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Kr<EFBFBD>lovsk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Kamenn<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Cihlov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Divn<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cerven<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cerven<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Zelen<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Zlut<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Siv<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Sed<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "B<EFBFBD>l<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Modr<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Ruzov<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cern<EFBFBD>" },
{ CZP_PRIVL, CZC_ANY, "Kr<EFBFBD>lova" },
{ CZP_PRIVL, CZC_ANY, "Janova" },
{ CZP_PRIVL, CZC_ANY, "Karlova" },
{ CZP_PRIVL, CZC_ANY, "Jir<EFBFBD>kova" },
{ CZP_PRIVL, CZC_ANY, "Petrova" },
{ CZP_PRIVL, CZC_ANY, "Sudovo" },
};
// Considered a stem for choose/allow matching purposes.
static const struct CzechNameSubst name_czech_subst_full[] = {
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Sedlec" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL, "<EFBFBD>val" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hora" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hlava" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Zd<EFBFBD>r" },
{ CZG_PMASC, CZA_ALL, CZC_NORMAL, "<EFBFBD>valy" },
{ CZG_PFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Luka" },
{ CZG_PNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
};
// TODO: More stems needed. --pasky
static const struct CzechNameSubst name_czech_subst_stem[] = {
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kostel" },
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kl<EFBFBD>ster" },
{ CZG_SMASC, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Hur" },
{ CZG_FREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Sedl" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hrad" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Pras" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Baz" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Tes" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Uz" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Br" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Vod" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Jan" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Prach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Kunr" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Strak" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vit" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vys" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zat" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zer" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Stred" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Harv" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Pruh" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Tach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "P<EFBFBD>sn" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jin" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jes" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jar" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Sok" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Hod" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Net" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Praz" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Nerat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Kral" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_NOPOSTFIX, "Pan" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Odstred" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Mrat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hlav" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE, CZC_NORMAL, "Mer" },
};
// Optional postfix inserted between stem and ending.
static const char *name_czech_subst_postfix[] = {
"av", "an", "at",
"ov", "on", "ot",
"ev", "en", "et",
};
// This array must have the both neutral genders at the end!
static const struct CzechNameSubst name_czech_subst_ending[] = {
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "ec" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "<EFBFBD>n" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ov" },
{ CZG_SMASC, CZA_SHORT | CZA_LONG, CZC_ANY, "kov" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "<EFBFBD>n" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "n<EFBFBD>k" },
{ CZG_SFEM, CZA_SHORT, CZC_ANY, "ka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "inka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_NOPOSTFIX, "na" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "n<EFBFBD>" },
{ CZG_SFEM, CZA_LONG, CZC_ANY, "ava" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "<EFBFBD>ky" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "upy" },
{ CZG_PFEM, CZA_LONG, CZC_ANY, "avy" },
{ CZG_PFEM, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ice" },
{ CZG_PNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "na" },
{ CZG_SNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "no" },
{ CZG_SNEUT, CZA_LONG, CZC_ANY, "iste" },
};
static const char *name_czech_suffix[] = {
"nad Cydlinou",
"nad Dyj<79>",
"nad Jihlavou",
"nad Labem",
"nad Lesy",
"nad Moravou",
"nad Nisou",
"nad Odrou",
"nad Ostravic<69>",
"nad S<>zavou",
"nad Vltavou",
"pod Pradedem",
"pod Radhostem",
"pod R<>pem",
"pod Snezkou",
"pod Spic<69>kem",
"pod Sedlem",
};
static const char *name_romanian_real[]= {
"Adjud",
"Alba Iulia",