Combine CC-CEDICT "surname" entries together with their regular entry counter parts

For example:
王 Wang2 surname Wang
王 wang2 king / monarch

to
王 wang2 surname Wang / king / monarch
This commit is contained in:
Marvin Elsen 2024-10-11 20:29:32 +02:00
parent 816bdd8f51
commit f159d3f404
Signed by: marvinelsen
GPG Key ID: 820672408CC318C2

View File

@ -64,7 +64,14 @@ fun main() {
connection.prepareStatement(
"INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)"
)
for (entry in cedictEntries) {
cedictEntries.groupBy {
"${it.traditional}${it.pinyinSyllables.joinToString(
""
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase() }}"
}.entries.forEach { entries ->
val entry = entries.value.first()
val definitions = entries.value.flatMap { it.definitions }
try {
insertStatement.setString(1, entry.traditional)
insertStatement.setString(2, entry.simplified)
@ -72,13 +79,17 @@ fun main() {
3,
entry.pinyinSyllables.joinToString(
separator = " "
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) }
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS).lowercase() }
)
insertStatement.setString(
4,
entry.pinyinSyllables.joinToString(
separator = " "
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).replace("u:", "ü").replace("v", "ü") }
) {
it.format(
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS
).lowercase().replace("u:", "ü").replace("v", "ü")
}
)
insertStatement.setString(
5,
@ -106,7 +117,7 @@ fun main() {
8,
Json.encodeToString(
ListSerializer(ListSerializer(String.serializer())),
entry.definitions.map { it.glosses }
definitions.map { it.glosses }
)
)
insertStatement.setString(