Combine CC-CEDICT "surname" entries together with their regular entry counter parts

For example:
王 Wang2 surname Wang
王 wang2 king / monarch

to
王 wang2 surname Wang / king / monarch
This commit is contained in:
Marvin Elsen 2024-10-11 20:29:32 +02:00
parent 816bdd8f51
commit f159d3f404
Signed by: marvinelsen
GPG Key ID: 820672408CC318C2

View File

@ -64,7 +64,14 @@ fun main() {
connection.prepareStatement( connection.prepareStatement(
"INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)" "INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)"
) )
for (entry in cedictEntries) { cedictEntries.groupBy {
"${it.traditional}${it.pinyinSyllables.joinToString(
""
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase() }}"
}.entries.forEach { entries ->
val entry = entries.value.first()
val definitions = entries.value.flatMap { it.definitions }
try { try {
insertStatement.setString(1, entry.traditional) insertStatement.setString(1, entry.traditional)
insertStatement.setString(2, entry.simplified) insertStatement.setString(2, entry.simplified)
@ -72,13 +79,17 @@ fun main() {
3, 3,
entry.pinyinSyllables.joinToString( entry.pinyinSyllables.joinToString(
separator = " " separator = " "
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) } ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS).lowercase() }
) )
insertStatement.setString( insertStatement.setString(
4, 4,
entry.pinyinSyllables.joinToString( entry.pinyinSyllables.joinToString(
separator = " " separator = " "
) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).replace("u:", "ü").replace("v", "ü") } ) {
it.format(
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS
).lowercase().replace("u:", "ü").replace("v", "ü")
}
) )
insertStatement.setString( insertStatement.setString(
5, 5,
@ -106,7 +117,7 @@ fun main() {
8, 8,
Json.encodeToString( Json.encodeToString(
ListSerializer(ListSerializer(String.serializer())), ListSerializer(ListSerializer(String.serializer())),
entry.definitions.map { it.glosses } definitions.map { it.glosses }
) )
) )
insertStatement.setString( insertStatement.setString(