diff --git a/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt b/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt index e349828..96f3fd1 100644 --- a/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt +++ b/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt @@ -64,7 +64,14 @@ fun main() { connection.prepareStatement( "INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)" ) - for (entry in cedictEntries) { + cedictEntries.groupBy { + "${it.traditional}${it.pinyinSyllables.joinToString( + "" + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase() }}" + }.entries.forEach { entries -> + val entry = entries.value.first() + val definitions = entries.value.flatMap { it.definitions } + try { insertStatement.setString(1, entry.traditional) insertStatement.setString(2, entry.simplified) @@ -72,13 +79,17 @@ fun main() { 3, entry.pinyinSyllables.joinToString( separator = " " - ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) } + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS).lowercase() } ) insertStatement.setString( 4, entry.pinyinSyllables.joinToString( separator = " " - ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).replace("u:", "ü").replace("v", "ü") } + ) { + it.format( + TransliterationSystem.PINYIN_WITH_TONE_NUMBERS + ).lowercase().replace("u:", "ü").replace("v", "ü") + } ) insertStatement.setString( 5, @@ -106,7 +117,7 @@ fun main() { 8, Json.encodeToString( ListSerializer(ListSerializer(String.serializer())), - entry.definitions.map { it.glosses } + definitions.map { it.glosses } ) ) insertStatement.setString(