diff --git a/src/main/kotlin/com/marvinelsen/willow/cedict/CreateDatabase.kt b/src/main/kotlin/com/marvinelsen/willow/cedict/CreateDatabase.kt index 5b61c09..d4c3c1a 100644 --- a/src/main/kotlin/com/marvinelsen/willow/cedict/CreateDatabase.kt +++ b/src/main/kotlin/com/marvinelsen/willow/cedict/CreateDatabase.kt @@ -27,6 +27,8 @@ fun main() { pinyin_with_tone_marks TEXT NOT NULL, pinyin_with_tone_numbers TEXT NOT NULL, zhuyin TEXT NOT NULL, + searchable_pinyin TEXT NOT NULL, + searchable_pinyin_with_tone_numbers TEXT NOT NULL, definitions JSON NOT NULL, character_count INTEGER NOT NULL, CONSTRAINT character_count_gte CHECK(character_count > 0) @@ -36,6 +38,10 @@ fun main() { statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_traditional ON cedict (traditional)") statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_simplified ON cedict (simplified)") statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_character_count ON cedict (character_count)") + statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin ON cedict (searchable_pinyin)") + statement.executeUpdate( + "CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin_with_tone_numbers ON cedict (searchable_pinyin_with_tone_numbers)" + ) val cedictParser = CedictParser.instance val cedictEntries = @@ -45,7 +51,7 @@ fun main() { val insertStatement = connection.prepareStatement( - "INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions, character_count) VALUES(?,?,?,?,?,?,?)" + "INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?)" ) for (entry in cedictEntries) { try { @@ -71,12 +77,31 @@ fun main() { ) insertStatement.setString( 6, + entry.pinyinSyllables.joinToString( + separator = "" + ) { + it + .format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) + .lowercase() + .replace("""\d""".toRegex(), "") + } + ) + insertStatement.setString( + 7, + entry.pinyinSyllables.joinToString( + separator = "" + ) { + it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase() + } + ) + insertStatement.setString( + 8, Json.encodeToString( ListSerializer(ListSerializer(String.serializer())), entry.definitions.map { it.glosses } ) ) - insertStatement.setInt(7, entry.traditional.length) + insertStatement.setInt(9, entry.traditional.length) } catch (_: Exception) { // no-op } diff --git a/src/main/kotlin/com/marvinelsen/willow/domain/SqliteDictionary.kt b/src/main/kotlin/com/marvinelsen/willow/domain/SqliteDictionary.kt index 9e74dd8..ae7a90b 100644 --- a/src/main/kotlin/com/marvinelsen/willow/domain/SqliteDictionary.kt +++ b/src/main/kotlin/com/marvinelsen/willow/domain/SqliteDictionary.kt @@ -6,6 +6,7 @@ import java.sql.PreparedStatement import java.sql.ResultSet class SqliteDictionary(private val connection: Connection) : Dictionary { + private val whitespaceRegex = """\s+""".toRegex() private val searchSimplifiedPreparedStatement: PreparedStatement by lazy { connection.prepareStatement( @@ -29,6 +30,18 @@ class SqliteDictionary(private val connection: Connection) : Dictionary { ) } + private val searchPinyinPreparedStatement: PreparedStatement by lazy { + connection.prepareStatement( + """ + SELECT traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions + FROM cedict + WHERE searchable_pinyin GLOB ? + OR searchable_pinyin_with_tone_numbers GLOB ? + ORDER BY character_count ASC + """.trimIndent() + ) + } + private val findWordsContaining: PreparedStatement by lazy { connection.prepareStatement( """ @@ -41,7 +54,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary { } override fun search(query: String, searchMode: SearchMode) = when (searchMode) { - SearchMode.PINYIN -> TODO() + SearchMode.PINYIN -> searchPinyin(query) SearchMode.SIMPLIFIED -> searchSimplified(query) SearchMode.TRADITIONAL -> searchTraditional(query) SearchMode.ENGLISH -> TODO() @@ -71,6 +84,17 @@ class SqliteDictionary(private val connection: Connection) : Dictionary { return resultSet.toListOfDictionaryEntries() } + private fun searchPinyin(query: String): List { + val sanitizedQuery = query.lowercase().replace(whitespaceRegex, "") + + searchPinyinPreparedStatement.setString(1, "$sanitizedQuery*") + searchPinyinPreparedStatement.setString(2, "$sanitizedQuery*") + + val resultSet: ResultSet = searchPinyinPreparedStatement.executeQuery() + + return resultSet.toListOfDictionaryEntries() + } + private fun searchTraditional(query: String): List { searchTraditionalPreparedStatement.setString(1, "$query*")