Implement Pinyin search
This commit is contained in:
parent
3a159fba76
commit
a65c1db941
@ -27,6 +27,8 @@ fun main() {
|
||||
pinyin_with_tone_marks TEXT NOT NULL,
|
||||
pinyin_with_tone_numbers TEXT NOT NULL,
|
||||
zhuyin TEXT NOT NULL,
|
||||
searchable_pinyin TEXT NOT NULL,
|
||||
searchable_pinyin_with_tone_numbers TEXT NOT NULL,
|
||||
definitions JSON NOT NULL,
|
||||
character_count INTEGER NOT NULL,
|
||||
CONSTRAINT character_count_gte CHECK(character_count > 0)
|
||||
@ -36,6 +38,10 @@ fun main() {
|
||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_traditional ON cedict (traditional)")
|
||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_simplified ON cedict (simplified)")
|
||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_character_count ON cedict (character_count)")
|
||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin ON cedict (searchable_pinyin)")
|
||||
statement.executeUpdate(
|
||||
"CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin_with_tone_numbers ON cedict (searchable_pinyin_with_tone_numbers)"
|
||||
)
|
||||
|
||||
val cedictParser = CedictParser.instance
|
||||
val cedictEntries =
|
||||
@ -45,7 +51,7 @@ fun main() {
|
||||
|
||||
val insertStatement =
|
||||
connection.prepareStatement(
|
||||
"INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions, character_count) VALUES(?,?,?,?,?,?,?)"
|
||||
"INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?)"
|
||||
)
|
||||
for (entry in cedictEntries) {
|
||||
try {
|
||||
@ -71,12 +77,31 @@ fun main() {
|
||||
)
|
||||
insertStatement.setString(
|
||||
6,
|
||||
entry.pinyinSyllables.joinToString(
|
||||
separator = ""
|
||||
) {
|
||||
it
|
||||
.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
||||
.lowercase()
|
||||
.replace("""\d""".toRegex(), "")
|
||||
}
|
||||
)
|
||||
insertStatement.setString(
|
||||
7,
|
||||
entry.pinyinSyllables.joinToString(
|
||||
separator = ""
|
||||
) {
|
||||
it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase()
|
||||
}
|
||||
)
|
||||
insertStatement.setString(
|
||||
8,
|
||||
Json.encodeToString(
|
||||
ListSerializer(ListSerializer(String.serializer())),
|
||||
entry.definitions.map { it.glosses }
|
||||
)
|
||||
)
|
||||
insertStatement.setInt(7, entry.traditional.length)
|
||||
insertStatement.setInt(9, entry.traditional.length)
|
||||
} catch (_: Exception) {
|
||||
// no-op
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import java.sql.PreparedStatement
|
||||
import java.sql.ResultSet
|
||||
|
||||
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
private val whitespaceRegex = """\s+""".toRegex()
|
||||
|
||||
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
||||
connection.prepareStatement(
|
||||
@ -29,6 +30,18 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
)
|
||||
}
|
||||
|
||||
private val searchPinyinPreparedStatement: PreparedStatement by lazy {
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
SELECT traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions
|
||||
FROM cedict
|
||||
WHERE searchable_pinyin GLOB ?
|
||||
OR searchable_pinyin_with_tone_numbers GLOB ?
|
||||
ORDER BY character_count ASC
|
||||
""".trimIndent()
|
||||
)
|
||||
}
|
||||
|
||||
private val findWordsContaining: PreparedStatement by lazy {
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
@ -41,7 +54,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
}
|
||||
|
||||
override fun search(query: String, searchMode: SearchMode) = when (searchMode) {
|
||||
SearchMode.PINYIN -> TODO()
|
||||
SearchMode.PINYIN -> searchPinyin(query)
|
||||
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
||||
SearchMode.TRADITIONAL -> searchTraditional(query)
|
||||
SearchMode.ENGLISH -> TODO()
|
||||
@ -71,6 +84,17 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
return resultSet.toListOfDictionaryEntries()
|
||||
}
|
||||
|
||||
private fun searchPinyin(query: String): List<DictionaryEntry> {
|
||||
val sanitizedQuery = query.lowercase().replace(whitespaceRegex, "")
|
||||
|
||||
searchPinyinPreparedStatement.setString(1, "$sanitizedQuery*")
|
||||
searchPinyinPreparedStatement.setString(2, "$sanitizedQuery*")
|
||||
|
||||
val resultSet: ResultSet = searchPinyinPreparedStatement.executeQuery()
|
||||
|
||||
return resultSet.toListOfDictionaryEntries()
|
||||
}
|
||||
|
||||
private fun searchTraditional(query: String): List<DictionaryEntry> {
|
||||
searchTraditionalPreparedStatement.setString(1, "$query*")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user