Implement Pinyin search
This commit is contained in:
parent
3a159fba76
commit
a65c1db941
@ -27,6 +27,8 @@ fun main() {
|
|||||||
pinyin_with_tone_marks TEXT NOT NULL,
|
pinyin_with_tone_marks TEXT NOT NULL,
|
||||||
pinyin_with_tone_numbers TEXT NOT NULL,
|
pinyin_with_tone_numbers TEXT NOT NULL,
|
||||||
zhuyin TEXT NOT NULL,
|
zhuyin TEXT NOT NULL,
|
||||||
|
searchable_pinyin TEXT NOT NULL,
|
||||||
|
searchable_pinyin_with_tone_numbers TEXT NOT NULL,
|
||||||
definitions JSON NOT NULL,
|
definitions JSON NOT NULL,
|
||||||
character_count INTEGER NOT NULL,
|
character_count INTEGER NOT NULL,
|
||||||
CONSTRAINT character_count_gte CHECK(character_count > 0)
|
CONSTRAINT character_count_gte CHECK(character_count > 0)
|
||||||
@ -36,6 +38,10 @@ fun main() {
|
|||||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_traditional ON cedict (traditional)")
|
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_traditional ON cedict (traditional)")
|
||||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_simplified ON cedict (simplified)")
|
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_simplified ON cedict (simplified)")
|
||||||
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_character_count ON cedict (character_count)")
|
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_character_count ON cedict (character_count)")
|
||||||
|
statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin ON cedict (searchable_pinyin)")
|
||||||
|
statement.executeUpdate(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_cedict_searchable_pinyin_with_tone_numbers ON cedict (searchable_pinyin_with_tone_numbers)"
|
||||||
|
)
|
||||||
|
|
||||||
val cedictParser = CedictParser.instance
|
val cedictParser = CedictParser.instance
|
||||||
val cedictEntries =
|
val cedictEntries =
|
||||||
@ -45,7 +51,7 @@ fun main() {
|
|||||||
|
|
||||||
val insertStatement =
|
val insertStatement =
|
||||||
connection.prepareStatement(
|
connection.prepareStatement(
|
||||||
"INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions, character_count) VALUES(?,?,?,?,?,?,?)"
|
"INSERT OR IGNORE INTO cedict(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?)"
|
||||||
)
|
)
|
||||||
for (entry in cedictEntries) {
|
for (entry in cedictEntries) {
|
||||||
try {
|
try {
|
||||||
@ -71,12 +77,31 @@ fun main() {
|
|||||||
)
|
)
|
||||||
insertStatement.setString(
|
insertStatement.setString(
|
||||||
6,
|
6,
|
||||||
|
entry.pinyinSyllables.joinToString(
|
||||||
|
separator = ""
|
||||||
|
) {
|
||||||
|
it
|
||||||
|
.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
||||||
|
.lowercase()
|
||||||
|
.replace("""\d""".toRegex(), "")
|
||||||
|
}
|
||||||
|
)
|
||||||
|
insertStatement.setString(
|
||||||
|
7,
|
||||||
|
entry.pinyinSyllables.joinToString(
|
||||||
|
separator = ""
|
||||||
|
) {
|
||||||
|
it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
insertStatement.setString(
|
||||||
|
8,
|
||||||
Json.encodeToString(
|
Json.encodeToString(
|
||||||
ListSerializer(ListSerializer(String.serializer())),
|
ListSerializer(ListSerializer(String.serializer())),
|
||||||
entry.definitions.map { it.glosses }
|
entry.definitions.map { it.glosses }
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
insertStatement.setInt(7, entry.traditional.length)
|
insertStatement.setInt(9, entry.traditional.length)
|
||||||
} catch (_: Exception) {
|
} catch (_: Exception) {
|
||||||
// no-op
|
// no-op
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ import java.sql.PreparedStatement
|
|||||||
import java.sql.ResultSet
|
import java.sql.ResultSet
|
||||||
|
|
||||||
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||||
|
private val whitespaceRegex = """\s+""".toRegex()
|
||||||
|
|
||||||
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
||||||
connection.prepareStatement(
|
connection.prepareStatement(
|
||||||
@ -29,6 +30,18 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private val searchPinyinPreparedStatement: PreparedStatement by lazy {
|
||||||
|
connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
SELECT traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions
|
||||||
|
FROM cedict
|
||||||
|
WHERE searchable_pinyin GLOB ?
|
||||||
|
OR searchable_pinyin_with_tone_numbers GLOB ?
|
||||||
|
ORDER BY character_count ASC
|
||||||
|
""".trimIndent()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
private val findWordsContaining: PreparedStatement by lazy {
|
private val findWordsContaining: PreparedStatement by lazy {
|
||||||
connection.prepareStatement(
|
connection.prepareStatement(
|
||||||
"""
|
"""
|
||||||
@ -41,7 +54,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override fun search(query: String, searchMode: SearchMode) = when (searchMode) {
|
override fun search(query: String, searchMode: SearchMode) = when (searchMode) {
|
||||||
SearchMode.PINYIN -> TODO()
|
SearchMode.PINYIN -> searchPinyin(query)
|
||||||
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
||||||
SearchMode.TRADITIONAL -> searchTraditional(query)
|
SearchMode.TRADITIONAL -> searchTraditional(query)
|
||||||
SearchMode.ENGLISH -> TODO()
|
SearchMode.ENGLISH -> TODO()
|
||||||
@ -71,6 +84,17 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
return resultSet.toListOfDictionaryEntries()
|
return resultSet.toListOfDictionaryEntries()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun searchPinyin(query: String): List<DictionaryEntry> {
|
||||||
|
val sanitizedQuery = query.lowercase().replace(whitespaceRegex, "")
|
||||||
|
|
||||||
|
searchPinyinPreparedStatement.setString(1, "$sanitizedQuery*")
|
||||||
|
searchPinyinPreparedStatement.setString(2, "$sanitizedQuery*")
|
||||||
|
|
||||||
|
val resultSet: ResultSet = searchPinyinPreparedStatement.executeQuery()
|
||||||
|
|
||||||
|
return resultSet.toListOfDictionaryEntries()
|
||||||
|
}
|
||||||
|
|
||||||
private fun searchTraditional(query: String): List<DictionaryEntry> {
|
private fun searchTraditional(query: String): List<DictionaryEntry> {
|
||||||
searchTraditionalPreparedStatement.setString(1, "$query*")
|
searchTraditionalPreparedStatement.setString(1, "$query*")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user