Implement segmented phrase search mode
This commit is contained in:
parent
387e10dd50
commit
62c57d1e85
@ -5,4 +5,5 @@ interface Dictionary {
|
||||
fun findWordsContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
||||
fun findSentencesContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
||||
fun findCharacters(entry: DictionaryEntry): List<DictionaryEntry>
|
||||
fun searchSegments(phrase: String): List<DictionaryEntry>
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
package com.marvinelsen.willow.domain
|
||||
|
||||
enum class SearchMode {
|
||||
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH
|
||||
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH, SEGMENTS
|
||||
}
|
||||
|
@ -1,5 +1,13 @@
|
||||
package com.marvinelsen.willow.domain
|
||||
|
||||
import com.github.houbb.segment.bs.SegmentBs
|
||||
import com.github.houbb.segment.data.phrase.core.data.SegmentPhraseDatas
|
||||
import com.github.houbb.segment.data.pos.core.data.SegmentPosDatas
|
||||
import com.github.houbb.segment.support.format.impl.SegmentFormats
|
||||
import com.github.houbb.segment.support.segment.impl.Segments
|
||||
import com.github.houbb.segment.support.segment.mode.impl.SegmentModes
|
||||
import com.github.houbb.segment.support.segment.result.impl.SegmentResultHandlers
|
||||
import com.github.houbb.segment.support.tagging.pos.tag.impl.SegmentPosTaggings
|
||||
import kotlinx.serialization.json.Json
|
||||
import java.sql.Connection
|
||||
import java.sql.PreparedStatement
|
||||
@ -8,6 +16,14 @@ import java.sql.ResultSet
|
||||
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
private val whitespaceRegex = """\s+""".toRegex()
|
||||
|
||||
private val segmentBs = SegmentBs.newInstance()
|
||||
.segment(Segments.defaults())
|
||||
.segmentData(SegmentPhraseDatas.define())
|
||||
.segmentMode(SegmentModes.dict())
|
||||
.segmentFormat(SegmentFormats.chineseSimple())
|
||||
.posTagging(SegmentPosTaggings.simple())
|
||||
.posData(SegmentPosDatas.define())
|
||||
|
||||
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
@ -42,6 +58,14 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
)
|
||||
}
|
||||
|
||||
private val searchSegments = """
|
||||
WITH cte(id, segment) AS (VALUES ?)
|
||||
SELECT cedict.traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions
|
||||
FROM cedict INNER JOIN cte
|
||||
ON cte.segment = cedict.traditional OR cte.segment = cedict.simplified
|
||||
ORDER BY cte.id
|
||||
""".trimIndent()
|
||||
|
||||
private val findWordsContaining: PreparedStatement by lazy {
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
@ -63,6 +87,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
SearchMode.PINYIN -> searchPinyin(query)
|
||||
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
||||
SearchMode.TRADITIONAL -> searchTraditional(query)
|
||||
SearchMode.SEGMENTS -> searchSegments(query)
|
||||
SearchMode.ENGLISH -> TODO()
|
||||
}
|
||||
|
||||
@ -91,6 +116,20 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||
return resultSet.toListOfDictionaryEntries()
|
||||
}
|
||||
|
||||
override fun searchSegments(phrase: String): List<DictionaryEntry> {
|
||||
val segments = segmentBs.segment(phrase, SegmentResultHandlers.word())
|
||||
|
||||
val segmentsListString = segments
|
||||
.mapIndexed { index, s -> "($index, '$s')" }
|
||||
.joinToString(",")
|
||||
|
||||
val query = searchSegments.replace("?", segmentsListString)
|
||||
|
||||
val resultSet: ResultSet = connection.createStatement().executeQuery(query)
|
||||
|
||||
return resultSet.toListOfDictionaryEntries()
|
||||
}
|
||||
|
||||
private fun searchSimplified(query: String): List<DictionaryEntry> {
|
||||
searchSimplifiedPreparedStatement.setString(1, "$query*")
|
||||
|
||||
|
@ -30,6 +30,12 @@
|
||||
<SearchMode fx:value="PINYIN"/>
|
||||
</userData>
|
||||
</RadioButton>
|
||||
<RadioButton mnemonicParsing="false" text="%search.mode.phrase"
|
||||
toggleGroup="$searchModeToggleGroup">
|
||||
<userData>
|
||||
<SearchMode fx:value="SEGMENTS"/>
|
||||
</userData>
|
||||
</RadioButton>
|
||||
<RadioButton mnemonicParsing="false" text="%search.mode.english"
|
||||
toggleGroup="$searchModeToggleGroup"
|
||||
disable="true">
|
||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
|
||||
menubar.help=_Help
|
||||
menubar.help.about=_About…
|
||||
list.no_entries_found=No matching entries found
|
||||
search.mode.phrase=Phrase
|
||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Kopiere Aussprache
|
||||
menubar.help=_Hilfe
|
||||
menubar.help.about=_Über…
|
||||
list.no_entries_found=No matching entries found
|
||||
search.mode.phrase=Phrase
|
||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
|
||||
menubar.help=_Help
|
||||
menubar.help.about=_About…
|
||||
list.no_entries_found=No matching entries found
|
||||
search.mode.phrase=Phrase
|
||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
|
||||
menubar.help=_說明
|
||||
menubar.help.about=_關於 Willow…
|
||||
list.no_entries_found=No matching entries found
|
||||
search.mode.phrase=Phrase
|
||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
|
||||
menubar.help=_說明
|
||||
menubar.help.about=_關於 Willow…
|
||||
list.no_entries_found=No matching entries found
|
||||
search.mode.phrase=Phrase
|
||||
|
119393
src/main/resources/segment_phrase_dict_define.txt
Normal file
119393
src/main/resources/segment_phrase_dict_define.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user