Implement segmented phrase search mode
This commit is contained in:
parent
387e10dd50
commit
62c57d1e85
@ -5,4 +5,5 @@ interface Dictionary {
|
|||||||
fun findWordsContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
fun findWordsContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
||||||
fun findSentencesContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
fun findSentencesContaining(entry: DictionaryEntry): List<DictionaryEntry>
|
||||||
fun findCharacters(entry: DictionaryEntry): List<DictionaryEntry>
|
fun findCharacters(entry: DictionaryEntry): List<DictionaryEntry>
|
||||||
|
fun searchSegments(phrase: String): List<DictionaryEntry>
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
package com.marvinelsen.willow.domain
|
package com.marvinelsen.willow.domain
|
||||||
|
|
||||||
enum class SearchMode {
|
enum class SearchMode {
|
||||||
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH
|
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH, SEGMENTS
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,13 @@
|
|||||||
package com.marvinelsen.willow.domain
|
package com.marvinelsen.willow.domain
|
||||||
|
|
||||||
|
import com.github.houbb.segment.bs.SegmentBs
|
||||||
|
import com.github.houbb.segment.data.phrase.core.data.SegmentPhraseDatas
|
||||||
|
import com.github.houbb.segment.data.pos.core.data.SegmentPosDatas
|
||||||
|
import com.github.houbb.segment.support.format.impl.SegmentFormats
|
||||||
|
import com.github.houbb.segment.support.segment.impl.Segments
|
||||||
|
import com.github.houbb.segment.support.segment.mode.impl.SegmentModes
|
||||||
|
import com.github.houbb.segment.support.segment.result.impl.SegmentResultHandlers
|
||||||
|
import com.github.houbb.segment.support.tagging.pos.tag.impl.SegmentPosTaggings
|
||||||
import kotlinx.serialization.json.Json
|
import kotlinx.serialization.json.Json
|
||||||
import java.sql.Connection
|
import java.sql.Connection
|
||||||
import java.sql.PreparedStatement
|
import java.sql.PreparedStatement
|
||||||
@ -8,6 +16,14 @@ import java.sql.ResultSet
|
|||||||
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
class SqliteDictionary(private val connection: Connection) : Dictionary {
|
||||||
private val whitespaceRegex = """\s+""".toRegex()
|
private val whitespaceRegex = """\s+""".toRegex()
|
||||||
|
|
||||||
|
private val segmentBs = SegmentBs.newInstance()
|
||||||
|
.segment(Segments.defaults())
|
||||||
|
.segmentData(SegmentPhraseDatas.define())
|
||||||
|
.segmentMode(SegmentModes.dict())
|
||||||
|
.segmentFormat(SegmentFormats.chineseSimple())
|
||||||
|
.posTagging(SegmentPosTaggings.simple())
|
||||||
|
.posData(SegmentPosDatas.define())
|
||||||
|
|
||||||
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
|
||||||
connection.prepareStatement(
|
connection.prepareStatement(
|
||||||
"""
|
"""
|
||||||
@ -42,6 +58,14 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private val searchSegments = """
|
||||||
|
WITH cte(id, segment) AS (VALUES ?)
|
||||||
|
SELECT cedict.traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions
|
||||||
|
FROM cedict INNER JOIN cte
|
||||||
|
ON cte.segment = cedict.traditional OR cte.segment = cedict.simplified
|
||||||
|
ORDER BY cte.id
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
private val findWordsContaining: PreparedStatement by lazy {
|
private val findWordsContaining: PreparedStatement by lazy {
|
||||||
connection.prepareStatement(
|
connection.prepareStatement(
|
||||||
"""
|
"""
|
||||||
@ -63,6 +87,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
SearchMode.PINYIN -> searchPinyin(query)
|
SearchMode.PINYIN -> searchPinyin(query)
|
||||||
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
SearchMode.SIMPLIFIED -> searchSimplified(query)
|
||||||
SearchMode.TRADITIONAL -> searchTraditional(query)
|
SearchMode.TRADITIONAL -> searchTraditional(query)
|
||||||
|
SearchMode.SEGMENTS -> searchSegments(query)
|
||||||
SearchMode.ENGLISH -> TODO()
|
SearchMode.ENGLISH -> TODO()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,6 +116,20 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
|
|||||||
return resultSet.toListOfDictionaryEntries()
|
return resultSet.toListOfDictionaryEntries()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override fun searchSegments(phrase: String): List<DictionaryEntry> {
|
||||||
|
val segments = segmentBs.segment(phrase, SegmentResultHandlers.word())
|
||||||
|
|
||||||
|
val segmentsListString = segments
|
||||||
|
.mapIndexed { index, s -> "($index, '$s')" }
|
||||||
|
.joinToString(",")
|
||||||
|
|
||||||
|
val query = searchSegments.replace("?", segmentsListString)
|
||||||
|
|
||||||
|
val resultSet: ResultSet = connection.createStatement().executeQuery(query)
|
||||||
|
|
||||||
|
return resultSet.toListOfDictionaryEntries()
|
||||||
|
}
|
||||||
|
|
||||||
private fun searchSimplified(query: String): List<DictionaryEntry> {
|
private fun searchSimplified(query: String): List<DictionaryEntry> {
|
||||||
searchSimplifiedPreparedStatement.setString(1, "$query*")
|
searchSimplifiedPreparedStatement.setString(1, "$query*")
|
||||||
|
|
||||||
|
@ -30,6 +30,12 @@
|
|||||||
<SearchMode fx:value="PINYIN"/>
|
<SearchMode fx:value="PINYIN"/>
|
||||||
</userData>
|
</userData>
|
||||||
</RadioButton>
|
</RadioButton>
|
||||||
|
<RadioButton mnemonicParsing="false" text="%search.mode.phrase"
|
||||||
|
toggleGroup="$searchModeToggleGroup">
|
||||||
|
<userData>
|
||||||
|
<SearchMode fx:value="SEGMENTS"/>
|
||||||
|
</userData>
|
||||||
|
</RadioButton>
|
||||||
<RadioButton mnemonicParsing="false" text="%search.mode.english"
|
<RadioButton mnemonicParsing="false" text="%search.mode.english"
|
||||||
toggleGroup="$searchModeToggleGroup"
|
toggleGroup="$searchModeToggleGroup"
|
||||||
disable="true">
|
disable="true">
|
||||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
|
|||||||
menubar.help=_Help
|
menubar.help=_Help
|
||||||
menubar.help.about=_About…
|
menubar.help.about=_About…
|
||||||
list.no_entries_found=No matching entries found
|
list.no_entries_found=No matching entries found
|
||||||
|
search.mode.phrase=Phrase
|
||||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Kopiere Aussprache
|
|||||||
menubar.help=_Hilfe
|
menubar.help=_Hilfe
|
||||||
menubar.help.about=_Über…
|
menubar.help.about=_Über…
|
||||||
list.no_entries_found=No matching entries found
|
list.no_entries_found=No matching entries found
|
||||||
|
search.mode.phrase=Phrase
|
||||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
|
|||||||
menubar.help=_Help
|
menubar.help=_Help
|
||||||
menubar.help.about=_About…
|
menubar.help.about=_About…
|
||||||
list.no_entries_found=No matching entries found
|
list.no_entries_found=No matching entries found
|
||||||
|
search.mode.phrase=Phrase
|
||||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
|
|||||||
menubar.help=_說明
|
menubar.help=_說明
|
||||||
menubar.help.about=_關於 Willow…
|
menubar.help.about=_關於 Willow…
|
||||||
list.no_entries_found=No matching entries found
|
list.no_entries_found=No matching entries found
|
||||||
|
search.mode.phrase=Phrase
|
||||||
|
@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
|
|||||||
menubar.help=_說明
|
menubar.help=_說明
|
||||||
menubar.help.about=_關於 Willow…
|
menubar.help.about=_關於 Willow…
|
||||||
list.no_entries_found=No matching entries found
|
list.no_entries_found=No matching entries found
|
||||||
|
search.mode.phrase=Phrase
|
||||||
|
119393
src/main/resources/segment_phrase_dict_define.txt
Normal file
119393
src/main/resources/segment_phrase_dict_define.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user