Implement segmented phrase search mode

This commit is contained in:
Marvin Elsen 2024-09-28 19:38:00 +02:00
parent 387e10dd50
commit 62c57d1e85
Signed by: marvinelsen
GPG Key ID: 820672408CC318C2
10 changed files with 119445 additions and 1 deletions

View File

@ -5,4 +5,5 @@ interface Dictionary {
fun findWordsContaining(entry: DictionaryEntry): List<DictionaryEntry>
fun findSentencesContaining(entry: DictionaryEntry): List<DictionaryEntry>
fun findCharacters(entry: DictionaryEntry): List<DictionaryEntry>
fun searchSegments(phrase: String): List<DictionaryEntry>
}

View File

@ -1,5 +1,5 @@
package com.marvinelsen.willow.domain
enum class SearchMode {
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH
PINYIN, SIMPLIFIED, TRADITIONAL, ENGLISH, SEGMENTS
}

View File

@ -1,5 +1,13 @@
package com.marvinelsen.willow.domain
import com.github.houbb.segment.bs.SegmentBs
import com.github.houbb.segment.data.phrase.core.data.SegmentPhraseDatas
import com.github.houbb.segment.data.pos.core.data.SegmentPosDatas
import com.github.houbb.segment.support.format.impl.SegmentFormats
import com.github.houbb.segment.support.segment.impl.Segments
import com.github.houbb.segment.support.segment.mode.impl.SegmentModes
import com.github.houbb.segment.support.segment.result.impl.SegmentResultHandlers
import com.github.houbb.segment.support.tagging.pos.tag.impl.SegmentPosTaggings
import kotlinx.serialization.json.Json
import java.sql.Connection
import java.sql.PreparedStatement
@ -8,6 +16,14 @@ import java.sql.ResultSet
class SqliteDictionary(private val connection: Connection) : Dictionary {
private val whitespaceRegex = """\s+""".toRegex()
private val segmentBs = SegmentBs.newInstance()
.segment(Segments.defaults())
.segmentData(SegmentPhraseDatas.define())
.segmentMode(SegmentModes.dict())
.segmentFormat(SegmentFormats.chineseSimple())
.posTagging(SegmentPosTaggings.simple())
.posData(SegmentPosDatas.define())
private val searchSimplifiedPreparedStatement: PreparedStatement by lazy {
connection.prepareStatement(
"""
@ -42,6 +58,14 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
)
}
private val searchSegments = """
WITH cte(id, segment) AS (VALUES ?)
SELECT cedict.traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, definitions
FROM cedict INNER JOIN cte
ON cte.segment = cedict.traditional OR cte.segment = cedict.simplified
ORDER BY cte.id
""".trimIndent()
private val findWordsContaining: PreparedStatement by lazy {
connection.prepareStatement(
"""
@ -63,6 +87,7 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
SearchMode.PINYIN -> searchPinyin(query)
SearchMode.SIMPLIFIED -> searchSimplified(query)
SearchMode.TRADITIONAL -> searchTraditional(query)
SearchMode.SEGMENTS -> searchSegments(query)
SearchMode.ENGLISH -> TODO()
}
@ -91,6 +116,20 @@ class SqliteDictionary(private val connection: Connection) : Dictionary {
return resultSet.toListOfDictionaryEntries()
}
override fun searchSegments(phrase: String): List<DictionaryEntry> {
val segments = segmentBs.segment(phrase, SegmentResultHandlers.word())
val segmentsListString = segments
.mapIndexed { index, s -> "($index, '$s')" }
.joinToString(",")
val query = searchSegments.replace("?", segmentsListString)
val resultSet: ResultSet = connection.createStatement().executeQuery(query)
return resultSet.toListOfDictionaryEntries()
}
private fun searchSimplified(query: String): List<DictionaryEntry> {
searchSimplifiedPreparedStatement.setString(1, "$query*")

View File

@ -30,6 +30,12 @@
<SearchMode fx:value="PINYIN"/>
</userData>
</RadioButton>
<RadioButton mnemonicParsing="false" text="%search.mode.phrase"
toggleGroup="$searchModeToggleGroup">
<userData>
<SearchMode fx:value="SEGMENTS"/>
</userData>
</RadioButton>
<RadioButton mnemonicParsing="false" text="%search.mode.english"
toggleGroup="$searchModeToggleGroup"
disable="true">

View File

@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
menubar.help=_Help
menubar.help.about=_About…
list.no_entries_found=No matching entries found
search.mode.phrase=Phrase

View File

@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Kopiere Aussprache
menubar.help=_Hilfe
menubar.help.about=_Über…
list.no_entries_found=No matching entries found
search.mode.phrase=Phrase

View File

@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=Copy Pronunciation
menubar.help=_Help
menubar.help.about=_About…
list.no_entries_found=No matching entries found
search.mode.phrase=Phrase

View File

@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
menubar.help=_說明
menubar.help.about=_關於 Willow…
list.no_entries_found=No matching entries found
search.mode.phrase=Phrase

View File

@ -17,3 +17,4 @@ menubar.edit.copy.pronunciation=複製 Aussprache
menubar.help=_說明
menubar.help.about=_關於 Willow…
list.no_entries_found=No matching entries found
search.mode.phrase=Phrase

File diff suppressed because it is too large Load Diff