Use Chinese transliteration library
All checks were successful
Pull Request / build (pull_request) Successful in 2m10s

This commit is contained in:
Marvin Elsen 2024-09-20 13:46:01 +02:00
parent f1282d74a1
commit f27ed7006d
Signed by: marvinelsen
GPG Key ID: 820672408CC318C2
6 changed files with 15 additions and 61 deletions

View File

@ -9,11 +9,16 @@ version = "1.0-SNAPSHOT"
repositories {
mavenCentral()
maven {
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
}
}
dependencies {
detektPlugins(libs.detekt.formatting)
implementation("com.marvinelsen:chinese-transliteration:1.1-SNAPSHOT") { isChanging = true }
testImplementation(libs.kotest.core)
testImplementation(libs.kotest.assertions)
}

View File

@ -1,8 +1,10 @@
package com.marvinelsen.cedict.api
import com.marvinelsen.chinese.transliteration.Syllable
data class CedictEntry(
val traditional: String,
val simplified: String,
val pinyinSyllables: List<PinyinSyllable>,
val pinyinSyllables: List<Syllable>,
val definitions: List<CedictDefinition>,
)

View File

@ -1,27 +0,0 @@
package com.marvinelsen.cedict.api
data class PinyinSyllable(
val syllable: String,
val tone: Tone,
) {
companion object {
fun fromString(pinyinWithNumbers: String): PinyinSyllable {
require(pinyinWithNumbers.isNotBlank()) { "Argument pinyinWithNumbers must not be blank." }
val lastCharacter = pinyinWithNumbers.last()
return if (lastCharacter.isDigit()) {
PinyinSyllable(
syllable = pinyinWithNumbers.substring(0, pinyinWithNumbers.lastIndex),
tone = Tone.fromDigit(lastCharacter)
)
} else {
PinyinSyllable(
syllable = pinyinWithNumbers,
tone = Tone.NONE
)
}
}
}
override fun toString() = syllable + tone.toString()
}

View File

@ -1,25 +0,0 @@
package com.marvinelsen.cedict.api
enum class Tone {
NONE, FIRST, SECOND, THIRD, FORTH, FIFTH;
companion object {
fun fromDigit(digit: Char) = when (digit) {
'1' -> FIRST
'2' -> SECOND
'3' -> THIRD
'4' -> FORTH
'5' -> FIFTH
else -> error("Digit $digit is not a valid tone")
}
}
override fun toString() = when (this) {
NONE -> ""
FIRST -> "1"
SECOND -> "2"
THIRD -> "3"
FORTH -> "4"
FIFTH -> "5"
}
}

View File

@ -3,7 +3,7 @@ package com.marvinelsen.cedict.internal
import com.marvinelsen.cedict.api.CedictDefinition
import com.marvinelsen.cedict.api.CedictEntry
import com.marvinelsen.cedict.api.CedictParser
import com.marvinelsen.cedict.api.PinyinSyllable
import com.marvinelsen.chinese.transliteration.Syllable
import java.io.InputStream
internal class CedictParserImpl : CedictParser {
@ -42,10 +42,9 @@ internal class CedictParserImpl : CedictParser {
}
private fun toPinyinSyllables(pinyinWithNumbers: String) = pinyinWithNumbers
.lowercase()
.replace("u:", "ü")
.split(" ")
.map { PinyinSyllable.fromString(it) }
.filter { Syllable.isValidPinyinWithToneNumberSyllable(it) }
.map { Syllable.fromPinyinWithToneNumber(it) }
private fun toCedictDefinitions(definitions: String) = definitions
.split(DEFINITION_SEPARATOR)

View File

@ -1,8 +1,8 @@
package com.marvinelsen.cedict.internal
import com.marvinelsen.cedict.api.CedictDefinition
import com.marvinelsen.cedict.api.PinyinSyllable
import com.marvinelsen.cedict.api.Tone
import com.marvinelsen.chinese.transliteration.Syllable
import com.marvinelsen.chinese.transliteration.Tone
import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe
import java.util.zip.GZIPInputStream
@ -16,8 +16,8 @@ class CedictParserImplTest : ShouldSpec({
cedictEntry.traditional shouldBe "皮實"
cedictEntry.simplified shouldBe "皮实"
cedictEntry.pinyinSyllables shouldBe listOf(
PinyinSyllable("pi", Tone.SECOND),
PinyinSyllable("shi", Tone.FIFTH)
Syllable("pi", Tone.SECOND),
Syllable("shi", Tone.FIFTH)
)
cedictEntry.definitions shouldBe listOf(
CedictDefinition(listOf("(of things) durable")),