Use Chinese transliteration library
All checks were successful
Pull Request / build (pull_request) Successful in 2m10s
All checks were successful
Pull Request / build (pull_request) Successful in 2m10s
This commit is contained in:
parent
f1282d74a1
commit
f27ed7006d
@ -9,11 +9,16 @@ version = "1.0-SNAPSHOT"
|
|||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
|
maven {
|
||||||
|
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
detektPlugins(libs.detekt.formatting)
|
detektPlugins(libs.detekt.formatting)
|
||||||
|
|
||||||
|
implementation("com.marvinelsen:chinese-transliteration:1.1-SNAPSHOT") { isChanging = true }
|
||||||
|
|
||||||
testImplementation(libs.kotest.core)
|
testImplementation(libs.kotest.core)
|
||||||
testImplementation(libs.kotest.assertions)
|
testImplementation(libs.kotest.assertions)
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
package com.marvinelsen.cedict.api
|
package com.marvinelsen.cedict.api
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.transliteration.Syllable
|
||||||
|
|
||||||
data class CedictEntry(
|
data class CedictEntry(
|
||||||
val traditional: String,
|
val traditional: String,
|
||||||
val simplified: String,
|
val simplified: String,
|
||||||
val pinyinSyllables: List<PinyinSyllable>,
|
val pinyinSyllables: List<Syllable>,
|
||||||
val definitions: List<CedictDefinition>,
|
val definitions: List<CedictDefinition>,
|
||||||
)
|
)
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
package com.marvinelsen.cedict.api
|
|
||||||
|
|
||||||
data class PinyinSyllable(
|
|
||||||
val syllable: String,
|
|
||||||
val tone: Tone,
|
|
||||||
) {
|
|
||||||
companion object {
|
|
||||||
fun fromString(pinyinWithNumbers: String): PinyinSyllable {
|
|
||||||
require(pinyinWithNumbers.isNotBlank()) { "Argument pinyinWithNumbers must not be blank." }
|
|
||||||
|
|
||||||
val lastCharacter = pinyinWithNumbers.last()
|
|
||||||
return if (lastCharacter.isDigit()) {
|
|
||||||
PinyinSyllable(
|
|
||||||
syllable = pinyinWithNumbers.substring(0, pinyinWithNumbers.lastIndex),
|
|
||||||
tone = Tone.fromDigit(lastCharacter)
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
PinyinSyllable(
|
|
||||||
syllable = pinyinWithNumbers,
|
|
||||||
tone = Tone.NONE
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun toString() = syllable + tone.toString()
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
package com.marvinelsen.cedict.api
|
|
||||||
|
|
||||||
enum class Tone {
|
|
||||||
NONE, FIRST, SECOND, THIRD, FORTH, FIFTH;
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
fun fromDigit(digit: Char) = when (digit) {
|
|
||||||
'1' -> FIRST
|
|
||||||
'2' -> SECOND
|
|
||||||
'3' -> THIRD
|
|
||||||
'4' -> FORTH
|
|
||||||
'5' -> FIFTH
|
|
||||||
else -> error("Digit $digit is not a valid tone")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun toString() = when (this) {
|
|
||||||
NONE -> ""
|
|
||||||
FIRST -> "1"
|
|
||||||
SECOND -> "2"
|
|
||||||
THIRD -> "3"
|
|
||||||
FORTH -> "4"
|
|
||||||
FIFTH -> "5"
|
|
||||||
}
|
|
||||||
}
|
|
@ -3,7 +3,7 @@ package com.marvinelsen.cedict.internal
|
|||||||
import com.marvinelsen.cedict.api.CedictDefinition
|
import com.marvinelsen.cedict.api.CedictDefinition
|
||||||
import com.marvinelsen.cedict.api.CedictEntry
|
import com.marvinelsen.cedict.api.CedictEntry
|
||||||
import com.marvinelsen.cedict.api.CedictParser
|
import com.marvinelsen.cedict.api.CedictParser
|
||||||
import com.marvinelsen.cedict.api.PinyinSyllable
|
import com.marvinelsen.chinese.transliteration.Syllable
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
internal class CedictParserImpl : CedictParser {
|
internal class CedictParserImpl : CedictParser {
|
||||||
@ -42,10 +42,9 @@ internal class CedictParserImpl : CedictParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun toPinyinSyllables(pinyinWithNumbers: String) = pinyinWithNumbers
|
private fun toPinyinSyllables(pinyinWithNumbers: String) = pinyinWithNumbers
|
||||||
.lowercase()
|
|
||||||
.replace("u:", "ü")
|
|
||||||
.split(" ")
|
.split(" ")
|
||||||
.map { PinyinSyllable.fromString(it) }
|
.filter { Syllable.isValidPinyinWithToneNumberSyllable(it) }
|
||||||
|
.map { Syllable.fromPinyinWithToneNumber(it) }
|
||||||
|
|
||||||
private fun toCedictDefinitions(definitions: String) = definitions
|
private fun toCedictDefinitions(definitions: String) = definitions
|
||||||
.split(DEFINITION_SEPARATOR)
|
.split(DEFINITION_SEPARATOR)
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
package com.marvinelsen.cedict.internal
|
package com.marvinelsen.cedict.internal
|
||||||
|
|
||||||
import com.marvinelsen.cedict.api.CedictDefinition
|
import com.marvinelsen.cedict.api.CedictDefinition
|
||||||
import com.marvinelsen.cedict.api.PinyinSyllable
|
import com.marvinelsen.chinese.transliteration.Syllable
|
||||||
import com.marvinelsen.cedict.api.Tone
|
import com.marvinelsen.chinese.transliteration.Tone
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
import io.kotest.matchers.shouldBe
|
import io.kotest.matchers.shouldBe
|
||||||
import java.util.zip.GZIPInputStream
|
import java.util.zip.GZIPInputStream
|
||||||
@ -16,8 +16,8 @@ class CedictParserImplTest : ShouldSpec({
|
|||||||
cedictEntry.traditional shouldBe "皮實"
|
cedictEntry.traditional shouldBe "皮實"
|
||||||
cedictEntry.simplified shouldBe "皮实"
|
cedictEntry.simplified shouldBe "皮实"
|
||||||
cedictEntry.pinyinSyllables shouldBe listOf(
|
cedictEntry.pinyinSyllables shouldBe listOf(
|
||||||
PinyinSyllable("pi", Tone.SECOND),
|
Syllable("pi", Tone.SECOND),
|
||||||
PinyinSyllable("shi", Tone.FIFTH)
|
Syllable("shi", Tone.FIFTH)
|
||||||
)
|
)
|
||||||
cedictEntry.definitions shouldBe listOf(
|
cedictEntry.definitions shouldBe listOf(
|
||||||
CedictDefinition(listOf("(of things) durable")),
|
CedictDefinition(listOf("(of things) durable")),
|
||||||
|
Loading…
Reference in New Issue
Block a user