refactor: refactor everything
This commit is contained in:
parent
5ec3638454
commit
1eb9fb1d56
@ -31,7 +31,7 @@ publishing {
|
||||
publications {
|
||||
create<MavenPublication>("maven") {
|
||||
groupId = project.group as String
|
||||
artifactId = "chinese-transliteration"
|
||||
artifactId = "chinese-phonetics"
|
||||
version = project.version as String
|
||||
|
||||
from(components["java"])
|
||||
|
@ -1 +1 @@
|
||||
rootProject.name = "chinese-transliteration"
|
||||
rootProject.name = "chinese-phonetics"
|
@ -0,0 +1,76 @@
|
||||
package com.marvinelsen.chinese.phonetics
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.SyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.SyllableParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
||||
|
||||
object ChinesePhonetics {
|
||||
fun getToneFromNumberedPinyin(numberedPinyin: String) =
|
||||
PinyinNumberSyllableParser.parseOrNull(numberedPinyin)?.tone
|
||||
|
||||
fun getToneFromZhuyin(zhuyin: String) =
|
||||
ZhuyinSyllableParser.parseOrNull(zhuyin)?.tone
|
||||
|
||||
fun getTonesFromNumberedPinyinSequence(numberedPinyinSequence: String) = numberedPinyinSequence.trim()
|
||||
.split(Regex("\\s+"))
|
||||
.filter { it.isNotEmpty() }
|
||||
.map { getToneFromNumberedPinyin(it) }
|
||||
|
||||
fun getTonesFromZhuyinSequence(zhuyinSequence: String) = zhuyinSequence.trim()
|
||||
.split(Regex("\\s+"))
|
||||
.filter { it.isNotEmpty() }
|
||||
.map { getToneFromZhuyin(it) }
|
||||
|
||||
fun zhuyinToPinyinWithNumbers(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
||||
input = zhuyin,
|
||||
parser = ZhuyinSyllableParser,
|
||||
formatter = PinyinNumberSyllableFormatter,
|
||||
strict = strict
|
||||
)
|
||||
|
||||
fun zhuyinToPinyinWithToneMarks(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
||||
input = zhuyin,
|
||||
parser = ZhuyinSyllableParser,
|
||||
formatter = PinyinMarkSyllableFormatter,
|
||||
strict = strict
|
||||
)
|
||||
|
||||
fun pinyinWithNumbersToZhuyin(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
||||
input = pinyinWithNumbers,
|
||||
parser = PinyinNumberSyllableParser,
|
||||
formatter = ZhuyinSyllableFormatter,
|
||||
strict = strict
|
||||
)
|
||||
|
||||
fun pinyinWithNumbersToToneMarks(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
||||
input = pinyinWithNumbers,
|
||||
parser = PinyinNumberSyllableParser,
|
||||
formatter = PinyinMarkSyllableFormatter,
|
||||
strict = strict
|
||||
)
|
||||
|
||||
private fun convertSyllableSequence(
|
||||
input: String,
|
||||
parser: SyllableParser,
|
||||
formatter: SyllableFormatter,
|
||||
strict: Boolean
|
||||
): String {
|
||||
val originalParts = input.trim().split(Regex("\\s+")).filter { it.isNotEmpty() }
|
||||
if (originalParts.isEmpty()) return ""
|
||||
|
||||
val convertedParts = originalParts.map { part ->
|
||||
if (strict) {
|
||||
val syllable = parser.parse(part)
|
||||
formatter.format(syllable)
|
||||
} else {
|
||||
val syllable = parser.parseOrNull(part)
|
||||
syllable?.let { formatter.format(it) } ?: part
|
||||
}
|
||||
}
|
||||
return convertedParts.joinToString(" ")
|
||||
}
|
||||
}
|
38
src/main/kotlin/com/marvinelsen/chinese/phonetics/Tone.kt
Normal file
38
src/main/kotlin/com/marvinelsen/chinese/phonetics/Tone.kt
Normal file
@ -0,0 +1,38 @@
|
||||
package com.marvinelsen.chinese.phonetics
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinNumberToneFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinToneFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.ZhuyinToneFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.DigitToneParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.IntToneParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.PinyinToneParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.ZhuyinToneParser
|
||||
|
||||
enum class Tone {
|
||||
FIRST, SECOND, THIRD, FOURTH, FIFTH;
|
||||
|
||||
companion object {
|
||||
fun fromInt(toneNumber: Int) = IntToneParser.parse(toneNumber)
|
||||
fun fromDigit(digit: Char) = DigitToneParser.parse(digit)
|
||||
fun fromPinyinTone(pinyinTone: Char) = PinyinToneParser.parse(pinyinTone)
|
||||
fun fromZhuyinTone(zhuyinTone: Char) = ZhuyinToneParser.parse(zhuyinTone)
|
||||
|
||||
fun fromIntOrNull(toneNumber: Int) = IntToneParser.parseOrNull(toneNumber)
|
||||
fun fromDigitOrNull(digit: Char) = DigitToneParser.parseOrNull(digit)
|
||||
fun fromPinyinToneOrNull(pinyinTone: Char) = PinyinToneParser.parseOrNull(pinyinTone)
|
||||
fun fromZhuyinToneOrNull(zhuyinTone: Char) = ZhuyinToneParser.parseOrNull(zhuyinTone)
|
||||
}
|
||||
}
|
||||
|
||||
@Suppress("MagicNumber")
|
||||
fun Tone.toInt() = when (this) {
|
||||
Tone.FIRST -> 1
|
||||
Tone.SECOND -> 2
|
||||
Tone.THIRD -> 3
|
||||
Tone.FOURTH -> 4
|
||||
Tone.FIFTH -> 5
|
||||
}
|
||||
|
||||
fun Tone.toPinyinNumber() = PinyinNumberToneFormatter.format(this)
|
||||
fun Tone.toPinyinTone() = PinyinToneFormatter.format(this)
|
||||
fun Tone.toZhuyinTone() = ZhuyinToneFormatter.format(this)
|
@ -0,0 +1,5 @@
|
||||
package com.marvinelsen.chinese.phonetics
|
||||
|
||||
object Zhuyin {
|
||||
const val SEPARATOR = " "
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
package com.marvinelsen.chinese.phonetics.exceptions
|
||||
|
||||
class InvalidSyllableInputException(message: String) : IllegalArgumentException(message)
|
@ -0,0 +1,3 @@
|
||||
package com.marvinelsen.chinese.phonetics.exceptions
|
||||
|
||||
class InvalidToneInputException(message: String) : IllegalArgumentException(message)
|
@ -0,0 +1,8 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data class Syllable(
|
||||
val basePinyin: String,
|
||||
val tone: Tone
|
||||
)
|
@ -0,0 +1,36 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable
|
||||
|
||||
import java.io.IOException
|
||||
|
||||
/**
|
||||
* Manages the loading and access of Pinyin-Zhuyin transcription data.
|
||||
* Intended for internal use by parsers and formatters.
|
||||
* Loads data lazily upon first access.
|
||||
*/
|
||||
internal object TranscriptionDataRepository {
|
||||
private const val TRANSCRIPTION_RESOURCE_PATH = "/pinyin_zhuyin_transcriptions.tsv"
|
||||
|
||||
val pinyinToZhuyin = loadTranscriptions()
|
||||
val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
||||
|
||||
fun isValidZhuyin(zhuyin: String) = zhuyin in zhuyinToPinyin
|
||||
fun isValidPinyin(pinyin: String) = pinyin in pinyinToZhuyin
|
||||
|
||||
fun normalize(pinyin: String) = pinyin.lowercase()
|
||||
.replace("v", "ü")
|
||||
.replace("u:", "ü")
|
||||
|
||||
private fun loadTranscriptions(): Map<String, String> {
|
||||
val inputStream = this::class.java.getResourceAsStream(TRANSCRIPTION_RESOURCE_PATH)
|
||||
?: error("Cannot find transcription resource: $TRANSCRIPTION_RESOURCE_PATH")
|
||||
|
||||
return try {
|
||||
inputStream.bufferedReader().useLines { lines ->
|
||||
lines.map { it.split('\t') }
|
||||
.associate { it[0] to it[1] }
|
||||
}
|
||||
} catch (e: IOException) {
|
||||
throw IOException("Failed to load transcription data from $TRANSCRIPTION_RESOURCE_PATH", e)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.toPinyinTone
|
||||
|
||||
internal data object PinyinMarkSyllableFormatter : SyllableFormatter {
|
||||
private fun findVowelIndexForToneMark(pinyin: String): Int {
|
||||
val characterToIndex = pinyin.withIndex().associate { it.value to it.index }
|
||||
return when {
|
||||
'a' in characterToIndex -> characterToIndex['a']!!
|
||||
'o' in characterToIndex -> characterToIndex['o']!!
|
||||
'e' in characterToIndex -> characterToIndex['e']!!
|
||||
'i' in characterToIndex ->
|
||||
if (pinyin.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
||||
characterToIndex['u']!!
|
||||
} else {
|
||||
characterToIndex['i']!!
|
||||
}
|
||||
|
||||
'u' in characterToIndex -> characterToIndex['u']!!
|
||||
'ü' in characterToIndex -> characterToIndex['ü']!!
|
||||
else -> error("No vowel found in Pinyin syllable '$pinyin'")
|
||||
}
|
||||
}
|
||||
|
||||
override fun format(syllable: Syllable): String {
|
||||
if (syllable.tone == Tone.FIFTH) return syllable.basePinyin
|
||||
|
||||
val vowelIndex = findVowelIndexForToneMark(syllable.basePinyin)
|
||||
val toneMark = syllable.tone.toPinyinTone()
|
||||
|
||||
return buildString {
|
||||
append(syllable.basePinyin)
|
||||
insert(vowelIndex + 1, toneMark)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.toPinyinNumber
|
||||
|
||||
internal data object PinyinNumberSyllableFormatter : SyllableFormatter {
|
||||
override fun format(syllable: Syllable) = syllable.basePinyin + syllable.tone.toPinyinNumber()
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
|
||||
internal sealed interface SyllableFormatter {
|
||||
fun format(syllable: Syllable): String
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||
import com.marvinelsen.chinese.phonetics.toZhuyinTone
|
||||
|
||||
internal data object ZhuyinSyllableFormatter : SyllableFormatter {
|
||||
override fun format(syllable: Syllable): String {
|
||||
val zhuyinBase = TranscriptionDataRepository.pinyinToZhuyin[syllable.basePinyin]!!
|
||||
val zhuyinToneMark = syllable.tone.toZhuyinTone()
|
||||
|
||||
return when (syllable.tone) {
|
||||
Tone.FIFTH -> zhuyinToneMark + zhuyinBase
|
||||
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FOURTH -> zhuyinBase + zhuyinToneMark
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||
|
||||
internal data object PinyinNumberSyllableParser : SyllableParser {
|
||||
@Suppress("ReturnCount", "MagicNumber")
|
||||
override fun parseOrNull(input: String): Syllable? {
|
||||
if (input.isBlank() || !input.last().isDigit()) return null
|
||||
|
||||
val toneNumber = input.last()
|
||||
val tone = Tone.fromDigitOrNull(toneNumber) ?: return null
|
||||
|
||||
val pinyin = input.substring(0, input.lastIndex)
|
||||
val normalizedPinyin = TranscriptionDataRepository.normalize(pinyin)
|
||||
|
||||
if (!TranscriptionDataRepository.isValidPinyin(normalizedPinyin)) return null
|
||||
|
||||
return Syllable(
|
||||
basePinyin = normalizedPinyin,
|
||||
tone = tone
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidSyllableInputException
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
|
||||
internal sealed interface SyllableParser {
|
||||
fun parseOrNull(input: String): Syllable?
|
||||
fun parse(input: String) = parseOrNull(input)
|
||||
?: throw InvalidSyllableInputException("Invalid input for syllable parsing: '$input'")
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||
|
||||
internal data object ZhuyinSyllableParser : SyllableParser {
|
||||
private val zhuyinToneMarkRegex = """[ˊˇˋ˙¯]""".toRegex()
|
||||
|
||||
override fun parseOrNull(input: String): Syllable? {
|
||||
val zhuyinWithoutToneMark = input.replace(zhuyinToneMarkRegex, "")
|
||||
|
||||
if (!TranscriptionDataRepository.isValidZhuyin(zhuyinWithoutToneMark)) return null
|
||||
|
||||
return Syllable(
|
||||
basePinyin = TranscriptionDataRepository.zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||
tone = Tone.fromZhuyinToneOrNull(input.last()) ?: Tone.fromZhuyinToneOrNull(input.first()) ?: Tone.FIRST
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object PinyinNumberToneFormatter : ToneFormatter {
|
||||
override fun format(tone: Tone) = when (tone) {
|
||||
Tone.FIRST -> "1"
|
||||
Tone.SECOND -> "2"
|
||||
Tone.THIRD -> "3"
|
||||
Tone.FOURTH -> "4"
|
||||
Tone.FIFTH -> "5"
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object PinyinToneFormatter : ToneFormatter {
|
||||
override fun format(tone: Tone) = when (tone) {
|
||||
Tone.FIRST -> "\u0304"
|
||||
Tone.SECOND -> "\u0301"
|
||||
Tone.THIRD -> "\u030C"
|
||||
Tone.FOURTH -> "\u0300"
|
||||
Tone.FIFTH -> ""
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal sealed interface ToneFormatter {
|
||||
fun format(tone: Tone): String
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object ZhuyinToneFormatter : ToneFormatter {
|
||||
override fun format(tone: Tone) = when (tone) {
|
||||
Tone.FIRST -> ""
|
||||
Tone.SECOND -> "ˊ"
|
||||
Tone.THIRD -> "ˇ"
|
||||
Tone.FOURTH -> "ˋ"
|
||||
Tone.FIFTH -> "˙"
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object DigitToneParser : ToneParser<Char> {
|
||||
override fun parseOrNull(input: Char) = when (input) {
|
||||
'1' -> Tone.FIRST
|
||||
'2' -> Tone.SECOND
|
||||
'3' -> Tone.THIRD
|
||||
'4' -> Tone.FOURTH
|
||||
'5' -> Tone.FIFTH
|
||||
else -> null
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
@Suppress("MagicNumber")
|
||||
internal data object IntToneParser : ToneParser<Int> {
|
||||
override fun parseOrNull(input: Int) = when (input) {
|
||||
1 -> Tone.FIRST
|
||||
2 -> Tone.SECOND
|
||||
3 -> Tone.THIRD
|
||||
4 -> Tone.FOURTH
|
||||
5 -> Tone.FIFTH
|
||||
else -> null
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object PinyinToneParser : ToneParser<Char> {
|
||||
override fun parseOrNull(input: Char) = when (input) {
|
||||
'\u0304' -> Tone.FIRST
|
||||
'\u0301' -> Tone.SECOND
|
||||
'\u030C' -> Tone.THIRD
|
||||
'\u0300' -> Tone.FOURTH
|
||||
else -> null
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
||||
|
||||
internal sealed interface ToneParser<T> {
|
||||
fun parseOrNull(input: T): Tone?
|
||||
fun parse(input: T) = parseOrNull(input)
|
||||
?: throw InvalidToneInputException("Invalid input for tone parsing: '$input'")
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.Tone
|
||||
|
||||
internal data object ZhuyinToneParser : ToneParser<Char> {
|
||||
override fun parseOrNull(input: Char) = when (input) {
|
||||
'¯' -> Tone.FIRST
|
||||
'ˊ' -> Tone.SECOND
|
||||
'ˇ' -> Tone.THIRD
|
||||
'ˋ' -> Tone.FOURTH
|
||||
'˙' -> Tone.FIFTH
|
||||
else -> null
|
||||
}
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
|
||||
import java.io.InputStream
|
||||
|
||||
@Suppress("MagicNumber", "MaximumLineLength", "MaxLineLength")
|
||||
data class PinyinSyllable(
|
||||
val pinyinSyllableWithoutTone: String,
|
||||
val tone: Tone,
|
||||
) {
|
||||
companion object {
|
||||
private val pinyinToZhuyin = parseTranscriptions(
|
||||
this::class.java.getResourceAsStream("/pinyin_zhuyin_transcriptions.tsv")!!
|
||||
)
|
||||
private val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
||||
private val zhuyinToneMarkRegex = """[ˊˇˋ˙]""".toRegex()
|
||||
|
||||
fun isValidPinyinWithToneNumberSyllable(pinyinSyllable: String) =
|
||||
pinyinSyllable.last().isDigit() && pinyinSyllable.last().digitToInt() in 1..5 && pinyinSyllable
|
||||
.substring(0, pinyinSyllable.lastIndex)
|
||||
.lowercase() in pinyinToZhuyin
|
||||
|
||||
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): PinyinSyllable {
|
||||
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
||||
val lastCharacter = pinyinWithToneNumber.last()
|
||||
|
||||
require(lastCharacter.isDigit()) {
|
||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the last character to be a digit, but was '${pinyinWithToneNumber.last()}'"
|
||||
}
|
||||
require(lastCharacter.digitToInt() in 1..5) {
|
||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the tone number 'n' to be in range 1 <= n <= 5, but was '${pinyinWithToneNumber.last()}'"
|
||||
}
|
||||
require(
|
||||
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
||||
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
||||
|
||||
return PinyinSyllable(
|
||||
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
||||
tone = Tone.fromDigit(lastCharacter)
|
||||
)
|
||||
}
|
||||
|
||||
fun fromZhuyin(zhuyin: String): PinyinSyllable {
|
||||
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
||||
|
||||
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
||||
|
||||
return PinyinSyllable(
|
||||
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
||||
?: Tone.FIRST
|
||||
)
|
||||
}
|
||||
|
||||
private fun parseTranscriptions(inputStream: InputStream) =
|
||||
inputStream.bufferedReader().useLines { lines ->
|
||||
lines.map { it.split('\t') }
|
||||
.associate { it[0] to it[1] }
|
||||
}
|
||||
}
|
||||
|
||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||
}
|
||||
|
||||
private fun formatToZhuyin(): String {
|
||||
val zhuyinSyllable = pinyinToZhuyin[pinyinSyllableWithoutTone.lowercase()]
|
||||
?: error("$pinyinSyllableWithoutTone is not a valid Pinyin syllable")
|
||||
val zhuyinToneMark = tone.format(TransliterationSystem.ZHUYIN)
|
||||
|
||||
return when (tone) {
|
||||
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FORTH -> zhuyinSyllable + zhuyinToneMark
|
||||
Tone.FIFTH -> zhuyinToneMark + zhuyinSyllable
|
||||
}
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneNumbers(): String {
|
||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||
}
|
||||
|
||||
return pinyinSyllableWithoutTone + tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneMarks(): String {
|
||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||
}
|
||||
|
||||
if (pinyinSyllableWithoutTone.lowercase() == "r" && tone == Tone.FIFTH) {
|
||||
return pinyinSyllableWithoutTone
|
||||
}
|
||||
|
||||
val sanitizedPinyinSyllableWithoutTone = pinyinSyllableWithoutTone.replace("v", "ü").replace("u:", "ü")
|
||||
|
||||
val characterToIndex = sanitizedPinyinSyllableWithoutTone.lowercase().withIndex().associate { it.value to it.index }
|
||||
val vowelIndex = when {
|
||||
'a' in characterToIndex -> characterToIndex['a']!!
|
||||
'o' in characterToIndex -> characterToIndex['o']!!
|
||||
'e' in characterToIndex -> characterToIndex['e']!!
|
||||
'i' in characterToIndex ->
|
||||
if (sanitizedPinyinSyllableWithoutTone.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
||||
characterToIndex['u']!!
|
||||
} else {
|
||||
characterToIndex['i']!!
|
||||
}
|
||||
|
||||
'u' in characterToIndex -> characterToIndex['u']!!
|
||||
'ü' in characterToIndex -> characterToIndex['ü']!!
|
||||
else -> error("No vowel found in Pinyin syllable '$sanitizedPinyinSyllableWithoutTone'")
|
||||
}
|
||||
|
||||
return buildString {
|
||||
append(sanitizedPinyinSyllableWithoutTone)
|
||||
insert(vowelIndex + 1, tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
|
||||
@Suppress("MagicNumber")
|
||||
enum class Tone {
|
||||
FIRST, SECOND, THIRD, FORTH, FIFTH;
|
||||
|
||||
companion object {
|
||||
fun fromInt(number: Int) =
|
||||
fromIntOrNull(number) ?: throw IllegalArgumentException("Number $number is not a valid tone")
|
||||
|
||||
fun fromIntOrNull(number: Int) = when (number) {
|
||||
1 -> FIRST
|
||||
2 -> SECOND
|
||||
3 -> THIRD
|
||||
4 -> FORTH
|
||||
5 -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
|
||||
fun fromDigit(digit: Char) =
|
||||
fromDigitOrNull(digit) ?: throw IllegalArgumentException("Digit $digit is not a valid tone")
|
||||
|
||||
fun fromDigitOrNull(digit: Char) = when (digit) {
|
||||
'1' -> FIRST
|
||||
'2' -> SECOND
|
||||
'3' -> THIRD
|
||||
'4' -> FORTH
|
||||
'5' -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
|
||||
fun fromZhuyinToneMark(zhuyinToneMark: Char) = fromZhuyinToneMarkOrNull(zhuyinToneMark)
|
||||
?: throw IllegalArgumentException("Invalid zhuyin tone mark '$zhuyinToneMark'")
|
||||
|
||||
fun fromZhuyinToneMarkOrNull(zhuyinToneMark: Char) = when (zhuyinToneMark) {
|
||||
'ˊ' -> SECOND
|
||||
'ˇ' -> THIRD
|
||||
'ˋ' -> FORTH
|
||||
'˙' -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
|
||||
fun toInt() = when (this) {
|
||||
FIRST -> 1
|
||||
SECOND -> 2
|
||||
THIRD -> 3
|
||||
FORTH -> 4
|
||||
FIFTH -> 5
|
||||
}
|
||||
|
||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneNumbers() = when (this) {
|
||||
FIRST -> "1"
|
||||
SECOND -> "2"
|
||||
THIRD -> "3"
|
||||
FORTH -> "4"
|
||||
FIFTH -> "5"
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneMarks() = when (this) {
|
||||
FIRST -> "\u0304"
|
||||
SECOND -> "\u0301"
|
||||
THIRD -> "\u030C"
|
||||
FORTH -> "\u0300"
|
||||
FIFTH -> ""
|
||||
}
|
||||
|
||||
private fun formatToZhuyin() = when (this) {
|
||||
FIRST -> ""
|
||||
SECOND -> "ˊ"
|
||||
THIRD -> "ˇ"
|
||||
FORTH -> "ˋ"
|
||||
FIFTH -> "˙"
|
||||
}
|
||||
}
|
@ -1,5 +0,0 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
|
||||
enum class TransliterationSystem {
|
||||
ZHUYIN, PINYIN_WITH_TONE_NUMBERS, PINYIN_WITH_TONE_MARKS
|
||||
}
|
@ -1,5 +0,0 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
|
||||
object Zhuyin {
|
||||
const val SEPARATOR = " "
|
||||
}
|
@ -195,17 +195,9 @@ lu ㄌㄨ
|
||||
luan ㄌㄨㄢ
|
||||
lun ㄌㄨㄣ
|
||||
luo ㄌㄨㄛ
|
||||
lu: ㄌㄩ
|
||||
lv ㄌㄩ
|
||||
lü ㄌㄩ
|
||||
lu:e ㄌㄩㄝ
|
||||
lve ㄌㄩㄝ
|
||||
lüe ㄌㄩㄝ
|
||||
lu:n ㄌㄩㄣ
|
||||
lvn ㄌㄩㄣ
|
||||
lün ㄌㄩㄣ
|
||||
lu:an ㄌㄩㄢ
|
||||
lvan ㄌㄩㄢ
|
||||
lüan ㄌㄩㄢ
|
||||
m ㄇ
|
||||
ma ㄇㄚ
|
||||
@ -251,11 +243,7 @@ nu ㄋㄨ
|
||||
nuan ㄋㄨㄢ
|
||||
nun ㄋㄨㄣ
|
||||
nuo ㄋㄨㄛ
|
||||
nu: ㄋㄩ
|
||||
nv ㄋㄩ
|
||||
nü ㄋㄩ
|
||||
nu:e ㄋㄩㄝ
|
||||
nve ㄋㄩㄝ
|
||||
nüe ㄋㄩㄝ
|
||||
o ㄛ
|
||||
ou ㄡ
|
||||
|
|
@ -0,0 +1,113 @@
|
||||
package com.marvinelsen.chinese.phonetics
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
||||
import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.datatest.withData
|
||||
import io.kotest.matchers.shouldBe
|
||||
|
||||
class ChinesePhoneticsTest : ShouldSpec({
|
||||
context("from pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"sheng1" to Syllable("sheng", Tone.FIRST),
|
||||
"zhi2" to Syllable("zhi", Tone.SECOND),
|
||||
"ka3" to Syllable("ka", Tone.THIRD),
|
||||
"yao4" to Syllable("yao", Tone.FOURTH),
|
||||
"me5" to Syllable("me", Tone.FIFTH),
|
||||
"Me5" to Syllable("me", Tone.FIFTH),
|
||||
"nv3" to Syllable("nü", Tone.THIRD),
|
||||
"nü3" to Syllable("nü", Tone.THIRD),
|
||||
"nu:3" to Syllable("nü", Tone.THIRD),
|
||||
"r5" to Syllable("r", Tone.FIFTH),
|
||||
"R5" to Syllable("r", Tone.FIFTH),
|
||||
"er2" to Syllable("er", Tone.SECOND),
|
||||
"Er2" to Syllable("er", Tone.SECOND),
|
||||
) { (pinyinWithNumber, expectedSyllable) ->
|
||||
PinyinNumberSyllableParser.parse(pinyinWithNumber) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
context("from zhuyin") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
|
||||
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
|
||||
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
|
||||
"ㄧㄠˋ" to Syllable("yao", Tone.FOURTH),
|
||||
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
|
||||
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
|
||||
"ㄋㄩˇ" to Syllable("nü", Tone.THIRD),
|
||||
) { (zhuyin, expectedSyllable) ->
|
||||
ZhuyinSyllableParser.parse(zhuyin) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
|
||||
context("from invalid pinyin with tone numbers") {
|
||||
}
|
||||
|
||||
context("from invalid zhuyin") {
|
||||
}
|
||||
|
||||
context("format to zhuyin") {
|
||||
withData(
|
||||
nameFn = { "$it -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||
Syllable("yao", Tone.FOURTH) to "ㄧㄠˋ",
|
||||
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||
Syllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||
Syllable("r", Tone.FIFTH) to "˙ㄦ",
|
||||
Syllable("er", Tone.SECOND) to "ㄦˊ",
|
||||
) { (syllable, expectedZhuyin) ->
|
||||
ZhuyinSyllableFormatter.format(syllable) shouldBe expectedZhuyin
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "$it -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "sheng1",
|
||||
Syllable("zhi", Tone.SECOND) to "zhi2",
|
||||
Syllable("ka", Tone.THIRD) to "ka3",
|
||||
Syllable("yao", Tone.FOURTH) to "yao4",
|
||||
Syllable("me", Tone.FIFTH) to "me5",
|
||||
Syllable("nü", Tone.THIRD) to "nü3",
|
||||
Syllable("r", Tone.FIFTH) to "r5",
|
||||
Syllable("er", Tone.SECOND) to "er2",
|
||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||
PinyinNumberSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneNumbers
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone marks") {
|
||||
withData(
|
||||
nameFn = { "$it -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "shēng",
|
||||
Syllable("zhi", Tone.SECOND) to "zhí",
|
||||
Syllable("ka", Tone.THIRD) to "kǎ",
|
||||
Syllable("yao", Tone.FOURTH) to "yào",
|
||||
Syllable("me", Tone.FIFTH) to "me",
|
||||
Syllable("zhui", Tone.FIRST) to "zhuī",
|
||||
Syllable("liu", Tone.FIRST) to "liū",
|
||||
Syllable("nü", Tone.THIRD) to "nǚ",
|
||||
Syllable("r", Tone.FIFTH) to "r",
|
||||
Syllable("er", Tone.SECOND) to "ér",
|
||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||
PinyinMarkSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneMarks
|
||||
}
|
||||
}
|
||||
|
||||
context("format to zhuyin with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||
}
|
||||
})
|
@ -1,5 +1,6 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
package com.marvinelsen.chinese.phonetics
|
||||
|
||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.datatest.withData
|
||||
@ -13,7 +14,7 @@ class ToneTest : ShouldSpec({
|
||||
'1' to Tone.FIRST,
|
||||
'2' to Tone.SECOND,
|
||||
'3' to Tone.THIRD,
|
||||
'4' to Tone.FORTH,
|
||||
'4' to Tone.FOURTH,
|
||||
'5' to Tone.FIFTH,
|
||||
) { (digit, expectedTone) ->
|
||||
Tone.fromDigit(digit) shouldBe expectedTone
|
||||
@ -26,7 +27,7 @@ class ToneTest : ShouldSpec({
|
||||
1 to Tone.FIRST,
|
||||
2 to Tone.SECOND,
|
||||
3 to Tone.THIRD,
|
||||
4 to Tone.FORTH,
|
||||
4 to Tone.FOURTH,
|
||||
5 to Tone.FIFTH,
|
||||
) { (number, expectedTone) ->
|
||||
Tone.fromInt(number) shouldBe expectedTone
|
||||
@ -36,12 +37,25 @@ class ToneTest : ShouldSpec({
|
||||
context("convert correctly from Zhuyin tone mark") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
'¯' to Tone.FIRST,
|
||||
'ˊ' to Tone.SECOND,
|
||||
'ˇ' to Tone.THIRD,
|
||||
'ˋ' to Tone.FORTH,
|
||||
'ˋ' to Tone.FOURTH,
|
||||
'˙' to Tone.FIFTH,
|
||||
) { (zhuyinToneMark, expectedTone) ->
|
||||
Tone.fromZhuyinToneMark(zhuyinToneMark) shouldBe expectedTone
|
||||
Tone.fromZhuyinTone(zhuyinToneMark) shouldBe expectedTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly from Pinyin tone mark") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
'\u0304' to Tone.FIRST,
|
||||
'\u0301' to Tone.SECOND,
|
||||
'\u030C' to Tone.THIRD,
|
||||
'\u0300' to Tone.FOURTH,
|
||||
) { (pinyinToneMark, expectedTone) ->
|
||||
Tone.fromPinyinTone(pinyinToneMark) shouldBe expectedTone
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,13 +65,52 @@ class ToneTest : ShouldSpec({
|
||||
Tone.FIRST to 1,
|
||||
Tone.SECOND to 2,
|
||||
Tone.THIRD to 3,
|
||||
Tone.FORTH to 4,
|
||||
Tone.FOURTH to 4,
|
||||
Tone.FIFTH to 5,
|
||||
) { (tone, expectedInteger) ->
|
||||
tone.toInt() shouldBe expectedInteger
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly to Zhuyin tone mark") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "",
|
||||
Tone.SECOND to "ˊ",
|
||||
Tone.THIRD to "ˇ",
|
||||
Tone.FOURTH to "ˋ",
|
||||
Tone.FIFTH to "˙",
|
||||
) { (tone, zhuyinTone) ->
|
||||
tone.toZhuyinTone() shouldBe zhuyinTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly to Pinyin tone mark") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "\u0304",
|
||||
Tone.SECOND to "\u0301",
|
||||
Tone.THIRD to "\u030C",
|
||||
Tone.FOURTH to "\u0300",
|
||||
Tone.FIFTH to "",
|
||||
) { (tone, pinyinTone) ->
|
||||
tone.toPinyinTone() shouldBe pinyinTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly to Pinyin number") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "1",
|
||||
Tone.SECOND to "2",
|
||||
Tone.THIRD to "3",
|
||||
Tone.FOURTH to "4",
|
||||
Tone.FIFTH to "5",
|
||||
) { (tone, pinyinNumber) ->
|
||||
tone.toPinyinNumber() shouldBe pinyinNumber
|
||||
}
|
||||
}
|
||||
|
||||
context("return null when converting from invalid digit ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> null" },
|
||||
@ -74,7 +127,7 @@ class ToneTest : ShouldSpec({
|
||||
|
||||
context("return null when converting from invalid int ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
nameFn = { "'$it' -> null" },
|
||||
0,
|
||||
6,
|
||||
-1,
|
||||
@ -87,7 +140,7 @@ class ToneTest : ShouldSpec({
|
||||
|
||||
context("return null when converting from invalid Zhuyin tone mark ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
nameFn = { "'$it' -> null" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
@ -95,7 +148,21 @@ class ToneTest : ShouldSpec({
|
||||
'$',
|
||||
'*',
|
||||
) { invalidZhuyinToneMark ->
|
||||
Tone.fromZhuyinToneMarkOrNull(invalidZhuyinToneMark).shouldBeNull()
|
||||
Tone.fromZhuyinToneOrNull(invalidZhuyinToneMark).shouldBeNull()
|
||||
}
|
||||
}
|
||||
|
||||
context("return null when converting from invalid Pinyin tone mark ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> null" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidPinyinToneMark ->
|
||||
Tone.fromPinyinToneOrNull(invalidPinyinToneMark).shouldBeNull()
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,7 +176,7 @@ class ToneTest : ShouldSpec({
|
||||
'$',
|
||||
'*',
|
||||
) { invalidDigit ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
shouldThrow<InvalidToneInputException> {
|
||||
Tone.fromDigit(invalidDigit)
|
||||
}
|
||||
}
|
||||
@ -124,7 +191,7 @@ class ToneTest : ShouldSpec({
|
||||
Int.MAX_VALUE,
|
||||
Int.MIN_VALUE,
|
||||
) { invalidNumber ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
shouldThrow<InvalidToneInputException> {
|
||||
Tone.fromInt(invalidNumber)
|
||||
}
|
||||
}
|
||||
@ -140,48 +207,25 @@ class ToneTest : ShouldSpec({
|
||||
'$',
|
||||
'*',
|
||||
) { invalidZhuyinToneMark ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
Tone.fromZhuyinToneMark(invalidZhuyinToneMark)
|
||||
shouldThrow<InvalidToneInputException> {
|
||||
Tone.fromZhuyinTone(invalidZhuyinToneMark)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Zhuyin correctly") {
|
||||
context("throw exception when converting from invalid Pinyin tone mark ") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "",
|
||||
Tone.SECOND to "ˊ",
|
||||
Tone.THIRD to "ˇ",
|
||||
Tone.FORTH to "ˋ",
|
||||
Tone.FIFTH to "˙",
|
||||
) { (tone, expectedZhuyinToneMark) ->
|
||||
tone.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyinToneMark
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Pinyin with tone numbers correctly") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "1",
|
||||
Tone.SECOND to "2",
|
||||
Tone.THIRD to "3",
|
||||
Tone.FORTH to "4",
|
||||
Tone.FIFTH to "5",
|
||||
) { (tone, expectedNumber) ->
|
||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedNumber
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Pinyin with tone marks correctly") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "\u0304",
|
||||
Tone.SECOND to "\u0301",
|
||||
Tone.THIRD to "\u030C",
|
||||
Tone.FORTH to "\u0300",
|
||||
Tone.FIFTH to "",
|
||||
) { (tone, expectedAccent) ->
|
||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedAccent
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidPinyinToneMark ->
|
||||
shouldThrow<InvalidToneInputException> {
|
||||
Tone.fromPinyinTone(invalidPinyinToneMark)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
@ -1,119 +0,0 @@
|
||||
package com.marvinelsen.chinese.transliteration.api
|
||||
|
||||
import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.datatest.withData
|
||||
import io.kotest.matchers.shouldBe
|
||||
|
||||
class PinyinSyllableTest : ShouldSpec({
|
||||
context("from pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"sheng1" to PinyinSyllable("sheng", Tone.FIRST),
|
||||
"zhi2" to PinyinSyllable("zhi", Tone.SECOND),
|
||||
"ka3" to PinyinSyllable("ka", Tone.THIRD),
|
||||
"yao4" to PinyinSyllable("yao", Tone.FORTH),
|
||||
"me5" to PinyinSyllable("me", Tone.FIFTH),
|
||||
"Me5" to PinyinSyllable("Me", Tone.FIFTH),
|
||||
"nv3" to PinyinSyllable("nv", Tone.THIRD),
|
||||
"nü3" to PinyinSyllable("nü", Tone.THIRD),
|
||||
"nu:3" to PinyinSyllable("nu:", Tone.THIRD),
|
||||
"r5" to PinyinSyllable("r", Tone.FIFTH),
|
||||
"R5" to PinyinSyllable("R", Tone.FIFTH),
|
||||
"er2" to PinyinSyllable("er", Tone.SECOND),
|
||||
"Er2" to PinyinSyllable("Er", Tone.SECOND),
|
||||
) { (pinyinWithNumber, expectedSyllable) ->
|
||||
PinyinSyllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
|
||||
context("from zhuyin") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"ㄕㄥ" to PinyinSyllable("sheng", Tone.FIRST),
|
||||
"ㄓˊ" to PinyinSyllable("zhi", Tone.SECOND),
|
||||
"ㄎㄚˇ" to PinyinSyllable("ka", Tone.THIRD),
|
||||
"ㄧㄠˋ" to PinyinSyllable("yao", Tone.FORTH),
|
||||
"ㄇㄜ˙" to PinyinSyllable("me", Tone.FIFTH),
|
||||
"˙ㄇㄜ" to PinyinSyllable("me", Tone.FIFTH),
|
||||
"ㄋㄩˇ" to PinyinSyllable("nü", Tone.THIRD),
|
||||
) { (zhuyin, expectedSyllable) ->
|
||||
PinyinSyllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
|
||||
context("from invalid pinyin with tone numbers") {
|
||||
}
|
||||
|
||||
context("from invalid zhuyin") {
|
||||
}
|
||||
|
||||
context("format to zhuyin") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
PinyinSyllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||
PinyinSyllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||
PinyinSyllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||
PinyinSyllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
||||
PinyinSyllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||
PinyinSyllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||
PinyinSyllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
||||
PinyinSyllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
||||
PinyinSyllable("r", Tone.FIFTH) to "˙ㄦ",
|
||||
PinyinSyllable("R", Tone.FIFTH) to "˙ㄦ",
|
||||
PinyinSyllable("er", Tone.SECOND) to "ㄦˊ",
|
||||
PinyinSyllable("Er", Tone.SECOND) to "ㄦˊ",
|
||||
) { (syllable, expectedZhuyin) ->
|
||||
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
PinyinSyllable("sheng", Tone.FIRST) to "sheng1",
|
||||
PinyinSyllable("zhi", Tone.SECOND) to "zhi2",
|
||||
PinyinSyllable("ka", Tone.THIRD) to "ka3",
|
||||
PinyinSyllable("yao", Tone.FORTH) to "yao4",
|
||||
PinyinSyllable("me", Tone.FIFTH) to "me5",
|
||||
PinyinSyllable("nü", Tone.THIRD) to "nü3",
|
||||
PinyinSyllable("nu:", Tone.THIRD) to "nu:3",
|
||||
PinyinSyllable("nv", Tone.THIRD) to "nv3",
|
||||
PinyinSyllable("r", Tone.FIFTH) to "r5",
|
||||
PinyinSyllable("R", Tone.FIFTH) to "R5",
|
||||
PinyinSyllable("er", Tone.SECOND) to "er2",
|
||||
PinyinSyllable("Er", Tone.SECOND) to "Er2",
|
||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone marks") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
PinyinSyllable("sheng", Tone.FIRST) to "shēng",
|
||||
PinyinSyllable("zhi", Tone.SECOND) to "zhí",
|
||||
PinyinSyllable("ka", Tone.THIRD) to "kǎ",
|
||||
PinyinSyllable("yao", Tone.FORTH) to "yào",
|
||||
PinyinSyllable("me", Tone.FIFTH) to "me",
|
||||
PinyinSyllable("zhui", Tone.FIRST) to "zhuī",
|
||||
PinyinSyllable("liu", Tone.FIRST) to "liū",
|
||||
PinyinSyllable("nü", Tone.THIRD) to "nǚ",
|
||||
PinyinSyllable("nu:", Tone.THIRD) to "nǚ",
|
||||
PinyinSyllable("nv", Tone.THIRD) to "nǚ",
|
||||
PinyinSyllable("r", Tone.FIFTH) to "r",
|
||||
PinyinSyllable("er", Tone.SECOND) to "ér",
|
||||
PinyinSyllable("Er", Tone.SECOND) to "Ér",
|
||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
||||
}
|
||||
}
|
||||
|
||||
context("format to zhuyin with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||
}
|
||||
})
|
Loading…
x
Reference in New Issue
Block a user