Compare commits
1 Commits
main
...
refactor/r
Author | SHA1 | Date | |
---|---|---|---|
1eb9fb1d56 |
@ -31,7 +31,7 @@ publishing {
|
|||||||
publications {
|
publications {
|
||||||
create<MavenPublication>("maven") {
|
create<MavenPublication>("maven") {
|
||||||
groupId = project.group as String
|
groupId = project.group as String
|
||||||
artifactId = "chinese-transliteration"
|
artifactId = "chinese-phonetics"
|
||||||
version = project.version as String
|
version = project.version as String
|
||||||
|
|
||||||
from(components["java"])
|
from(components["java"])
|
||||||
|
@ -1 +1 @@
|
|||||||
rootProject.name = "chinese-transliteration"
|
rootProject.name = "chinese-phonetics"
|
@ -0,0 +1,76 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.SyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.SyllableParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
||||||
|
|
||||||
|
object ChinesePhonetics {
|
||||||
|
fun getToneFromNumberedPinyin(numberedPinyin: String) =
|
||||||
|
PinyinNumberSyllableParser.parseOrNull(numberedPinyin)?.tone
|
||||||
|
|
||||||
|
fun getToneFromZhuyin(zhuyin: String) =
|
||||||
|
ZhuyinSyllableParser.parseOrNull(zhuyin)?.tone
|
||||||
|
|
||||||
|
fun getTonesFromNumberedPinyinSequence(numberedPinyinSequence: String) = numberedPinyinSequence.trim()
|
||||||
|
.split(Regex("\\s+"))
|
||||||
|
.filter { it.isNotEmpty() }
|
||||||
|
.map { getToneFromNumberedPinyin(it) }
|
||||||
|
|
||||||
|
fun getTonesFromZhuyinSequence(zhuyinSequence: String) = zhuyinSequence.trim()
|
||||||
|
.split(Regex("\\s+"))
|
||||||
|
.filter { it.isNotEmpty() }
|
||||||
|
.map { getToneFromZhuyin(it) }
|
||||||
|
|
||||||
|
fun zhuyinToPinyinWithNumbers(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
||||||
|
input = zhuyin,
|
||||||
|
parser = ZhuyinSyllableParser,
|
||||||
|
formatter = PinyinNumberSyllableFormatter,
|
||||||
|
strict = strict
|
||||||
|
)
|
||||||
|
|
||||||
|
fun zhuyinToPinyinWithToneMarks(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
||||||
|
input = zhuyin,
|
||||||
|
parser = ZhuyinSyllableParser,
|
||||||
|
formatter = PinyinMarkSyllableFormatter,
|
||||||
|
strict = strict
|
||||||
|
)
|
||||||
|
|
||||||
|
fun pinyinWithNumbersToZhuyin(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
||||||
|
input = pinyinWithNumbers,
|
||||||
|
parser = PinyinNumberSyllableParser,
|
||||||
|
formatter = ZhuyinSyllableFormatter,
|
||||||
|
strict = strict
|
||||||
|
)
|
||||||
|
|
||||||
|
fun pinyinWithNumbersToToneMarks(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
||||||
|
input = pinyinWithNumbers,
|
||||||
|
parser = PinyinNumberSyllableParser,
|
||||||
|
formatter = PinyinMarkSyllableFormatter,
|
||||||
|
strict = strict
|
||||||
|
)
|
||||||
|
|
||||||
|
private fun convertSyllableSequence(
|
||||||
|
input: String,
|
||||||
|
parser: SyllableParser,
|
||||||
|
formatter: SyllableFormatter,
|
||||||
|
strict: Boolean
|
||||||
|
): String {
|
||||||
|
val originalParts = input.trim().split(Regex("\\s+")).filter { it.isNotEmpty() }
|
||||||
|
if (originalParts.isEmpty()) return ""
|
||||||
|
|
||||||
|
val convertedParts = originalParts.map { part ->
|
||||||
|
if (strict) {
|
||||||
|
val syllable = parser.parse(part)
|
||||||
|
formatter.format(syllable)
|
||||||
|
} else {
|
||||||
|
val syllable = parser.parseOrNull(part)
|
||||||
|
syllable?.let { formatter.format(it) } ?: part
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return convertedParts.joinToString(" ")
|
||||||
|
}
|
||||||
|
}
|
38
src/main/kotlin/com/marvinelsen/chinese/phonetics/Tone.kt
Normal file
38
src/main/kotlin/com/marvinelsen/chinese/phonetics/Tone.kt
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinNumberToneFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinToneFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.ZhuyinToneFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.DigitToneParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.IntToneParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.PinyinToneParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.ZhuyinToneParser
|
||||||
|
|
||||||
|
enum class Tone {
|
||||||
|
FIRST, SECOND, THIRD, FOURTH, FIFTH;
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun fromInt(toneNumber: Int) = IntToneParser.parse(toneNumber)
|
||||||
|
fun fromDigit(digit: Char) = DigitToneParser.parse(digit)
|
||||||
|
fun fromPinyinTone(pinyinTone: Char) = PinyinToneParser.parse(pinyinTone)
|
||||||
|
fun fromZhuyinTone(zhuyinTone: Char) = ZhuyinToneParser.parse(zhuyinTone)
|
||||||
|
|
||||||
|
fun fromIntOrNull(toneNumber: Int) = IntToneParser.parseOrNull(toneNumber)
|
||||||
|
fun fromDigitOrNull(digit: Char) = DigitToneParser.parseOrNull(digit)
|
||||||
|
fun fromPinyinToneOrNull(pinyinTone: Char) = PinyinToneParser.parseOrNull(pinyinTone)
|
||||||
|
fun fromZhuyinToneOrNull(zhuyinTone: Char) = ZhuyinToneParser.parseOrNull(zhuyinTone)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Suppress("MagicNumber")
|
||||||
|
fun Tone.toInt() = when (this) {
|
||||||
|
Tone.FIRST -> 1
|
||||||
|
Tone.SECOND -> 2
|
||||||
|
Tone.THIRD -> 3
|
||||||
|
Tone.FOURTH -> 4
|
||||||
|
Tone.FIFTH -> 5
|
||||||
|
}
|
||||||
|
|
||||||
|
fun Tone.toPinyinNumber() = PinyinNumberToneFormatter.format(this)
|
||||||
|
fun Tone.toPinyinTone() = PinyinToneFormatter.format(this)
|
||||||
|
fun Tone.toZhuyinTone() = ZhuyinToneFormatter.format(this)
|
@ -0,0 +1,5 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics
|
||||||
|
|
||||||
|
object Zhuyin {
|
||||||
|
const val SEPARATOR = " "
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.exceptions
|
||||||
|
|
||||||
|
class InvalidSyllableInputException(message: String) : IllegalArgumentException(message)
|
@ -0,0 +1,3 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.exceptions
|
||||||
|
|
||||||
|
class InvalidToneInputException(message: String) : IllegalArgumentException(message)
|
@ -0,0 +1,8 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data class Syllable(
|
||||||
|
val basePinyin: String,
|
||||||
|
val tone: Tone
|
||||||
|
)
|
@ -0,0 +1,36 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable
|
||||||
|
|
||||||
|
import java.io.IOException
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manages the loading and access of Pinyin-Zhuyin transcription data.
|
||||||
|
* Intended for internal use by parsers and formatters.
|
||||||
|
* Loads data lazily upon first access.
|
||||||
|
*/
|
||||||
|
internal object TranscriptionDataRepository {
|
||||||
|
private const val TRANSCRIPTION_RESOURCE_PATH = "/pinyin_zhuyin_transcriptions.tsv"
|
||||||
|
|
||||||
|
val pinyinToZhuyin = loadTranscriptions()
|
||||||
|
val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
||||||
|
|
||||||
|
fun isValidZhuyin(zhuyin: String) = zhuyin in zhuyinToPinyin
|
||||||
|
fun isValidPinyin(pinyin: String) = pinyin in pinyinToZhuyin
|
||||||
|
|
||||||
|
fun normalize(pinyin: String) = pinyin.lowercase()
|
||||||
|
.replace("v", "ü")
|
||||||
|
.replace("u:", "ü")
|
||||||
|
|
||||||
|
private fun loadTranscriptions(): Map<String, String> {
|
||||||
|
val inputStream = this::class.java.getResourceAsStream(TRANSCRIPTION_RESOURCE_PATH)
|
||||||
|
?: error("Cannot find transcription resource: $TRANSCRIPTION_RESOURCE_PATH")
|
||||||
|
|
||||||
|
return try {
|
||||||
|
inputStream.bufferedReader().useLines { lines ->
|
||||||
|
lines.map { it.split('\t') }
|
||||||
|
.associate { it[0] to it[1] }
|
||||||
|
}
|
||||||
|
} catch (e: IOException) {
|
||||||
|
throw IOException("Failed to load transcription data from $TRANSCRIPTION_RESOURCE_PATH", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.toPinyinTone
|
||||||
|
|
||||||
|
internal data object PinyinMarkSyllableFormatter : SyllableFormatter {
|
||||||
|
private fun findVowelIndexForToneMark(pinyin: String): Int {
|
||||||
|
val characterToIndex = pinyin.withIndex().associate { it.value to it.index }
|
||||||
|
return when {
|
||||||
|
'a' in characterToIndex -> characterToIndex['a']!!
|
||||||
|
'o' in characterToIndex -> characterToIndex['o']!!
|
||||||
|
'e' in characterToIndex -> characterToIndex['e']!!
|
||||||
|
'i' in characterToIndex ->
|
||||||
|
if (pinyin.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
||||||
|
characterToIndex['u']!!
|
||||||
|
} else {
|
||||||
|
characterToIndex['i']!!
|
||||||
|
}
|
||||||
|
|
||||||
|
'u' in characterToIndex -> characterToIndex['u']!!
|
||||||
|
'ü' in characterToIndex -> characterToIndex['ü']!!
|
||||||
|
else -> error("No vowel found in Pinyin syllable '$pinyin'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun format(syllable: Syllable): String {
|
||||||
|
if (syllable.tone == Tone.FIFTH) return syllable.basePinyin
|
||||||
|
|
||||||
|
val vowelIndex = findVowelIndexForToneMark(syllable.basePinyin)
|
||||||
|
val toneMark = syllable.tone.toPinyinTone()
|
||||||
|
|
||||||
|
return buildString {
|
||||||
|
append(syllable.basePinyin)
|
||||||
|
insert(vowelIndex + 1, toneMark)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.toPinyinNumber
|
||||||
|
|
||||||
|
internal data object PinyinNumberSyllableFormatter : SyllableFormatter {
|
||||||
|
override fun format(syllable: Syllable) = syllable.basePinyin + syllable.tone.toPinyinNumber()
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
|
||||||
|
internal sealed interface SyllableFormatter {
|
||||||
|
fun format(syllable: Syllable): String
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||||
|
import com.marvinelsen.chinese.phonetics.toZhuyinTone
|
||||||
|
|
||||||
|
internal data object ZhuyinSyllableFormatter : SyllableFormatter {
|
||||||
|
override fun format(syllable: Syllable): String {
|
||||||
|
val zhuyinBase = TranscriptionDataRepository.pinyinToZhuyin[syllable.basePinyin]!!
|
||||||
|
val zhuyinToneMark = syllable.tone.toZhuyinTone()
|
||||||
|
|
||||||
|
return when (syllable.tone) {
|
||||||
|
Tone.FIFTH -> zhuyinToneMark + zhuyinBase
|
||||||
|
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FOURTH -> zhuyinBase + zhuyinToneMark
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||||
|
|
||||||
|
internal data object PinyinNumberSyllableParser : SyllableParser {
|
||||||
|
@Suppress("ReturnCount", "MagicNumber")
|
||||||
|
override fun parseOrNull(input: String): Syllable? {
|
||||||
|
if (input.isBlank() || !input.last().isDigit()) return null
|
||||||
|
|
||||||
|
val toneNumber = input.last()
|
||||||
|
val tone = Tone.fromDigitOrNull(toneNumber) ?: return null
|
||||||
|
|
||||||
|
val pinyin = input.substring(0, input.lastIndex)
|
||||||
|
val normalizedPinyin = TranscriptionDataRepository.normalize(pinyin)
|
||||||
|
|
||||||
|
if (!TranscriptionDataRepository.isValidPinyin(normalizedPinyin)) return null
|
||||||
|
|
||||||
|
return Syllable(
|
||||||
|
basePinyin = normalizedPinyin,
|
||||||
|
tone = tone
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.exceptions.InvalidSyllableInputException
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
|
||||||
|
internal sealed interface SyllableParser {
|
||||||
|
fun parseOrNull(input: String): Syllable?
|
||||||
|
fun parse(input: String) = parseOrNull(input)
|
||||||
|
?: throw InvalidSyllableInputException("Invalid input for syllable parsing: '$input'")
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
||||||
|
|
||||||
|
internal data object ZhuyinSyllableParser : SyllableParser {
|
||||||
|
private val zhuyinToneMarkRegex = """[ˊˇˋ˙¯]""".toRegex()
|
||||||
|
|
||||||
|
override fun parseOrNull(input: String): Syllable? {
|
||||||
|
val zhuyinWithoutToneMark = input.replace(zhuyinToneMarkRegex, "")
|
||||||
|
|
||||||
|
if (!TranscriptionDataRepository.isValidZhuyin(zhuyinWithoutToneMark)) return null
|
||||||
|
|
||||||
|
return Syllable(
|
||||||
|
basePinyin = TranscriptionDataRepository.zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||||
|
tone = Tone.fromZhuyinToneOrNull(input.last()) ?: Tone.fromZhuyinToneOrNull(input.first()) ?: Tone.FIRST
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object PinyinNumberToneFormatter : ToneFormatter {
|
||||||
|
override fun format(tone: Tone) = when (tone) {
|
||||||
|
Tone.FIRST -> "1"
|
||||||
|
Tone.SECOND -> "2"
|
||||||
|
Tone.THIRD -> "3"
|
||||||
|
Tone.FOURTH -> "4"
|
||||||
|
Tone.FIFTH -> "5"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object PinyinToneFormatter : ToneFormatter {
|
||||||
|
override fun format(tone: Tone) = when (tone) {
|
||||||
|
Tone.FIRST -> "\u0304"
|
||||||
|
Tone.SECOND -> "\u0301"
|
||||||
|
Tone.THIRD -> "\u030C"
|
||||||
|
Tone.FOURTH -> "\u0300"
|
||||||
|
Tone.FIFTH -> ""
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal sealed interface ToneFormatter {
|
||||||
|
fun format(tone: Tone): String
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object ZhuyinToneFormatter : ToneFormatter {
|
||||||
|
override fun format(tone: Tone) = when (tone) {
|
||||||
|
Tone.FIRST -> ""
|
||||||
|
Tone.SECOND -> "ˊ"
|
||||||
|
Tone.THIRD -> "ˇ"
|
||||||
|
Tone.FOURTH -> "ˋ"
|
||||||
|
Tone.FIFTH -> "˙"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,14 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object DigitToneParser : ToneParser<Char> {
|
||||||
|
override fun parseOrNull(input: Char) = when (input) {
|
||||||
|
'1' -> Tone.FIRST
|
||||||
|
'2' -> Tone.SECOND
|
||||||
|
'3' -> Tone.THIRD
|
||||||
|
'4' -> Tone.FOURTH
|
||||||
|
'5' -> Tone.FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
@Suppress("MagicNumber")
|
||||||
|
internal data object IntToneParser : ToneParser<Int> {
|
||||||
|
override fun parseOrNull(input: Int) = when (input) {
|
||||||
|
1 -> Tone.FIRST
|
||||||
|
2 -> Tone.SECOND
|
||||||
|
3 -> Tone.THIRD
|
||||||
|
4 -> Tone.FOURTH
|
||||||
|
5 -> Tone.FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object PinyinToneParser : ToneParser<Char> {
|
||||||
|
override fun parseOrNull(input: Char) = when (input) {
|
||||||
|
'\u0304' -> Tone.FIRST
|
||||||
|
'\u0301' -> Tone.SECOND
|
||||||
|
'\u030C' -> Tone.THIRD
|
||||||
|
'\u0300' -> Tone.FOURTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
||||||
|
|
||||||
|
internal sealed interface ToneParser<T> {
|
||||||
|
fun parseOrNull(input: T): Tone?
|
||||||
|
fun parse(input: T) = parseOrNull(input)
|
||||||
|
?: throw InvalidToneInputException("Invalid input for tone parsing: '$input'")
|
||||||
|
}
|
@ -0,0 +1,14 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.Tone
|
||||||
|
|
||||||
|
internal data object ZhuyinToneParser : ToneParser<Char> {
|
||||||
|
override fun parseOrNull(input: Char) = when (input) {
|
||||||
|
'¯' -> Tone.FIRST
|
||||||
|
'ˊ' -> Tone.SECOND
|
||||||
|
'ˇ' -> Tone.THIRD
|
||||||
|
'ˋ' -> Tone.FOURTH
|
||||||
|
'˙' -> Tone.FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
@ -1,119 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
|
||||||
|
|
||||||
import java.io.InputStream
|
|
||||||
|
|
||||||
@Suppress("MagicNumber", "MaximumLineLength", "MaxLineLength")
|
|
||||||
data class PinyinSyllable(
|
|
||||||
val pinyinSyllableWithoutTone: String,
|
|
||||||
val tone: Tone,
|
|
||||||
) {
|
|
||||||
companion object {
|
|
||||||
private val pinyinToZhuyin = parseTranscriptions(
|
|
||||||
this::class.java.getResourceAsStream("/pinyin_zhuyin_transcriptions.tsv")!!
|
|
||||||
)
|
|
||||||
private val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
|
||||||
private val zhuyinToneMarkRegex = """[ˊˇˋ˙]""".toRegex()
|
|
||||||
|
|
||||||
fun isValidPinyinWithToneNumberSyllable(pinyinSyllable: String) =
|
|
||||||
pinyinSyllable.last().isDigit() && pinyinSyllable.last().digitToInt() in 1..5 && pinyinSyllable
|
|
||||||
.substring(0, pinyinSyllable.lastIndex)
|
|
||||||
.lowercase() in pinyinToZhuyin
|
|
||||||
|
|
||||||
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): PinyinSyllable {
|
|
||||||
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
|
||||||
val lastCharacter = pinyinWithToneNumber.last()
|
|
||||||
|
|
||||||
require(lastCharacter.isDigit()) {
|
|
||||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the last character to be a digit, but was '${pinyinWithToneNumber.last()}'"
|
|
||||||
}
|
|
||||||
require(lastCharacter.digitToInt() in 1..5) {
|
|
||||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the tone number 'n' to be in range 1 <= n <= 5, but was '${pinyinWithToneNumber.last()}'"
|
|
||||||
}
|
|
||||||
require(
|
|
||||||
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
|
||||||
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
|
||||||
|
|
||||||
return PinyinSyllable(
|
|
||||||
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
|
||||||
tone = Tone.fromDigit(lastCharacter)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun fromZhuyin(zhuyin: String): PinyinSyllable {
|
|
||||||
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
|
||||||
|
|
||||||
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
|
||||||
|
|
||||||
return PinyinSyllable(
|
|
||||||
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
|
||||||
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
|
||||||
?: Tone.FIRST
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun parseTranscriptions(inputStream: InputStream) =
|
|
||||||
inputStream.bufferedReader().useLines { lines ->
|
|
||||||
lines.map { it.split('\t') }
|
|
||||||
.associate { it[0] to it[1] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
|
||||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
|
||||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
|
||||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToZhuyin(): String {
|
|
||||||
val zhuyinSyllable = pinyinToZhuyin[pinyinSyllableWithoutTone.lowercase()]
|
|
||||||
?: error("$pinyinSyllableWithoutTone is not a valid Pinyin syllable")
|
|
||||||
val zhuyinToneMark = tone.format(TransliterationSystem.ZHUYIN)
|
|
||||||
|
|
||||||
return when (tone) {
|
|
||||||
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FORTH -> zhuyinSyllable + zhuyinToneMark
|
|
||||||
Tone.FIFTH -> zhuyinToneMark + zhuyinSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToPinyinWithToneNumbers(): String {
|
|
||||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
|
||||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
|
||||||
}
|
|
||||||
|
|
||||||
return pinyinSyllableWithoutTone + tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToPinyinWithToneMarks(): String {
|
|
||||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
|
||||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pinyinSyllableWithoutTone.lowercase() == "r" && tone == Tone.FIFTH) {
|
|
||||||
return pinyinSyllableWithoutTone
|
|
||||||
}
|
|
||||||
|
|
||||||
val sanitizedPinyinSyllableWithoutTone = pinyinSyllableWithoutTone.replace("v", "ü").replace("u:", "ü")
|
|
||||||
|
|
||||||
val characterToIndex = sanitizedPinyinSyllableWithoutTone.lowercase().withIndex().associate { it.value to it.index }
|
|
||||||
val vowelIndex = when {
|
|
||||||
'a' in characterToIndex -> characterToIndex['a']!!
|
|
||||||
'o' in characterToIndex -> characterToIndex['o']!!
|
|
||||||
'e' in characterToIndex -> characterToIndex['e']!!
|
|
||||||
'i' in characterToIndex ->
|
|
||||||
if (sanitizedPinyinSyllableWithoutTone.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
|
||||||
characterToIndex['u']!!
|
|
||||||
} else {
|
|
||||||
characterToIndex['i']!!
|
|
||||||
}
|
|
||||||
|
|
||||||
'u' in characterToIndex -> characterToIndex['u']!!
|
|
||||||
'ü' in characterToIndex -> characterToIndex['ü']!!
|
|
||||||
else -> error("No vowel found in Pinyin syllable '$sanitizedPinyinSyllableWithoutTone'")
|
|
||||||
}
|
|
||||||
|
|
||||||
return buildString {
|
|
||||||
append(sanitizedPinyinSyllableWithoutTone)
|
|
||||||
insert(vowelIndex + 1, tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,81 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
|
||||||
|
|
||||||
@Suppress("MagicNumber")
|
|
||||||
enum class Tone {
|
|
||||||
FIRST, SECOND, THIRD, FORTH, FIFTH;
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
fun fromInt(number: Int) =
|
|
||||||
fromIntOrNull(number) ?: throw IllegalArgumentException("Number $number is not a valid tone")
|
|
||||||
|
|
||||||
fun fromIntOrNull(number: Int) = when (number) {
|
|
||||||
1 -> FIRST
|
|
||||||
2 -> SECOND
|
|
||||||
3 -> THIRD
|
|
||||||
4 -> FORTH
|
|
||||||
5 -> FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
|
|
||||||
fun fromDigit(digit: Char) =
|
|
||||||
fromDigitOrNull(digit) ?: throw IllegalArgumentException("Digit $digit is not a valid tone")
|
|
||||||
|
|
||||||
fun fromDigitOrNull(digit: Char) = when (digit) {
|
|
||||||
'1' -> FIRST
|
|
||||||
'2' -> SECOND
|
|
||||||
'3' -> THIRD
|
|
||||||
'4' -> FORTH
|
|
||||||
'5' -> FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
|
|
||||||
fun fromZhuyinToneMark(zhuyinToneMark: Char) = fromZhuyinToneMarkOrNull(zhuyinToneMark)
|
|
||||||
?: throw IllegalArgumentException("Invalid zhuyin tone mark '$zhuyinToneMark'")
|
|
||||||
|
|
||||||
fun fromZhuyinToneMarkOrNull(zhuyinToneMark: Char) = when (zhuyinToneMark) {
|
|
||||||
'ˊ' -> SECOND
|
|
||||||
'ˇ' -> THIRD
|
|
||||||
'ˋ' -> FORTH
|
|
||||||
'˙' -> FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun toInt() = when (this) {
|
|
||||||
FIRST -> 1
|
|
||||||
SECOND -> 2
|
|
||||||
THIRD -> 3
|
|
||||||
FORTH -> 4
|
|
||||||
FIFTH -> 5
|
|
||||||
}
|
|
||||||
|
|
||||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
|
||||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
|
||||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
|
||||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToPinyinWithToneNumbers() = when (this) {
|
|
||||||
FIRST -> "1"
|
|
||||||
SECOND -> "2"
|
|
||||||
THIRD -> "3"
|
|
||||||
FORTH -> "4"
|
|
||||||
FIFTH -> "5"
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToPinyinWithToneMarks() = when (this) {
|
|
||||||
FIRST -> "\u0304"
|
|
||||||
SECOND -> "\u0301"
|
|
||||||
THIRD -> "\u030C"
|
|
||||||
FORTH -> "\u0300"
|
|
||||||
FIFTH -> ""
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatToZhuyin() = when (this) {
|
|
||||||
FIRST -> ""
|
|
||||||
SECOND -> "ˊ"
|
|
||||||
THIRD -> "ˇ"
|
|
||||||
FORTH -> "ˋ"
|
|
||||||
FIFTH -> "˙"
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,5 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
|
||||||
|
|
||||||
enum class TransliterationSystem {
|
|
||||||
ZHUYIN, PINYIN_WITH_TONE_NUMBERS, PINYIN_WITH_TONE_MARKS
|
|
||||||
}
|
|
@ -1,5 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
|
||||||
|
|
||||||
object Zhuyin {
|
|
||||||
const val SEPARATOR = " "
|
|
||||||
}
|
|
@ -195,17 +195,9 @@ lu ㄌㄨ
|
|||||||
luan ㄌㄨㄢ
|
luan ㄌㄨㄢ
|
||||||
lun ㄌㄨㄣ
|
lun ㄌㄨㄣ
|
||||||
luo ㄌㄨㄛ
|
luo ㄌㄨㄛ
|
||||||
lu: ㄌㄩ
|
|
||||||
lv ㄌㄩ
|
|
||||||
lü ㄌㄩ
|
lü ㄌㄩ
|
||||||
lu:e ㄌㄩㄝ
|
|
||||||
lve ㄌㄩㄝ
|
|
||||||
lüe ㄌㄩㄝ
|
lüe ㄌㄩㄝ
|
||||||
lu:n ㄌㄩㄣ
|
|
||||||
lvn ㄌㄩㄣ
|
|
||||||
lün ㄌㄩㄣ
|
lün ㄌㄩㄣ
|
||||||
lu:an ㄌㄩㄢ
|
|
||||||
lvan ㄌㄩㄢ
|
|
||||||
lüan ㄌㄩㄢ
|
lüan ㄌㄩㄢ
|
||||||
m ㄇ
|
m ㄇ
|
||||||
ma ㄇㄚ
|
ma ㄇㄚ
|
||||||
@ -251,11 +243,7 @@ nu ㄋㄨ
|
|||||||
nuan ㄋㄨㄢ
|
nuan ㄋㄨㄢ
|
||||||
nun ㄋㄨㄣ
|
nun ㄋㄨㄣ
|
||||||
nuo ㄋㄨㄛ
|
nuo ㄋㄨㄛ
|
||||||
nu: ㄋㄩ
|
|
||||||
nv ㄋㄩ
|
|
||||||
nü ㄋㄩ
|
nü ㄋㄩ
|
||||||
nu:e ㄋㄩㄝ
|
|
||||||
nve ㄋㄩㄝ
|
|
||||||
nüe ㄋㄩㄝ
|
nüe ㄋㄩㄝ
|
||||||
o ㄛ
|
o ㄛ
|
||||||
ou ㄡ
|
ou ㄡ
|
||||||
|
|
@ -0,0 +1,113 @@
|
|||||||
|
package com.marvinelsen.chinese.phonetics
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
||||||
|
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
||||||
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
|
import io.kotest.datatest.withData
|
||||||
|
import io.kotest.matchers.shouldBe
|
||||||
|
|
||||||
|
class ChinesePhoneticsTest : ShouldSpec({
|
||||||
|
context("from pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"sheng1" to Syllable("sheng", Tone.FIRST),
|
||||||
|
"zhi2" to Syllable("zhi", Tone.SECOND),
|
||||||
|
"ka3" to Syllable("ka", Tone.THIRD),
|
||||||
|
"yao4" to Syllable("yao", Tone.FOURTH),
|
||||||
|
"me5" to Syllable("me", Tone.FIFTH),
|
||||||
|
"Me5" to Syllable("me", Tone.FIFTH),
|
||||||
|
"nv3" to Syllable("nü", Tone.THIRD),
|
||||||
|
"nü3" to Syllable("nü", Tone.THIRD),
|
||||||
|
"nu:3" to Syllable("nü", Tone.THIRD),
|
||||||
|
"r5" to Syllable("r", Tone.FIFTH),
|
||||||
|
"R5" to Syllable("r", Tone.FIFTH),
|
||||||
|
"er2" to Syllable("er", Tone.SECOND),
|
||||||
|
"Er2" to Syllable("er", Tone.SECOND),
|
||||||
|
) { (pinyinWithNumber, expectedSyllable) ->
|
||||||
|
PinyinNumberSyllableParser.parse(pinyinWithNumber) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
context("from zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
|
||||||
|
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
|
||||||
|
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
|
||||||
|
"ㄧㄠˋ" to Syllable("yao", Tone.FOURTH),
|
||||||
|
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
|
||||||
|
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
|
||||||
|
"ㄋㄩˇ" to Syllable("nü", Tone.THIRD),
|
||||||
|
) { (zhuyin, expectedSyllable) ->
|
||||||
|
ZhuyinSyllableParser.parse(zhuyin) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid pinyin with tone numbers") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid zhuyin") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "$it -> ${it.second}" },
|
||||||
|
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||||
|
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||||
|
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||||
|
Syllable("yao", Tone.FOURTH) to "ㄧㄠˋ",
|
||||||
|
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||||
|
Syllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
Syllable("r", Tone.FIFTH) to "˙ㄦ",
|
||||||
|
Syllable("er", Tone.SECOND) to "ㄦˊ",
|
||||||
|
) { (syllable, expectedZhuyin) ->
|
||||||
|
ZhuyinSyllableFormatter.format(syllable) shouldBe expectedZhuyin
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "$it -> ${it.second}" },
|
||||||
|
Syllable("sheng", Tone.FIRST) to "sheng1",
|
||||||
|
Syllable("zhi", Tone.SECOND) to "zhi2",
|
||||||
|
Syllable("ka", Tone.THIRD) to "ka3",
|
||||||
|
Syllable("yao", Tone.FOURTH) to "yao4",
|
||||||
|
Syllable("me", Tone.FIFTH) to "me5",
|
||||||
|
Syllable("nü", Tone.THIRD) to "nü3",
|
||||||
|
Syllable("r", Tone.FIFTH) to "r5",
|
||||||
|
Syllable("er", Tone.SECOND) to "er2",
|
||||||
|
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||||
|
PinyinNumberSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneNumbers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone marks") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "$it -> ${it.second}" },
|
||||||
|
Syllable("sheng", Tone.FIRST) to "shēng",
|
||||||
|
Syllable("zhi", Tone.SECOND) to "zhí",
|
||||||
|
Syllable("ka", Tone.THIRD) to "kǎ",
|
||||||
|
Syllable("yao", Tone.FOURTH) to "yào",
|
||||||
|
Syllable("me", Tone.FIFTH) to "me",
|
||||||
|
Syllable("zhui", Tone.FIRST) to "zhuī",
|
||||||
|
Syllable("liu", Tone.FIRST) to "liū",
|
||||||
|
Syllable("nü", Tone.THIRD) to "nǚ",
|
||||||
|
Syllable("r", Tone.FIFTH) to "r",
|
||||||
|
Syllable("er", Tone.SECOND) to "ér",
|
||||||
|
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||||
|
PinyinMarkSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneMarks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
})
|
@ -1,5 +1,6 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
package com.marvinelsen.chinese.phonetics
|
||||||
|
|
||||||
|
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
||||||
import io.kotest.assertions.throwables.shouldThrow
|
import io.kotest.assertions.throwables.shouldThrow
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
import io.kotest.datatest.withData
|
import io.kotest.datatest.withData
|
||||||
@ -13,7 +14,7 @@ class ToneTest : ShouldSpec({
|
|||||||
'1' to Tone.FIRST,
|
'1' to Tone.FIRST,
|
||||||
'2' to Tone.SECOND,
|
'2' to Tone.SECOND,
|
||||||
'3' to Tone.THIRD,
|
'3' to Tone.THIRD,
|
||||||
'4' to Tone.FORTH,
|
'4' to Tone.FOURTH,
|
||||||
'5' to Tone.FIFTH,
|
'5' to Tone.FIFTH,
|
||||||
) { (digit, expectedTone) ->
|
) { (digit, expectedTone) ->
|
||||||
Tone.fromDigit(digit) shouldBe expectedTone
|
Tone.fromDigit(digit) shouldBe expectedTone
|
||||||
@ -26,7 +27,7 @@ class ToneTest : ShouldSpec({
|
|||||||
1 to Tone.FIRST,
|
1 to Tone.FIRST,
|
||||||
2 to Tone.SECOND,
|
2 to Tone.SECOND,
|
||||||
3 to Tone.THIRD,
|
3 to Tone.THIRD,
|
||||||
4 to Tone.FORTH,
|
4 to Tone.FOURTH,
|
||||||
5 to Tone.FIFTH,
|
5 to Tone.FIFTH,
|
||||||
) { (number, expectedTone) ->
|
) { (number, expectedTone) ->
|
||||||
Tone.fromInt(number) shouldBe expectedTone
|
Tone.fromInt(number) shouldBe expectedTone
|
||||||
@ -36,12 +37,25 @@ class ToneTest : ShouldSpec({
|
|||||||
context("convert correctly from Zhuyin tone mark") {
|
context("convert correctly from Zhuyin tone mark") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
'¯' to Tone.FIRST,
|
||||||
'ˊ' to Tone.SECOND,
|
'ˊ' to Tone.SECOND,
|
||||||
'ˇ' to Tone.THIRD,
|
'ˇ' to Tone.THIRD,
|
||||||
'ˋ' to Tone.FORTH,
|
'ˋ' to Tone.FOURTH,
|
||||||
'˙' to Tone.FIFTH,
|
'˙' to Tone.FIFTH,
|
||||||
) { (zhuyinToneMark, expectedTone) ->
|
) { (zhuyinToneMark, expectedTone) ->
|
||||||
Tone.fromZhuyinToneMark(zhuyinToneMark) shouldBe expectedTone
|
Tone.fromZhuyinTone(zhuyinToneMark) shouldBe expectedTone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("convert correctly from Pinyin tone mark") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
'\u0304' to Tone.FIRST,
|
||||||
|
'\u0301' to Tone.SECOND,
|
||||||
|
'\u030C' to Tone.THIRD,
|
||||||
|
'\u0300' to Tone.FOURTH,
|
||||||
|
) { (pinyinToneMark, expectedTone) ->
|
||||||
|
Tone.fromPinyinTone(pinyinToneMark) shouldBe expectedTone
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,13 +65,52 @@ class ToneTest : ShouldSpec({
|
|||||||
Tone.FIRST to 1,
|
Tone.FIRST to 1,
|
||||||
Tone.SECOND to 2,
|
Tone.SECOND to 2,
|
||||||
Tone.THIRD to 3,
|
Tone.THIRD to 3,
|
||||||
Tone.FORTH to 4,
|
Tone.FOURTH to 4,
|
||||||
Tone.FIFTH to 5,
|
Tone.FIFTH to 5,
|
||||||
) { (tone, expectedInteger) ->
|
) { (tone, expectedInteger) ->
|
||||||
tone.toInt() shouldBe expectedInteger
|
tone.toInt() shouldBe expectedInteger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
context("convert correctly to Zhuyin tone mark") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
|
Tone.FIRST to "",
|
||||||
|
Tone.SECOND to "ˊ",
|
||||||
|
Tone.THIRD to "ˇ",
|
||||||
|
Tone.FOURTH to "ˋ",
|
||||||
|
Tone.FIFTH to "˙",
|
||||||
|
) { (tone, zhuyinTone) ->
|
||||||
|
tone.toZhuyinTone() shouldBe zhuyinTone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("convert correctly to Pinyin tone mark") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
|
Tone.FIRST to "\u0304",
|
||||||
|
Tone.SECOND to "\u0301",
|
||||||
|
Tone.THIRD to "\u030C",
|
||||||
|
Tone.FOURTH to "\u0300",
|
||||||
|
Tone.FIFTH to "",
|
||||||
|
) { (tone, pinyinTone) ->
|
||||||
|
tone.toPinyinTone() shouldBe pinyinTone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("convert correctly to Pinyin number") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
|
Tone.FIRST to "1",
|
||||||
|
Tone.SECOND to "2",
|
||||||
|
Tone.THIRD to "3",
|
||||||
|
Tone.FOURTH to "4",
|
||||||
|
Tone.FIFTH to "5",
|
||||||
|
) { (tone, pinyinNumber) ->
|
||||||
|
tone.toPinyinNumber() shouldBe pinyinNumber
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
context("return null when converting from invalid digit ") {
|
context("return null when converting from invalid digit ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> null" },
|
nameFn = { "'$it' -> null" },
|
||||||
@ -74,7 +127,7 @@ class ToneTest : ShouldSpec({
|
|||||||
|
|
||||||
context("return null when converting from invalid int ") {
|
context("return null when converting from invalid int ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> throws exception" },
|
nameFn = { "'$it' -> null" },
|
||||||
0,
|
0,
|
||||||
6,
|
6,
|
||||||
-1,
|
-1,
|
||||||
@ -87,7 +140,7 @@ class ToneTest : ShouldSpec({
|
|||||||
|
|
||||||
context("return null when converting from invalid Zhuyin tone mark ") {
|
context("return null when converting from invalid Zhuyin tone mark ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> throws exception" },
|
nameFn = { "'$it' -> null" },
|
||||||
'0',
|
'0',
|
||||||
'6',
|
'6',
|
||||||
'a',
|
'a',
|
||||||
@ -95,7 +148,21 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidZhuyinToneMark ->
|
) { invalidZhuyinToneMark ->
|
||||||
Tone.fromZhuyinToneMarkOrNull(invalidZhuyinToneMark).shouldBeNull()
|
Tone.fromZhuyinToneOrNull(invalidZhuyinToneMark).shouldBeNull()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("return null when converting from invalid Pinyin tone mark ") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'$it' -> null" },
|
||||||
|
'0',
|
||||||
|
'6',
|
||||||
|
'a',
|
||||||
|
'z',
|
||||||
|
'$',
|
||||||
|
'*',
|
||||||
|
) { invalidPinyinToneMark ->
|
||||||
|
Tone.fromPinyinToneOrNull(invalidPinyinToneMark).shouldBeNull()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,7 +176,7 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidDigit ->
|
) { invalidDigit ->
|
||||||
shouldThrow<IllegalArgumentException> {
|
shouldThrow<InvalidToneInputException> {
|
||||||
Tone.fromDigit(invalidDigit)
|
Tone.fromDigit(invalidDigit)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -124,7 +191,7 @@ class ToneTest : ShouldSpec({
|
|||||||
Int.MAX_VALUE,
|
Int.MAX_VALUE,
|
||||||
Int.MIN_VALUE,
|
Int.MIN_VALUE,
|
||||||
) { invalidNumber ->
|
) { invalidNumber ->
|
||||||
shouldThrow<IllegalArgumentException> {
|
shouldThrow<InvalidToneInputException> {
|
||||||
Tone.fromInt(invalidNumber)
|
Tone.fromInt(invalidNumber)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,48 +207,25 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidZhuyinToneMark ->
|
) { invalidZhuyinToneMark ->
|
||||||
shouldThrow<IllegalArgumentException> {
|
shouldThrow<InvalidToneInputException> {
|
||||||
Tone.fromZhuyinToneMark(invalidZhuyinToneMark)
|
Tone.fromZhuyinTone(invalidZhuyinToneMark)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
context("format to Zhuyin correctly") {
|
context("throw exception when converting from invalid Pinyin tone mark ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
nameFn = { "'$it' -> throws exception" },
|
||||||
Tone.FIRST to "",
|
'0',
|
||||||
Tone.SECOND to "ˊ",
|
'6',
|
||||||
Tone.THIRD to "ˇ",
|
'a',
|
||||||
Tone.FORTH to "ˋ",
|
'z',
|
||||||
Tone.FIFTH to "˙",
|
'$',
|
||||||
) { (tone, expectedZhuyinToneMark) ->
|
'*',
|
||||||
tone.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyinToneMark
|
) { invalidPinyinToneMark ->
|
||||||
|
shouldThrow<InvalidToneInputException> {
|
||||||
|
Tone.fromPinyinTone(invalidPinyinToneMark)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
context("format to Pinyin with tone numbers correctly") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
|
||||||
Tone.FIRST to "1",
|
|
||||||
Tone.SECOND to "2",
|
|
||||||
Tone.THIRD to "3",
|
|
||||||
Tone.FORTH to "4",
|
|
||||||
Tone.FIFTH to "5",
|
|
||||||
) { (tone, expectedNumber) ->
|
|
||||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedNumber
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to Pinyin with tone marks correctly") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
|
||||||
Tone.FIRST to "\u0304",
|
|
||||||
Tone.SECOND to "\u0301",
|
|
||||||
Tone.THIRD to "\u030C",
|
|
||||||
Tone.FORTH to "\u0300",
|
|
||||||
Tone.FIFTH to "",
|
|
||||||
) { (tone, expectedAccent) ->
|
|
||||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedAccent
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
@ -1,119 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration.api
|
|
||||||
|
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
|
||||||
import io.kotest.datatest.withData
|
|
||||||
import io.kotest.matchers.shouldBe
|
|
||||||
|
|
||||||
class PinyinSyllableTest : ShouldSpec({
|
|
||||||
context("from pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"sheng1" to PinyinSyllable("sheng", Tone.FIRST),
|
|
||||||
"zhi2" to PinyinSyllable("zhi", Tone.SECOND),
|
|
||||||
"ka3" to PinyinSyllable("ka", Tone.THIRD),
|
|
||||||
"yao4" to PinyinSyllable("yao", Tone.FORTH),
|
|
||||||
"me5" to PinyinSyllable("me", Tone.FIFTH),
|
|
||||||
"Me5" to PinyinSyllable("Me", Tone.FIFTH),
|
|
||||||
"nv3" to PinyinSyllable("nv", Tone.THIRD),
|
|
||||||
"nü3" to PinyinSyllable("nü", Tone.THIRD),
|
|
||||||
"nu:3" to PinyinSyllable("nu:", Tone.THIRD),
|
|
||||||
"r5" to PinyinSyllable("r", Tone.FIFTH),
|
|
||||||
"R5" to PinyinSyllable("R", Tone.FIFTH),
|
|
||||||
"er2" to PinyinSyllable("er", Tone.SECOND),
|
|
||||||
"Er2" to PinyinSyllable("Er", Tone.SECOND),
|
|
||||||
) { (pinyinWithNumber, expectedSyllable) ->
|
|
||||||
PinyinSyllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"ㄕㄥ" to PinyinSyllable("sheng", Tone.FIRST),
|
|
||||||
"ㄓˊ" to PinyinSyllable("zhi", Tone.SECOND),
|
|
||||||
"ㄎㄚˇ" to PinyinSyllable("ka", Tone.THIRD),
|
|
||||||
"ㄧㄠˋ" to PinyinSyllable("yao", Tone.FORTH),
|
|
||||||
"ㄇㄜ˙" to PinyinSyllable("me", Tone.FIFTH),
|
|
||||||
"˙ㄇㄜ" to PinyinSyllable("me", Tone.FIFTH),
|
|
||||||
"ㄋㄩˇ" to PinyinSyllable("nü", Tone.THIRD),
|
|
||||||
) { (zhuyin, expectedSyllable) ->
|
|
||||||
PinyinSyllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid pinyin with tone numbers") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid zhuyin") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
PinyinSyllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
|
||||||
PinyinSyllable("zhi", Tone.SECOND) to "ㄓˊ",
|
|
||||||
PinyinSyllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
|
||||||
PinyinSyllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
|
||||||
PinyinSyllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
|
||||||
PinyinSyllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
PinyinSyllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
PinyinSyllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
PinyinSyllable("r", Tone.FIFTH) to "˙ㄦ",
|
|
||||||
PinyinSyllable("R", Tone.FIFTH) to "˙ㄦ",
|
|
||||||
PinyinSyllable("er", Tone.SECOND) to "ㄦˊ",
|
|
||||||
PinyinSyllable("Er", Tone.SECOND) to "ㄦˊ",
|
|
||||||
) { (syllable, expectedZhuyin) ->
|
|
||||||
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
PinyinSyllable("sheng", Tone.FIRST) to "sheng1",
|
|
||||||
PinyinSyllable("zhi", Tone.SECOND) to "zhi2",
|
|
||||||
PinyinSyllable("ka", Tone.THIRD) to "ka3",
|
|
||||||
PinyinSyllable("yao", Tone.FORTH) to "yao4",
|
|
||||||
PinyinSyllable("me", Tone.FIFTH) to "me5",
|
|
||||||
PinyinSyllable("nü", Tone.THIRD) to "nü3",
|
|
||||||
PinyinSyllable("nu:", Tone.THIRD) to "nu:3",
|
|
||||||
PinyinSyllable("nv", Tone.THIRD) to "nv3",
|
|
||||||
PinyinSyllable("r", Tone.FIFTH) to "r5",
|
|
||||||
PinyinSyllable("R", Tone.FIFTH) to "R5",
|
|
||||||
PinyinSyllable("er", Tone.SECOND) to "er2",
|
|
||||||
PinyinSyllable("Er", Tone.SECOND) to "Er2",
|
|
||||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
|
||||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone marks") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
PinyinSyllable("sheng", Tone.FIRST) to "shēng",
|
|
||||||
PinyinSyllable("zhi", Tone.SECOND) to "zhí",
|
|
||||||
PinyinSyllable("ka", Tone.THIRD) to "kǎ",
|
|
||||||
PinyinSyllable("yao", Tone.FORTH) to "yào",
|
|
||||||
PinyinSyllable("me", Tone.FIFTH) to "me",
|
|
||||||
PinyinSyllable("zhui", Tone.FIRST) to "zhuī",
|
|
||||||
PinyinSyllable("liu", Tone.FIRST) to "liū",
|
|
||||||
PinyinSyllable("nü", Tone.THIRD) to "nǚ",
|
|
||||||
PinyinSyllable("nu:", Tone.THIRD) to "nǚ",
|
|
||||||
PinyinSyllable("nv", Tone.THIRD) to "nǚ",
|
|
||||||
PinyinSyllable("r", Tone.FIFTH) to "r",
|
|
||||||
PinyinSyllable("er", Tone.SECOND) to "ér",
|
|
||||||
PinyinSyllable("Er", Tone.SECOND) to "Ér",
|
|
||||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
|
||||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
})
|
|
Loading…
x
Reference in New Issue
Block a user