Compare commits

..

No commits in common. "c6347a6b13fea83453ae133c5c3206af8a8f0f83" and "78020d6843b95b9de7077841065e7af1752d8d50" have entirely different histories.

4 changed files with 20 additions and 19 deletions

View File

@ -5,7 +5,7 @@ plugins {
}
group = "com.marvinelsen"
version = "2.0.0"
version = "1.0.1"
repositories {
mavenCentral()

View File

@ -1,12 +1,12 @@
package com.marvinelsen.cedict.api
import com.marvinelsen.cedict.internal.RegexCedictParser
import com.marvinelsen.cedict.internal.CedictParserImpl
import java.io.InputStream
interface CedictParser {
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
fun parseCedict(inputStream: InputStream): List<CedictEntry>
companion object {
val instance: CedictParser by lazy { RegexCedictParser() }
val instance: CedictParser by lazy { CedictParserImpl() }
}
}

View File

@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
import com.marvinelsen.chinese.transliteration.Syllable
import java.io.InputStream
internal class RegexCedictParser : CedictParser {
internal class CedictParserImpl : CedictParser {
companion object {
private const val DEFINITION_SEPARATOR = '/'
private const val GLOSS_SEPARATOR = ';'
@ -20,11 +20,11 @@ internal class RegexCedictParser : CedictParser {
}
override fun parseCedict(inputStream: InputStream) =
inputStream
.bufferedReader()
.lineSequence()
.filterNot(::isComment)
inputStream.bufferedReader().useLines { lines ->
lines.filterNot(::isComment)
.map(::toCedictEntry)
.toList()
}
private fun isComment(line: String) = line[0] == COMMENT_MARKER
@ -48,5 +48,9 @@ internal class RegexCedictParser : CedictParser {
private fun toCedictDefinitions(definitions: String) = definitions
.split(DEFINITION_SEPARATOR)
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
.map {
CedictDefinition(
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
)
}
}

View File

@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe
import java.util.zip.GZIPInputStream
class RegexCedictParserTest : ShouldSpec({
class CedictParserImplTest : ShouldSpec({
should("parse lines correctly") {
val cedictParser = RegexCedictParser()
val cedictParser = CedictParserImpl()
val cedictEntry =
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
@ -29,12 +29,9 @@ class RegexCedictParserTest : ShouldSpec({
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
val cedictParser = RegexCedictParser()
cedictFileStream.use {
val cedictParser = CedictParserImpl()
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
cedictEntries.toList().size shouldBe 122_508
}
cedictEntries.size shouldBe 122_508
}
})