Compare commits

..

No commits in common. "c6347a6b13fea83453ae133c5c3206af8a8f0f83" and "78020d6843b95b9de7077841065e7af1752d8d50" have entirely different histories.

4 changed files with 20 additions and 19 deletions

View File

@ -5,7 +5,7 @@ plugins {
} }
group = "com.marvinelsen" group = "com.marvinelsen"
version = "2.0.0" version = "1.0.1"
repositories { repositories {
mavenCentral() mavenCentral()

View File

@ -1,12 +1,12 @@
package com.marvinelsen.cedict.api package com.marvinelsen.cedict.api
import com.marvinelsen.cedict.internal.RegexCedictParser import com.marvinelsen.cedict.internal.CedictParserImpl
import java.io.InputStream import java.io.InputStream
interface CedictParser { interface CedictParser {
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry> fun parseCedict(inputStream: InputStream): List<CedictEntry>
companion object { companion object {
val instance: CedictParser by lazy { RegexCedictParser() } val instance: CedictParser by lazy { CedictParserImpl() }
} }
} }

View File

@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
import com.marvinelsen.chinese.transliteration.Syllable import com.marvinelsen.chinese.transliteration.Syllable
import java.io.InputStream import java.io.InputStream
internal class RegexCedictParser : CedictParser { internal class CedictParserImpl : CedictParser {
companion object { companion object {
private const val DEFINITION_SEPARATOR = '/' private const val DEFINITION_SEPARATOR = '/'
private const val GLOSS_SEPARATOR = ';' private const val GLOSS_SEPARATOR = ';'
@ -20,11 +20,11 @@ internal class RegexCedictParser : CedictParser {
} }
override fun parseCedict(inputStream: InputStream) = override fun parseCedict(inputStream: InputStream) =
inputStream inputStream.bufferedReader().useLines { lines ->
.bufferedReader() lines.filterNot(::isComment)
.lineSequence()
.filterNot(::isComment)
.map(::toCedictEntry) .map(::toCedictEntry)
.toList()
}
private fun isComment(line: String) = line[0] == COMMENT_MARKER private fun isComment(line: String) = line[0] == COMMENT_MARKER
@ -48,5 +48,9 @@ internal class RegexCedictParser : CedictParser {
private fun toCedictDefinitions(definitions: String) = definitions private fun toCedictDefinitions(definitions: String) = definitions
.split(DEFINITION_SEPARATOR) .split(DEFINITION_SEPARATOR)
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) } .map {
CedictDefinition(
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
)
}
} }

View File

@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldBe
import java.util.zip.GZIPInputStream import java.util.zip.GZIPInputStream
class RegexCedictParserTest : ShouldSpec({ class CedictParserImplTest : ShouldSpec({
should("parse lines correctly") { should("parse lines correctly") {
val cedictParser = RegexCedictParser() val cedictParser = CedictParserImpl()
val cedictEntry = val cedictEntry =
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/") cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
@ -29,12 +29,9 @@ class RegexCedictParserTest : ShouldSpec({
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz" val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath)) val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
val cedictParser = RegexCedictParser() val cedictParser = CedictParserImpl()
cedictFileStream.use {
val cedictEntries = cedictParser.parseCedict(cedictFileStream) val cedictEntries = cedictParser.parseCedict(cedictFileStream)
cedictEntries.toList().size shouldBe 122_508 cedictEntries.size shouldBe 122_508
}
} }
}) })