Compare commits

..

8 Commits

Author SHA1 Message Date
3f8be7049b
Make toCedictDefinitions method more concise
All checks were successful
Pull Request / build (pull_request) Successful in 2m16s
2024-09-24 12:05:57 +02:00
9fda246045
Close stream properly in test 2024-09-24 12:05:57 +02:00
ebf608b2fa
Rename test 2024-09-24 12:05:57 +02:00
9d3f1ec456
Rename CedictParserImpl to RegexCedictParser 2024-09-24 12:05:57 +02:00
200cd7d06b
Fix test 2024-09-24 12:05:57 +02:00
8592f4fe67
Bump version 2024-09-24 12:05:57 +02:00
73e627b6be
Return sequence instead of list from parse method 2024-09-24 12:05:57 +02:00
7c16182d61
Do not close input stream in parse method 2024-09-24 12:05:56 +02:00
4 changed files with 19 additions and 20 deletions

View File

@ -5,7 +5,7 @@ plugins {
} }
group = "com.marvinelsen" group = "com.marvinelsen"
version = "1.0.1" version = "2.0.0"
repositories { repositories {
mavenCentral() mavenCentral()

View File

@ -1,12 +1,12 @@
package com.marvinelsen.cedict.api package com.marvinelsen.cedict.api
import com.marvinelsen.cedict.internal.CedictParserImpl import com.marvinelsen.cedict.internal.RegexCedictParser
import java.io.InputStream import java.io.InputStream
interface CedictParser { interface CedictParser {
fun parseCedict(inputStream: InputStream): List<CedictEntry> fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
companion object { companion object {
val instance: CedictParser by lazy { CedictParserImpl() } val instance: CedictParser by lazy { RegexCedictParser() }
} }
} }

View File

@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
import com.marvinelsen.chinese.transliteration.Syllable import com.marvinelsen.chinese.transliteration.Syllable
import java.io.InputStream import java.io.InputStream
internal class CedictParserImpl : CedictParser { internal class RegexCedictParser : CedictParser {
companion object { companion object {
private const val DEFINITION_SEPARATOR = '/' private const val DEFINITION_SEPARATOR = '/'
private const val GLOSS_SEPARATOR = ';' private const val GLOSS_SEPARATOR = ';'
@ -20,11 +20,11 @@ internal class CedictParserImpl : CedictParser {
} }
override fun parseCedict(inputStream: InputStream) = override fun parseCedict(inputStream: InputStream) =
inputStream.bufferedReader().useLines { lines -> inputStream
lines.filterNot(::isComment) .bufferedReader()
.lineSequence()
.filterNot(::isComment)
.map(::toCedictEntry) .map(::toCedictEntry)
.toList()
}
private fun isComment(line: String) = line[0] == COMMENT_MARKER private fun isComment(line: String) = line[0] == COMMENT_MARKER
@ -48,9 +48,5 @@ internal class CedictParserImpl : CedictParser {
private fun toCedictDefinitions(definitions: String) = definitions private fun toCedictDefinitions(definitions: String) = definitions
.split(DEFINITION_SEPARATOR) .split(DEFINITION_SEPARATOR)
.map { .map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
CedictDefinition(
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
)
}
} }

View File

@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldBe
import java.util.zip.GZIPInputStream import java.util.zip.GZIPInputStream
class CedictParserImplTest : ShouldSpec({ class RegexCedictParserTest : ShouldSpec({
should("parse lines correctly") { should("parse lines correctly") {
val cedictParser = CedictParserImpl() val cedictParser = RegexCedictParser()
val cedictEntry = val cedictEntry =
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/") cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
@ -29,9 +29,12 @@ class CedictParserImplTest : ShouldSpec({
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz" val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath)) val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
val cedictParser = CedictParserImpl() val cedictParser = RegexCedictParser()
cedictFileStream.use {
val cedictEntries = cedictParser.parseCedict(cedictFileStream) val cedictEntries = cedictParser.parseCedict(cedictFileStream)
cedictEntries.size shouldBe 122_508 cedictEntries.toList().size shouldBe 122_508
}
} }
}) })