avwx.parsing.sanitization.cleaners.joined

Cleaners where two items are joined.

 1"""Cleaners where two items are joined."""
 2
 3from __future__ import annotations
 4
 5import re
 6
 7from avwx.parsing.core import is_timerange, is_timestamp
 8from avwx.parsing.sanitization.base import SplitItem
 9from avwx.static.core import CLOUD_LIST
10from avwx.static.taf import TAF_NEWLINE, TAF_NEWLINE_STARTSWITH
11
12_CLOUD_GROUP = "(" + "|".join(CLOUD_LIST) + ")"
13CLOUD_SPACE_PATTERNS = [
14    re.compile(pattern)
15    for pattern in (
16        f"(?=.+){_CLOUD_GROUP}" + r"\d{3}(\w{2,3})?$",  # SCT010BKN021
17        r"M?\d{2}\/M?\d{2}$",  # BKN01826/25
18    )
19]
20
21
22class JoinedCloud(SplitItem):
23    """For items starting with cloud list."""
24
25    def split_at(self, item: str) -> int | None:
26        if item[:3] in CLOUD_LIST:
27            for pattern in CLOUD_SPACE_PATTERNS:
28                match = pattern.search(item)
29                if match is None:
30                    continue
31                if match.start():
32                    return match.start()
33        return None
34
35
36_TIMESTAMP_BREAKS = ((7, is_timestamp), (9, is_timerange))
37
38
39class JoinedTimestamp(SplitItem):
40    """Connected timestamp."""
41
42    def split_at(self, item: str) -> int | None:
43        return next(
44            (loc for loc, check in _TIMESTAMP_BREAKS if len(item) > loc and check(item[:loc])),
45            None,
46        )
47
48
49class JoinedWind(SplitItem):
50    """Connected to wind."""
51
52    def split_at(self, item: str) -> int | None:
53        if len(item) > 5 and "KT" in item and not item.endswith("KT"):
54            index = item.find("KT")
55            if index > 4:
56                return index + 2
57        return None
58
59
60class JoinedTafNewLine(SplitItem):
61    """TAF newline connected to previous element."""
62
63    def split_at(self, item: str) -> int | None:
64        for key in TAF_NEWLINE:
65            if key in item and not item.startswith(key):
66                return item.find(key)
67        for key in TAF_NEWLINE_STARTSWITH:
68            if key in item and not item.startswith(key):
69                index = item.find(key)
70                if item[index + len(key) :].isdigit():
71                    return index
72        return None
73
74
75class JoinedMinMaxTemperature(SplitItem):
76    """Connected TAF min/max temp."""
77
78    def split_at(self, item: str) -> int | None:
79        if "TX" in item and "TN" in item and item.endswith("Z") and "/" in item:
80            tx_index, tn_index = item.find("TX"), item.find("TN")
81            return max(tx_index, tn_index)
82        return None
83
84
85RVR_PATTERN = re.compile(r"R\d{2}[RCL]?/\S+")
86
87
88class JoinedRunwayVisibility(SplitItem):
89    """Connected RVR elements.
90    Ex: R36/1500DR18/P2000
91    """
92
93    def split_at(self, item: str) -> int | None:
94        return match.start() + 1 if (match := RVR_PATTERN.search(item[1:])) else None
CLOUD_SPACE_PATTERNS = [re.compile('(?=.+)(FEW|SCT|BKN|OVC)\\d{3}(\\w{2,3})?$'), re.compile('M?\\d{2}\\/M?\\d{2}$')]
class JoinedCloud(avwx.parsing.sanitization.cleaners.base.SplitItem):
23class JoinedCloud(SplitItem):
24    """For items starting with cloud list."""
25
26    def split_at(self, item: str) -> int | None:
27        if item[:3] in CLOUD_LIST:
28            for pattern in CLOUD_SPACE_PATTERNS:
29                match = pattern.search(item)
30                if match is None:
31                    continue
32                if match.start():
33                    return match.start()
34        return None

For items starting with cloud list.

def split_at(self, item: str) -> int | None:
26    def split_at(self, item: str) -> int | None:
27        if item[:3] in CLOUD_LIST:
28            for pattern in CLOUD_SPACE_PATTERNS:
29                match = pattern.search(item)
30                if match is None:
31                    continue
32                if match.start():
33                    return match.start()
34        return None

Return the string index where the item should be split.

class JoinedTimestamp(avwx.parsing.sanitization.cleaners.base.SplitItem):
40class JoinedTimestamp(SplitItem):
41    """Connected timestamp."""
42
43    def split_at(self, item: str) -> int | None:
44        return next(
45            (loc for loc, check in _TIMESTAMP_BREAKS if len(item) > loc and check(item[:loc])),
46            None,
47        )

Connected timestamp.

def split_at(self, item: str) -> int | None:
43    def split_at(self, item: str) -> int | None:
44        return next(
45            (loc for loc, check in _TIMESTAMP_BREAKS if len(item) > loc and check(item[:loc])),
46            None,
47        )

Return the string index where the item should be split.

class JoinedWind(avwx.parsing.sanitization.cleaners.base.SplitItem):
50class JoinedWind(SplitItem):
51    """Connected to wind."""
52
53    def split_at(self, item: str) -> int | None:
54        if len(item) > 5 and "KT" in item and not item.endswith("KT"):
55            index = item.find("KT")
56            if index > 4:
57                return index + 2
58        return None

Connected to wind.

def split_at(self, item: str) -> int | None:
53    def split_at(self, item: str) -> int | None:
54        if len(item) > 5 and "KT" in item and not item.endswith("KT"):
55            index = item.find("KT")
56            if index > 4:
57                return index + 2
58        return None

Return the string index where the item should be split.

class JoinedTafNewLine(avwx.parsing.sanitization.cleaners.base.SplitItem):
61class JoinedTafNewLine(SplitItem):
62    """TAF newline connected to previous element."""
63
64    def split_at(self, item: str) -> int | None:
65        for key in TAF_NEWLINE:
66            if key in item and not item.startswith(key):
67                return item.find(key)
68        for key in TAF_NEWLINE_STARTSWITH:
69            if key in item and not item.startswith(key):
70                index = item.find(key)
71                if item[index + len(key) :].isdigit():
72                    return index
73        return None

TAF newline connected to previous element.

def split_at(self, item: str) -> int | None:
64    def split_at(self, item: str) -> int | None:
65        for key in TAF_NEWLINE:
66            if key in item and not item.startswith(key):
67                return item.find(key)
68        for key in TAF_NEWLINE_STARTSWITH:
69            if key in item and not item.startswith(key):
70                index = item.find(key)
71                if item[index + len(key) :].isdigit():
72                    return index
73        return None

Return the string index where the item should be split.

class JoinedMinMaxTemperature(avwx.parsing.sanitization.cleaners.base.SplitItem):
76class JoinedMinMaxTemperature(SplitItem):
77    """Connected TAF min/max temp."""
78
79    def split_at(self, item: str) -> int | None:
80        if "TX" in item and "TN" in item and item.endswith("Z") and "/" in item:
81            tx_index, tn_index = item.find("TX"), item.find("TN")
82            return max(tx_index, tn_index)
83        return None

Connected TAF min/max temp.

def split_at(self, item: str) -> int | None:
79    def split_at(self, item: str) -> int | None:
80        if "TX" in item and "TN" in item and item.endswith("Z") and "/" in item:
81            tx_index, tn_index = item.find("TX"), item.find("TN")
82            return max(tx_index, tn_index)
83        return None

Return the string index where the item should be split.

RVR_PATTERN = re.compile('R\\d{2}[RCL]?/\\S+')
class JoinedRunwayVisibility(avwx.parsing.sanitization.cleaners.base.SplitItem):
89class JoinedRunwayVisibility(SplitItem):
90    """Connected RVR elements.
91    Ex: R36/1500DR18/P2000
92    """
93
94    def split_at(self, item: str) -> int | None:
95        return match.start() + 1 if (match := RVR_PATTERN.search(item[1:])) else None

Connected RVR elements. Ex: R36/1500DR18/P2000

def split_at(self, item: str) -> int | None:
94    def split_at(self, item: str) -> int | None:
95        return match.start() + 1 if (match := RVR_PATTERN.search(item[1:])) else None

Return the string index where the item should be split.