opensextant.extractors.xtemporal

  1# -*- coding: utf-8 -*-
  2from calendar import timegm
  3from datetime import datetime
  4
  5import arrow
  6from opensextant import logger_config
  7from opensextant.FlexPat import PatternMatch, RegexPatternManager, PatternExtractor
  8
  9TZINFO = arrow.utcnow().tzinfo
 10
 11NOW = arrow.now()
 12YEAR = NOW.year
 13MILLENNIUM = 2000
 14CURR_YY = YEAR - MILLENNIUM
 15FUTURE_YY_THRESHOLD = CURR_YY + 2
 16MAXIMUM_YEAR = 2040
 17
 18INVALID_DATE = -1
 19INVALID_DAY = -2
 20NO_YEAR = -3
 21NO_MONTH = -4
 22NO_DAY = -5
 23
 24log = logger_config("INFO", pkg=__name__)
 25_default_locale = None
 26
 27def format_date(d):
 28    if isinstance(d, arrow.Arrow):
 29        return d.format("YYYY-MM-DD")
 30    else:
 31        return arrow.get(d).format("YYYY-MM-DD")
 32
 33
 34def normalize_day(slots):
 35    """
 36
 37    :param slots:
 38    :return:
 39    """
 40    if not slots:
 41        return INVALID_DAY
 42
 43    day = slots.get("DM2") or slots.get("DOM") or slots.get("DD")
 44    if day:
 45        try:
 46            day = int(day)
 47            if 0 < day <= 31:
 48                return day
 49            else:
 50                return INVALID_DAY
 51        except:
 52            pass
 53    return INVALID_DATE
 54
 55
 56def normalize_month_name(slots):
 57    """
 58
 59    :param slots:
 60    :return: name of month in English
 61    """
 62    text = slots.get("MON_ABBREV") or slots.get("MON_NAME")
 63    if not text:
 64        return INVALID_DATE
 65    tlen = len(text)
 66    if tlen < 3 or 11 < tlen:
 67        return INVALID_DATE
 68    try:
 69        short = text[0:3]
 70        return arrow.get(short, "MMM").month
 71    except:
 72        return INVALID_DATE
 73
 74
 75def normalize_month_num(slots: dict):
 76    """
 77    returns month number
 78    :param slots:
 79    :return: month num, 1-12
 80    """
 81    if not slots:
 82        return INVALID_DATE
 83
 84    month_num = slots.get("DM1") or slots.get("MM") or slots.get("MONTH")
 85    if month_num:
 86        try:
 87            num = int(month_num)
 88            if 0 < num <= 12:
 89                return num
 90        except:
 91            pass
 92    return INVALID_DATE
 93
 94
 95def test_european_locale(slots: dict, locale=None):
 96    """
 97
 98    :param slots:
 99    :return:  day, month
100    """
101    if not ("DM1" in slots and "DM2" in slots):
102        return None, None
103
104    # Matched as MDY
105    # But we test if DMY is valid based on values.
106    try:
107        day = int(slots["DM1"])
108        mon = int(slots["DM2"])
109        # First pass -- if LOCALE == "euro", then assume pattern matches DAY/MON/YYYY
110        if locale and locale == "euro":
111            if mon <= 12 and  day <= 31:
112                return day, mon
113            else:
114                return -1, -1
115        else:
116            # Otherwise -- this is a test and we're guessing. Only return
117            # a date if date pattern appears to be unambiguous.  03/05   is Mar-5th or May-3rd, for example
118            if day > 12 and mon <= 12:
119                # Valid match  31/12/...  new year's eve.
120                return day, mon
121            if day > 12 and mon > 12:
122                # Invalid date match for this pattern, e.g., 13/13/, or 30/13/...
123                return -1, -1
124    except:
125        pass
126    return None, None
127
128def _get_year(slots:dict) -> str:
129    return slots.get("YEAR") or slots.get("YY") or slots.get("YEARYY") or slots.get("YY2")
130
131def normalize_year(slots) -> int:
132    if not slots:
133        return INVALID_DATE
134
135    year_str = slots.get("YEAR")
136    if year_str:
137        year = int(year_str)
138        if 0 < year < MAXIMUM_YEAR:
139            return year
140
141    is_year = False
142    year = INVALID_DATE
143    yearyy = slots.get("YEARYY")
144    yy = slots.get("YY") or slots.get("YY2")
145    try:
146        if yy:
147            year = int(yy)
148        elif yearyy:
149            is_year = yearyy.startswith("'")
150            yearyy = yearyy.strip("'")
151            year = int(yearyy)
152
153        # measure len of either slot
154        short_year = len(yy or yearyy) < 4
155
156        #  Recent years, just past turn of century.
157        if not short_year and year < MAXIMUM_YEAR:
158            return year
159
160        # Is short year
161        if is_year:
162            # class of '17
163            # 22 Jun '17
164            if 0 <= year <= FUTURE_YY_THRESHOLD:
165                return MILLENNIUM + year
166            elif year <= 99:
167                # Year is '27      -- more likely 1927
168                return 1900 + year
169        elif FUTURE_YY_THRESHOLD < year <= 99:
170            # If not marked as a year and its a bare two-digits, then only accept years
171            # Note -- "24"  could be 1924 or 2024,... or Day of month 24.
172            return 1900 + year
173        else:
174            # Default.   Two-digit year, add Millennium
175            return MILLENNIUM + year
176
177    except:
178        return INVALID_DATE
179
180
181def normalize_tz(slots):
182    """
183
184    :param slots:
185    :return: arrow.tzinfo
186    """
187
188    try:
189        tz = slots.get("SHORT_TZ")
190        if tz:
191            dt_tz = arrow.get(tz, "Z")
192            return dt_tz
193        tz = slots.get("LONG_TZ")
194        if tz:
195            dt_tz = arrow.get(tz, "ZZZ")
196            return dt_tz
197    except Exception as parse_err:
198        return None
199
200
201def normalize_time(slots):
202    """
203    Derive a valid time tuple.
204    :param slots:
205    :return: tuple of H, M, S, resolution
206    """
207
208    # Default time is mid-day, noon. In timezone provided or UTC if no timezone.
209    hh_mm_ss = []
210    for field in ["hh", "mm", "ss"]:
211        if field in slots:
212            val = slots.get(field)
213            if val is not None:
214                hh_mm_ss.append(int(val))
215        else:
216            hh_mm_ss.append(-1)
217
218    # Time resolution is D, H, M, S ... where second is optional
219    hh, mm, ss = hh_mm_ss
220    # resolution = Resolution.DAY
221    if not (0 <= hh < 24):
222        return None
223    # resolution = Resolution.HOUR
224    if not (0 <= mm < 60):
225        return None
226    resolution = Resolution.MINUTE
227    if 0 <= ss < 60:
228        resolution = Resolution.SECOND
229    return hh, mm, ss, resolution
230
231
232class XTemporal(PatternExtractor):
233    def __init__(self, cfg="datetime_patterns_py.cfg", debug=False, locale=None):
234        """
235        :param cfg: patterns config file.
236        """
237        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
238        if locale:
239            global _default_locale
240            _default_locale = locale.lower()
241
242        if debug:
243            log.setLevel("DEBUG")
244
245
246class Resolution:
247    YEAR = "Y"
248    MONTH = "M"
249    WEEK = "W"
250    DAY = "D"
251    HOUR = "H"
252    MINUTE = "m"
253    SECOND = "s"
254
255
256class DateTimeMatch(PatternMatch):
257    """
258    DateTimeMatch puts out a matched date with attributes:
259
260        datenorm -- ISO yyyy-mm-dd date
261        epoch    -- seconds from 1970-01-01
262        resolution - D, M, h, m, s
263        locale   -- "north-am" or "euro".
264
265        If locale is set using XTemporal(locale='euro')
266        matching Euro-style dates will be forced as such through out document.
267        When locale is not set, the default is to only use euro locale for dates
268        that are not ambiguous, e.g., 30/05/1977.
269        Ambiguous dates (with no default locale used) are parsed as "north-am".
270    """
271    def __init__(self, *args, **kwargs):
272        PatternMatch.__init__(self, *args, **kwargs)
273        self.case = PatternMatch.LOWER_CASE
274        self.locale = "north-am"  # vs. "euro" vs...
275
276    def __str__(self):
277        return f"{self.text}"
278
279    def normalize(self):
280        PatternMatch.normalize(self)
281        self.is_valid = False
282        self.filtered_out = True
283
284        # Slots to capture:
285        # MON_ABBREV, MON_NAME,  MONTH, MM
286        # YEAR, YY, YEARYY
287        # DAY_ENUM, DOM, DD
288        # SHORT_TZ, LONG_TZ
289        # hh, mm, ss
290        slots = self.attributes()
291
292        # normalize_year, resolution = YEAR
293        # if separators present, validate now
294        # normalize day or day of month
295        # normalize month num or month name
296        # normalize TZ and time if present.
297        # set finest resolution Y, M, D, H, S
298        # TODO: TIMEX encodings
299
300        year = normalize_year(slots)
301        if year is None or year == INVALID_DATE:
302            return False
303
304        # resolution = Resolution.YEAR
305        day, month = None, None
306        is_short_mdy = False
307        if self.pattern_id in {"MDY-01", "MDY-02"}:
308            is_short_mdy = True
309            day, month = test_european_locale(slots, _default_locale) # Uses DM slots only
310            if day and day < 0:
311                return False
312            if day and month:
313                # Non-zero day/month returned from test
314                self.locale = "euro"
315
316        if not month:
317            month = normalize_month_num(slots)
318        if month <= 0:
319            month = normalize_month_name(slots)
320
321        if month < 0:
322            return False
323
324        resolution = Resolution.MONTH
325        sep1 = slots.get("DSEP1")
326        sep2 = slots.get("DSEP2")
327        if sep1 and sep2 and sep1 != sep2:
328            return False
329
330        if sep1 == "." and is_short_mdy:
331            year_str = _get_year(slots)
332            if len(year_str) ==2:
333                return False
334            
335        if not day:
336            day = normalize_day(slots)
337        if day == INVALID_DAY:
338            return False
339        elif day == INVALID_DATE:
340            # Missing day
341            day = 1
342        else:
343            resolution = Resolution.DAY
344
345        # Simple february catch:
346        if month == 2 and day > 29:
347            return False
348
349        try:
350            tz_found = None
351            date_found = arrow.get(datetime(year, month, day))
352            tm = normalize_time(slots)
353            if tm:
354                hr, minute, seconds, resolution = tm
355                if hr >= 0:
356                    date_found = date_found.shift(hours=hr)
357                    if minute >= 0:
358                        date_found = date_found.shift(minutes=minute)
359                        if seconds >= 0:
360                            date_found = date_found.shift(seconds=seconds)
361                tz_found = normalize_tz(slots)
362                if tz_found:
363                    date_found = date_found.to(tz_found.tzinfo)
364
365            # Matchgroups are raw data from REGEX
366            # Attributes are final encodings to share.
367            self.attrs = {
368                "datenorm": date_found.format("YYYY-MM-DD"),
369                "epoch": timegm(date_found.timetuple()),
370                "resolution": resolution,
371                "locale": self.locale
372            }
373            if tm:
374                self.attrs["timestamp"] = date_found.format("YYYY-MM-DDTHH:mm:ssZ")
375            if tz_found:
376                self.attrs["tzinfo"] = tz_found.format("ZZZ")
377
378            self.is_valid = True
379            self.filtered_out = False
380        except Exception as parse_err:
381            # For debugging purposes -- but ideally, you IGNORE
382            # date/time values that are marked filtered_out = True
383            self.attrs["error"] = str(parse_err)
384            log.info("Parsing error: DATE: %s (YMD = %d / % d / %d )", self.text, year, month, day)
385            log.debug("Exception  - ", exc_info=parse_err)
def normalize_day(slots):
35def normalize_day(slots):
36    """
37
38    :param slots:
39    :return:
40    """
41    if not slots:
42        return INVALID_DAY
43
44    day = slots.get("DM2") or slots.get("DOM") or slots.get("DD")
45    if day:
46        try:
47            day = int(day)
48            if 0 < day <= 31:
49                return day
50            else:
51                return INVALID_DAY
52        except:
53            pass
54    return INVALID_DATE

:param slots: :return:

def normalize_month_name(slots):
57def normalize_month_name(slots):
58    """
59
60    :param slots:
61    :return: name of month in English
62    """
63    text = slots.get("MON_ABBREV") or slots.get("MON_NAME")
64    if not text:
65        return INVALID_DATE
66    tlen = len(text)
67    if tlen < 3 or 11 < tlen:
68        return INVALID_DATE
69    try:
70        short = text[0:3]
71        return arrow.get(short, "MMM").month
72    except:
73        return INVALID_DATE

:param slots: :return: name of month in English

def normalize_month_num(slots: dict):
76def normalize_month_num(slots: dict):
77    """
78    returns month number
79    :param slots:
80    :return: month num, 1-12
81    """
82    if not slots:
83        return INVALID_DATE
84
85    month_num = slots.get("DM1") or slots.get("MM") or slots.get("MONTH")
86    if month_num:
87        try:
88            num = int(month_num)
89            if 0 < num <= 12:
90                return num
91        except:
92            pass
93    return INVALID_DATE

returns month number :param slots: :return: month num, 1-12

def test_european_locale(slots: dict, locale=None):
 96def test_european_locale(slots: dict, locale=None):
 97    """
 98
 99    :param slots:
100    :return:  day, month
101    """
102    if not ("DM1" in slots and "DM2" in slots):
103        return None, None
104
105    # Matched as MDY
106    # But we test if DMY is valid based on values.
107    try:
108        day = int(slots["DM1"])
109        mon = int(slots["DM2"])
110        # First pass -- if LOCALE == "euro", then assume pattern matches DAY/MON/YYYY
111        if locale and locale == "euro":
112            if mon <= 12 and  day <= 31:
113                return day, mon
114            else:
115                return -1, -1
116        else:
117            # Otherwise -- this is a test and we're guessing. Only return
118            # a date if date pattern appears to be unambiguous.  03/05   is Mar-5th or May-3rd, for example
119            if day > 12 and mon <= 12:
120                # Valid match  31/12/...  new year's eve.
121                return day, mon
122            if day > 12 and mon > 12:
123                # Invalid date match for this pattern, e.g., 13/13/, or 30/13/...
124                return -1, -1
125    except:
126        pass
127    return None, None

:param slots: :return: day, month

def normalize_tz(slots):
182def normalize_tz(slots):
183    """
184
185    :param slots:
186    :return: arrow.tzinfo
187    """
188
189    try:
190        tz = slots.get("SHORT_TZ")
191        if tz:
192            dt_tz = arrow.get(tz, "Z")
193            return dt_tz
194        tz = slots.get("LONG_TZ")
195        if tz:
196            dt_tz = arrow.get(tz, "ZZZ")
197            return dt_tz
198    except Exception as parse_err:
199        return None

:param slots: :return: arrow.tzinfo

def normalize_time(slots):
202def normalize_time(slots):
203    """
204    Derive a valid time tuple.
205    :param slots:
206    :return: tuple of H, M, S, resolution
207    """
208
209    # Default time is mid-day, noon. In timezone provided or UTC if no timezone.
210    hh_mm_ss = []
211    for field in ["hh", "mm", "ss"]:
212        if field in slots:
213            val = slots.get(field)
214            if val is not None:
215                hh_mm_ss.append(int(val))
216        else:
217            hh_mm_ss.append(-1)
218
219    # Time resolution is D, H, M, S ... where second is optional
220    hh, mm, ss = hh_mm_ss
221    # resolution = Resolution.DAY
222    if not (0 <= hh < 24):
223        return None
224    # resolution = Resolution.HOUR
225    if not (0 <= mm < 60):
226        return None
227    resolution = Resolution.MINUTE
228    if 0 <= ss < 60:
229        resolution = Resolution.SECOND
230    return hh, mm, ss, resolution

Derive a valid time tuple. :param slots: :return: tuple of H, M, S, resolution

class XTemporal(opensextant.FlexPat.PatternExtractor):
233class XTemporal(PatternExtractor):
234    def __init__(self, cfg="datetime_patterns_py.cfg", debug=False, locale=None):
235        """
236        :param cfg: patterns config file.
237        """
238        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
239        if locale:
240            global _default_locale
241            _default_locale = locale.lower()
242
243        if debug:
244            log.setLevel("DEBUG")

Discussion: Read first https://opensextant.github.io/Xponents/doc/Patterns.md

Example:

from opensextant.extractors.poli import PatternsOfLifeManager
from opensextant.FlexPat import PatternExtractor

# INIT
#=====================
# Invoke a particular REGEX rule set, here poli_patterns.cfg
# @see https://github.com/OpenSextant/Xponents/blob/master/Core/src/main/resources/poli_patterns.cfg
mgr = PatternsOfLifeManager("poli_patterns.cfg")
pex = PatternExtractor(mgr)

# DEV/TEST
#=====================
# "default_test()" is useful to run during development and
# encourages you to capture critical pattern variants in your "TEST" data.
# Look at your pass/fail situations -- what test cases are failing your rule?
test_results = pex.default_tests()
print("TEST RESULTS")
for result in test_results:
    print(repr(result))

# RUN
#=====================
real_results = pex.extract(".... text blob 1-800-123-4567...")
print("REAL RESULTS")
for result in real_results:
    print(repr(result))
    print("     RAW DICT:", render_match(result))
XTemporal(cfg='datetime_patterns_py.cfg', debug=False, locale=None)
234    def __init__(self, cfg="datetime_patterns_py.cfg", debug=False, locale=None):
235        """
236        :param cfg: patterns config file.
237        """
238        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
239        if locale:
240            global _default_locale
241            _default_locale = locale.lower()
242
243        if debug:
244            log.setLevel("DEBUG")

:param cfg: patterns config file.

class DateTimeMatch(opensextant.FlexPat.PatternMatch):
257class DateTimeMatch(PatternMatch):
258    """
259    DateTimeMatch puts out a matched date with attributes:
260
261        datenorm -- ISO yyyy-mm-dd date
262        epoch    -- seconds from 1970-01-01
263        resolution - D, M, h, m, s
264        locale   -- "north-am" or "euro".
265
266        If locale is set using XTemporal(locale='euro')
267        matching Euro-style dates will be forced as such through out document.
268        When locale is not set, the default is to only use euro locale for dates
269        that are not ambiguous, e.g., 30/05/1977.
270        Ambiguous dates (with no default locale used) are parsed as "north-am".
271    """
272    def __init__(self, *args, **kwargs):
273        PatternMatch.__init__(self, *args, **kwargs)
274        self.case = PatternMatch.LOWER_CASE
275        self.locale = "north-am"  # vs. "euro" vs...
276
277    def __str__(self):
278        return f"{self.text}"
279
280    def normalize(self):
281        PatternMatch.normalize(self)
282        self.is_valid = False
283        self.filtered_out = True
284
285        # Slots to capture:
286        # MON_ABBREV, MON_NAME,  MONTH, MM
287        # YEAR, YY, YEARYY
288        # DAY_ENUM, DOM, DD
289        # SHORT_TZ, LONG_TZ
290        # hh, mm, ss
291        slots = self.attributes()
292
293        # normalize_year, resolution = YEAR
294        # if separators present, validate now
295        # normalize day or day of month
296        # normalize month num or month name
297        # normalize TZ and time if present.
298        # set finest resolution Y, M, D, H, S
299        # TODO: TIMEX encodings
300
301        year = normalize_year(slots)
302        if year is None or year == INVALID_DATE:
303            return False
304
305        # resolution = Resolution.YEAR
306        day, month = None, None
307        is_short_mdy = False
308        if self.pattern_id in {"MDY-01", "MDY-02"}:
309            is_short_mdy = True
310            day, month = test_european_locale(slots, _default_locale) # Uses DM slots only
311            if day and day < 0:
312                return False
313            if day and month:
314                # Non-zero day/month returned from test
315                self.locale = "euro"
316
317        if not month:
318            month = normalize_month_num(slots)
319        if month <= 0:
320            month = normalize_month_name(slots)
321
322        if month < 0:
323            return False
324
325        resolution = Resolution.MONTH
326        sep1 = slots.get("DSEP1")
327        sep2 = slots.get("DSEP2")
328        if sep1 and sep2 and sep1 != sep2:
329            return False
330
331        if sep1 == "." and is_short_mdy:
332            year_str = _get_year(slots)
333            if len(year_str) ==2:
334                return False
335            
336        if not day:
337            day = normalize_day(slots)
338        if day == INVALID_DAY:
339            return False
340        elif day == INVALID_DATE:
341            # Missing day
342            day = 1
343        else:
344            resolution = Resolution.DAY
345
346        # Simple february catch:
347        if month == 2 and day > 29:
348            return False
349
350        try:
351            tz_found = None
352            date_found = arrow.get(datetime(year, month, day))
353            tm = normalize_time(slots)
354            if tm:
355                hr, minute, seconds, resolution = tm
356                if hr >= 0:
357                    date_found = date_found.shift(hours=hr)
358                    if minute >= 0:
359                        date_found = date_found.shift(minutes=minute)
360                        if seconds >= 0:
361                            date_found = date_found.shift(seconds=seconds)
362                tz_found = normalize_tz(slots)
363                if tz_found:
364                    date_found = date_found.to(tz_found.tzinfo)
365
366            # Matchgroups are raw data from REGEX
367            # Attributes are final encodings to share.
368            self.attrs = {
369                "datenorm": date_found.format("YYYY-MM-DD"),
370                "epoch": timegm(date_found.timetuple()),
371                "resolution": resolution,
372                "locale": self.locale
373            }
374            if tm:
375                self.attrs["timestamp"] = date_found.format("YYYY-MM-DDTHH:mm:ssZ")
376            if tz_found:
377                self.attrs["tzinfo"] = tz_found.format("ZZZ")
378
379            self.is_valid = True
380            self.filtered_out = False
381        except Exception as parse_err:
382            # For debugging purposes -- but ideally, you IGNORE
383            # date/time values that are marked filtered_out = True
384            self.attrs["error"] = str(parse_err)
385            log.info("Parsing error: DATE: %s (YMD = %d / % d / %d )", self.text, year, month, day)
386            log.debug("Exception  - ", exc_info=parse_err)

DateTimeMatch puts out a matched date with attributes:

datenorm -- ISO yyyy-mm-dd date
epoch    -- seconds from 1970-01-01
resolution - D, M, h, m, s
locale   -- "north-am" or "euro".

If locale is set using XTemporal(locale='euro')
matching Euro-style dates will be forced as such through out document.
When locale is not set, the default is to only use euro locale for dates
that are not ambiguous, e.g., 30/05/1977.
Ambiguous dates (with no default locale used) are parsed as "north-am".
def normalize(self):
280    def normalize(self):
281        PatternMatch.normalize(self)
282        self.is_valid = False
283        self.filtered_out = True
284
285        # Slots to capture:
286        # MON_ABBREV, MON_NAME,  MONTH, MM
287        # YEAR, YY, YEARYY
288        # DAY_ENUM, DOM, DD
289        # SHORT_TZ, LONG_TZ
290        # hh, mm, ss
291        slots = self.attributes()
292
293        # normalize_year, resolution = YEAR
294        # if separators present, validate now
295        # normalize day or day of month
296        # normalize month num or month name
297        # normalize TZ and time if present.
298        # set finest resolution Y, M, D, H, S
299        # TODO: TIMEX encodings
300
301        year = normalize_year(slots)
302        if year is None or year == INVALID_DATE:
303            return False
304
305        # resolution = Resolution.YEAR
306        day, month = None, None
307        is_short_mdy = False
308        if self.pattern_id in {"MDY-01", "MDY-02"}:
309            is_short_mdy = True
310            day, month = test_european_locale(slots, _default_locale) # Uses DM slots only
311            if day and day < 0:
312                return False
313            if day and month:
314                # Non-zero day/month returned from test
315                self.locale = "euro"
316
317        if not month:
318            month = normalize_month_num(slots)
319        if month <= 0:
320            month = normalize_month_name(slots)
321
322        if month < 0:
323            return False
324
325        resolution = Resolution.MONTH
326        sep1 = slots.get("DSEP1")
327        sep2 = slots.get("DSEP2")
328        if sep1 and sep2 and sep1 != sep2:
329            return False
330
331        if sep1 == "." and is_short_mdy:
332            year_str = _get_year(slots)
333            if len(year_str) ==2:
334                return False
335            
336        if not day:
337            day = normalize_day(slots)
338        if day == INVALID_DAY:
339            return False
340        elif day == INVALID_DATE:
341            # Missing day
342            day = 1
343        else:
344            resolution = Resolution.DAY
345
346        # Simple february catch:
347        if month == 2 and day > 29:
348            return False
349
350        try:
351            tz_found = None
352            date_found = arrow.get(datetime(year, month, day))
353            tm = normalize_time(slots)
354            if tm:
355                hr, minute, seconds, resolution = tm
356                if hr >= 0:
357                    date_found = date_found.shift(hours=hr)
358                    if minute >= 0:
359                        date_found = date_found.shift(minutes=minute)
360                        if seconds >= 0:
361                            date_found = date_found.shift(seconds=seconds)
362                tz_found = normalize_tz(slots)
363                if tz_found:
364                    date_found = date_found.to(tz_found.tzinfo)
365
366            # Matchgroups are raw data from REGEX
367            # Attributes are final encodings to share.
368            self.attrs = {
369                "datenorm": date_found.format("YYYY-MM-DD"),
370                "epoch": timegm(date_found.timetuple()),
371                "resolution": resolution,
372                "locale": self.locale
373            }
374            if tm:
375                self.attrs["timestamp"] = date_found.format("YYYY-MM-DDTHH:mm:ssZ")
376            if tz_found:
377                self.attrs["tzinfo"] = tz_found.format("ZZZ")
378
379            self.is_valid = True
380            self.filtered_out = False
381        except Exception as parse_err:
382            # For debugging purposes -- but ideally, you IGNORE
383            # date/time values that are marked filtered_out = True
384            self.attrs["error"] = str(parse_err)
385            log.info("Parsing error: DATE: %s (YMD = %d / % d / %d )", self.text, year, month, day)
386            log.debug("Exception  - ", exc_info=parse_err)

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return: