Internal matching routine.
479{
480 DBG(
"%d:matchAt(tokenPos=%zu, str='%s', pos=%zu)\n",level,tokenPos,pos<str.length() ? str.substr(pos).c_str() :
"",pos);
481 auto isStartIdChar = [](
char c) {
return isalpha(c) || c==
'_'; };
483 auto matchCharClass = [
this,isStartIdChar,
isIdChar](
size_t tp,
char c) ->
bool
484 {
485 PToken tok =
data[tp];
487 uint16_t numFields = tok.value();
488 bool found = false;
489 for (uint16_t i=0;i<numFields;i++)
490 {
492
497 )
498 {
499 found=true;
500 break;
501 }
502 else
503 {
504 uint16_t v = static_cast<uint16_t>(c);
505 if (tok.from()<=v && v<=tok.to())
506 {
507 found=true;
508 break;
509 }
510 }
511 }
512 DBG(
"matchCharClass(tp=%zu,c=%c (x%02x))=%d\n",tp,c,c,negate?!found:found);
513 return negate ? !found : found;
514 };
515 size_t index = pos;
516 enum SequenceType { Star, Optional, OptionalRange };
517 auto processSequence = [this,&tokenPos,&tokenLen,&index,&str,&matchCharClass,
518 &isStartIdChar,&
isIdChar,&
match,&level,&pos](SequenceType type) ->
bool
519 {
520 size_t startIndex = index;
521 size_t len = str.length();
522 PToken tok =
data[++tokenPos];
523
524
526 {
527 size_t groupId = tok.value();
528 size_t innerStart = tokenPos + 1;
529
530
531 size_t tp = innerStart;
532 int depth = 1;
533 while (tp<tokenLen && depth>0)
534 {
537 tp++;
538 }
539 if (depth!=0) return false;
540 size_t endCapturePos = tp - 1;
541 size_t afterSeqPos = endCapturePos + 2;
542
543
544 Match tmp;
546 bool innerOk =
matchAt(innerStart,endCapturePos,str,tmp,index,level+1);
547 if (innerOk)
548 {
549 size_t capLen = tmp.length();
550
551
552 for (size_t gid=1; gid<tmp.size(); gid++)
553 {
554 size_t sp = tmp[gid].position();
555 size_t sl = tmp[gid].length();
556 if (sp!=std::string::npos && sl!=std::string::npos)
557 {
558 match.startCapture(gid,sp);
559 match.endCapture(gid,sp+sl);
560 }
561 }
562
563 match.startCapture(groupId,index);
564 match.endCapture(groupId,index+capLen);
565
566 bool ok =
matchAt(afterSeqPos,tokenLen,str,
match,index+capLen,level+1);
567 if (ok)
568 {
569 match.setMatch(pos,(index+capLen)-pos+
match.length());
570 return true;
571 }
572 }
573
574
575 match.startCapture(groupId,index);
576 match.endCapture(groupId,index);
577
578 bool ok2 =
matchAt(afterSeqPos,tokenLen,str,
match,index,level+1);
579 if (ok2)
580 {
582 return true;
583 }
584 return false;
585 }
586
588 {
589 char c_tok = tok.asciiValue();
590 while (index<len && str[index]==c_tok) { index++; if (type==Optional) break; }
591 tokenPos++;
592 }
593 else if (tok.isCharClass())
594 {
595 while (index<len && matchCharClass(tokenPos,str[index])) { index++; if (type==Optional) break; }
596 tokenPos+=tok.value()+1;
597 }
599 {
600 while (index<len && isStartIdChar(str[index])) { index++; if (type==Optional) break; }
601 tokenPos++;
602 }
604 {
605 while (index<len &&
isIdChar(str[index])) { index++;
if (type==Optional)
break; }
606 tokenPos++;
607 }
609 {
610 while (index<len &&
isspace(str[index])) { index++;
if (type==Optional)
break; }
611 tokenPos++;
612 }
614 {
615 while (index<len &&
isdigit(str[index])) { index++;
if (type==Optional)
break; }
616 tokenPos++;
617 }
619 {
620 if (type==Optional) index++; else index = str.length();
621 tokenPos++;
622 }
624 {
625 size_t tokenStart = ++tokenPos;
627 Match rangeMatch;
628 rangeMatch.init(str,0);
629 bool found =
matchAt(tokenStart,tokenPos,str,rangeMatch,index,level+1);
630 if (found)
631 {
632 index+=rangeMatch.length();
633 }
634 tokenPos++;
635 }
636 tokenPos++;
637 while (index>=startIndex)
638 {
639
640 bool found =
matchAt(tokenPos,tokenLen,str,
match,index,level+1);
641 if (found)
642 {
644 return true;
645 }
646 if (index==0) break;
647 index--;
648 }
649 return false;
650 };
651
652 while (tokenPos<tokenLen)
653 {
654 PToken tok =
data[tokenPos];
655 DBG(
"loop tokenPos=%zu token=%s\n",tokenPos,tok.kindStr());
657 {
658 char c_tok = tok.asciiValue();
659 if (index>=str.length() || str[index]!=c_tok) return false;
660 index++,tokenPos++;
661 }
662 else if (tok.isCharClass())
663 {
664 if (index>=str.length() || !matchCharClass(tokenPos,str[index])) return false;
665 index++,tokenPos+=tok.value()+1;
666 }
667 else
668 {
669 switch (tok.kind())
670 {
672 if (index>=str.length() || !isStartIdChar(str[index])) return false;
673 index++;
674 break;
676 if (index>=str.length() || !
isIdChar(str[index]))
return false;
677 index++;
678 break;
680 if (index>=str.length() || !
isspace(str[index]))
return false;
681 index++;
682 break;
684 if (index>=str.length() || !
isdigit(str[index]))
return false;
685 index++;
686 break;
688 if (index!=pos) return false;
689 break;
691 if (index<str.length()) return false;
692 break;
694 DBG(
"BeginOfWord: index=%zu isIdChar(%c)=%d prev.isIdChar(%c)=%d\n",
695 index,str[index],
isIdChar(str[index]),
696 index>0?str[index]-1:0,
698 if (index>=str.length() ||
700 (index>0 &&
isIdChar(str[index-1])))
return false;
701 break;
703 DBG(
"EndOfWord: index=%zu pos=%zu idIdChar(%c)=%d prev.isIsChar(%c)=%d\n",
704 index,pos,str[index],
isIdChar(str[index]),
705 index==0 ? 0 : str[index-1],
706 index==0 ? -1 :
isIdChar(str[index-1]));
707 if (index<str.length() &&
708 (
isIdChar(str[index]) || index==0 || !
isIdChar(str[index-1])))
return false;
709 break;
711 DBG(
"BeginCapture(%zu) gid=%u\n",index,tok.value());
712 match.startCapture(tok.value(),index);
713 break;
715 DBG(
"EndCapture(%zu) gid=%u\n",index,tok.value());
716 match.endCapture(tok.value(),index);
717 break;
719 if (index>=str.length()) return false;
720 index++;
721 break;
723 return processSequence(Star);
726 {
727 return processSequence(OptionalRange);
728 }
729 else
730 {
731 return processSequence(Optional);
732 }
733 default:
734 return false;
735 }
736 tokenPos++;
737 }
738 }
739 match.setMatch(pos,index-pos);
740 return true;
741}
bool matchAt(size_t tokenPos, size_t tokenLen, std::string_view str, Match &match, size_t pos, int level) const
Internal matching routine.
bool match(std::string_view str, Match &match, size_t pos=0) const
Check if a given string matches this regular expression.
static bool isalpha(char c)
static bool isspace(char c)
static bool isalnum(char c)
static bool isdigit(char c)