Skip to main content
ClaudeWave
Skill173 repo starsupdated 3mo ago

cwicr-quantity-matcher

Match BIM quantities to CWICR work items. Map element categories to cost codes, validate quantities, and generate cost-linked QTOs.

Install in Claude Code
Copy
git clone --depth 1 https://github.com/datadrivenconstruction/DDC_Skills_for_AI_Agents_in_Construction /tmp/cwicr-quantity-matcher && cp -r /tmp/cwicr-quantity-matcher/1_DDC_Toolkit/CWICR-Database/cwicr-quantity-matcher ~/.claude/skills/cwicr-quantity-matcher
Then start a new Claude Code session; the skill loads automatically.

SKILL.md

# CWICR Quantity Matcher

## Business Case

### Problem Statement
BIM exports contain quantities but:
- Element categories don't match cost codes
- Manual mapping is error-prone
- Different naming conventions
- Need consistent code assignment

### Solution
Intelligent matching of BIM element quantities to CWICR work items using category mapping, semantic matching, and rule-based assignment.

### Business Value
- **Automation** - Reduce manual mapping effort
- **Consistency** - Standard code assignment
- **Accuracy** - Validated quantity linkage
- **Integration** - BIM-to-cost data flow

## Technical Implementation

```python
import pandas as pd
import numpy as np
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum
import re
from difflib import SequenceMatcher


class MatchMethod(Enum):
    """Methods for matching BIM elements to work items."""
    EXACT = "exact"
    CATEGORY = "category"
    SEMANTIC = "semantic"
    RULE_BASED = "rule_based"
    MANUAL = "manual"


class MatchConfidence(Enum):
    """Confidence level of match."""
    HIGH = "high"       # >90% confidence
    MEDIUM = "medium"   # 70-90%
    LOW = "low"         # 50-70%
    MANUAL = "manual"   # <50% - needs review


@dataclass
class QuantityMatch:
    """Single quantity match result."""
    bim_element_id: str
    bim_category: str
    bim_description: str
    bim_quantity: float
    bim_unit: str
    matched_work_item: str
    work_item_description: str
    work_item_unit: str
    match_method: MatchMethod
    confidence: MatchConfidence
    confidence_score: float
    unit_conversion_factor: float = 1.0


@dataclass
class MatchingResult:
    """Complete matching result."""
    total_elements: int
    matched: int
    unmatched: int
    high_confidence: int
    needs_review: int
    matches: List[QuantityMatch]
    unmatched_elements: List[Dict[str, Any]]


# Category to work item mapping rules
CATEGORY_MAPPING = {
    # Revit categories to CWICR prefixes
    'walls': ['WALL', 'MSNR', 'PART'],
    'floors': ['CONC', 'FLOOR', 'SLAB'],
    'columns': ['CONC', 'STRL', 'COLM'],
    'beams': ['CONC', 'STRL', 'BEAM'],
    'foundations': ['CONC', 'FNDN', 'EXCV'],
    'roofs': ['ROOF', 'INSUL'],
    'doors': ['DOOR', 'CARP'],
    'windows': ['WIND', 'GLAZ'],
    'stairs': ['STAIR', 'CONC'],
    'railings': ['RAIL', 'METL'],
    'ceilings': ['CEIL', 'FINI'],
    'structural framing': ['STRL', 'STEE'],
    'structural columns': ['STRL', 'COLM'],
    'pipes': ['PLMB', 'PIPE'],
    'ducts': ['HVAC', 'DUCT'],
    'conduits': ['ELEC', 'COND'],
    'cable trays': ['ELEC', 'CABL'],
    'concrete': ['CONC'],
    'rebar': ['REBAR', 'RENF'],
    'formwork': ['FORM', 'CONC'],
}

# Unit conversion mapping
UNIT_CONVERSIONS = {
    ('sf', 'm2'): 0.092903,
    ('m2', 'sf'): 10.7639,
    ('cy', 'm3'): 0.764555,
    ('m3', 'cy'): 1.30795,
    ('lf', 'm'): 0.3048,
    ('m', 'lf'): 3.28084,
    ('lb', 'kg'): 0.453592,
    ('kg', 'lb'): 2.20462,
}


class CWICRQuantityMatcher:
    """Match BIM quantities to CWICR work items."""

    def __init__(self, cwicr_data: pd.DataFrame):
        self.work_items = cwicr_data
        self._index_data()
        self._build_search_index()

    def _index_data(self):
        """Index work items."""
        if 'work_item_code' in self.work_items.columns:
            self._code_index = self.work_items.set_index('work_item_code')
        else:
            self._code_index = None

    def _build_search_index(self):
        """Build search index for semantic matching."""
        self._search_index = {}

        if 'description' in self.work_items.columns:
            for _, row in self.work_items.iterrows():
                code = row.get('work_item_code', '')
                desc = str(row.get('description', '')).lower()

                # Index by keywords
                words = re.findall(r'\w+', desc)
                for word in words:
                    if len(word) > 3:
                        if word not in self._search_index:
                            self._search_index[word] = []
                        self._search_index[word].append(code)

    def _get_category_codes(self, category: str) -> List[str]:
        """Get potential work item prefixes for BIM category."""
        cat_lower = category.lower().strip()

        for key, prefixes in CATEGORY_MAPPING.items():
            if key in cat_lower:
                return prefixes

        return []

    def _semantic_match(self, description: str, category: str) -> List[Tuple[str, float]]:
        """Find work items using semantic matching."""
        desc_lower = description.lower()
        words = re.findall(r'\w+', desc_lower)

        # Find candidate codes
        candidates = {}
        for word in words:
            if word in self._search_index:
                for code in self._search_index[word]:
                    if code not in candidates:
                        candidates[code] = 0
                    candidates[code] += 1

        # Score candidates
        scored = []
        for code, count in candidates.items():
            if self._code_index is not None and code in self._code_index.index:
                item_desc = str(self._code_index.loc[code].get('description', ''))
                similarity = SequenceMatcher(None, desc_lower, item_desc.lower()).ratio()
                score = (count * 0.4) + (similarity * 0.6)
                scored.append((code, score))

        return sorted(scored, key=lambda x: x[1], reverse=True)[:5]

    def _get_confidence(self, score: float) -> MatchConfidence:
        """Determine confidence level from score."""
        if score >= 0.9:
            return MatchConfidence.HIGH
        elif score >= 0.7:
            return MatchConfidence.MEDIUM
        elif score >= 0.5:
            return MatchConfidence.LOW
        else:
            return MatchConfidence.MANUAL

    def _get_unit_conversion(self, from_unit: str, to_unit: str) -> float: